332 files changed, 15204 insertions, 1253 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 9631838..5271057 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,1035 @@
+2025-12-09  Richard Biener  <rguenther@suse.de>
+
+	PR testsuite/120167
+	* testsuite/libgomp.graphite/force-parallel-1.c: Make parloop
+	noipa.
+
+2025-12-09  Andrew Stubbs  <ams@baylibre.com>
+
+	* testsuite/lib/libgomp.exp (check_effective_target_omp_usm): New.
+	* testsuite/libgomp.c++/target-std__array-concurrent-usm.C: Require
+	working Unified Shared Memory to run the test.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__list-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__map-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__set-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__span-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: Likewise.
+	* testsuite/libgomp.c-c++-common/target-implicit-map-4.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-link-3.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-link-4.c: Likewise.
+	* testsuite/libgomp.fortran/self_maps.f90: Likewise.
+
+2025-12-04  Andrew Stubbs  <ams@baylibre.com>
+
+	* plugin/plugin-gcn.c (generic_isa_code): New function.
+	(isa_matches_agent): Use generic ISA details to help select an error
+	message on ISA mismatch.
+	* testsuite/lib/libgomp.exp
+	(check_effective_target_offload_target_amdgcn_with_xnack): Use a
+	runtime check.
+
+2025-12-04  Andrew Stubbs  <ams@baylibre.com>
+
+	* testsuite/libgomp.c-c++-common/requires-4.c: Change dg-excess-errors
+	to dg-xfail-if.
+	* testsuite/libgomp.c-c++-common/requires-4a.c: Likewise.
+	* testsuite/libgomp.c-c++-common/requires-5.c: Likewise.
+
+2025-12-03  Andrew Stubbs  <ams@baylibre.com>
+
+	* testsuite/lib/libgomp.exp (libgomp_target_compile): Don't use
+	additional sources if there are no primary sources.
+
+2025-12-01  Andrew Stubbs  <ams@baylibre.com>
+
+	* libgomp.texi (AMD GCN): Mention HSA_XNACK is set automatically.
+
+2025-12-01  Andrew Stubbs  <ams@baylibre.com>
+
+	* Makefile.in: Regenerate.
+	* libgomp-plugin.h (gomp_simple_alloc_init_context): New prototype.
+	(gomp_simple_alloc_register_memory): New prototype.
+	(gomp_simple_alloc): New prototype.
+	(gomp_simple_free): New prototype.
+	(gomp_simple_realloc): New prototype.
+	* libgomp.h (gomp_simple_alloc_init_context): Move to libgomp-plugin.h.
+	(gomp_simple_alloc_register_memory): Likewise.
+	(gomp_simple_alloc): Likewise.
+	(gomp_simple_free): Likewise.
+	(gomp_simple_realloc): Likewise.
+	* libgomp.texi: Update AMD managed memory description.
+	* plugin/Makefrag.am (libgomp_plugin_gcn_la_SOURCES): Add
+	simple-allocator.c and plugin/mutex.c.
+	* plugin/plugin-gcn.c: Include sys/mman.h and unistd.h.
+	(struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
+	(dump_hsa_system_info): Add HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
+	HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT to the GCN_DEBUG output.
+	(init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
+	(isa_matches_agent): Add a new error message for the case where the
+	ISA doesn't match but the name does.
+	(managed_ctx): New variable.
+	(managed_heap_create): New function.
+	(GOMP_OFFLOAD_managed_alloc): Likewise.
+	(GOMP_OFFLOAD_managed_free): Likewise.
+	* simple-allocator.c (gomp_fatal): New macro.
+	* testsuite/lib/libgomp.exp (check_effective_target_omp_managedmem):
+	Add amdgcn support checker.
+	(check_effective_target_offload_target_amdgcn_with_xnack): New.
+	* testsuite/libgomp.c-c++-common/requires-4.c: Ignore xnack warning.
+	* testsuite/libgomp.c-c++-common/requires-4a.c: Ignore xnack warning.
+	* testsuite/libgomp.c-c++-common/requires-5.c: Ignore xnack warning.
+	* testsuite/libgomp.c++/alloc-managed-1.C: Add -mxnack=on, if needed.
+	* testsuite/libgomp.c/alloc-managed-1.c: Likewise.
+	* testsuite/libgomp.c/alloc-managed-2.c: Likewise.
+	* testsuite/libgomp.c/alloc-managed-3.c: Likewise.
+	* testsuite/libgomp.c/alloc-managed-4.c: Likewise.
+	* testsuite/libgomp.fortran/alloc-managed-1.f90: Likewise.
+	* plugin/mutex.c: New file.
+
+2025-12-01  Paul-Antoine Arras  <parras@baylibre.com>
+
+	PR fortran/120505
+	* target.c (gomp_map_vars_internal): Allow struct mapping from different
+	containing array elements as long as adresses are in increasing order.
+	* testsuite/libgomp.c-c++-common/map-arrayofstruct-2.c: Adjust
+	dg-output.
+	* testsuite/libgomp.c-c++-common/map-arrayofstruct-3.c: Likewise.
+	* testsuite/libgomp.fortran/map-subarray-5.f90: Likewise.
+	* testsuite/libgomp.fortran/map-subarray-10.f90: New test.
+	* testsuite/libgomp.fortran/map-subarray-9.f90: New test.
+
+2025-11-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* testsuite/libgomp.c++/atomic-12.C (main): Add ()s around array
+	reference index.
+	* testsuite/libgomp.c++/atomic-13.C: Likewise.
+	* testsuite/libgomp.c++/atomic-8.C: Likewise.
+	* testsuite/libgomp.c++/atomic-9.C: Likewise.
+	* testsuite/libgomp.c++/loop-6.C: Use count = count + 1;
+	return count > 0; instead of return ++count > 0;.
+	* testsuite/libgomp.c++/pr38650.C: Add -std=gnu++17.
+	* testsuite/libgomp.c++/target-lambda-1.C (merge_data_func):
+	Use [=,this] instead of just [=] in lambda captures.
+	* testsuite/libgomp.c-c++-common/target-40.c (f1): Use v += 1;
+	instead of v++;.
+	* testsuite/libgomp.c-c++-common/depend-iterator-2.c: Use v = v + 1;
+	instead of v++.
+
+2025-11-25  Frank Scheiner  <frank.scheiner@web.de>
+
+	* affinity-fmt.c: Make char *q a pointer to a const char.
+
+2025-11-25  Arsen Arsenović  <aarsenovic@baylibre.com>
+
+	* testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c: Copy
+	changes in r11-3059-g8183ebcdc1c843 from atomic_capture-2.c.
+
+2025-11-22  Sandra Loosemore  <sloosemore@baylibre.com>
+
+	* libgomp.texi (OpenMP 5.1): Update "begin declare variant" status.
+
+2025-11-22  Sandra Loosemore  <sloosemore@baylibre.com>
+	    Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c-c++-common/delim-declare-variant-1.c: New.
+	* testsuite/libgomp.c-c++-common/delim-declare-variant-2.c: New.
+
+2025-11-22  Sandra Loosemore  <sloosemore@baylibre.com>
+	    Julian Brown  <julian@codesourcery.com>
+	    waffl3x  <waffl3x@baylibre.com>
+
+	* testsuite/libgomp.c++/bdv_module1.C: New.
+	* testsuite/libgomp.c++/bdv_module1_main.C: New.
+	* testsuite/libgomp.c++/bdv_module2.C: New.
+	* testsuite/libgomp.c++/bdv_module2_impl.C: New.
+	* testsuite/libgomp.c++/bdv_module2_main.C: New.
+	* testsuite/libgomp.c++/bdv_module3.C: New.
+	* testsuite/libgomp.c++/bdv_module3_impl.C: New.
+	* testsuite/libgomp.c++/bdv_module3_main.C: New.
+	* testsuite/libgomp.c++/delim-declare-variant-1.C: New.
+	* testsuite/libgomp.c++/delim-declare-variant-2.C: New.
+	* testsuite/libgomp.c++/delim-declare-variant-7.C: New.
+
+2025-11-21  Arsen Arsenović  <aarsenovic@baylibre.com>
+
+	* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c (explanatory
+	header): Fix typo.
+	(main): Insert waits on kernels reading 'a' into queue 10 before
+	exiting 'a', and waits on kernels reading 'N' into queue 15
+	before exiting 'N'.
+	* testsuite/libgomp.oacc-c-c++-common/data-2.c: Ditto.
+
+2025-11-21  Josef Melcr  <josef.melcr@suse.com>
+
+	* testsuite/libgomp.c/ipcp-cb-spec1.c: Remove LTO requirement.
+	* testsuite/libgomp.c/ipcp-cb-spec2.c: Likewise.
+	* testsuite/libgomp.c/ipcp-cb1.c: Likewise.
+
+2025-11-19  Martin Liska  <martin.liska@hey.com>
+
+	* acinclude.m4:: Add detection for wild linker.
+	* configure: Regenerate.
+
+2025-11-18  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (The libgomp ABI): Update MASTER section by
+	also covering MASKED.
+
+2025-11-17  Jakub Jelinek  <jakub@redhat.com>
+
+	* testsuite/libgomp.c++/allocate-1.C: Replace [: in OpenMP or OpenACC
+	pragmas or attributes with [ : and :] with : ].
+	* testsuite/libgomp.c++/baseptrs-3.C: Likewise.
+	* testsuite/libgomp.c++/baseptrs-5.C: Likewise.
+	* testsuite/libgomp.c++/class-array-1.C: Likewise.
+	* testsuite/libgomp.c++/examples-4/target_data-5.C: Likewise.
+	* testsuite/libgomp.c++/lvalue-tofrom-2.C: Likewise.
+	* testsuite/libgomp.c++/pr101544-1.C: Likewise.
+	* testsuite/libgomp.c++/pr108286.C: Likewise.
+	* testsuite/libgomp.c++/reduction-10.C: Likewise.
+	* testsuite/libgomp.c++/reduction-11.C: Likewise.
+	* testsuite/libgomp.c++/reduction-12.C: Likewise.
+	* testsuite/libgomp.c++/reduction-5.C: Likewise.
+	* testsuite/libgomp.c++/reduction-6.C: Likewise.
+	* testsuite/libgomp.c++/reduction-7.C: Likewise.
+	* testsuite/libgomp.c++/reduction-8.C: Likewise.
+	* testsuite/libgomp.c++/reduction-9.C: Likewise.
+	* testsuite/libgomp.c++/target-18.C: Likewise.
+	* testsuite/libgomp.c++/target-19.C: Likewise.
+	* testsuite/libgomp.c++/target-2.C: Likewise.
+	* testsuite/libgomp.c++/target-22.C: Likewise.
+	* testsuite/libgomp.c++/target-23.C: Likewise.
+	* testsuite/libgomp.c++/target-9.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-100.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-101.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-12.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-2003.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-30.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-300.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-32.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-33.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-41.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-60.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-61.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-62.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-80.C: Likewise.
+	* testsuite/libgomp.c++/target-flex-81.C: Likewise.
+	* testsuite/libgomp.c++/target-has-device-addr-7.C: Likewise.
+	* testsuite/libgomp.c++/target-in-reduction-1.C: Likewise.
+	* testsuite/libgomp.c++/target-in-reduction-2.C: Likewise.
+	* testsuite/libgomp.c++/target-lambda-1.C: Likewise.
+	* testsuite/libgomp.c++/target-lambda-3.C: Likewise.
+	* testsuite/libgomp.c++/target-map-class-1.C: Likewise.
+	* testsuite/libgomp.c++/target-std__array-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_map-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_set-concurrent.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-std__valarray-1.C: Likewise.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-this-3.C: Likewise.
+	* testsuite/libgomp.c++/target-this-4.C: Likewise.
+	* testsuite/libgomp.c++/target-virtual-1.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-11.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-12.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-13.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-17.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-18.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-19.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-4.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-5.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-6.C: Likewise.
+	* testsuite/libgomp.c++/task-reduction-7.C: Likewise.
+	* testsuite/libgomp.c++/taskloop-reduction-2.C: Likewise.
+	* testsuite/libgomp.c++/taskloop-reduction-3.C: Likewise.
+	* testsuite/libgomp.c++/taskloop-reduction-4.C: Likewise.
+	* testsuite/libgomp.c-c++-common/allocate-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/allocate-3.c: Likewise.
+	* testsuite/libgomp.c-c++-common/baseptrs-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/dispatch-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/dispatch-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/interop-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c:
+	Likewise.
+	* testsuite/libgomp.c-c++-common/ptr-attach-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/ptr-attach-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/refcount-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/struct-elem-4.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-has-device-addr-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-implicit-map-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-implicit-map-5.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-in-reduction-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-in-reduction-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-3.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-map-zlas-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-1.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-3.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-11.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-12.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-16.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-3.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-7.c: Likewise.
+	* testsuite/libgomp.c-c++-common/task-reduction-9.c: Likewise.
+	* testsuite/libgomp.c-c++-common/taskloop-reduction-2.c: Likewise.
+	* testsuite/libgomp.c-c++-common/teams-nteams-icv-1.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: Likewise.
+
+2025-11-13  Andrew Stubbs  <ams@codesourcery.com>
+	    Kwok Cheung Yeung  <kcyeung@baylibre.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* allocator.c (ompx_gnu_max_predefined_alloc): Update to
+	ompx_gnu_managed_mem_alloc.
+	(_Static_assert): Fix assertion messages for allocators and add
+	new assertions for memspace constants.
+	(omp_max_predefined_mem_space): New define.
+	(ompx_gnu_min_predefined_mem_space): New define.
+	(ompx_gnu_max_predefined_mem_space): New define.
+	(MEMSPACE_ALLOC): Add check for non-standard memspaces.
+	(MEMSPACE_CALLOC): Likewise.
+	(MEMSPACE_REALLOC): Likewise.
+	(MEMSPACE_VALIDATE): Likewise.
+	(predefined_ompx_gnu_alloc_mapping): Add ompx_gnu_managed_mem_space.
+	(omp_init_allocator): Add ompx_gnu_managed_mem_space validation.
+	* config/gcn/allocator.c (gcn_memspace_alloc): Add check for
+	non-standard memspaces.
+	(gcn_memspace_calloc): Likewise.
+	(gcn_memspace_realloc): Likewise.
+	(gcn_memspace_validate): Update to validate standard vs non-standard
+	memspaces.
+	* config/linux/allocator.c (linux_memspace_alloc): Add managed
+	memory space handling.
+	(linux_memspace_calloc): Likewise.
+	(linux_memspace_free): Likewise.
+	(linux_memspace_realloc): Likewise (returns NULL for fallback).
+	* config/nvptx/allocator.c (nvptx_memspace_alloc): Add check for
+	non-standard memspaces.
+	(nvptx_memspace_calloc): Likewise.
+	(nvptx_memspace_realloc): Likewise.
+	(nvptx_memspace_validate): Update to validate standard vs non-standard
+	memspaces.
+	* env.c (parse_allocator): Add ompx_gnu_managed_mem_alloc,
+	ompx_gnu_managed_mem_space, and some static asserts so I don't forget
+	them again.
+	* libgomp-plugin.h (GOMP_OFFLOAD_managed_alloc): New declaration.
+	(GOMP_OFFLOAD_managed_free): New declaration.
+	* libgomp.h (gomp_managed_alloc): New declaration.
+	(gomp_managed_free): New declaration.
+	(struct gomp_device_descr): Add managed_alloc_func and
+	managed_free_func fields.
+	* libgomp.texi: Document ompx_gnu_managed_mem_alloc and
+	ompx_gnu_managed_mem_space, add C++ template documentation, and
+	describe NVPTX and AMD support.
+	* omp.h.in: Add ompx_gnu_managed_mem_space and
+	ompx_gnu_managed_mem_alloc enumerators, and gnu_managed_mem C++
+	allocator template.
+	* omp_lib.f90.in: Add Fortran bindings for new allocator and
+	memory space.
+	* omp_lib.h.in: Likewise.
+	* plugin/cuda-lib.def: Add cuMemAllocManaged.
+	* plugin/plugin-nvptx.c (nvptx_alloc): Add managed parameter to
+	support cuMemAllocManaged.
+	(GOMP_OFFLOAD_alloc): Move contents to ...
+	(cleanup_and_alloc): ... this new function, and add managed support.
+	(GOMP_OFFLOAD_managed_alloc): New function.
+	(GOMP_OFFLOAD_managed_free): New function.
+	* target.c (gomp_managed_alloc): New function.
+	(gomp_managed_free): New function.
+	(gomp_load_plugin_for_device): Load optional managed_alloc
+	and managed_free plugin APIs.
+	* testsuite/lib/libgomp.exp: Add check_effective_target_omp_managedmem.
+	* testsuite/libgomp.c++/alloc-managed-1.C: New test.
+	* testsuite/libgomp.c/alloc-managed-1.c: New test.
+	* testsuite/libgomp.c/alloc-managed-2.c: New test.
+	* testsuite/libgomp.c/alloc-managed-3.c: New test.
+	* testsuite/libgomp.c/alloc-managed-4.c: New test.
+	* testsuite/libgomp.fortran/alloc-managed-1.f90: New test.
+
+2025-11-12  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/119677
+	* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Modify
+	test as -5 is now a conforming device number.
+	* testsuite/libgomp.fortran/target-is-accessible-1.f90: Likewise.
+
+2025-11-12  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (OpenMP Implementation Status): Add TR14.
+
+2025-11-12  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/119677
+	* omp.h.in (omp_default_device): New enum value.
+	* omp_lib.f90.in: New parameter.
+	* omp_lib.h.in: Likewise
+	* target.c (gomp_get_default_device): New. Split off from ...
+	(resolve_device): ... here; call it.
+	(omp_target_alloc, omp_target_free, omp_target_is_present,
+	omp_target_memcpy_check, omp_target_memset, omp_target_memset_async,
+	omp_target_associate_ptr, omp_get_mapped_ptr,
+	omp_target_is_accessible, omp_pause_resource,
+	omp_get_uid_from_device): Handle omp_default_device.
+	* testsuite/libgomp.c/device_uid.c: Likewise.
+	* testsuite/libgomp.fortran/device_uid.f90: Likewise.
+	* testsuite/libgomp.c-c++-common/omp-default-device.c: New test.
+	* testsuite/libgomp.fortran/omp-default-device.f90: New test.
+
+2025-11-05  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Fix memory
+	freeing of device allocated memory with USM.
+
+2025-11-03  Sam James  <sam@gentoo.org>
+
+	* configure: Regenerate.
+
+2025-11-03  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/122543
+	* testsuite/libgomp.fortran/omp_target_memset.f90: Move fptr inside
+	the target to avoid implicit mapping of its uninit pointee.
+	* testsuite/libgomp.fortran/omp_target_memset-2.f90: Likewise.
+
+2025-11-03  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR libgomp/122281
+	* testsuite/libgomp.c/pr122281.c: Fix for non-USM offloading
+	execution.
+
+2025-11-03  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/122281
+	PR middle-end/105001
+	* testsuite/libgomp.c/pr122281.c: New file.
+
+2025-10-28  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* env.c (initialize_env): Simplify 'parse_stacksize' call.
+
+2025-10-23  Andrew Stubbs  <ams@baylibre.com>
+
+	* Makefile.am (libgomp_la_SOURCES): Add simple-allocator.c.
+	* Makefile.in: Regenerate.
+	* basic-allocator.c: Mention simple-allocator in the comment.
+	* config/linux/allocator.c: Include unistd.h.
+	(pin_ctx): New variable.
+	(ctxlock): New variable.
+	(linux_init_pin_ctx): New function.
+	(linux_memspace_alloc): Use simple-allocator for pinned memory.
+	(linux_memspace_free): Likewise.
+	(linux_memspace_realloc): Likewise.
+	* libgomp.h (gomp_simple_alloc_init_context): New prototype.
+	(gomp_simple_alloc_register_memory): New prototype.
+	(gomp_simple_alloc): New prototype.
+	(gomp_simple_free): New prototype.
+	(gomp_simple_realloc): New prototype.
+	* libgomp.texi: Update pinned memory trait documentation.
+	* testsuite/libgomp.c/alloc-pinned-8.c: New test.
+	* simple-allocator.c: New file.
+
+2025-10-23  Andrew Stubbs  <ams@baylibre.com>
+	    Thomas Schwinge  <thomas@codesourcery.com>
+
+	* config/linux/allocator.c: Include assert.h.
+	(using_device_for_page_locked): New variable.
+	(linux_memspace_alloc): Add init0 parameter. Support device pinning.
+	(linux_memspace_calloc): Set init0 to true.
+	(linux_memspace_free): Support device pinning.
+	(linux_memspace_realloc): Support device pinning.
+	(MEMSPACE_ALLOC): Set init0 to false.
+	* libgomp-plugin.h
+	(GOMP_OFFLOAD_page_locked_host_alloc): New prototype.
+	(GOMP_OFFLOAD_page_locked_host_free): Likewise.
+	* libgomp.h (gomp_page_locked_host_alloc): Likewise.
+	(gomp_page_locked_host_free): Likewise.
+	(struct gomp_device_descr): Add page_locked_host_alloc_func and
+	page_locked_host_free_func.
+	* libgomp.texi: Adjust the docs for the pinned trait.
+	* plugin/plugin-nvptx.c
+	(GOMP_OFFLOAD_page_locked_host_alloc): New function.
+	(GOMP_OFFLOAD_page_locked_host_free): Likewise.
+	* target.c (device_for_page_locked): New variable.
+	(get_device_for_page_locked): New function.
+	(gomp_page_locked_host_alloc): Likewise.
+	(gomp_page_locked_host_free): Likewise.
+	(gomp_load_plugin_for_device): Add page_locked_host_alloc and
+	page_locked_host_free.
+	* testsuite/libgomp.c/alloc-pinned-1.c: Change expectations for NVPTX
+	devices.
+	* testsuite/libgomp.c/alloc-pinned-2.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-3.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-4.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-5.c: Likewise.
+	* testsuite/libgomp.c/alloc-pinned-6.c: Likewise.
+
+2025-10-20  Josef Melcr  <jmelcr02@gmail.com>
+
+	* testsuite/libgomp.c/ipcp-cb-spec1.c: Moved from
+	gcc/testsuite/gcc.dg/ipa/.
+	* testsuite/libgomp.c/ipcp-cb-spec2.c: Likewise.
+	* testsuite/libgomp.c/ipcp-cb1.c: Likewise.
+
+2025-10-20  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR c++/114457
+	PR c++/122268
+	PR c++/120450
+	* testsuite/libgomp.c++/target-flex-101.C: XFAIL GCN, nvptx
+	offloading compilation.
+	* testsuite/libgomp.c++/target-std__flat_map-concurrent.C:
+	Un-XFAIL GCN offloading compilation.
+	* testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C:
+	Likewise.
+
+2025-10-16  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c: Add
+	dg-excess-errors to handle possible missing libgomp multi lib.
+	* testsuite/libgomp.c/declare-variant-4-gfx1030.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx1036.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx11-generic.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx1100.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx1103.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx9-generic.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx900.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx906.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx908.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx90a.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx90c.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx942.c: Likewise.
+	* testsuite/libgomp.c/declare-variant-4-gfx1031.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1032.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1033.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1034.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1035.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1101.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1102.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1150.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1151.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1152.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx1153.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx902.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx904.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx909.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx950.c: New test.
+
+2025-10-15  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (OpenMP Context Selectors): Add note that there is
+	currently an exact match between ISA and compilation, ignoring
+	compatibilities in both ways.
+	* testsuite/libgomp.c/declare-variant-4.h: Add missing variant
+	functions for specific and generic AMD GPUs.
+	* testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx11-generic.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c: New test.
+	* testsuite/libgomp.c/declare-variant-4-gfx9-generic.c: New test.
+
+2025-10-10  Tobias Burnus  <tburnus@baylibre.com>
+
+	* plugin/plugin-gcn.c (is_integrated_apu): New; currently '#if 0'.
+	* plugin/plugin-nvptx.c (is_integrated_apu): Likewise.
+
+2025-10-05  Sam James  <sam@gentoo.org>
+
+	* Makefile.in: Regenerate.
+	* aclocal.m4: Regenerate.
+	* configure: Regenerate.
+	* testsuite/Makefile.in: Regenerate.
+
+2025-09-18  Tobias Burnus  <tburnus@baylibre.com>
+	    Sandra Loosemore  <sloosemore@baylibre.com>
+
+	PR middle-end/121922
+	* testsuite/libgomp.c-c++-common/declare-variant-1.c: New test.
+
+2025-09-17  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/119857
+	PR libgomp/114445
+	* config/accel/target-indirect.c: Change to use uint128_t instead
+	of a struct as data structure and add GOMP_INDIRECT_ADDR_HMAP as
+	host-accessible variable.
+	(struct indirect_map_t): Remove.
+	(USE_HASHTAB_LOOKUP, INDIRECT_DEV_ADDR, INDIRECT_HOST_ADDR,
+	SET_INDIRECT_HOST_ADDR, SET_INDIRECT_ADDRS): Define.
+	(htab_free): Use __builtin_unreachable.
+	(htab_hash, htab_eq, GOMP_target_map_indirect_ptr,
+	build_indirect_map): Update for new representation and new
+	pointer-to-hash variable.
+	* config/gcn/team.c (gomp_gcn_enter_kernel): Only call
+	build_indirect_map when GOMP_INDIRECT_ADDR_MAP.
+	* config/nvptx/team.c (gomp_nvptx_main): Likewise.
+	* libgomp-plugin.h (GOMP_INDIRECT_ADDR_HMAP): Define.
+	* plugin/plugin-gcn.c: Conditionally include
+	build-target-indirect-htab.h.
+	(USE_HASHTAB_LOOKUP_FOR_INDIRECT): Define.
+	(create_target_indirect_map): New prototype.
+	(GOMP_OFFLOAD_load_image): Update to create the device's
+	indirect-function hash table on the host.
+	* plugin/plugin-nvptx.c: Conditionally include
+	build-target-indirect-htab.h.
+	(USE_HASHTAB_LOOKUP_FOR_INDIRECT): Define.
+	(create_target_indirect_map): New prototype.
+	(GOMP_OFFLOAD_load_image): Update to create the device's
+	indirect-function hash table on the host.
+	* plugin/build-target-indirect-htab.h: New file.
+
+2025-09-17  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.map (OACC_2.5): Move previously unimplemented
+	acc_{copyout,delete}_finalize_async_{32,64,array}_h_ to ...
+	(OACC_2.6.1): ... here.
+	* libgomp.texi (acc_copyin, acc_present_or_copyin, acc_create,
+	acc_present_or_create, acc_copyout, acc_update_device,
+	acc_update_self, acc_is_present): Use 'type(*), dimension(..)'
+	instead of 'type, dimension(:[,:]...)' for Fortran.
+	(acc_delete): Likewise; change acc_delete_async_finalize to
+	acc_delete_finalize_async.
+	* openacc.f90 (openacc_internal): Add interfaces for
+	acc_{copyout,delete}_finalize_async_{{32,64,array}_h,_l}.
+	(openacc): Add generic interfaces for
+	acc_copyout_finalize_async and acc_delete_finalize_async.
+	(acc_{copyout,delete}_finalize_async_{32,64,array}_h): New.
+	* openacc_lib.h: Add generic interfaces for
+	acc_copyout_finalize_async and acc_delete_finalize_async.
+	* testsuite/libgomp.oacc-fortran/pr92970-1.f90: New test.
+
+2025-09-06  Jakub Jelinek  <jakub@redhat.com>
+
+	* omp.h.in: Fix up formatting of __cplusplus >= 201103L
+	guarded code from libstc++ style to GCC/libgomp style.
+
+2025-08-06  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+
+	* target.c (gomp_update): Call gomp_merge_iterator_maps.  Free
+	allocated variables.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-1.c: New.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-2.c: New.
+	* testsuite/libgomp.c-c++-common/target-update-iterators-3.c: New.
+
+2025-08-06  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+	    Andrew Stubbs  <ams@baylibre.com>
+
+	* target.c (kind_to_name): New.
+	(gomp_merge_iterator_maps): New.
+	(gomp_map_vars_internal): Call gomp_merge_iterator_maps.  Copy
+	address of only the first iteration to target vars.  Free allocated
+	variables.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-1.c: New.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-2.c: New.
+	* testsuite/libgomp.c-c++-common/target-map-iterators-3.c: New.
+
+2025-07-21  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119853
+	PR target/119854
+	* testsuite/libgomp.c++/target-cdtor-1.C: Adjust for
+	'targetm.cxx.use_aeabi_atexit'.
+	* testsuite/libgomp.c++/target-cdtor-2.C: Likewise.
+
+2025-07-18  Andrew Stubbs  <ams@baylibre.com>
+
+	PR target/121156
+	* config/gcn/bar.c (gomp_team_barrier_wait_end): Remove unused
+	"generation" variable.
+	(gomp_team_barrier_wait_cancel_end): Likewise.
+
+2025-07-17  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119692
+	* testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 { target offload_device } }'.
+	* testsuite/libgomp.c++/pr119692-1-5.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+
+2025-06-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (acc_attach, acc_detach): Update for Fortran
+	version.
+	* openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}):
+	Add.
+	* openacc_lib.h: Likewise.
+	* testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test.
+	* testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.
+
+2025-06-19  Tobias Burnus  <tburnus@baylibre.com>
+
+	* target.c (GOMP_REQUIRES_NAME_BUF_LEN): Define.
+	(GOMP_offload_register_ver, gomp_target_init): Use it for the
+	char buffer size.
+
+2025-06-19  Tobias Burnus  <tburnus@baylibre.com>
+	    waffl3x  <waffl3x@baylibre.com>
+
+	* libgomp.texi (omp_init_allocator): Refer to 'Memory allocation'
+	for available memory spaces.
+	(OMP_ALLOCATOR): Move list of traits and predefined memspaces
+	and allocators to ...
+	(Memory allocation): ... here. Document omp(x)::allocator::*;
+	minor wording tweaks, be more explicit about memkind, pinned and
+	pool_size.
+
+2025-06-17  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c++/declare_target-2.C: New test.
+
+2025-06-10  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c/declare-variant-4.h (gfx942): New variant function.
+	* testsuite/libgomp.c/declare-variant-4-gfx942.c: New test.
+
+2025-06-06  Tobias Burnus  <tburnus@baylibre.com>
+	    Sandra Loosemore  <sloosemore@baylibre.com>
+
+	* libgomp.texi (omp_get_num_devices, omp_get_intrinsic_device):
+	Document builtin handling.
+
+2025-06-06  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR target/120530
+	* testsuite/libgomp.c/target-map-zero-sized-3.c (main): Add missing
+	map clause; remove unused variable.
+
+2025-06-04  Tobias Burnus  <tburnus@baylibre.com>
+	    Sandra Loosemore  <sloosemore@baylibre.com>
+
+	* libgomp.texi (omp_interop_{int,ptr,str,rc_desc}): Add note about
+	the 'ret_code' type change in OpenMP 6.
+
+2025-06-03  Jakub Jelinek  <jakub@redhat.com>
+
+	PR libgomp/120444
+	* testsuite/libgomp.c-c++-common/omp_target_memset-3.c (test_it):
+	Change ptr argument type from void * to int8_t *.
+	(main): Change ptr variable type from void * to int8_t * and cast
+	omp_target_alloc result to the latter type.
+
+2025-06-02  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/120444
+	* libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare.
+	* libgomp.h (struct gomp_device_descr): Add memset_func.
+	* libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}.
+	* libgomp.texi (Device Memory Routines): Document them.
+	* omp.h.in (omp_target_memset, omp_target_memset_async): Declare.
+	* omp_lib.f90.in (omp_target_memset, omp_target_memset_async):
+	Add interfaces.
+	* omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise.
+	* plugin/cuda-lib.def: Add cuMemsetD8.
+	* plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add
+	hsa_amd_memory_fill_fn.
+	(init_hsa_runtime_functions): DLSYM_OPT_FN load it.
+	(GOMP_OFFLOAD_memset): New.
+	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New.
+	* target.c (omp_target_memset_int, omp_target_memset,
+	omp_target_memset_async_helper, omp_target_memset_async): New.
+	(gomp_load_plugin_for_device): Add DLSYM (memset).
+	* testsuite/libgomp.c-c++-common/omp_target_memset.c: New test.
+	* testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test.
+	* testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test.
+	* testsuite/libgomp.fortran/omp_target_memset.f90: New test.
+	* testsuite/libgomp.fortran/omp_target_memset-2.f90: New test.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__valarray-1.C: New.
+	* testsuite/libgomp.c++/target-std__valarray-1.output: Likewise.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__array-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__array-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__deque-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__list-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__list-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__map-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__map-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__set-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__set-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__span-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__span-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__vector-concurrent.C: Adjust.
+
+2025-05-30  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__array-concurrent.C: New.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise.
+
+2025-05-30  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__cmath.C: New.
+	* testsuite/libgomp.c++/target-std__complex.C: Likewise.
+	* testsuite/libgomp.c++/target-std__numbers.C: Likewise.
+
+2025-05-30  Waffl3x  <waffl3x@baylibre.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-flex-10.C: New test.
+	* testsuite/libgomp.c++/target-flex-100.C: New test.
+	* testsuite/libgomp.c++/target-flex-101.C: New test.
+	* testsuite/libgomp.c++/target-flex-11.C: New test.
+	* testsuite/libgomp.c++/target-flex-12.C: New test.
+	* testsuite/libgomp.c++/target-flex-2000.C: New test.
+	* testsuite/libgomp.c++/target-flex-2001.C: New test.
+	* testsuite/libgomp.c++/target-flex-2002.C: New test.
+	* testsuite/libgomp.c++/target-flex-2003.C: New test.
+	* testsuite/libgomp.c++/target-flex-30.C: New test.
+	* testsuite/libgomp.c++/target-flex-300.C: New test.
+	* testsuite/libgomp.c++/target-flex-31.C: New test.
+	* testsuite/libgomp.c++/target-flex-32.C: New test.
+	* testsuite/libgomp.c++/target-flex-33.C: New test.
+	* testsuite/libgomp.c++/target-flex-41.C: New test.
+	* testsuite/libgomp.c++/target-flex-60.C: New test.
+	* testsuite/libgomp.c++/target-flex-61.C: New test.
+	* testsuite/libgomp.c++/target-flex-62.C: New test.
+	* testsuite/libgomp.c++/target-flex-70.C: New test.
+	* testsuite/libgomp.c++/target-flex-80.C: New test.
+	* testsuite/libgomp.c++/target-flex-81.C: New test.
+	* testsuite/libgomp.c++/target-flex-90.C: New test.
+	* testsuite/libgomp.c++/target-flex-common.h: New test.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+	    Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/119835
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c:
+	'#pragma GCC optimize "-fno-inline"'.
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New.
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust.
+
+2025-05-30  Julian Brown  <julian@codesourcery.com>
+
+	* testsuite/libgomp.c-c++-common/declare-mapper-9.c: Enable for C.
+	* testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise.
+
+2025-05-30  Julian Brown  <julian@codesourcery.com>
+	    Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c++/declare-mapper-1.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-2.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-3.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-4.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-5.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-6.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-7.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-8.C: New test.
+	* testsuite/libgomp.c-c++-common/declare-mapper-9.c: New test (only
+	enabled for C++ for now).
+	* testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise.
+
+2025-05-29  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/93226
+	* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New
+	prototype.
+	* libgomp.h (struct acc_dispatch_t): Add dev2dev_func.
+	(gomp_copy_dev2dev): New prototype.
+	* libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}.
+	* libgomp.texi (acc_memcpy_device): New.
+	* oacc-mem.c (memcpy_tofrom_device): Change to take from/to
+	device boolean; use memcpy not memmove; add early return if
+	size == 0 or same device + same ptr.
+	(acc_memcpy_to_device, acc_memcpy_to_device_async,
+	acc_memcpy_from_device, acc_memcpy_from_device_async): Update.
+	(acc_memcpy_device, acc_memcpy_device_async): New.
+	* openacc.f90 (acc_memcpy_device, acc_memcpy_device_async):
+	Add interface.
+	* openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async):
+	Likewise.
+	* openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add
+	prototype.
+	* plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev):
+	Update comment.
+	(GOMP_OFFLOAD_openacc_async_dev2host): Update call.
+	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
+	* plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New.
+	(GOMP_OFFLOAD_dev2dev): Call it.
+	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
+	* target.c (gomp_copy_dev2dev): New.
+	(gomp_load_plugin_for_device): Load dev2dev and async_dev2dev.
+	* testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test.
+	* testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.
+
+2025-05-28  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/118694
+	* testsuite/libgomp.fortran/metadirective-1.f90: xfail when
+	compiling (also) for nvptx offloading as an error is then expected.
+
+2025-05-23  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/118694
+	* testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when
+	compiling (also) for nvptx offloading as an error is then expected.
+
+2025-05-19  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR lto/120308
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Add empty
+	structure testing.
+
+2025-05-19  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: New.
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Likewise.
+
+2025-05-19  Julian Brown  <julian@codesourcery.com>
+
+	* testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after
+	wait API calls.
+
+2025-05-15  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/alloc-comp-4.f90: New test.
+
+2025-05-14  Tobias Burnus  <tburnus@baylibre.com>
+
+	* target.c (gomp_attach_pointer): Return bool; accept additional
+	bool to optionally silence the fatal pointee-not-found error.
+	(gomp_map_vars_internal): If the pointee could not be found,
+	check whether it was mapped as GOMP_MAP_ZERO_LEN_ARRAY_SECTION.
+	* libgomp.h (gomp_attach_pointer): Update prototype.
+	* oacc-mem.c (acc_attach_async, goacc_enter_data_internal): Update
+	calls.
+	* testsuite/libgomp.c/target-map-zero-sized.c: New test.
+	* testsuite/libgomp.c/target-map-zero-sized-2.c: New test.
+	* testsuite/libgomp.c/target-map-zero-sized-3.c: New test.
+
+2025-05-12  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119692
+	* testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 }'.
+	* testsuite/libgomp.c++/pr119692-1-5.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+
+2025-05-12  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c/declare-variant-3-sm61.c: New.
+	* testsuite/libgomp.c/declare-variant-3.h: Adjust.
+
+2025-05-09  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c/interop-cuda-full.c: Use 'link' instead
+	of 'run' when the default device is "! offload_device_nvptx".
+	* testsuite/libgomp.c/interop-cuda-libonly.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: Likewise.
+	* testsuite/libgomp.c/interop-hip-amd-full.c: Use 'link' instead
+	of 'run' when the default device is "! offload_device_gcn".
+	* testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-amd-full.F90: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: Likewise.
+
+2025-05-09  David Malcolm  <dmalcolm@redhat.com>
+
+	PR other/116792
+	* testsuite/lib/libgomp.exp: Add load_lib of scanhtml.exp.
+
+2025-05-07  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/map-alloc-comp-9.f90: Process differently
+	when USE_USM_REQUIREMENT is set.
+	* testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90: New test.
+
+2025-05-06  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test.
+
+2025-05-05  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c/interop-hsa.c: GCN offloading only.
+
 2025-05-01  Tobias Burnus  <tobias@codesourcery.com>
 
 	* testsuite/libgomp.fortran/allocate-8a.f90: New test.
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 19479ae..0902164 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -69,8 +69,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c \
 	target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
 	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
-	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c target-cxa-dso-dtor.c
+	priority_queue.c affinity-fmt.c teams.c allocator.c simple-allocator.c \
+	oacc-profiling.c oacc-target.c target-indirect.c target-cxa-dso-dtor.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 6d22b3d..5dca37c 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -125,9 +125,11 @@ subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
 	$(top_srcdir)/../config/ax_count_cpus.m4 \
+	$(top_srcdir)/../config/clang-plugin.m4 \
 	$(top_srcdir)/../config/depstand.m4 \
 	$(top_srcdir)/../config/enable.m4 \
 	$(top_srcdir)/../config/futex.m4 \
+	$(top_srcdir)/../config/gcc-plugin.m4 \
 	$(top_srcdir)/../config/lead-dot.m4 \
 	$(top_srcdir)/../config/lthostflags.m4 \
 	$(top_srcdir)/../config/multi.m4 \
@@ -185,7 +187,9 @@ am__DEPENDENCIES_1 =
 @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_DEPENDENCIES = libgomp.la \
 @PLUGIN_GCN_TRUE@	$(am__DEPENDENCIES_1)
 @PLUGIN_GCN_TRUE@am_libgomp_plugin_gcn_la_OBJECTS =  \
-@PLUGIN_GCN_TRUE@	libgomp_plugin_gcn_la-plugin-gcn.lo
+@PLUGIN_GCN_TRUE@	libgomp_plugin_gcn_la-plugin-gcn.lo \
+@PLUGIN_GCN_TRUE@	libgomp_plugin_gcn_la-simple-allocator.lo \
+@PLUGIN_GCN_TRUE@	libgomp_plugin_gcn_la-mutex.lo
 libgomp_plugin_gcn_la_OBJECTS = $(am_libgomp_plugin_gcn_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -218,9 +222,9 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	affinity.lo target.lo splay-tree.lo libgomp-plugin.lo \
 	oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
 	oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
-	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-	oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
-	$(am__objects_1)
+	affinity-fmt.lo teams.lo allocator.lo simple-allocator.lo \
+	oacc-profiling.lo oacc-target.lo target-indirect.lo \
+	target-cxa-dso-dtor.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -397,6 +401,7 @@ LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
 LIPO = @LIPO@
+LLVM_CONFIG = @LLVM_CONFIG@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAINT = @MAINT@
@@ -552,9 +557,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
-	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
-	$(am__append_3)
+	affinity-fmt.c teams.c allocator.c simple-allocator.c \
+	oacc-profiling.c oacc-target.c target-indirect.c \
+	target-cxa-dso-dtor.c $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -581,7 +586,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 
 # AMD GCN plugin
 @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_version_info = -version-info $(libtool_VERSION)
-@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_SOURCES = plugin/plugin-gcn.c
+@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_SOURCES = plugin/plugin-gcn.c simple-allocator.c \
+@PLUGIN_GCN_TRUE@				plugin/mutex.c
+
 @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_CPPFLAGS = $(AM_CPPFLAGS) \
 @PLUGIN_GCN_TRUE@	-D_GNU_SOURCE
 
@@ -757,7 +764,9 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_gcn_la-mutex.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_gcn_la-simple-allocator.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
@@ -780,6 +789,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple-allocator.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
@@ -819,6 +829,20 @@ libgomp_plugin_gcn_la-plugin-gcn.lo: plugin/plugin-gcn.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_gcn_la-plugin-gcn.lo `test -f 'plugin/plugin-gcn.c' || echo '$(srcdir)/'`plugin/plugin-gcn.c
 
+libgomp_plugin_gcn_la-simple-allocator.lo: simple-allocator.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_gcn_la-simple-allocator.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_gcn_la-simple-allocator.Tpo -c -o libgomp_plugin_gcn_la-simple-allocator.lo `test -f 'simple-allocator.c' || echo '$(srcdir)/'`simple-allocator.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgomp_plugin_gcn_la-simple-allocator.Tpo $(DEPDIR)/libgomp_plugin_gcn_la-simple-allocator.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='simple-allocator.c' object='libgomp_plugin_gcn_la-simple-allocator.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_gcn_la-simple-allocator.lo `test -f 'simple-allocator.c' || echo '$(srcdir)/'`simple-allocator.c
+
+libgomp_plugin_gcn_la-mutex.lo: plugin/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_gcn_la-mutex.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_gcn_la-mutex.Tpo -c -o libgomp_plugin_gcn_la-mutex.lo `test -f 'plugin/mutex.c' || echo '$(srcdir)/'`plugin/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgomp_plugin_gcn_la-mutex.Tpo $(DEPDIR)/libgomp_plugin_gcn_la-mutex.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='plugin/mutex.c' object='libgomp_plugin_gcn_la-mutex.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_gcn_la-mutex.lo `test -f 'plugin/mutex.c' || echo '$(srcdir)/'`plugin/mutex.c
+
 libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
diff --git a/libgomp/acinclude.m4 b/libgomp/acinclude.m4
index 12fbb20..9482059 100644
--- a/libgomp/acinclude.m4
+++ b/libgomp/acinclude.m4
@@ -118,6 +118,7 @@ dnl Sets:
 dnl  with_gnu_ld
 dnl  libgomp_ld_is_gold (possibly)
 dnl  libgomp_ld_is_mold (possibly)
+dnl  libgomp_ld_is_wild (possibly)
 dnl  libgomp_gnu_ld_version (possibly)
 dnl
 dnl The last will be a single integer, e.g., version 1.23.45.0.67.89 will
@@ -151,10 +152,13 @@ AC_DEFUN([LIBGOMP_CHECK_LINKER_FEATURES], [
   # does some of this, but throws away the result.
   libgomp_ld_is_gold=no
   libgomp_ld_is_mold=no
+  libgomp_ld_is_wild=no
   if $LD --version 2>/dev/null | grep 'GNU gold'> /dev/null 2>&1; then
     libgomp_ld_is_gold=yes
   elif $LD --version 2>/dev/null | grep 'mold'> /dev/null 2>&1; then
     libgomp_ld_is_mold=yes
+  elif $LD --version 2>/dev/null | grep 'Wild'> /dev/null 2>&1; then
+    libgomp_ld_is_wild=yes
   fi
   changequote(,)
   ldver=`$LD --version 2>/dev/null |
@@ -312,6 +316,8 @@ if test $enable_symvers != no && test $libgomp_shared_libgcc = yes; then
       enable_symvers=gnu
     elif test $libgomp_ld_is_mold = yes ; then
       enable_symvers=gnu
+    elif test $libgomp_ld_is_wild = yes ; then
+      enable_symvers=gnu
     else
       # The right tools, the right setup, but too old.  Fallbacks?
       AC_MSG_WARN(=== Linker version $libgomp_gnu_ld_version is too old for)
diff --git a/libgomp/aclocal.m4 b/libgomp/aclocal.m4
index 55d9d71..94d1b0a 100644
--- a/libgomp/aclocal.m4
+++ b/libgomp/aclocal.m4
@@ -1169,9 +1169,11 @@ AC_SUBST([am__untar])
 
 m4_include([../config/acx.m4])
 m4_include([../config/ax_count_cpus.m4])
+m4_include([../config/clang-plugin.m4])
 m4_include([../config/depstand.m4])
 m4_include([../config/enable.m4])
 m4_include([../config/futex.m4])
+m4_include([../config/gcc-plugin.m4])
 m4_include([../config/lead-dot.m4])
 m4_include([../config/lthostflags.m4])
 m4_include([../config/multi.m4])
diff --git a/libgomp/affinity-fmt.c b/libgomp/affinity-fmt.c
index 1fae893..8d3df5f 100644
--- a/libgomp/affinity-fmt.c
+++ b/libgomp/affinity-fmt.c
@@ -327,7 +327,7 @@ gomp_display_affinity (char *buffer, size_t size,
 	      }
 	  if (c == '{')
 	    {
-	      char *q = strchr (p + 1, '}');
+	      const char *q = strchr (p + 1, '}');
 	      if (q)
 		gomp_fatal ("unsupported long type name '%.*s' in affinity "
 			    "format", (int) (q - (p + 1)), p + 1);
diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index 4a683d9..44c41ca 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -100,34 +100,57 @@ GOMP_is_alloc (void *ptr)
 
 #define omp_max_predefined_alloc omp_thread_mem_alloc
 #define ompx_gnu_min_predefined_alloc ompx_gnu_pinned_mem_alloc
-#define ompx_gnu_max_predefined_alloc ompx_gnu_pinned_mem_alloc
+#define ompx_gnu_max_predefined_alloc ompx_gnu_managed_mem_alloc
 
 _Static_assert (GOMP_OMP_PREDEF_ALLOC_MAX == omp_thread_mem_alloc,
 		"GOMP_OMP_PREDEF_ALLOC_MAX == omp_thread_mem_alloc");
 _Static_assert (GOMP_OMPX_PREDEF_ALLOC_MIN == ompx_gnu_min_predefined_alloc,
-		"GOMP_OMP_PREDEF_ALLOC_MAX == omp_thread_mem_alloc");
+		"GOMP_OMPX_PREDEF_ALLOC_MIN == ompx_gnu_min_predefined_alloc");
 _Static_assert (GOMP_OMPX_PREDEF_ALLOC_MAX == ompx_gnu_max_predefined_alloc,
-		"GOMP_OMP_PREDEF_ALLOC_MAX == omp_thread_mem_alloc");
+		"GOMP_OMPX_PREDEF_ALLOC_MAX == ompx_gnu_max_predefined_alloc");
 _Static_assert (GOMP_OMP_PREDEF_ALLOC_THREADS == omp_thread_mem_alloc,
 		"GOMP_OMP_PREDEF_ALLOC_THREADS == omp_thread_mem_alloc");
 
+#define omp_max_predefined_mem_space omp_low_lat_mem_space
+#define ompx_gnu_min_predefined_mem_space ompx_gnu_managed_mem_space
+#define ompx_gnu_max_predefined_mem_space ompx_gnu_managed_mem_space
+
+_Static_assert (GOMP_OMP_PREDEF_MEMSPACE_MAX == omp_max_predefined_mem_space,
+		"GOMP_OMP_PREDEF_MEMSPACE_MAX == omp_max_predefined_mem_space");
+_Static_assert (GOMP_OMPX_PREDEF_MEMSPACE_MIN == ompx_gnu_min_predefined_mem_space,
+		"GOMP_OMPX_PREDEF_MEMSPACE_MIN == ompx_gnu_min_predefined_mem_space");
+_Static_assert (GOMP_OMPX_PREDEF_MEMSPACE_MAX == ompx_gnu_max_predefined_mem_space,
+		"GOMP_OMPX_PREDEF_MEMSPACE_MAX == ompx_gnu_max_predefined_mem_space");
+
+#if 0 /* For testing the fall-back macros compile, only.  */
+#undef MEMSPACE_ALLOC
+#undef MEMSPACE_CALLOC
+#undef MEMSPACE_REALLOC
+#undef MEMSPACE_FREE
+#undef MEMSPACE_VALIDATE
+#endif
+
 /* These macros may be overridden in config/<target>/allocator.c.
    The defaults (no override) are to return NULL for pinned memory requests
-   and pass through to the regular OS calls otherwise.
+   or non-standard memory spaces (these need a deliberate implementation), and
+   pass through to the regular OS calls otherwise.
    The following definitions (ab)use comma operators to avoid unused
    variable errors.  */
 #ifndef MEMSPACE_ALLOC
 #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
-  (PIN ? NULL : malloc (((void)(MEMSPACE), (SIZE))))
+  ((PIN) || (MEMSPACE) > GOMP_OMP_PREDEF_MEMSPACE_MAX \
+   ? NULL : malloc (((void)(MEMSPACE), (SIZE))))
 #endif
 #ifndef MEMSPACE_CALLOC
 #define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
-  (PIN ? NULL : calloc (1, (((void)(MEMSPACE), (SIZE)))))
+  ((PIN) || (MEMSPACE) > GOMP_OMP_PREDEF_MEMSPACE_MAX \
+   ? NULL : calloc (1, (((void)(MEMSPACE), (SIZE)))))
 #endif
 #ifndef MEMSPACE_REALLOC
 #define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
-   ((PIN) || (OLDPIN) ? NULL \
-   : realloc (ADDR, (((void)(MEMSPACE), (void)(OLDSIZE), (SIZE)))))
+   ((PIN) || (OLDPIN) || (MEMSPACE) > GOMP_OMP_PREDEF_MEMSPACE_MAX \
+    ? NULL \
+    : realloc (ADDR, (((void)(MEMSPACE), (void)(OLDSIZE), (SIZE)))))
 #endif
 #ifndef MEMSPACE_FREE
 #define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
@@ -135,7 +158,8 @@ _Static_assert (GOMP_OMP_PREDEF_ALLOC_THREADS == omp_thread_mem_alloc,
 #endif
 #ifndef MEMSPACE_VALIDATE
 #define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \
-  (PIN ? 0 : ((void)(MEMSPACE), (void)(ACCESS), 1))
+  ((PIN) || (MEMSPACE) > GOMP_OMP_PREDEF_MEMSPACE_MAX \
+   ? 0 : ((void)(MEMSPACE), (void)(ACCESS), 1))
 #endif
 
 /* Map the predefined allocators to the correct memory space.
@@ -155,6 +179,7 @@ static const omp_memspace_handle_t predefined_omp_alloc_mapping[] = {
 };
 static const omp_memspace_handle_t predefined_ompx_gnu_alloc_mapping[] = {
   omp_default_mem_space,   /* ompx_gnu_pinned_mem_alloc. */
+  ompx_gnu_managed_mem_space,  /* ompx_gnu_managed_mem_alloc. */
 };
 
 #define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
@@ -389,7 +414,9 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
   struct omp_allocator_data *ret;
   int i;
 
-  if (memspace > omp_low_lat_mem_space)
+  if (memspace > omp_max_predefined_mem_space
+      && (memspace < ompx_gnu_min_predefined_mem_space
+	  || memspace > ompx_gnu_max_predefined_mem_space))
     return omp_null_allocator;
   for (i = 0; i < ntraits; i++)
     switch (traits[i].key)
diff --git a/libgomp/basic-allocator.c b/libgomp/basic-allocator.c
index 11804bb..9464e09 100644
--- a/libgomp/basic-allocator.c
+++ b/libgomp/basic-allocator.c
@@ -25,6 +25,11 @@
 /* This is a basic "malloc" implementation intended for use with small,
    low-latency memories.
 
+   Compared to the "simple" allocator, this one is designed to keep the
+   metadata and heap together (no slow memory needed), and prioritize
+   space-efficiency over algorithm speed (the memory already being
+   low-latency).
+
    To use this template, define BASIC_ALLOC_PREFIX, and then #include the
    source file.  The other configuration macros are optional.
 
diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c
index 30e391c..a35fffb 100644
--- a/libgomp/config/accel/target-indirect.c
+++ b/libgomp/config/accel/target-indirect.c
@@ -25,43 +25,68 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
-#include <string.h>
 #include "libgomp.h"
 
-struct indirect_map_t
-{
-  void *host_addr;
-  void *target_addr;
-};
+void *GOMP_INDIRECT_ADDR_MAP = NULL;
+
+#define USE_HASHTAB_LOOKUP
+
+#ifdef USE_HASHTAB_LOOKUP
+
+#include <string.h>  /* For memset.  */
+
+/* Use a hashtab to lookup the target address instead of using a linear
+   search.
+
+   With newer libgomp on the host the hash is already initialized on the host
+   (i.e plugin/plugin-gcn.c). Thus, build_indirect_map is only used as
+   fallback with older glibc.  */
 
-typedef struct indirect_map_t *hash_entry_type;
+void *GOMP_INDIRECT_ADDR_HMAP = NULL;
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define SET_INDIRECT_HOST_ADDR(p, host) p = (((unsigned __int128) (uintptr_t) host))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+  p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+/* Besides the sizes, also the endianness either needs to agree or
+   host-device memcpy needs to take care of this.  */
+_Static_assert (sizeof (unsigned __int128) == 2*sizeof(void*),
+		"indirect_target_map_t size mismatch");
 
 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
-static inline void htab_free (void *ptr) { free (ptr); }
+static inline void htab_free (void *ptr) { __builtin_unreachable (); }
 
 #include "hashtab.h"
 
 static inline hashval_t
 htab_hash (hash_entry_type element)
 {
-  return hash_pointer (element->host_addr);
+  return hash_pointer (INDIRECT_HOST_ADDR (element));
 }
 
 static inline bool
 htab_eq (hash_entry_type x, hash_entry_type y)
 {
-  return x->host_addr == y->host_addr;
+  return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (y);
 }
 
-void **GOMP_INDIRECT_ADDR_MAP = NULL;
-
-/* Use a hashtab to lookup the target address instead of using a linear
-   search.  */
-#define USE_HASHTAB_LOOKUP
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+  /* NULL pointers always resolve to NULL.  */
+  if (!ptr)
+    return ptr;
 
-#ifdef USE_HASHTAB_LOOKUP
+  assert (GOMP_INDIRECT_ADDR_HMAP);
 
-static htab_t indirect_htab = NULL;
+  hash_entry_type element;
+  SET_INDIRECT_HOST_ADDR (element, ptr);
+  hash_entry_type entry = htab_find ((htab_t) GOMP_INDIRECT_ADDR_HMAP, element);
+  return entry ? INDIRECT_DEV_ADDR (entry) : ptr;
+}
 
 /* Build the hashtab used for host->target address lookups.  */
 
@@ -69,43 +94,28 @@ void
 build_indirect_map (void)
 {
   size_t num_ind_funcs = 0;
-  void **map_entry;
+  uint64_t *map_entry;
 
-  if (!GOMP_INDIRECT_ADDR_MAP)
+  if (!GOMP_INDIRECT_ADDR_MAP || GOMP_INDIRECT_ADDR_HMAP)
     return;
 
-  if (!indirect_htab)
-    {
-      /* Count the number of entries in the NULL-terminated address map.  */
-      for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
-	   map_entry += 2, num_ind_funcs++);
-
-      /* Build hashtab for address lookup.  */
-      indirect_htab = htab_create (num_ind_funcs);
-      map_entry = GOMP_INDIRECT_ADDR_MAP;
-
-      for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
-	{
-	  struct indirect_map_t element = { *map_entry, NULL };
-	  hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
-						  INSERT);
-	  *slot = (hash_entry_type) map_entry;
-	}
-    }
-}
+  /* Count the number of entries in the NULL-terminated address map.  */
+  for (map_entry = (uint64_t *) GOMP_INDIRECT_ADDR_MAP; *map_entry;
+    map_entry += 2, num_ind_funcs++);
 
-void *
-GOMP_target_map_indirect_ptr (void *ptr)
-{
-  /* NULL pointers always resolve to NULL.  */
-  if (!ptr)
-    return ptr;
-
-  assert (indirect_htab);
+  /* Build hashtab for address lookup.  */
+  htab_t indirect_htab = htab_create (num_ind_funcs);
+  GOMP_INDIRECT_ADDR_HMAP = (void *) indirect_htab;
 
-  struct indirect_map_t element = { ptr, NULL };
-  hash_entry_type entry = htab_find (indirect_htab, &element);
-  return entry ? entry->target_addr : ptr;
+  map_entry = GOMP_INDIRECT_ADDR_MAP;
+  for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
+    {
+      hash_entry_type element;
+      SET_INDIRECT_ADDRS (element, *map_entry, *(map_entry + 1));
+      hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+					      INSERT);
+      *slot = element;
+    }
 }
 
 #else
diff --git a/libgomp/config/gcn/allocator.c b/libgomp/config/gcn/allocator.c
index 92aa2db..969cfa9 100644
--- a/libgomp/config/gcn/allocator.c
+++ b/libgomp/config/gcn/allocator.c
@@ -56,8 +56,12 @@ gcn_memspace_alloc (omp_memspace_handle_t memspace, size_t size)
 
       return __gcn_lowlat_alloc (shared_pool, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side amdgcn.  */
+    return NULL;
   else
     return malloc (size);
+
 }
 
 static void *
@@ -69,6 +73,9 @@ gcn_memspace_calloc (omp_memspace_handle_t memspace, size_t size)
 
       return __gcn_lowlat_calloc (shared_pool, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side amdgcn.  */
+    return NULL;
   else
     return calloc (1, size);
 }
@@ -96,6 +103,9 @@ gcn_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
 
       return __gcn_lowlat_realloc (shared_pool, addr, oldsize, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side amdgcn.  */
+    return NULL;
   else
     return realloc (addr, size);
 }
@@ -105,8 +115,14 @@ gcn_memspace_validate (omp_memspace_handle_t memspace, unsigned access)
 {
   /* Disallow use of low-latency memory when it must be accessible by
      all threads.  */
-  return (memspace != omp_low_lat_mem_space
-	  || access != omp_atv_all);
+  if (memspace == omp_low_lat_mem_space
+      && access == omp_atv_all)
+    return false;
+
+  /* Otherwise, standard memspaces are accepted, even when we don't have
+     anything special to do with them, and non-standard memspaces are assumed
+     to need explicit support.  */
+  return (memspace <= GOMP_OMP_PREDEF_MEMSPACE_MAX);
 }
 
 #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c
index 52b344c..57ac648 100644
--- a/libgomp/config/gcn/bar.c
+++ b/libgomp/config/gcn/bar.c
@@ -79,7 +79,7 @@ gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
 void
 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
 {
-  unsigned int generation, gen;
+  unsigned int gen;
 
   if (__builtin_expect (state & BAR_WAS_LAST, 0))
     {
@@ -105,7 +105,6 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
 	}
     }
 
-  generation = state;
   state &= ~BAR_CANCELLED;
   int retry = 100;
   do
@@ -128,7 +127,6 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
 	  gomp_barrier_handle_tasks (state);
 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
 	}
-      generation |= gen & BAR_WAITING_FOR_TASK;
     }
   while (gen != state + BAR_INCR);
 }
@@ -152,7 +150,7 @@ bool
 gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
 				   gomp_barrier_state_t state)
 {
-  unsigned int generation, gen;
+  unsigned int gen;
 
   if (__builtin_expect (state & BAR_WAS_LAST, 0))
     {
@@ -184,7 +182,6 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
   if (__builtin_expect (state & BAR_CANCELLED, 0))
     return true;
 
-  generation = state;
   int retry = 100;
   do
     {
@@ -209,7 +206,6 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
 	  gomp_barrier_handle_tasks (state);
 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
 	}
-      generation |= gen & BAR_WAITING_FOR_TASK;
     }
   while (gen != state + BAR_INCR);
 
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index 40827ce..df5e065 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -32,6 +32,10 @@
 #define LITTLEENDIAN_CPU
 #include "hsa.h"
 
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
 /* Defined in basic-allocator.c via config/amdgcn/allocator.c.  */
 void __gcn_lowlat_init (void *heap, size_t size);
 
@@ -57,8 +61,8 @@ gomp_gcn_enter_kernel (void)
       int numthreads = __builtin_gcn_dim_size (1);
       int teamid = __builtin_gcn_dim_pos(0);
 
-      /* Initialize indirect function support.  */
-      if (teamid == 0)
+      /* Initialize indirect function support for older libgomp.  */
+      if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && teamid == 0))
 	build_indirect_map ();
 
       /* Set up the global state.
diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c
index 8dea959..c144c59 100644
--- a/libgomp/config/linux/allocator.c
+++ b/libgomp/config/linux/allocator.c
@@ -36,6 +36,11 @@
 
 /* Implement malloc routines that can handle pinned memory on Linux.
    
+   Given that pinned memory is typically used to help host <-> device memory
+   transfers, we attempt to allocate such memory using a device (really:
+   libgomp plugin), but fall back to mmap plus mlock if no suitable device is
+   available.
+
    It's possible to use mlock on any heap memory, but using munlock is
    problematic if there are multiple pinned allocations on the same page.
    Tracking all that manually would be possible, but adds overhead. This may
@@ -48,50 +53,120 @@
 
 #define _GNU_SOURCE
 #include <sys/mman.h>
+#include <unistd.h>
 #include <string.h>
+#include <assert.h>
 #include "libgomp.h"
 #ifdef HAVE_INTTYPES_H
 # include <inttypes.h>  /* For PRIu64.  */
 #endif
 
+static int using_device_for_page_locked
+  = /* uninitialized */ -1;
+
+
+static gomp_simple_alloc_ctx_p pin_ctx = NULL;
+static pthread_once_t ctxlock = PTHREAD_ONCE_INIT;
+
+static void
+linux_init_pin_ctx ()
+{
+  pin_ctx = gomp_simple_alloc_init_context ();
+}
+
 static void *
-linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
+linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin,
+		      bool init0)
 {
-  (void)memspace;
+  void *addr = NULL;
 
-  if (pin)
+  if (memspace == ompx_gnu_managed_mem_space)
+    addr = gomp_managed_alloc (size);
+  else if (pin)
     {
-      /* Note that mmap always returns zeroed memory and is therefore also a
-	 suitable implementation of calloc.  */
-      void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
-			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-      if (addr == MAP_FAILED)
-	return NULL;
-
-      if (mlock (addr, size))
+      int using_device = __atomic_load_n (&using_device_for_page_locked,
+					  MEMMODEL_RELAXED);
+      if (using_device != 0)
+	{
+	  using_device = gomp_page_locked_host_alloc (&addr, size);
+	  int using_device_old
+	    = __atomic_exchange_n (&using_device_for_page_locked,
+				   using_device, MEMMODEL_RELAXED);
+	  assert (using_device_old == -1
+		  /* We shouldn't have concurrently changed our mind.  */
+		  || using_device_old == using_device);
+	}
+      if (using_device == 0)
 	{
+	  static int pagesize = 0;
+	  static void *addrhint = NULL;
+
+	  if (!pagesize)
+	    pagesize = sysconf(_SC_PAGE_SIZE);
+
+	  while (1)
+	    {
+	      addr = gomp_simple_alloc (pin_ctx, size);
+	      if (addr)
+		break;
+
+	      /* Round up to a whole page.  */
+	      size_t misalignment = size % pagesize;
+	      size_t mmap_size = (misalignment > 0
+				  ? size + pagesize - misalignment
+				  : size);
+	      void *newpage = mmap (addrhint, mmap_size, PROT_READ | PROT_WRITE,
+				 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	      if (newpage == MAP_FAILED)
+		break;
+	      else
+		{
+		  if (mlock (newpage, size))
+		    {
 #ifdef HAVE_INTTYPES_H
-	  gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes of"
-		      " memory (ulimit too low?)\n", (uint64_t) size);
+		      gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes"
+				  " of memory (ulimit too low?)\n",
+				  (uint64_t) size);
 #else
-	  gomp_debug (0, "libgomp: failed to pin %lu bytes of"
-		      " memory (ulimit too low?)\n", (unsigned long) size);
+		      gomp_debug (0, "libgomp: failed to pin %lu bytes of"
+				  " memory (ulimit too low?)\n",
+				  (unsigned long) size);
 #endif
-	  munmap (addr, size);
-	  return NULL;
-	}
+		      munmap (newpage, size);
+		      break;
+		    }
 
-      return addr;
+		  addrhint = newpage + mmap_size;
+
+		  pthread_once (&ctxlock, linux_init_pin_ctx);
+		  gomp_simple_alloc_register_memory (pin_ctx, newpage,
+						     mmap_size);
+		}
+	    }
+	}
     }
   else
-    return malloc (size);
+    addr = malloc (size);
+
+  if (addr && init0)
+    memset (addr, 0, size);
+
+  return addr;
 }
 
 static void *
 linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
 {
-  if (pin)
-    return linux_memspace_alloc (memspace, size, pin);
+  if (memspace == ompx_gnu_managed_mem_space)
+    {
+      void *ret = gomp_managed_alloc (size);
+      if (!ret)
+	return NULL;
+      memset (ret, 0, size);
+      return ret;
+    }
+  else if (pin)
+    return linux_memspace_alloc (memspace, size, pin, true);
   else
     return calloc (1, size);
 }
@@ -100,10 +175,21 @@ static void
 linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
 		     int pin)
 {
-  (void)memspace;
-
-  if (pin)
-    munmap (addr, size);
+  if (memspace == ompx_gnu_managed_mem_space)
+    gomp_managed_free (addr);
+  else if (pin)
+    {
+      int using_device
+	= __atomic_load_n (&using_device_for_page_locked,
+			   MEMMODEL_RELAXED);
+      if (using_device == 1)
+	gomp_page_locked_host_free (addr);
+      else
+	/* The "simple" allocator does not (currently) munmap locked pages
+	   (meaning that the number of locked pages never decreases), but it
+	   can reuse the freed memory in subsequent gomp_simple_alloc calls.  */
+	gomp_simple_free (pin_ctx, addr);
+    }
   else
     free (addr);
 }
@@ -112,38 +198,51 @@ static void *
 linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
 			size_t oldsize, size_t size, int oldpin, int pin)
 {
-  if (oldpin && pin)
+  if (memspace == ompx_gnu_managed_mem_space)
+    /* Realloc is not implemented for device Managed Memory.  */
+    ;
+  else if (oldpin && pin)
     {
-      void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
-      if (newaddr == MAP_FAILED)
-	return NULL;
-
-      return newaddr;
-    }
-  else if (oldpin || pin)
-    {
-      void *newaddr = linux_memspace_alloc (memspace, size, pin);
-      if (newaddr)
+      int using_device
+	= __atomic_load_n (&using_device_for_page_locked,
+		       MEMMODEL_RELAXED);
+      /* The device plugin API does not support realloc,
+	 but the gomp_simple_alloc allocator does.  */
+      if (using_device == 0)
 	{
-	  memcpy (newaddr, addr, oldsize < size ? oldsize : size);
-	  linux_memspace_free (memspace, addr, oldsize, oldpin);
+	  /* This can fail if there is insufficient pinned memory free.  */
+	  void *newaddr = gomp_simple_realloc (pin_ctx, addr, size);
+	  if (newaddr)
+	    return newaddr;
 	}
-
-      return newaddr;
     }
+  else if (oldpin || pin)
+    /* Moving from pinned to unpinned memory cannot be done in-place.  */
+    ;
   else
     return realloc (addr, size);
+
+  /* In-place reallocation failed.  Fall back to copy.  */
+  void *newaddr = linux_memspace_alloc (memspace, size, pin, false);
+  if (newaddr)
+    {
+      memcpy (newaddr, addr, oldsize < size ? oldsize : size);
+      linux_memspace_free (memspace, addr, oldsize, oldpin);
+    }
+
+  return newaddr;
 }
 
 static int
 linux_memspace_validate (omp_memspace_handle_t, unsigned, int)
 {
-  /* Everything should be accepted on Linux, including pinning.  */
+  /* Everything should be accepted on Linux, including pinning and
+     non-standard memspaces.  */
   return 1;
 }
 
 #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
-  linux_memspace_alloc (MEMSPACE, SIZE, PIN)
+  linux_memspace_alloc (MEMSPACE, SIZE, PIN, false)
 #define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
   linux_memspace_calloc (MEMSPACE, SIZE, PIN)
 #define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
diff --git a/libgomp/config/nvptx/allocator.c b/libgomp/config/nvptx/allocator.c
index 7e9e343..8bbc14a 100644
--- a/libgomp/config/nvptx/allocator.c
+++ b/libgomp/config/nvptx/allocator.c
@@ -61,6 +61,9 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace, size_t size)
 
       return __nvptx_lowlat_alloc (shared_pool, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side nvptx.  */
+    return NULL;
   else
     return malloc (size);
 }
@@ -75,6 +78,9 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace, size_t size)
 
       return __nvptx_lowlat_calloc (shared_pool, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side nvptx.  */
+    return NULL;
   else
     return calloc (1, size);
 }
@@ -104,6 +110,9 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
 
       return __nvptx_lowlat_realloc (shared_pool, addr, oldsize, size);
     }
+  else if (memspace > GOMP_OMP_PREDEF_MEMSPACE_MAX)
+    /* No non-standard memspaces are implemented for device-side nvptx.  */
+    return NULL;
   else
     return realloc (addr, size);
 }
@@ -115,12 +124,19 @@ nvptx_memspace_validate (omp_memspace_handle_t memspace, unsigned access)
     || (__PTX_ISA_VERSION_MAJOR__ == 4 && __PTX_ISA_VERSION_MINOR >= 1)
   /* Disallow use of low-latency memory when it must be accessible by
      all threads.  */
-  return (memspace != omp_low_lat_mem_space
-	  || access != omp_atv_all);
+  if (memspace == omp_low_lat_mem_space
+      && access == omp_atv_all)
+    return false;
 #else
   /* Low-latency memory is not available before PTX 4.1.  */
-  return (memspace != omp_low_lat_mem_space);
+  if (memspace == omp_low_lat_mem_space)
+    return false;
 #endif
+
+  /* Otherwise, standard memspaces are accepted, even when we don't have
+     anything special to do with them, and non-standard memspaces are assumed
+     to need explicit support.  */
+  return (memspace <= GOMP_OMP_PREDEF_MEMSPACE_MAX);
 }
 
 #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
index 4227344..6a34144 100644
--- a/libgomp/config/nvptx/team.c
+++ b/libgomp/config/nvptx/team.c
@@ -31,6 +31,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
 struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
 int __gomp_team_num __attribute__((shared,nocommon));
 
@@ -71,10 +75,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
 
-      /* Initialize indirect function support.  */
+      /* Initialize indirect function support for older libgomp.  */
       unsigned int block_id;
       asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
-      if (block_id == 0)
+      if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && block_id == 0))
 	build_indirect_map ();
 
       /* Find the low-latency heap details ....  */
diff --git a/libgomp/configure b/libgomp/configure
index df1fc8d..da48c83 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -688,12 +688,13 @@ ENABLE_DARWIN_AT_RPATH_TRUE
 enable_static
 enable_shared
 lt_host_flags
-CPP
 OTOOL64
 OTOOL
 LIPO
 NMEDIT
 DSYMUTIL
+LLVM_CONFIG
+CPP
 OBJDUMP
 LN_S
 NM
@@ -1636,6 +1637,43 @@ fi
 
 } # ac_fn_c_try_compile
 
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
 # ac_fn_c_try_link LINENO
 # -----------------------
 # Try to link conftest.$ac_ext, and return whether this succeeded.
@@ -1713,43 +1751,6 @@ $as_echo "$ac_res" >&6; }
 
 } # ac_fn_c_check_header_compile
 
-# ac_fn_c_try_cpp LINENO
-# ----------------------
-# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_cpp ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if { { ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
-  ac_status=$?
-  if test -s conftest.err; then
-    grep -v '^ *+' conftest.err >conftest.er1
-    cat conftest.er1 >&5
-    mv -f conftest.er1 conftest.err
-  fi
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } > conftest.i && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then :
-  ac_retval=0
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-    ac_retval=1
-fi
-  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-  as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_cpp
-
 # ac_fn_c_try_run LINENO
 # ----------------------
 # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
@@ -6171,8 +6172,499 @@ test -z "$deplibs_check_method" && deplibs_check_method=unknown
 
 
 
-plugin_option=
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if ${ac_cv_prog_CPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+# Try CLANG_PLUGIN_FILE first since GCC_PLUGIN_OPTION may return the
+# wrong plugin_option with clang.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clang" >&5
+$as_echo_n "checking for clang... " >&6; }
+if ${clang_cv_is_clang+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifdef __clang__
+  yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "yes" >/dev/null 2>&1; then :
+  clang_cv_is_clang=yes
+else
+  clang_cv_is_clang=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $clang_cv_is_clang" >&5
+$as_echo "$clang_cv_is_clang" >&6; }
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}llvm-config", so it can be a program name with args.
+set dummy ${ac_tool_prefix}llvm-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_LLVM_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$LLVM_CONFIG"; then
+  ac_cv_prog_LLVM_CONFIG="$LLVM_CONFIG" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_LLVM_CONFIG="${ac_tool_prefix}llvm-config"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+LLVM_CONFIG=$ac_cv_prog_LLVM_CONFIG
+if test -n "$LLVM_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LLVM_CONFIG" >&5
+$as_echo "$LLVM_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_LLVM_CONFIG"; then
+  ac_ct_LLVM_CONFIG=$LLVM_CONFIG
+  # Extract the first word of "llvm-config", so it can be a program name with args.
+set dummy llvm-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_LLVM_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_LLVM_CONFIG"; then
+  ac_cv_prog_ac_ct_LLVM_CONFIG="$ac_ct_LLVM_CONFIG" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_LLVM_CONFIG="llvm-config"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_LLVM_CONFIG=$ac_cv_prog_ac_ct_LLVM_CONFIG
+if test -n "$ac_ct_LLVM_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LLVM_CONFIG" >&5
+$as_echo "$ac_ct_LLVM_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_LLVM_CONFIG" = x; then
+    LLVM_CONFIG=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    LLVM_CONFIG=$ac_ct_LLVM_CONFIG
+  fi
+else
+  LLVM_CONFIG="$ac_cv_prog_LLVM_CONFIG"
+fi
+
+  plugin_file=
+  if test $clang_cv_is_clang = yes; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clang plugin file" >&5
+$as_echo_n "checking for clang plugin file... " >&6; }
+    plugin_names="LLVMgold.so"
+    for plugin in $plugin_names; do
+      plugin_file=`${CC} ${CFLAGS} --print-file-name $plugin`
+      if test "$plugin_file" != "$plugin"; then
+	break;
+      fi
+      if test -n "${LLVM_CONFIG}"; then
+	plugin_file=`${LLVM_CONFIG} --libdir`/$plugin
+	if test -f "$plugin_file"; then
+	  break;
+	fi
+      fi
+      plugin_file=
+    done
+    if test -z "$plugin_file"; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plugin_file" >&5
+$as_echo "$plugin_file" >&6; }
+            if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AR="${ac_tool_prefix}ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AR="ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+    AR=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+      if test -z "${AR}"; then
+	as_fn_error $? "Required archive tool 'ar' not found on PATH." "$LINENO" 5
+      fi
+      plugin_option="--plugin $plugin_file"
+      touch conftest.c
+      ${AR} $plugin_option rc conftest.a conftest.c
+      if test "$?" != 0; then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Failed: $AR $plugin_option rc" >&5
+$as_echo "$as_me: WARNING: Failed: $AR $plugin_option rc" >&2;}
+	plugin_file=
+      fi
+      rm -f conftest.*
+    fi
+  fi
+  plugin_file="$plugin_file"
+
+if test -n "$plugin_file"; then
+  plugin_option="--plugin $plugin_file"
+else
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AR="${ac_tool_prefix}ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AR="ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+    AR=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+if test -z "${AR}"; then
+  as_fn_error $? "Required archive tool 'ar' not found on PATH." "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -plugin option" >&5
+$as_echo_n "checking for -plugin option... " >&6; }
 plugin_names="liblto_plugin.so liblto_plugin-0.dll cyglto_plugin-0.dll"
+plugin_option=
 for plugin in $plugin_names; do
   plugin_so=`${CC} ${CFLAGS} --print-prog-name $plugin`
   if test x$plugin_so = x$plugin; then
@@ -6183,7 +6675,24 @@ for plugin in $plugin_names; do
     break
   fi
 done
+if test -z "$plugin_option"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $plugin_option" >&5
+$as_echo "$plugin_option" >&6; }
+    touch conftest.c
+  ${AR} $plugin_option rc conftest.a conftest.c
+  if test "$?" != 0; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Failed: $AR $plugin_option rc" >&5
+$as_echo "$as_me: WARNING: Failed: $AR $plugin_option rc" >&2;}
+    plugin_option=
+  fi
+  rm -f conftest.*
+fi
+plugin_option="$plugin_option"
 
+fi
 if test -n "$ac_tool_prefix"; then
   # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
 set dummy ${ac_tool_prefix}ar; ac_word=$2
@@ -6278,17 +6787,15 @@ fi
 
 test -z "$AR" && AR=ar
 if test -n "$plugin_option"; then
-  if $AR --help 2>&1 | grep -q "\--plugin"; then
-    touch conftest.c
-    $AR $plugin_option rc conftest.a conftest.c
-    if test "$?" != 0; then
-      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Failed: $AR $plugin_option rc" >&5
-$as_echo "$as_me: WARNING: Failed: $AR $plugin_option rc" >&2;}
-    else
+  case "$AR" in
+  *"$plugin_option"*)
+    ;;
+  *)
+    if $AR --help 2>&1 | grep -q "\--plugin"; then
       AR="$AR $plugin_option"
     fi
-    rm -f conftest.*
-  fi
+    ;;
+  esac
 fi
 test -z "$AR_FLAGS" && AR_FLAGS=cru
 
@@ -6495,9 +7002,15 @@ fi
 
 test -z "$RANLIB" && RANLIB=:
 if test -n "$plugin_option" && test "$RANLIB" != ":"; then
-  if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
-    RANLIB="$RANLIB $plugin_option"
-  fi
+  case "$RANLIB" in
+  *"$plugin_option"*)
+    ;;
+  *)
+    if $RANLIB --help 2>&1 | grep -q "\--plugin"; then
+      RANLIB="$RANLIB $plugin_option"
+    fi
+    ;;
+  esac
 fi
 
 
@@ -6826,7 +7339,6 @@ fi
 
 
 
-
 # Check whether --enable-libtool-lock was given.
 if test "${enable_libtool_lock+set}" = set; then :
   enableval=$enable_libtool_lock;
@@ -7650,144 +8162,6 @@ $as_echo "$lt_cv_ld_force_load" >&6; }
     ;;
   esac
 
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
-$as_echo_n "checking how to run the C preprocessor... " >&6; }
-# On Suns, sometimes $CPP names a directory.
-if test -n "$CPP" && test -d "$CPP"; then
-  CPP=
-fi
-if test -z "$CPP"; then
-  if ${ac_cv_prog_CPP+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-      # Double quotes because CPP needs to be expanded
-    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
-    do
-      ac_preproc_ok=false
-for ac_c_preproc_warn_flag in '' yes
-do
-  # Use a header file that comes with gcc, so configuring glibc
-  # with a fresh cross-compiler works.
-  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-  # <limits.h> exists even on freestanding compilers.
-  # On the NeXT, cc -E runs the code through the compiler's parser,
-  # not just through cpp. "Syntax error" is here to catch this case.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-		     Syntax error
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-
-else
-  # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-  # OK, works on sane cases.  Now check whether nonexistent headers
-  # can be detected and how.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-  # Broken: success on invalid input.
-continue
-else
-  # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-  break
-fi
-
-    done
-    ac_cv_prog_CPP=$CPP
-
-fi
-  CPP=$ac_cv_prog_CPP
-else
-  ac_cv_prog_CPP=$CPP
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
-$as_echo "$CPP" >&6; }
-ac_preproc_ok=false
-for ac_c_preproc_warn_flag in '' yes
-do
-  # Use a header file that comes with gcc, so configuring glibc
-  # with a fresh cross-compiler works.
-  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-  # <limits.h> exists even on freestanding compilers.
-  # On the NeXT, cc -E runs the code through the compiler's parser,
-  # not just through cpp. "Syntax error" is here to catch this case.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-		     Syntax error
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-
-else
-  # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-  # OK, works on sane cases.  Now check whether nonexistent headers
-  # can be detected and how.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-  # Broken: success on invalid input.
-continue
-else
-  # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-
-else
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
 $as_echo_n "checking for ANSI C header files... " >&6; }
 if ${ac_cv_header_stdc+:} false; then :
@@ -11469,7 +11843,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11472 "configure"
+#line 11846 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11575,7 +11949,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11578 "configure"
+#line 11952 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -16170,10 +16544,13 @@ with_gnu_ld=$lt_cv_prog_gnu_ld
   # does some of this, but throws away the result.
   libgomp_ld_is_gold=no
   libgomp_ld_is_mold=no
+  libgomp_ld_is_wild=no
   if $LD --version 2>/dev/null | grep 'GNU gold'> /dev/null 2>&1; then
     libgomp_ld_is_gold=yes
   elif $LD --version 2>/dev/null | grep 'mold'> /dev/null 2>&1; then
     libgomp_ld_is_mold=yes
+  elif $LD --version 2>/dev/null | grep 'Wild'> /dev/null 2>&1; then
+    libgomp_ld_is_wild=yes
   fi
 
   ldver=`$LD --version 2>/dev/null |
@@ -16378,6 +16755,8 @@ if test $enable_symvers != no && test $libgomp_shared_libgcc = yes; then
       enable_symvers=gnu
     elif test $libgomp_ld_is_mold = yes ; then
       enable_symvers=gnu
+    elif test $libgomp_ld_is_wild = yes ; then
+      enable_symvers=gnu
     else
       # The right tools, the right setup, but too old.  Fallbacks?
       { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: === Linker version $libgomp_gnu_ld_version is too old for" >&5
diff --git a/libgomp/env.c b/libgomp/env.c
index 626a753..48bb789 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -1231,6 +1231,12 @@ parse_affinity (bool ignore)
   return false;
 }
 
+/* These are reminders to add new allocators to parse_allocator.  */
+_Static_assert (GOMP_OMP_PREDEF_ALLOC_MAX == omp_thread_mem_alloc);
+_Static_assert (GOMP_OMPX_PREDEF_ALLOC_MAX == ompx_gnu_managed_mem_alloc);
+_Static_assert (GOMP_OMP_PREDEF_MEMSPACE_MAX == omp_low_lat_mem_space);
+_Static_assert (GOMP_OMPX_PREDEF_MEMSPACE_MAX == ompx_gnu_managed_mem_space);
+
 /* Parse the OMP_ALLOCATOR environment variable and return the value.  */
 static bool
 parse_allocator (const char *env, const char *val, void *const params[])
@@ -1249,12 +1255,12 @@ parse_allocator (const char *env, const char *val, void *const params[])
     ++val;
   if (0)
     ;
-#define C(v, m) \
+#define C(v, is_memspace) \
   else if (strncasecmp (val, #v, sizeof (#v) - 1) == 0)	\
     {							\
       *ret = v;						\
       val += sizeof (#v) - 1;				\
-      memspace = m;					\
+      memspace = is_memspace;					\
     }
   C (omp_default_mem_alloc, false)
   C (omp_large_cap_mem_alloc, false)
@@ -1265,11 +1271,13 @@ parse_allocator (const char *env, const char *val, void *const params[])
   C (omp_pteam_mem_alloc, false)
   C (omp_thread_mem_alloc, false)
   C (ompx_gnu_pinned_mem_alloc, false)
+  C (ompx_gnu_managed_mem_alloc, false)
   C (omp_default_mem_space, true)
   C (omp_large_cap_mem_space, true)
   C (omp_const_mem_space, true)
   C (omp_high_bw_mem_space, true)
   C (omp_low_lat_mem_space, true)
+  C (ompx_gnu_managed_mem_space, true)
 #undef C
   else
     goto invalid;
@@ -2455,7 +2463,7 @@ initialize_env (void)
       const char *env = getenv ("GOMP_STACKSIZE");
       if (env != NULL
 	  && parse_stacksize ("GOMP_STACKSIZE", env,
-			      (void *[3]) {&none->icvs.stacksize}))
+			      (void *[]) {&none->icvs.stacksize}))
 	gomp_set_icv_flag (&none->flags, GOMP_ICV_STACKSIZE);
     }
   if (none != NULL && gomp_get_icv_flag (none->flags, GOMP_ICV_STACKSIZE))
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 924fc1f..71e7452 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -134,7 +134,11 @@ enum gomp_interop_flag
    must be stringified).  */
 #define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
 
+/* GOMP_INDIRECT_ADDR_HMAP points to a hash table and is to be used by
+   newer libgomp, while GOMP_INDIRECT_ADDR_MAP points to a linear table
+   and exists for backward compatibility.  */
 #define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
+#define GOMP_INDIRECT_ADDR_HMAP __gomp_indirect_addr_hmap
 
 /* Miscellaneous functions.  */
 extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
@@ -167,6 +171,10 @@ extern int GOMP_OFFLOAD_load_image (int, unsigned, const void *,
 extern bool GOMP_OFFLOAD_unload_image (int, unsigned, const void *);
 extern void *GOMP_OFFLOAD_alloc (int, size_t);
 extern bool GOMP_OFFLOAD_free (int, void *);
+extern void *GOMP_OFFLOAD_managed_alloc (int, size_t);
+extern bool GOMP_OFFLOAD_managed_free (int, void *);
+extern bool GOMP_OFFLOAD_page_locked_host_alloc (void **, size_t);
+extern bool GOMP_OFFLOAD_page_locked_host_free (void *);
 extern bool GOMP_OFFLOAD_dev2host (int, void *, const void *, size_t);
 extern bool GOMP_OFFLOAD_host2dev (int, void *, const void *, size_t);
 extern bool GOMP_OFFLOAD_dev2dev (int, void *, const void *, size_t);
@@ -177,6 +185,7 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
 				  size_t, size_t, size_t, size_t, size_t,
 				  const void *, size_t, size_t, size_t, size_t,
 				  size_t);
+extern bool GOMP_OFFLOAD_memset (int, void *, int, size_t);
 extern bool GOMP_OFFLOAD_can_run (void *);
 extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
 extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
@@ -200,6 +209,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size
 						 struct goacc_asyncqueue *);
 extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
 						 struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t,
+						struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
@@ -225,6 +236,18 @@ extern const char *GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *,
 						       omp_interop_property_t);
 #endif
 
+/* simple-allocator.c  */
+
+typedef struct gomp_simple_alloc_context *gomp_simple_alloc_ctx_p;
+
+gomp_simple_alloc_ctx_p gomp_simple_alloc_init_context ();
+void gomp_simple_alloc_register_memory (gomp_simple_alloc_ctx_p ctx,
+				        char *base, size_t size);
+void *gomp_simple_alloc (gomp_simple_alloc_ctx_p ctx, size_t size);
+void gomp_simple_free (gomp_simple_alloc_ctx_p ctx, void *addr);
+void *gomp_simple_realloc (gomp_simple_alloc_ctx_p ctx, void *addr,
+			   size_t newsize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index d97768f..46db7d4 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1135,6 +1135,10 @@ extern int gomp_get_num_devices (void);
 extern bool gomp_target_task_fn (void *);
 extern void gomp_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
 			     int, struct goacc_asyncqueue *);
+extern void *gomp_managed_alloc (size_t size);
+extern void gomp_managed_free (void *device_ptr);
+extern bool gomp_page_locked_host_alloc (void **, size_t);
+extern void gomp_page_locked_host_free (void *);
 
 /* Splay tree definitions.  */
 typedef struct splay_tree_node_s *splay_tree_node;
@@ -1360,6 +1364,7 @@ typedef struct acc_dispatch_t
     __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
     __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
     __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func;
   } async;
 
   __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
@@ -1418,11 +1423,16 @@ struct gomp_device_descr
   __typeof (GOMP_OFFLOAD_unload_image) *unload_image_func;
   __typeof (GOMP_OFFLOAD_alloc) *alloc_func;
   __typeof (GOMP_OFFLOAD_free) *free_func;
+  __typeof (GOMP_OFFLOAD_managed_alloc) *managed_alloc_func;
+  __typeof (GOMP_OFFLOAD_managed_free) *managed_free_func;
+  __typeof (GOMP_OFFLOAD_page_locked_host_alloc) *page_locked_host_alloc_func;
+  __typeof (GOMP_OFFLOAD_page_locked_host_free) *page_locked_host_free_func;
   __typeof (GOMP_OFFLOAD_dev2host) *dev2host_func;
   __typeof (GOMP_OFFLOAD_host2dev) *host2dev_func;
+  __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func;
   __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func;
   __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func;
-  __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func;
+  __typeof (GOMP_OFFLOAD_memset) *memset_func;
   __typeof (GOMP_OFFLOAD_can_run) *can_run_func;
   __typeof (GOMP_OFFLOAD_run) *run_func;
   __typeof (GOMP_OFFLOAD_async_run) *async_run_func;
@@ -1467,11 +1477,14 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
 				struct goacc_asyncqueue *, void *, const void *,
 				size_t);
+extern void gomp_copy_dev2dev (struct gomp_device_descr *,
+			       struct goacc_asyncqueue *, void *, const void *,
+			       size_t);
 extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
-extern void gomp_attach_pointer (struct gomp_device_descr *,
+extern bool gomp_attach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree,
 				 splay_tree_key, uintptr_t, size_t,
-				 struct gomp_coalesce_buf *, bool);
+				 struct gomp_coalesce_buf *, bool, bool);
 extern void gomp_detach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree_key,
 				 uintptr_t, bool, struct gomp_coalesce_buf *);
@@ -1663,4 +1676,7 @@ gomp_thread_to_pthread_t (struct gomp_thread *thr)
 }
 #endif
 
+/* simple-allocator.c has its prototypes in libgomp-plugin.h so it's
+   accessible from both.  */
+
 #endif /* LIBGOMP_H */
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index eae2f53..67e08a3 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -453,6 +453,12 @@ GOMP_6.0 {
 	omp_get_uid_from_device_8_;
 } GOMP_5.1.3;
 
+GOMP_6.0.1 {
+  global:
+	omp_target_memset;
+	omp_target_memset_async;
+} GOMP_6.0;
+
 OACC_2.0 {
   global:
 	acc_get_num_devices;
@@ -556,9 +562,6 @@ OACC_2.5 {
 	acc_copyout_finalize_64_h_;
 	acc_copyout_finalize_array_h_;
 	acc_copyout_finalize_async;
-	acc_copyout_finalize_async_32_h_;
-	acc_copyout_finalize_async_64_h_;
-	acc_copyout_finalize_async_array_h_;
 	acc_create_async;
 	acc_create_async_32_h_;
 	acc_create_async_64_h_;
@@ -572,9 +575,6 @@ OACC_2.5 {
 	acc_delete_finalize_64_h_;
 	acc_delete_finalize_array_h_;
 	acc_delete_finalize_async;
-	acc_delete_finalize_async_32_h_;
-	acc_delete_finalize_async_64_h_;
-	acc_delete_finalize_async_array_h_;
 	acc_memcpy_from_device_async;
 	acc_memcpy_to_device_async;
 	acc_update_device_async;
@@ -609,6 +609,18 @@ OACC_2.6 {
 	acc_get_property_string_h_;
 } OACC_2.5.1;
 
+OACC_2.6.1 {
+  global:
+	acc_copyout_finalize_async_32_h_;
+	acc_copyout_finalize_async_64_h_;
+	acc_copyout_finalize_async_array_h_;
+	acc_delete_finalize_async_32_h_;
+	acc_delete_finalize_async_64_h_;
+	acc_delete_finalize_async_array_h_;
+	acc_memcpy_device;
+	acc_memcpy_device_async;
+} OACC_2.6;
+
 GOACC_2.0 {
   global:
 	GOACC_data_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6909c2b..ac96d2f 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -170,6 +170,7 @@ See also @ref{OpenMP Implementation Status}.
 * OpenMP 5.1:: Feature completion status to 5.1 specification
 * OpenMP 5.2:: Feature completion status to 5.2 specification
 * OpenMP 6.0:: Feature completion status to 6.0 specification
+* OpenMP Technical Report 14:: Feature completion status to first 6.1 preview
 @end menu
 
 The @code{_OPENMP} preprocessor macro and Fortran's @code{openmp_version}
@@ -290,8 +291,9 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_all_memory} reserved locator @tab Y @tab
 @item @emph{target_device trait} in OpenMP Context @tab Y
 @item @code{target_device} selector set in context selectors @tab Y @tab
-@item C/C++'s @code{declare variant} directive: elision support of
-      preprocessed code @tab N @tab
+@item C/C++'s delimited @code{declare variant} directive: support elision of
+      preprocessed code and interpret enclosed function definitions
+      as variant functions  @tab Y @tab
 @item @code{declare variant}: new clauses @code{adjust_args} and
       @code{append_args} @tab Y @tab
 @item @code{dispatch} construct @tab Y @tab
@@ -603,7 +605,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
       @code{omp_get_device_teams_thread_limit}, and
       @code{omp_set_device_teams_thread_limit} routines @tab N @tab
 @item @code{omp_target_memset} and @code{omp_target_memset_async} routines
-      @tab N @tab
+      @tab Y @tab
 @item Fortran version of the interop runtime routines @tab Y @tab
 @item Routines for obtaining memory spaces/allocators for shared/device memory
       @tab N @tab
@@ -651,6 +653,39 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
 @end multitable
 
 
+@node OpenMP Technical Report 14
+@section OpenMP Technical Report 14
+
+Technical Report (TR) 14 is the first preview for OpenMP 6.1.
+
+@unnumberedsubsec New features listed in Appendix B of the OpenMP specification
+@multitable @columnfractions .60 .10 .25
+@item The @code{depth} clause to @code{fuse} directive @tab N @tab
+@item The @code{attach} modifier to the @code{map} clause @tab N @tab
+@item The @code{dyn_groupprivate} clause and the
+      @code{omp_get_dyn_groupprivate_ptr}, @code{omp_get_dyn_groupprivate_size},
+      and @code{omp_get_dyn_groupprivate_size} routines @tab N @tab
+@item @code{begin declare_variant} directive in Fortran @tab N @tab
+@item @code{grid} and @code{tile} modifier to the @code{size} clause @tab N @tab
+@item New @code{flatten} loop-transforming directive @tab N @tab
+@item @code{scaled} modifier to @code{simdlen} clause @tab N @tab
+@item New @code{omp_default_device} identifier as conforming device number
+      @tab Y @tab
+@item Clarify when @code{omp_target_is_accessible} routine returns zero
+      @tab N @tab
+@end multitable
+
+@unnumberedsubsec Deprecated features, unless listed above
+@multitable @columnfractions .60 .10 .25
+@item Deprecation of conditional-update-capture structured block without a
+      capture statement @tab N @tab
+@end multitable
+
+@c @unnumberedsubsec Other new OpenMP 6.1 features
+@c @multitable @columnfractions .60 .10 .25
+@c @end multitable
+
+
 
 @c ---------------------------------------------------------------------
 @c OpenMP Runtime Library Routines
@@ -1802,6 +1837,11 @@ Returns the number of available non-host devices.
 
 The effect of running this routine in a @code{target} region is unspecified.
 
+Note that in GCC the function is marked pure, i.e. as returning always the
+same number.  When GCC was not configured to support offloading, it is replaced
+by zero; compile with @option{-fno-builtin-omp_get_num_devices} if a run-time
+function is desired.
+
 @item @emph{C/C++}:
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{int omp_get_num_devices(void);}
@@ -1812,6 +1852,9 @@ The effect of running this routine in a @code{target} region is unspecified.
 @item @emph{Interface}: @tab @code{integer function omp_get_num_devices()}
 @end multitable
 
+@item @emph{See also}:
+@ref{omp_get_initial_device}
+
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.31.
 @end table
@@ -1950,6 +1993,12 @@ the value of @code{omp_initial_device}.
 
 The effect of running this routine in a @code{target} region is unspecified.
 
+Note that GCC inlines this function unless you compile with
+@option{-fno-builtin-omp_get_initial_device}.  If GCC was not configured to
+support offloading, it expands to constant zero; in non-host code it expands
+to @code{omp_initial_device}; and otherwise it is replaced with a call to
+@code{omp_get_num_devices}.
+
 @item @emph{C/C++}
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{int omp_get_initial_device(void);}
@@ -1984,8 +2033,8 @@ pointers on devices. They have C linkage and do not throw exceptions.
 * omp_target_memcpy_async:: Copy data between devices asynchronously
 * omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously
-@c * omp_target_memset:: <fixme>/TR12
-@c * omp_target_memset_async:: <fixme>/TR12
+* omp_target_memset:: Set bytes in device memory
+* omp_target_memset_async:: Set bytes in device memory asynchronously
 * omp_target_associate_ptr:: Associate a device pointer with a host pointer
 * omp_target_disassociate_ptr:: Remove device--host pointer association
 * omp_get_mapped_ptr:: Return device pointer to a host pointer
@@ -2398,6 +2447,98 @@ the initial device.
 @end table
 
 
+@node omp_target_memset
+@subsection @code{omp_target_memset} -- Set bytes in device memory
+@table @asis
+@item @emph{Description}:
+This routine fills memory on the device identified by device number
+@var{device_num}.  Starting from the device address @var{ptr}, the first
+@var{count} bytes are set to the value @var{val}, converted to
+@code{unsigned char}. If @var{count} is zero, the routine has no effect;
+if @var{ptr} is @code{NULL}, the behavior is unspecified.  The function
+returns @var{ptr}.
+
+The @var{device_num} must be a conforming device number and @var{ptr} must be
+a valid device pointer for that device.  Running this routine in a
+@code{target} region except on the initial device is not supported.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void *omp_target_memcpy(void *ptr,}
+@item                   @tab @code{                        int val,}
+@item                   @tab @code{                        size_t count,}
+@item                   @tab @code{                        int device_num)}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset( &}
+@item                   @tab @code{    ptr, val, count, device_num) bind(C)}
+@item                   @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int}
+@item                   @tab @code{type(c_ptr), value :: ptr}
+@item                   @tab @code{integer(c_size_t), value :: count}
+@item                   @tab @code{integer(c_int), value :: val, device_num}
+@end multitable
+
+@item @emph{See also}:
+@ref{omp_target_memset_async}
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.1
+@end table
+
+
+
+@node omp_target_memset_async
+@subsection @code{omp_target_memset} -- Set bytes in device memory asynchronously
+@table @asis
+@item @emph{Description}:
+This routine fills memory on the device identified by device number
+@var{device_num}.  Starting from the device address @var{ptr}, the first
+@var{count} bytes are set to the value @var{val}, converted to
+@code{unsigned char}. If @var{count} is zero, the routine has no effect;
+if @var{ptr} is @code{NULL}, the behavior is unspecified.  Task dependence
+is expressed by passing an array of depend objects to @var{depobj_list}, where
+the number of array elements is passed as @var{depobj_count}; if the count is
+zero, the @var{depobj_list} argument is ignored.  In C++ and Fortran, the
+@var{depobj_list} argument can also be omitted in that case.  The function
+returns @var{ptr}.
+
+The @var{device_num} must be a conforming device number and @var{ptr} must be
+a valid device pointer for that device.  Running this routine in a
+@code{target} region except on the initial device is not supported.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void *omp_target_memcpy_async(void *ptr,}
+@item                   @tab @code{                        int val,}
+@item                   @tab @code{                        size_t count,}
+@item                   @tab @code{                        int device_num,}
+@item                   @tab @code{                        int depobj_count,}
+@item                   @tab @code{                        omp_depend_t *depobj_list)}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset_async( &}
+@item                   @tab @code{    ptr, val, count, device_num, &}
+@item                   @tab @code{    depobj_count, depobj_list) bind(C)}
+@item                   @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int}
+@item                   @tab @code{type(c_ptr), value :: ptr}
+@item                   @tab @code{integer(c_size_t), value :: count}
+@item                   @tab @code{integer(c_int), value :: val, device_num, depobj_count}
+@item                   @tab @code{integer(omp_depend_kind), optional :: depobj_list(*)}
+@end multitable
+
+
+@item @emph{See also}:
+@ref{omp_target_memset}
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.2
+@end table
+
+
 
 @node omp_target_associate_ptr
 @subsection @code{omp_target_associate_ptr} -- Associate a device pointer with a host pointer
@@ -3038,6 +3179,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: the function has C binding and @var{interop} and @var{property_id} are
 @c passed by value, which permits use of the same ABI as the C function.  This does
@@ -3084,6 +3230,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: the function has C binding and @var{interop} and @var{property_id} are
 @c passed by value, which permits use of the same ABI as the C function.  This does
@@ -3130,6 +3281,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: @var{interop} and @var{property_id} are passed by value.  This does not
 @c affect the usage of the function when GCC's @code{omp_lib} module or
@@ -3256,6 +3412,11 @@ the @var{ret_code} in human-readable form.
 The behavior is unspecified if value of @var{ret_code} was not set by an
 interoperability routine invoked for @var{interop}.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int} was used for the @var{ret_code} argument
+in place of the enumerated type @code{omp_interop_rc_t}.
+
 @item @emph{C/C++}:
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_rc_desc(const omp_interop_t interop,
@@ -3327,7 +3488,7 @@ traits; if an allocator that fulfills the requirements cannot be created,
 @code{omp_null_allocator} is returned.
 
 The predefined memory spaces and available traits can be found at
-@ref{OMP_ALLOCATOR}, where the trait names have to be prefixed by
+@ref{Memory allocation}, where the trait names have to be prefixed by
 @code{omp_atk_} (e.g. @code{omp_atk_pinned}) and the named trait values by
 @code{omp_atv_} (e.g. @code{omp_atv_true}); additionally, @code{omp_atv_default}
 may be used as trait value to specify that the default value should be used.
@@ -3350,7 +3511,7 @@ may be used as trait value to specify that the default value should be used.
 @end multitable
 
 @item @emph{See also}:
-@ref{OMP_ALLOCATOR}, @ref{Memory allocation}, @ref{omp_destroy_allocator}
+@ref{Memory allocation}, @ref{OMP_ALLOCATOR}, @ref{omp_destroy_allocator}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.7.2
@@ -3931,63 +4092,15 @@ The value can either be a predefined allocator or a predefined memory space
 or a predefined memory space followed by a colon and a comma-separated list
 of memory trait and value pairs, separated by @code{=}.
 
+See @ref{Memory allocation} for a list of supported prefedined allocators,
+memory spaces, and traits.
+
 Note: The corresponding device environment variables are currently not
 supported.  Therefore, the non-host @var{def-allocator-var} ICVs are always
 initialized to @code{omp_default_mem_alloc}.  However, on all devices,
 the @code{omp_set_default_allocator} API routine can be used to change
 value.
 
-@multitable @columnfractions .45 .45
-@headitem Predefined allocators @tab Associated predefined memory spaces
-@item omp_default_mem_alloc     @tab omp_default_mem_space
-@item omp_large_cap_mem_alloc   @tab omp_large_cap_mem_space
-@item omp_const_mem_alloc       @tab omp_const_mem_space
-@item omp_high_bw_mem_alloc     @tab omp_high_bw_mem_space
-@item omp_low_lat_mem_alloc     @tab omp_low_lat_mem_space
-@item omp_cgroup_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
-@item omp_pteam_mem_alloc       @tab omp_low_lat_mem_space (implementation defined)
-@item omp_thread_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
-@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension)
-@end multitable
-
-The predefined allocators use the default values for the traits,
-as listed below.  Except that the last three allocators have the
-@code{access} trait set to @code{cgroup}, @code{pteam}, and
-@code{thread}, respectively.
-
-@multitable @columnfractions .25 .40 .25
-@headitem Trait @tab Allowed values @tab Default value
-@item @code{sync_hint} @tab @code{contended}, @code{uncontended},
-                            @code{serialized}, @code{private}
-                       @tab @code{contended}
-@item @code{alignment} @tab Positive integer being a power of two
-                       @tab 1 byte
-@item @code{access}    @tab @code{all}, @code{cgroup},
-                            @code{pteam}, @code{thread}
-                       @tab @code{all}
-@item @code{pool_size} @tab Positive integer
-                       @tab See @ref{Memory allocation}
-@item @code{fallback}  @tab @code{default_mem_fb}, @code{null_fb},
-                            @code{abort_fb}, @code{allocator_fb}
-                       @tab See below
-@item @code{fb_data}   @tab @emph{unsupported as it needs an allocator handle}
-                       @tab (none)
-@item @code{pinned}    @tab @code{true}, @code{false}
-                       @tab See below
-@item @code{partition} @tab @code{environment}, @code{nearest},
-                            @code{blocked}, @code{interleaved}
-                       @tab @code{environment}
-@end multitable
-
-For the @code{fallback} trait, the default value is @code{null_fb} for the
-@code{omp_default_mem_alloc} allocator and any allocator that is associated
-with device memory; for all other allocators, it is @code{default_mem_fb}
-by default.
-
-For the @code{pinned} trait, the default value is @code{true} for
-predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and
-@code{false} for all others.
-
 Examples:
 @smallexample
 OMP_ALLOCATOR=omp_high_bw_mem_alloc
@@ -4763,6 +4876,7 @@ acceleration device.
                                 present on device.
 * acc_memcpy_to_device::        Copy host memory to device memory.
 * acc_memcpy_from_device::      Copy device memory to host memory.
+* acc_memcpy_device::           Copy memory within a device.
 * acc_attach::                  Let device pointer point to device-pointer target.
 * acc_detach::                  Let device pointer point to host-pointer target.
 
@@ -5282,15 +5396,15 @@ variable or array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_copyin(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_copyin(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5327,14 +5441,14 @@ backward compatibility with OpenACC 2.0; use @ref{acc_copyin} instead.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_present_or_copyin(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_present_or_copyin(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_pcopyin(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_pcopyin(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @end multitable
 
@@ -5366,15 +5480,15 @@ array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_create(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_create(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_create_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_create_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5411,14 +5525,14 @@ backward compatibility with OpenACC 2.0; use @ref{acc_create} instead.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_present_or_create(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_present_or_create(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_pcreate(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_pcreate(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @end multitable
 
@@ -5451,27 +5565,27 @@ array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_copyout(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5505,27 +5619,27 @@ array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_delete(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_delete(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
-@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize_async(a, async)}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
-@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize_async(a, len, async)}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5558,15 +5672,15 @@ array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_update_device(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_update_device(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5599,15 +5713,15 @@ array element and @var{len} specifies the length in bytes.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{subroutine acc_update_self(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item @emph{Interface}: @tab @code{subroutine acc_update_self(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, len, async)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{integer(acc_handle_kind) :: async}
 @end multitable
@@ -5750,10 +5864,10 @@ a @code{false} is return to indicate the mapped memory is not present.
 @item @emph{Fortran}:
 @multitable @columnfractions .20 .80
 @item @emph{Interface}: @tab @code{function acc_is_present(a)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{logical acc_is_present}
 @item @emph{Interface}: @tab @code{function acc_is_present(a, len)}
-@item                   @tab @code{type, dimension(:[,:]...) :: a}
+@item                   @tab @code{type(*), dimension(..) :: a}
 @item                   @tab @code{integer len}
 @item                   @tab @code{logical acc_is_present}
 @end multitable
@@ -5837,6 +5951,44 @@ This function copies device memory specified by device address of
 
 
 
+@node acc_memcpy_device
+@section @code{acc_memcpy_device} -- Copy memory within a device.
+@table @asis
+@item @emph{Description}
+This function copies device memory from one memory location to another
+on the current device.  It copies @var{bytes} bytes of data from the device
+address, specified by @var{data_dev_src}, to the device address
+@var{data_dev_dest}.  The @code{_async} version performs the transfer
+asynchronously using the queue associated with @var{async_arg}.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,}
+@item                   @tab @code{d_void* data_dev_src, size_t bytes);}
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,}
+@item                   @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &}
+@item                   @tab @code{data_dev_src, bytes)}
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &}
+@item                   @tab @code{data_dev_src, bytes, async_arg)}
+@item                   @tab @code{type(c_ptr), value :: data_dev_dest}
+@item                   @tab @code{type(c_ptr), value :: data_dev_src}
+@item                   @tab @code{integer(c_size_t), value :: bytes}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.33.  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.28.
+@end table
+
+
+
 @node acc_attach
 @section @code{acc_attach} -- Let device pointer point to device-pointer target.
 @table @asis
@@ -5850,19 +6002,19 @@ address to pointing to the corresponding device data.
 @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.34.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+ @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
@@ -5882,21 +6034,21 @@ address to pointing to the corresponding host data.
 @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.35.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+@uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
@@ -6707,6 +6859,13 @@ the following traits are supported in addition; while OpenMP is supported
 on more architectures, GCC currently does not match any @code{arch} or
 @code{isa} traits for those.
 
+Note that for AMD GCN and Nvidia PTX, the @code{isa} is currently an
+exact match between the compiled-for ISA architecture and the matching
+@code{isa} trait value.  For instance, when compiling for @code{gfx942},
+the @code{isa} trait value @code{gfx9-4-generic} is not matched and,
+likewise, @code{gfx942} is not matched when compiling for its generic
+architecture.
+
 @multitable @columnfractions .65 .30
 @headitem @code{arch} @tab @code{isa}
 @item @code{x86}, @code{x86_64}, @code{i386}, @code{i486},
@@ -6718,6 +6877,7 @@ on more architectures, GCC currently does not match any @code{arch} or
       @tab See @code{-march=} in ``Nvidia PTX Options''
 @end multitable
 
+
 @node Memory allocation
 @section Memory allocation
 
@@ -6752,11 +6912,96 @@ The description below applies to:
       @code{_Alignof} and C++'s @code{alignof}.
 @end itemize
 
-For the available predefined allocators and, as applicable, their associated
-predefined memory spaces and for the available traits and their default values,
-see @ref{OMP_ALLOCATOR}.  Predefined allocators without an associated memory
-space use the @code{omp_default_mem_space} memory space.  See additionally
-@ref{Offload-Target Specifics}.
+GCC supports the following predefined allocators and predefined memory spaces:
+
+@multitable @columnfractions .45 .45
+@headitem Predefined allocators @tab Associated predefined memory spaces
+@item omp_default_mem_alloc     @tab omp_default_mem_space
+@item omp_large_cap_mem_alloc   @tab omp_large_cap_mem_space
+@item omp_const_mem_alloc       @tab omp_const_mem_space
+@item omp_high_bw_mem_alloc     @tab omp_high_bw_mem_space
+@item omp_low_lat_mem_alloc     @tab omp_low_lat_mem_space
+@item omp_cgroup_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
+@item omp_pteam_mem_alloc       @tab omp_low_lat_mem_space (implementation defined)
+@item omp_thread_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
+@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension)
+@item ompx_gnu_managed_mem_alloc @tab ompx_gnu_managed_mem_space (GNU extension)
+@end multitable
+
+Each predefined allocator, including @code{omp_null_allocator}, has a corresponding
+allocator class template that meet the C++ allocator completeness requirements.
+These are located in the @code{omp::allocator} namespace, and the
+@code{ompx::allocator} namespace for gnu extensions.  This allows the
+allocator-aware C++ standard library containers to use OpenMP allocation routines;
+for instance:
+
+@smallexample
+std::vector<int, omp::allocator::cgroup_mem<int>> vec;
+@end smallexample
+
+The following allocator templates are supported:
+
+@multitable @columnfractions .45 .45
+@headitem Predefined allocators @tab Associated allocator template
+@item omp_null_allocator        @tab omp::allocator::null_allocator
+@item omp_default_mem_alloc     @tab omp::allocator::default_mem
+@item omp_large_cap_mem_alloc   @tab omp::allocator::large_cap_mem
+@item omp_const_mem_alloc       @tab omp::allocator::const_mem
+@item omp_high_bw_mem_alloc     @tab omp::allocator::high_bw_mem
+@item omp_low_lat_mem_alloc     @tab omp::allocator::low_lat_mem
+@item omp_cgroup_mem_alloc      @tab omp::allocator::cgroup_mem
+@item omp_pteam_mem_alloc       @tab omp::allocator::pteam_mem
+@item omp_thread_mem_alloc      @tab omp::allocator::thread_mem
+@item ompx_gnu_pinned_mem_alloc @tab ompx::allocator::gnu_pinned_mem
+@item ompx_gnu_managed_mem_alloc @tab ompx::allocator::gnu_managed_mem
+@end multitable
+
+The following traits are available when constructing a new allocator;
+if a trait is not specified or with the value @code{default}, the
+specified default value is used for that trait.  The predefined
+allocators use the default values of each trait, except that the
+@code{omp_cgroup_mem_alloc}, @code{omp_pteam_mem_alloc}, and
+@code{omp_thread_mem_alloc} allocators have the @code{access} trait
+set to @code{cgroup}, @code{pteam}, and @code{thread}, respectively.
+For each trait, a named constant prefixed by @code{omp_atk_} exists;
+for each non-numeric value, a named constant prefixed by @code{omp_atv_}
+exists.
+
+@multitable @columnfractions .25 .40 .25
+@headitem Trait @tab Allowed values @tab Default value
+@item @code{sync_hint} @tab @code{contended}, @code{uncontended},
+                            @code{serialized}, @code{private}
+                       @tab @code{contended}
+@item @code{alignment} @tab Positive integer being a power of two
+                       @tab 1 byte
+@item @code{access}    @tab @code{all}, @code{cgroup},
+                            @code{pteam}, @code{thread}
+                       @tab @code{all}
+@item @code{pool_size} @tab Positive integer (bytes)
+                       @tab See below.
+@item @code{fallback}  @tab @code{default_mem_fb}, @code{null_fb},
+                            @code{abort_fb}, @code{allocator_fb}
+                       @tab See below
+@item @code{fb_data}   @tab @emph{allocator handle}
+                       @tab (none)
+@item @code{pinned}    @tab @code{true}, @code{false}
+                       @tab See below
+@item @code{partition} @tab @code{environment}, @code{nearest},
+                            @code{blocked}, @code{interleaved}
+                       @tab @code{environment}
+@end multitable
+
+For the @code{fallback} trait, the default value is @code{null_fb} for the
+@code{omp_default_mem_alloc} allocator and any allocator that is associated
+with device memory; for all other allocators, it is @code{default_mem_fb}
+by default.
+
+For the @code{pinned} trait, the default value is @code{true} for
+predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and
+@code{false} for all others.
+
+The following description applies to the initial device (the host) and largely
+also to non-host devices; for the latter, also see @ref{Offload-Target Specifics}.
 
 For the memory spaces, the following applies:
 @itemize
@@ -6768,17 +7013,32 @@ For the memory spaces, the following applies:
       unless the memkind library is available
 @item @code{omp_high_bw_mem_space} maps to @code{omp_default_mem_space},
       unless the memkind library is available
+@item @code{ompx_gnu_managed_mem_space} is a GNU extension that provides
+      managed memory accessible by both host and devices.  The memory space is
+      available if the offload target associated with the
+      @var{default-device-var} ICV supports managed memory (see
+      @ref{Offload-Target Specifics}).  This memory is accessible by both the
+      host and the device at the same address, so it need not be mapped with
+      @code{map} clauses.  Instead, use the @code{is_device_ptr} clause or
+      @code{has_device_addr} clause to indicate that the pointer is already
+      accessible on the device.  If managed memory is not supported by the
+      default device, as configured at the moment the allocator is called, then
+      the allocator will use the fall-back setting.  If the default device is
+      configured differently when the memory is freed, via @code{omp_free} or
+      @code{omp_realloc}, the result may be undefined.
 @end itemize
 
 On Linux systems, where the @uref{https://github.com/memkind/memkind, memkind
-library} (@code{libmemkind.so.0}) is available at runtime, it is used when
-creating memory allocators requesting
+library} (@code{libmemkind.so.0}) is available at runtime and the respective
+memkind kind is supported, it is used when creating memory allocators requesting
 
 @itemize
-@item the memory space @code{omp_high_bw_mem_space}
-@item the memory space @code{omp_large_cap_mem_space}
-@item the @code{partition} trait @code{interleaved}; note that for
-      @code{omp_large_cap_mem_space} the allocation will not be interleaved
+@item the @code{partition} trait @code{interleaved} except when the memory space
+      is @code{omp_large_cap_mem_space} (uses @code{MEMKIND_HBW_INTERLEAVE})
+@item the memory space is @code{omp_high_bw_mem_space}  (uses
+      @code{MEMKIND_HBW_PREFERRED})
+@item the memory space is @code{omp_large_cap_mem_space} (uses
+      @code{MEMKIND_DAX_KMEM_ALL} or, if not available, @code{MEMKIND_DAX_KMEM})
 @end itemize
 
 On Linux systems, where the @uref{https://github.com/numactl/numactl, numa
@@ -6803,11 +7063,17 @@ a @code{nearest} allocation.
 
 Additional notes regarding the traits:
 @itemize
-@item The @code{pinned} trait is supported on Linux hosts, but is subject to
-      the OS @code{ulimit}/@code{rlimit} locked memory settings.
+@item The @code{pinned} trait is supported on Linux hosts, but is usually
+      subject to the OS @code{ulimit}/@code{rlimit} locked memory settings (see
+      @ref{Offload-Target Specifics} for exceptions).  The implementation
+      uses a custom allocator to try to use as few memory pages as possible.
+      At present, freed pinned memory is not returned to the OS (although it
+      may be reused by subsequent pinned allocations).
 @item The default for the @code{pool_size} trait is no pool and for every
       (re)allocation the associated library routine is called, which might
-      internally use a memory pool.
+      internally use a memory pool.  Currently, the same applies when a
+      @code{pool_size} has been specified, except that once allocations exceed
+      the the pool size, the action of the @code{fallback} trait applies.
 @item For the @code{partition} trait, the partition part size will be the same
       as the requested size (i.e. @code{interleaved} or @code{blocked} has no
       effect), except for @code{interleaved} when the memkind library is
@@ -6816,13 +7082,15 @@ Additional notes regarding the traits:
       that allocated the memory; on Linux, this is in particular the case when
       the memory placement policy is set to preferred.
 @item The @code{access} trait has no effect such that memory is always
-      accessible by all threads.
+      accessible by all threads. (Except on supported no-host devices.)
 @item The @code{sync_hint} trait has no effect.
 @end itemize
 
 See also:
 @ref{Offload-Target Specifics}
 
+
+
 @c ---------------------------------------------------------------------
 @c Offload-Target Specifics
 @c ---------------------------------------------------------------------
@@ -6889,13 +7157,14 @@ The implementation remark:
       such that the next reverse offload region is only executed after the previous
       one returned.
 @item OpenMP code that has a @code{requires} directive with @code{self_maps} or
-      @code{unified_shared_memory} is only supported if all AMD GPUs have the
-      @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
-      discrete GPUs, this may require setting the @code{HSA_XNACK} environment
-      variable to @samp{1}; for systems with both an APU and a discrete GPU that
-      does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to
-      enable only the APU.  If not supported, all AMD GPU devices are removed
-      from the list of available devices (``host fallback'').
+      @code{unified_shared_memory} is only supported if @emph{all} the AMD GPUs
+      present have the @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT}
+      property; some systems require the "xnack" feature enabled for this to be
+      true, in which case the runtime will attempt to set the @code{HSA_XNACK}
+      environment variable to @samp{1} automatically (user-set values are not
+      overridden, and the setting only affects the executable itself and any
+      child processes).  If any AMD GPU device is not supported, all AMD GPUs
+      are removed from the list of available devices (``host fallback'').
 @item The available stack size can be changed using the @code{GCN_STACK_SIZE}
       environment variable; the default is 32 kiB per thread.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
@@ -6911,6 +7180,32 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item Pinned memory allocated using @code{omp_alloc} with the
+      @code{ompx_gnu_pinned_mem_alloc} allocator or the @code{pinned} trait is
+      obtained via the CUDA API when an NVPTX device is present.  This provides
+      a performance boost for NVPTX offload code and also allows unlimited use
+      of pinned memory regardless of the OS @code{ulimit}/@code{rlimit}
+      settings.
+@item Managed memory allocated on the host with the
+      @code{ompx_gnu_managed_mem_alloc} allocator or in the
+      @code{ompx_gnu_managed_mem_space} (both GNU extensions) allocate memory
+      equivalent to HIP Managed Memory, although @emph{not} actually allocated
+      using @code{hipMallocManaged}.  This memory is accessible by both the
+      host and the device at the same address, so it need not be mapped with
+      @code{map} clauses.  Instead, use the @code{is_device_ptr} clause or
+      @code{has_device_addr} clause to indicate that the pointer is already
+      accessible on the device.  The ROCm runtime will automatically handle
+      data migration between host and device as needed.  Not all AMD GPU
+      devices support this feature, and many that do require that
+      @code{-mxnack=on} is configured at compile time.  If managed memory is
+      not supported by the default device, as configured at the moment the
+      allocator is called, then the allocator will use the fall-back setting.
+      If the default device is configured differently when the memory is freed,
+      via @code{omp_free} or @code{omp_realloc}, the result may be undefined.
+      If the current device does not support Unified Shared Memory (or it is
+      not enabled with @code{HSA_XNACK=1}) then Managed Memory might still
+      work, but allocations may only be visible to a single device (whichever
+      was the default device when the @emph{first} allocation was made).
 @item The OpenMP routines @code{omp_target_memcpy_rect} and
       @code{omp_target_memcpy_rect_async} and the @code{target update}
       directive for non-contiguous list items use the 3D memory-copy function
@@ -7073,6 +7368,20 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item Managed memory allocated on the host with the
+      @code{ompx_gnu_managed_mem_alloc} allocator or in the
+      @code{ompx_gnu_managed_mem_space} (both GNU extensions) allocate memory
+      in the CUDA Managed Memory space using @code{cuMemAllocManaged}.  This
+      memory is accessible by both the host and the device at the same address,
+      so it need not be mapped with @code{map} clauses.  Instead, use the
+      @code{is_device_ptr} clause or @code{has_device_addr} clause to indicate
+      that the pointer is already accessible on the device.  The CUDA runtime
+      will automatically handle data migration between host and device as
+      needed.  If managed memory is not supported by the default device, as
+      configured at the moment the allocator is called, then the allocator will
+      use the fall-back setting. If the default device is configured
+      differently when the memory is freed, via @code{omp_free} or
+      @code{omp_realloc}, the result may be undefined.
 @item The OpenMP routines @code{omp_target_memcpy_rect} and
       @code{omp_target_memcpy_rect_async} and the @code{target update}
       directive for non-contiguous list items use the 2D and 3D memory-copy
@@ -7181,7 +7490,7 @@ The following sections present notes on the external ABI as
 presented by libgomp.  Only maintainers should need them.
 
 @menu
-* Implementing MASTER construct::
+* Implementing MASKED and MASTER construct::
 * Implementing CRITICAL construct::
 * Implementing ATOMIC construct::
 * Implementing FLUSH construct::
@@ -7199,16 +7508,19 @@ presented by libgomp.  Only maintainers should need them.
 @end menu
 
 
-@node Implementing MASTER construct
-@section Implementing MASTER construct
+@node Implementing MASKED and MASTER construct
+@section Implementing MASKED and MASTER construct
 
 @smallexample
-if (omp_get_thread_num () == 0)
+if (omp_get_thread_num () == thread_num)
   block
 @end smallexample
 
+Hereby, @var{thread_num} has the value of the argument to the @code{filter}
+clause or zero if not specified.
+
 Alternately, we generate two copies of the parallel subfunction
-and only include this in the version run by the primary thread.
+and only include this in the version run by the @var{thread_num} thread.
 Surely this is not worthwhile though...
 
 
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 718252b..5b8ba7e 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -171,21 +171,22 @@ acc_free (void *d)
 }
 
 static void
-memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
-		      const char *libfnname)
+memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src,
+		      size_t s, int async, const char *libfnname)
 {
   /* No need to call lazy open here, as the device pointer must have
      been obtained from a routine that did that.  */
   struct goacc_thread *thr = goacc_thread ();
 
   assert (thr && thr->dev);
+  if (s == 0)
+    return;
 
   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     {
-      if (from)
-	memmove (h, d, s);
-      else
-	memmove (d, h, s);
+      if (src == dst)
+	return;
+      memcpy (dst, src, s);
       return;
     }
 
@@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
     }
 
   goacc_aq aq = get_goacc_asyncqueue (async);
-  if (from)
-    gomp_copy_dev2host (thr->dev, aq, h, d, s);
+  if (dev_to && dev_from)
+    {
+      if (dst != src)
+	gomp_copy_dev2dev (thr->dev, aq, dst, src, s);
+    }
+  else if (dev_from)
+    gomp_copy_dev2host (thr->dev, aq, dst, src, s);
   else
-    gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL);
 
   if (profiling_p)
     {
@@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
 void
 acc_memcpy_to_device (void *d, void *h, size_t s)
 {
-  memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
+  memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__);
 }
 
 void
 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
 {
-  memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
+  memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__);
 }
 
 void
 acc_memcpy_from_device (void *h, void *d, size_t s)
 {
-  memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
+  memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__);
 }
 
 void
 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
 {
-  memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
+  memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__);
+}
+
+void
+acc_memcpy_device (void *dst, void *src, size_t s)
+{
+  memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__);
+}
+
+void
+acc_memcpy_device_async (void *dst, void *src, size_t s, int async)
+{
+  memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__);
 }
 
 /* Return the device pointer that corresponds to host data H.  Or NULL
@@ -951,7 +969,7 @@ acc_attach_async (void **hostaddr, int async)
     }
 
   gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
-		       0, NULL, false);
+		       0, NULL, false, true);
 
   gomp_mutex_unlock (&acc_dev->lock);
 }
@@ -1158,7 +1176,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
 	    {
 	      gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
-				   (uintptr_t) h, s, NULL, false);
+				   (uintptr_t) h, s, NULL, false, true);
 	      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
 		 reference counts ('n->refcount', 'n->dynamic_refcount').  */
 	    }
@@ -1176,7 +1194,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 		  = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
 		gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
 				     (uintptr_t) hostaddrs[j], sizes[j], NULL,
-				     false);
+				     false, true);
 	      }
 
 	  bool processed = false;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 8d17db1..329d8dc 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -121,6 +121,7 @@ typedef enum omp_memspace_handle_t __GOMP_UINTPTR_T_ENUM
   omp_const_mem_space = 2,
   omp_high_bw_mem_space = 3,
   omp_low_lat_mem_space = 4,
+  ompx_gnu_managed_mem_space = 200,
   __omp_memspace_handle_t_max__ = __UINTPTR_MAX__
 } omp_memspace_handle_t;
 
@@ -136,6 +137,7 @@ typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM
   omp_pteam_mem_alloc = 7,
   omp_thread_mem_alloc = 8,
   ompx_gnu_pinned_mem_alloc = 200,
+  ompx_gnu_managed_mem_alloc = 201,
   __omp_allocator_handle_t_max__ = __UINTPTR_MAX__
 } omp_allocator_handle_t;
 
@@ -189,7 +191,8 @@ typedef enum omp_event_handle_t __GOMP_UINTPTR_T_ENUM
 enum
 {
   omp_initial_device = -1,
-  omp_invalid_device = -4
+  omp_invalid_device = -4,
+  omp_default_device = -5
 };
 
 typedef enum omp_interop_t __GOMP_UINTPTR_T_ENUM
@@ -347,6 +350,10 @@ extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__,
 					 const __SIZE_TYPE__ *, int, int, int,
 					 omp_depend_t * __GOMP_DEFAULT_NULL)
   __GOMP_NOTHROW;
+extern void *omp_target_memset (void *, int, __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern void *omp_target_memset_async (void *, int, __SIZE_TYPE__, int,
+				      int, omp_depend_t * __GOMP_DEFAULT_NULL)
+  __GOMP_NOTHROW;
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 				     __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
@@ -445,104 +452,104 @@ namespace allocator
 namespace __detail
 {
 
-template<typename __T, omp_allocator_handle_t __Handle>
+template <typename __T, omp_allocator_handle_t __Handle>
 struct __allocator_templ
 {
   using value_type = __T;
-  using pointer = __T*;
-  using const_pointer = const __T*;
+  using pointer = __T *;
+  using const_pointer = const __T *;
   using size_type = __SIZE_TYPE__;
   using difference_type = __PTRDIFF_TYPE__;
 
-  __T*
+  __T *
   allocate (size_type __n)
   {
-    if (__SIZE_MAX__ / sizeof(__T) < __n)
+    if (__SIZE_MAX__ / sizeof (__T) < __n)
       std::__throw_bad_array_new_length ();
     void *__p = omp_aligned_alloc (alignof(__T), __n * sizeof(__T), __Handle);
     if (!__p)
       std::__throw_bad_alloc ();
-    return static_cast<__T*>(__p);
+    return static_cast <__T *> (__p);
   }
 
   void
   deallocate (__T *__p, size_type) __GOMP_NOTHROW
   {
-    omp_free (static_cast<void*>(__p), __Handle);
+    omp_free (static_cast <void *> (__p), __Handle);
   }
 };
 
-template<typename __T, typename __U, omp_allocator_handle_t __Handle>
+template <typename __T, typename __U, omp_allocator_handle_t __Handle>
 constexpr bool
-operator== (const __allocator_templ<__T, __Handle>&,
-	    const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW
+operator== (const __allocator_templ <__T, __Handle> &,
+	    const __allocator_templ <__U, __Handle> &) __GOMP_NOTHROW
 {
   return true;
 }
 
-template<typename __T, omp_allocator_handle_t __Handle,
-	 typename __U, omp_allocator_handle_t __UHandle>
+template <typename __T, omp_allocator_handle_t __Handle,
+	  typename __U, omp_allocator_handle_t __UHandle>
 constexpr bool
-operator== (const __allocator_templ<__T, __Handle>&,
-	    const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW
+operator== (const __allocator_templ <__T, __Handle> &,
+	    const __allocator_templ <__U, __UHandle> &) __GOMP_NOTHROW
 {
   return false;
 }
 
-template<typename __T, typename __U, omp_allocator_handle_t __Handle>
+template <typename __T, typename __U, omp_allocator_handle_t __Handle>
 constexpr bool
-operator!= (const __allocator_templ<__T, __Handle>&,
-	    const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW
+operator!= (const __allocator_templ <__T, __Handle> &,
+	    const __allocator_templ <__U, __Handle> &) __GOMP_NOTHROW
 {
   return false;
 }
 
-template<typename __T, omp_allocator_handle_t __Handle,
-	 typename __U, omp_allocator_handle_t __UHandle>
+template <typename __T, omp_allocator_handle_t __Handle,
+	  typename __U, omp_allocator_handle_t __UHandle>
 constexpr bool
-operator!= (const __allocator_templ<__T, __Handle>&,
-	    const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW
+operator!= (const __allocator_templ <__T, __Handle> &,
+	    const __allocator_templ <__U, __UHandle> &) __GOMP_NOTHROW
 {
   return true;
 }
 
 } /* namespace __detail */
 
-template<typename __T>
+template <typename __T>
 struct null_allocator
-  : __detail::__allocator_templ<__T, omp_null_allocator> {};
+  : __detail::__allocator_templ <__T, omp_null_allocator> {};
 
-template<typename __T>
+template <typename __T>
 struct default_mem
-  : __detail::__allocator_templ<__T, omp_default_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_default_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct large_cap_mem
-  : __detail::__allocator_templ<__T, omp_large_cap_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_large_cap_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct const_mem
-  : __detail::__allocator_templ<__T, omp_const_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_const_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct high_bw_mem
-  : __detail::__allocator_templ<__T, omp_high_bw_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_high_bw_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct low_lat_mem
-  : __detail::__allocator_templ<__T, omp_low_lat_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_low_lat_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct cgroup_mem
-  : __detail::__allocator_templ<__T, omp_cgroup_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_cgroup_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct pteam_mem
-  : __detail::__allocator_templ<__T, omp_pteam_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_pteam_mem_alloc> {};
 
-template<typename __T>
+template <typename __T>
 struct thread_mem
-  : __detail::__allocator_templ<__T, omp_thread_mem_alloc> {};
+  : __detail::__allocator_templ <__T, omp_thread_mem_alloc> {};
 
 } /* namespace allocator */
 
@@ -554,9 +561,14 @@ namespace ompx
 namespace allocator
 {
 
-template<typename __T>
+template <typename __T>
 struct gnu_pinned_mem
-  : omp::allocator::__detail::__allocator_templ<__T, ompx_gnu_pinned_mem_alloc> {};
+  : omp::allocator::__detail::__allocator_templ <__T,
+						 ompx_gnu_pinned_mem_alloc> {};
+template <typename __T>
+struct gnu_managed_mem
+  : omp::allocator::__detail::__allocator_templ <__T,
+						 ompx_gnu_managed_mem_alloc> {};
 
 } /* namespace allocator */
 
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index cb6b95f..1b1a163 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -164,6 +164,8 @@
                  parameter :: omp_thread_mem_alloc = 8
         integer (kind=omp_allocator_handle_kind), &
                  parameter :: ompx_gnu_pinned_mem_alloc = 200
+        integer (kind=omp_allocator_handle_kind), &
+                 parameter :: ompx_gnu_managed_mem_alloc = 201
         integer (omp_memspace_handle_kind), &
                  parameter :: omp_default_mem_space = 0
         integer (omp_memspace_handle_kind), &
@@ -174,8 +176,11 @@
                  parameter :: omp_high_bw_mem_space = 3
         integer (omp_memspace_handle_kind), &
                  parameter :: omp_low_lat_mem_space = 4
+        integer (omp_memspace_handle_kind), &
+                 parameter :: ompx_gnu_managed_mem_space = 200
         integer, parameter :: omp_initial_device = -1
         integer, parameter :: omp_invalid_device = -4
+        integer, parameter :: omp_default_device = -5
         integer (omp_interop_kind), &
                  parameter :: omp_interop_none = 0_omp_interop_kind
         integer (omp_interop_fr_kind), parameter :: omp_ifr_cuda = 1
@@ -904,6 +909,29 @@
         end interface
 
         interface
+          function omp_target_memset (ptr, val, count, device_num) bind(c)
+            use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+            type(c_ptr) :: omp_target_memset
+            type(c_ptr), value :: ptr
+            integer(c_size_t), value :: count
+            integer(c_int), value :: val, device_num
+          end function omp_target_memset
+        end interface
+
+        interface
+          function omp_target_memset_async (ptr, val, count, device_num, &
+                                            depobj_count, depobj_list) bind(c)
+            use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+            import :: omp_depend_kind
+            type(c_ptr) :: omp_target_memset_async
+            type(c_ptr), value :: ptr
+            integer(c_size_t), value :: count
+            integer(c_int), value :: val, device_num, depobj_count
+            integer(omp_depend_kind), optional :: depobj_list(*)
+          end function omp_target_memset_async
+        end interface
+
+        interface
           function omp_target_associate_ptr (host_ptr, device_ptr, size, &
                                              device_offset, device_num) bind(c)
             use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index f7af5ff..9330583 100644
--- a/libgomp/omp_lib.h.in
+++ b/libgomp/omp_lib.h.in
@@ -162,6 +162,7 @@
       integer (omp_allocator_handle_kind) omp_pteam_mem_alloc
       integer (omp_allocator_handle_kind) omp_thread_mem_alloc
       integer (omp_allocator_handle_kind) ompx_gnu_pinned_mem_alloc
+      integer (omp_allocator_handle_kind) ompx_gnu_managed_mem_alloc
       parameter (omp_null_allocator = 0)
       parameter (omp_default_mem_alloc = 1)
       parameter (omp_large_cap_mem_alloc = 2)
@@ -172,19 +173,23 @@
       parameter (omp_pteam_mem_alloc = 7)
       parameter (omp_thread_mem_alloc = 8)
       parameter (ompx_gnu_pinned_mem_alloc = 200)
+      parameter (ompx_gnu_managed_mem_alloc = 201)
       integer (omp_memspace_handle_kind) omp_default_mem_space
       integer (omp_memspace_handle_kind) omp_large_cap_mem_space
       integer (omp_memspace_handle_kind) omp_const_mem_space
       integer (omp_memspace_handle_kind) omp_high_bw_mem_space
       integer (omp_memspace_handle_kind) omp_low_lat_mem_space
+      integer (omp_memspace_handle_kind) ompx_gnu_managed_mem_space
       parameter (omp_default_mem_space = 0)
       parameter (omp_large_cap_mem_space = 1)
       parameter (omp_const_mem_space = 2)
       parameter (omp_high_bw_mem_space = 3)
       parameter (omp_low_lat_mem_space = 4)
-      integer omp_initial_device, omp_invalid_device
+      parameter (ompx_gnu_managed_mem_space = 200)
+      integer omp_initial_device, omp_invalid_device, omp_default_device
       parameter (omp_initial_device = -1)
       parameter (omp_invalid_device = -4)
+      parameter (omp_default_device = -5)
       integer (omp_interop_kind) omp_interop_none
       parameter (omp_interop_none = 0_omp_interop_kind)
       integer (omp_interop_fr_kind) omp_ifr_cuda
@@ -505,6 +510,31 @@
       end interface
 
       interface
+        function omp_target_memset (ptr, val, count, device_num) bind(c)
+          use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+          type(c_ptr) omp_target_memset
+          type(c_ptr), value :: ptr
+          integer(c_size_t), value :: count
+          integer(c_int), value :: val, device_num
+        end function omp_target_memset
+      end interface
+
+      interface
+        function omp_target_memset_async (ptr, val, count, device_num,          &
+     &                                    depobj_count, depobj_list)            &
+     &      bind(c)
+          use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+          import :: omp_depend_kind
+          type(c_ptr) :: omp_target_memset_async
+          type(c_ptr), value :: ptr
+          integer(c_size_t), value :: count
+          integer(c_int), value :: val, device_num, depobj_count
+          integer(omp_depend_kind), optional :: depobj_list(*)
+        end function omp_target_memset_async
+      end interface
+
+
+      interface
         function omp_target_associate_ptr (host_ptr, device_ptr, size,          &
      &                                     device_offset, device_num)           &
      &      bind(c)
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index 8ef107e..1d94427 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -269,6 +269,30 @@ module openacc_internal
       type (*), dimension (..), contiguous :: a
     end subroutine
 
+    subroutine acc_copyout_finalize_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyout_finalize_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyout_finalize_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
     subroutine acc_delete_32_h (a, len)
       use iso_c_binding, only: c_int32_t
       !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
@@ -458,6 +482,30 @@ module openacc_internal
       integer (acc_handle_kind) async
     end subroutine
 
+    subroutine acc_delete_finalize_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_delete_finalize_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_delete_finalize_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
     subroutine acc_update_device_async_32_h (a, len, async)
       use iso_c_binding, only: c_int32_t
       use openacc_kinds, only: acc_handle_kind
@@ -663,6 +711,15 @@ module openacc_internal
       integer (c_size_t), value :: len
     end subroutine
 
+    subroutine acc_copyout_finalize_async_l (a, len, async) &
+        bind (C, name = "acc_copyout_finalize_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
     subroutine acc_delete_l (a, len) &
         bind (C, name = "acc_delete")
       use iso_c_binding, only: c_size_t
@@ -679,6 +736,15 @@ module openacc_internal
       integer (c_size_t), value :: len
     end subroutine
 
+    subroutine acc_delete_finalize_async_l (a, len, async) &
+        bind (C, name = "acc_delete_finalize_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
     subroutine acc_update_device_l (a, len) &
         bind (C, name = "acc_update_device")
       use iso_c_binding, only: c_size_t
@@ -794,9 +860,13 @@ module openacc
   public :: acc_deviceptr, acc_hostptr, acc_is_present
   public :: acc_copyin_async, acc_create_async, acc_copyout_async
   public :: acc_delete_async, acc_update_device_async, acc_update_self_async
-  public :: acc_copyout_finalize, acc_delete_finalize
+  public :: acc_copyout_finalize, acc_copyout_finalize_async
+  public :: acc_delete_finalize, acc_delete_finalize_async
   public :: acc_memcpy_to_device, acc_memcpy_to_device_async
   public :: acc_memcpy_from_device, acc_memcpy_from_device_async
+  public :: acc_memcpy_device, acc_memcpy_device_async
+  public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async
+  public :: acc_detach_finalize, acc_detach_finalize_async
 
   integer, parameter :: openacc_version = 201711
 
@@ -943,6 +1013,12 @@ module openacc
     procedure :: acc_copyout_finalize_array_h
   end interface
 
+  interface acc_copyout_finalize_async
+    procedure :: acc_copyout_finalize_async_32_h
+    procedure :: acc_copyout_finalize_async_64_h
+    procedure :: acc_copyout_finalize_async_array_h
+  end interface
+
   interface acc_delete
     procedure :: acc_delete_32_h
     procedure :: acc_delete_64_h
@@ -1046,6 +1122,69 @@ module openacc
     end subroutine
   end interface
 
+  interface
+    subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C)
+      use iso_c_binding, only: c_ptr, c_size_t
+      type(c_ptr), value :: data_dev_dest
+      type(c_ptr), value :: data_dev_src
+      integer(c_size_t), value :: bytes
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src,  &
+                                        bytes, async_arg) bind(C)
+      use iso_c_binding, only: c_ptr, c_size_t
+      import :: acc_handle_kind
+      type(c_ptr), value :: data_dev_dest
+      type(c_ptr), value :: data_dev_src
+      integer(c_size_t), value :: bytes
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_attach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
   interface acc_copyin_async
     procedure :: acc_copyin_async_32_h
     procedure :: acc_copyin_async_64_h
@@ -1070,6 +1209,12 @@ module openacc
     procedure :: acc_delete_async_array_h
   end interface
 
+  interface acc_delete_finalize_async
+    procedure :: acc_delete_finalize_async_32_h
+    procedure :: acc_delete_finalize_async_64_h
+    procedure :: acc_delete_finalize_async_array_h
+  end interface
+
   interface acc_update_device_async
     procedure :: acc_update_device_async_32_h
     procedure :: acc_update_device_async_64_h
@@ -1373,6 +1518,40 @@ subroutine acc_copyout_finalize_array_h (a)
   call acc_copyout_finalize_l (a, sizeof (a))
 end subroutine
 
+subroutine acc_copyout_finalize_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyout_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_copyout_finalize_async_l (a, int (len, kind = c_size_t), &
+                                     int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyout_finalize_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyout_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_copyout_finalize_async_l (a, int (len, kind = c_size_t), &
+                                     int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyout_finalize_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_copyout_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_copyout_finalize_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+
 subroutine acc_delete_32_h (a, len)
   use iso_c_binding, only: c_int32_t, c_size_t
   use openacc_internal, only: acc_delete_l
@@ -1620,6 +1799,39 @@ subroutine acc_delete_async_array_h (a, async)
   call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int))
 end subroutine
 
+subroutine acc_delete_finalize_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_delete_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_delete_finalize_async_l (a, int (len, kind = c_size_t), &
+                                    int (async, kind = c_int))
+end subroutine
+
+subroutine acc_delete_finalize_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_delete_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_delete_finalize_async_l (a, int (len, kind = c_size_t),&
+                                    int (async, kind = c_int))
+end subroutine
+
+subroutine acc_delete_finalize_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_delete_finalize_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_delete_finalize_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
 subroutine acc_update_device_async_32_h (a, len, async)
   use iso_c_binding, only: c_int32_t, c_size_t, c_int
   use openacc_internal, only: acc_update_device_async_l
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index a520bbe..3085b00 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
 int acc_is_present (void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_attach (void **) __GOACC_NOTHROW;
 void acc_attach_async (void **, int) __GOACC_NOTHROW;
 void acc_detach (void **) __GOACC_NOTHROW;
@@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_detach_finalize (void **) __GOACC_NOTHROW;
 void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
 
-/* Async functions, specified in OpenACC 2.5.  */
+/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6.  */
 void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
@@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
 void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
+void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
 
 /* CUDA-specific routines.  */
 void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index b0d287e..d3eaaac 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -350,6 +350,32 @@
         end subroutine
       end interface
 
+      interface acc_copyout_finalize_async
+        subroutine acc_copyout_finalize_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyout_finalize_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyout_finalize_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
       interface acc_delete
         subroutine acc_delete_32_h (a, len)
           use iso_c_binding, only: c_int32_t
@@ -390,6 +416,32 @@
         end subroutine
       end interface
 
+      interface acc_delete_finalize_async
+        subroutine acc_delete_finalize_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_delete_finalize_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_delete_finalize_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
       interface acc_update_device
         subroutine acc_update_device_32_h (a, len)
           use iso_c_binding, only: c_int32_t
@@ -528,6 +580,30 @@
         end subroutine
       end interface
 
+      interface
+        subroutine acc_memcpy_device(data_dev_dest, data_dev_src,       &
+     &                               bytes) bind(C)
+          use iso_c_binding, only: c_ptr, c_size_t
+          type(c_ptr), value :: data_dev_dest
+          type(c_ptr), value :: data_dev_src
+          integer(c_size_t), value :: bytes
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_memcpy_device_async(data_dev_dest,               &
+     &                                     data_dev_src, bytes,         &
+     &                                     async_arg) bind(C)
+          use iso_c_binding, only: c_ptr, c_size_t
+          import :: acc_handle_kind
+          type(c_ptr), value :: data_dev_dest
+          type(c_ptr), value :: data_dev_src
+          integer(c_size_t), value :: bytes
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+
       interface acc_copyin_async
         subroutine acc_copyin_async_32_h (a, len, async)
           use iso_c_binding, only: c_int32_t
@@ -683,3 +759,45 @@
           integer (acc_handle_kind) async_
         end subroutine
       end interface
+
+      interface
+        subroutine acc_attach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am
index 9c273e7..dbc02f3 100644
--- a/libgomp/plugin/Makefrag.am
+++ b/libgomp/plugin/Makefrag.am
@@ -57,7 +57,8 @@ if PLUGIN_GCN
 # AMD GCN plugin
 libgomp_plugin_gcn_version_info = -version-info $(libtool_VERSION)
 toolexeclib_LTLIBRARIES += libgomp-plugin-gcn.la
-libgomp_plugin_gcn_la_SOURCES = plugin/plugin-gcn.c
+libgomp_plugin_gcn_la_SOURCES = plugin/plugin-gcn.c simple-allocator.c \
+				plugin/mutex.c
 libgomp_plugin_gcn_la_CPPFLAGS = $(AM_CPPFLAGS) \
 	-D_GNU_SOURCE
 libgomp_plugin_gcn_la_LDFLAGS = $(libgomp_plugin_gcn_version_info) \
diff --git a/libgomp/plugin/build-target-indirect-htab.h b/libgomp/plugin/build-target-indirect-htab.h
new file mode 100644
index 0000000..d732aca
--- /dev/null
+++ b/libgomp/plugin/build-target-indirect-htab.h
@@ -0,0 +1,83 @@
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
+
+   Contributed by Siemens.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* This file is used to create a hash table on the host that is supposed
+   to get use on the device - that's for the 'indirect' clause feature.
+
+   In order to habe host initialization work, the pointer sizes must be
+   the same - and either the the endianess or the host-device memcopy
+   has to take of it.  */
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+  p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+_Static_assert (sizeof (unsigned __int128) == 2 * sizeof (void*),
+		"hash_entry_type size mismatch");
+
+static inline void *htab_alloc (size_t size) {
+  return malloc (size);
+}
+
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+  return hash_pointer (INDIRECT_HOST_ADDR (element));
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+  return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (x);
+}
+
+void*
+create_target_indirect_map (size_t *h_size, size_t count,
+			    uint64_t *host_addrs, uint64_t *device_addrs)
+{
+  assert (htab_find);  /* Silence -Werror=unused-function.  */
+
+  htab_t indirect_htab = htab_create (count);
+
+  hash_entry_type element;
+  for (int i = 0; i < count; i++)
+    {
+      SET_INDIRECT_ADDRS (element, host_addrs[i], device_addrs[i]);
+      hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+					      INSERT);
+      *slot = element;
+    }
+  *h_size = (sizeof (struct htab)
+	     + htab_size (indirect_htab) * sizeof (hash_entry_type));
+  return (void*) indirect_htab;
+}
diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index eb562ac..67c783d 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -33,6 +33,7 @@ CUDA_ONE_CALL (cuLinkDestroy)
 CUDA_ONE_CALL (cuMemAlloc)
 CUDA_ONE_CALL (cuMemAllocHost)
 CUDA_ONE_CALL (cuMemHostAlloc)
+CUDA_ONE_CALL (cuMemAllocManaged)
 CUDA_ONE_CALL (cuMemcpy)
 CUDA_ONE_CALL (cuMemcpyDtoDAsync)
 CUDA_ONE_CALL (cuMemcpyDtoH)
@@ -42,6 +43,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync)
 CUDA_ONE_CALL (cuMemcpy2D)
 CUDA_ONE_CALL (cuMemcpy2DUnaligned)
 CUDA_ONE_CALL (cuMemcpy3D)
+CUDA_ONE_CALL (cuMemsetD8)
 CUDA_ONE_CALL (cuMemFree)
 CUDA_ONE_CALL (cuMemFreeHost)
 CUDA_ONE_CALL (cuMemGetAddressRange)
diff --git a/libgomp/plugin/mutex.c b/libgomp/plugin/mutex.c
new file mode 100644
index 0000000..e6981ad
--- /dev/null
+++ b/libgomp/plugin/mutex.c
@@ -0,0 +1,58 @@
+/* Mutex implementation for libgomp plugins.
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   Contributed by BayLibre
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a minimal implementation of the gomp_mutex_t spinlocks, but
+   without all the dependencies used by the config/linux/mutex implementation.
+
+   At the time of writing, this is only used by simple_alloc which has
+   short-lived locks and should be fine with these.  The actual locks are in
+   a header file, so only the fallback "slow" functions are needed here.  */
+
+#include "config.h"
+#include <unistd.h>
+#include "libgomp.h"
+
+#ifndef HAVE_SYNC_BUILTINS
+#error "HAVE_SYNC_BUILTINS is required to build this"
+#endif
+
+void
+gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
+{
+  while (oldval == 1)
+    {
+      usleep (1);
+      oldval = __atomic_exchange_n (mutex, 1, __ATOMIC_ACQUIRE);
+    }
+}
+
+void
+gomp_mutex_unlock_slow (gomp_mutex_t *mutex)
+{
+  GOMP_PLUGIN_fatal ("gomp_mutex_unlock_slow should be unreachable");
+}
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 4b42a59..92de6fb 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -50,6 +50,16 @@
 #include "oacc-plugin.h"
 #include "oacc-int.h"
 #include <assert.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+/* Create hash-table for declare target's indirect clause on the host;
+   see build-target-indirect-htab.h for details.  */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+					 uint64_t *, uint64_t *);
+#endif
 
 /* These probably won't be in elf.h for a while.  */
 #ifndef R_AMDGPU_NONE
@@ -208,6 +218,8 @@ struct hsa_runtime_fn_info
   hsa_status_t (*hsa_code_object_deserialize_fn)
     (void *serialized_code_object, size_t serialized_code_object_size,
      const char *options, hsa_code_object_t *code_object);
+  hsa_status_t (*hsa_amd_memory_fill_fn)(void *ptr, uint32_t value,
+					 size_t count);
   hsa_status_t (*hsa_amd_memory_lock_fn)
     (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent,
      void **agent_ptr);
@@ -218,6 +230,9 @@ struct hsa_runtime_fn_info
      const hsa_dim3_t *range, hsa_agent_t copy_agent,
      hsa_amd_copy_direction_t dir, uint32_t num_dep_signals,
      const hsa_signal_t *dep_signals, hsa_signal_t completion_signal);
+  hsa_status_t (*hsa_amd_svm_attributes_set_fn)
+    (void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
+     size_t attribute_count);
 };
 
 /* As an HIP runtime is dlopened, following structure defines function
@@ -736,6 +751,24 @@ dump_hsa_system_info (void)
     }
   else
     GCN_WARNING ("HSA_SYSTEM_INFO_EXTENSIONS: FAILED\n");
+
+  bool svm_supported;
+  status = hsa_fns.hsa_system_get_info_fn
+    (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED, &svm_supported);
+  if (status == HSA_STATUS_SUCCESS)
+    GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: %s\n",
+	       (svm_supported ? "TRUE" : "FALSE"));
+  else
+    GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: FAILED\n");
+
+  bool svm_accessible;
+  status = hsa_fns.hsa_system_get_info_fn
+    (HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, &svm_accessible);
+  if (status == HSA_STATUS_SUCCESS)
+    GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: %s\n",
+	       (svm_accessible ? "TRUE" : "FALSE"));
+  else
+    GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: FAILED\n");
 }
 
 /* Dump information about the available hardware.  */
@@ -1456,9 +1489,11 @@ init_hsa_runtime_functions (void)
   DLSYM_FN (hsa_signal_load_acquire)
   DLSYM_FN (hsa_queue_destroy)
   DLSYM_FN (hsa_code_object_deserialize)
+  DLSYM_OPT_FN (hsa_amd_memory_fill)
   DLSYM_OPT_FN (hsa_amd_memory_lock)
   DLSYM_OPT_FN (hsa_amd_memory_unlock)
   DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect)
+  DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
   return true;
 #undef DLSYM_OPT_FN
 #undef DLSYM_FN
@@ -1741,6 +1776,22 @@ isa_code(const char *isa) {
   return EF_AMDGPU_MACH_UNSUPPORTED;
 }
 
+/* Returns the code which is used in the GCN object code to identify the
+   generic ISA that corresponds to a specific ISA.  */
+
+static gcn_isa
+generic_isa_code (int isa) {
+  switch(isa)
+    {
+#define EF_AMDGPU_MACH_AMDGCN_NONE 0
+#define GCN_DEVICE(name, NAME, ELF, GCCISA, XNACK, SRAM, WAVE64, CUMODE, \
+		   VGPRS, CO, ARCH, GENERIC_ISA, ...) \
+    case ELF: return EF_AMDGPU_MACH_AMDGCN_ ## GENERIC_ISA;
+#include "../../gcc/config/gcn/gcn-devices.def"
+    }
+  return 0;
+}
+
 /* CDNA2 devices have twice as many VGPRs compared to older devices.  */
 
 static int
@@ -2516,6 +2567,15 @@ isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image,
 	      "Consider using ROCR_VISIBLE_DEVICES to disable incompatible "
 	      "devices or run with LOADER_ENABLE_LOGGING=1 for more details.",
 	      device_isa_s, agent_isa_s, agent->device_id);
+  else if (strcmp (device_isa_s, agent_isa_s) == 0
+	   || (elf_gcn_isa_is_generic (image)
+	       && generic_isa_code (agent->device_isa) == isa_field))
+    snprintf (msg, sizeof msg,
+	      "GCN code object features do not match for an unknown reason "
+	      "(device %d).\n"
+	      "Try to adjust the HSA_XNACK setting (perhaps?), or use\n"
+	      "ROCR_VISIBLE_DEVICES to disable incompatible devices.\n",
+	      agent->device_id);
   else
     snprintf (msg, sizeof msg,
 	      "GCN code object ISA '%s' is incompatible with GPU ISA '%s' "
@@ -3177,6 +3237,125 @@ wait_queue (struct goacc_asyncqueue *aq)
 }
 
 /* }}}  */
+/* {{{ Managed Memory
+
+   This implements an allocator equivalent to CUDA "Managed" memory, in which
+   the pages automatically migrate between host and device memory, as needed.
+   These allocations are visible from both the host and devices without the
+   need for explicit mappings.  However, OpenMP does need "is_device_ptr" or
+   "has_device_addr" to function properly.
+
+   There isn't a high-level HSA/ROCr API to allocate managed memory, so we
+   use regular memory and register it with the driver by setting it to
+   "coarse-grained" mode, and setting the "accessible by default" attribute
+   on devices where HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT isn't set
+   as standard (as it isn't on systems that don't support USM, or when
+   HSA_XNACK != 1).
+
+   This is in contrast to GOMP_OFFLOAD_alloc which allocates coarse-grained
+   *GPU memory*, which is not visible on the host.
+
+   It would be possible to register memory returned by malloc, but
+   experimentation shows that doing so causes memory faults within the HSA
+   runtime code.  Therefore, the Managed memory space is allocated as a
+   largish block and then subdivided via a custom allocator.  The "simple"
+   allocator is designed specifically to store its free-chain outside of
+   the registered pages so that allocation does not inadvertently cause
+   pages to migrate.
+
+   Note: if the user has multiple mismatched devices, and one or more do
+   not support USM (or XNACK is off), then each page of the Managed heap
+   could end up associated with a different device (by calling omp_alloc
+   before and after omp_set_default_device).  This issue remains
+   an *unhandled* edge-case, at present.  */
+
+gomp_simple_alloc_ctx_p managed_ctx = NULL;
+
+/* Initialize or extend the Managed memory space.  This is called whenever
+   allocation fails.  SIZE is the minimum size required for the failed
+   allocation to succeed; the function may choose a larger size.
+   Note that Linux lazy allocation means that the memory returned isn't
+   guaranteed to actually exist.  */
+
+static bool
+managed_heap_create (struct agent_info *agent, size_t size)
+{
+  static int lock = 0;
+  while (__atomic_exchange_n (&lock, 1, __ATOMIC_ACQUIRE) != 0)
+    ;
+
+  size_t default_size = 1L * 1024 * 1024 * 1024; /* 1GB */
+  if (size < default_size)
+    size = default_size;
+
+  /* Round up to a whole page.  */
+  int pagesize = getpagesize ();
+  int misalignment = size % pagesize;
+  if (misalignment > 0)
+    size += pagesize - misalignment;
+
+  /* Try to get contiguous memory, but it might not be possible.
+     The most recent previous allocation is at the head of the list.  */
+  static void *addrhint = NULL;
+  void *new_pages = mmap (addrhint, size, PROT_READ | PROT_WRITE,
+			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (!new_pages)
+    {
+      GCN_DEBUG ("Could not allocate Managed Memory heap.");
+      __atomic_store_n (&lock, 0, __ATOMIC_RELEASE);
+      return false;
+    }
+
+  /* Register the heap allocation as coarse grained, "Managed" memory.  */
+  struct hsa_amd_svm_attribute_pair_s attr = {
+    HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG,
+    HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED
+  };
+  hsa_status_t status = hsa_fns.hsa_amd_svm_attributes_set_fn (new_pages, size,
+							       &attr, 1);
+  if (status != HSA_STATUS_SUCCESS)
+    GOMP_PLUGIN_fatal ("Failed to allocate Unified Shared Memory;"
+		       " please update your drivers and/or kernel");
+
+  /* The HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE setting is required on devices
+     without default SVM.  */
+  static int svm_accessible = 0xff; /* Use 0xff as "undefined".  */
+  if (svm_accessible == 0xff)
+    {
+      status = hsa_fns.hsa_system_get_info_fn
+	(HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, &svm_accessible);
+      if (status != HSA_STATUS_SUCCESS)
+	{
+	  GCN_DEBUG ("warning: failed to query "
+		     " HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT\n");
+	  svm_accessible = false;
+	}
+    }
+  if (svm_accessible == false)
+    {
+      struct hsa_amd_svm_attribute_pair_s attr2;
+      attr2.attribute = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
+      attr2.value = agent->id.handle;
+      status = hsa_fns.hsa_amd_svm_attributes_set_fn (new_pages, size, &attr2,
+						      1);
+      if (status != HSA_STATUS_SUCCESS)
+	GOMP_PLUGIN_fatal ("Failed to allocate Unified Shared Memory;"
+			   " please update your drivers and/or kernel");
+    }
+
+  addrhint = new_pages + size;
+
+  /* Initialize a new Managed memory heap, or add the new memory into an
+     existing Managed memory heap.  */
+  if (!managed_ctx)
+    managed_ctx = gomp_simple_alloc_init_context ();
+  gomp_simple_alloc_register_memory (managed_ctx, new_pages, size);
+
+  __atomic_store_n (&lock, 0, __ATOMIC_RELEASE);
+  return true;
+}
+
+/* }}} */
 /* {{{ OpenACC support  */
 
 /* Execute an OpenACC kernel, synchronously or asynchronously.  */
@@ -3320,6 +3499,61 @@ gcn_exec (struct kernel_info *kernel,
 /* }}}  */
 /* {{{ Generic Plugin API  */
 
+#if 0  /* TODO: Use to enable self-mapping/USM automatically.  */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+   variables, even if ignoring all other mappings. Cf. PR 115279.  */
+
+/* Return TRUE if the GPU is an APU, i.e. the GPU is integrated with the CPU
+   such that both use the same memory controller such that mapping or memory
+   migration is pointless.  If CHECK_XNACK is TRUE, it additionally requires
+   that the GPU has *no* XNACK support otherwise FALSE is returned.
+
+   In theory, enabling unified-shared memory for APUs should always work,
+   however, with AMD GPUs some APUs (e.g. MI300A) still require XNACK to be
+   enabled as it is required to handle page faults.
+
+   Thus, for unified-shared memory access, either of the following must hold:
+   * HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is TRUE
+     This implies that all GPUs support USM access, either directly (as APU)
+     or via page migration.  For MI300A, this is only the case if
+     HSA_AMD_SYSTEM_INFO_XNACK_ENABLED is TRUE.
+   * If the GPU an APU *and* it does not support XNACK.  */
+
+static bool
+is_integrated_apu (struct agent_info *agent, bool check_xnack)
+{
+  enum {
+    HSACO_ATTR_UNSUPPORTED,
+    HSACO_ATTR_OFF,
+    HSACO_ATTR_ON,
+    HSACO_ATTR_ANY,
+    HSACO_ATTR_DEFAULT
+  };
+
+  bool is_apu;
+  uint8_t mem_prop[8];
+  hsa_status_t status;
+
+  status = hsa_fns.hsa_agent_get_info_fn (
+	     agent->id, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES,
+	     mem_prop);
+  _Static_assert (HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8,
+		  "HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8");
+  is_apu = (status == HSA_STATUS_SUCCESS
+	    && (mem_prop[0] & (1 << HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)));
+
+  if (check_xnack)
+    switch(agent->device_isa)
+      {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, ...) \
+      case ELF: return is_apu && (XNACK == HSACO_ATTR_UNSUPPORTED);
+#include "../../gcc/config/gcn/gcn-devices.def"
+      default: return false;  /* Just to be save.  */
+      }
+  return is_apu;
+}
+#endif
+
 /* Return the name of the accelerator, which is "gcn".  */
 
 const char *
@@ -3685,37 +3919,28 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
 			     (void*) ind_funcs_table_addr,
 			     sizeof (ind_funcs_table));
 
-      /* Build host->target address map for indirect functions.  */
-      uint64_t ind_fn_map[ind_func_count * 2 + 1];
-      for (unsigned i = 0; i < ind_func_count; i++)
-	{
-	  ind_fn_map[i * 2] = host_ind_fn_table[i];
-	  ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
-	  GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
-		     i, host_ind_fn_table[i], ind_funcs_table[i]);
-	}
-      ind_fn_map[ind_func_count * 2] = 0;
+      /* For newer binaries, the hash table for 'indirect' is created on the
+	 host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+	 device side - and have to create the table themselves using
+	 GOMP_INDIRECT_ADDR_MAP.  */
 
-      /* Write the map onto the target.  */
-      void *map_target_addr
-	= GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
-      GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
-
-      GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
-			     (void*) ind_fn_map,
-			     sizeof (ind_fn_map));
-
-      /* Write address of the map onto the target.  */
       hsa_executable_symbol_t symbol;
-
+      bool host_init_htab = true;
+      #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
       status
 	= hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
-						XSTRING (GOMP_INDIRECT_ADDR_MAP),
+						XSTRING (GOMP_INDIRECT_ADDR_HMAP),
 						agent->id, 0, &symbol);
       if (status != HSA_STATUS_SUCCESS)
+      #endif
+	{
+	  host_init_htab = false;
+	  status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+		     XSTRING (GOMP_INDIRECT_ADDR_MAP), agent->id, 0, &symbol);
+	}
+      if (status != HSA_STATUS_SUCCESS)
 	hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
 		   status);
-
       uint64_t varptr;
       uint32_t varsize;
 
@@ -3731,9 +3956,51 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
 	hsa_fatal ("Could not extract a variable size from its symbol",
 		   status);
 
-      GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
-		 varptr, varsize);
+      GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_%sMAP at %lx with size %d\n",
+		 host_init_htab ? "H" : "", varptr, varsize);
+
+      void *map_target_addr;
+      if (!host_init_htab)
+	{
+	  /* Build host->target address map for indirect functions.  */
+	  uint64_t ind_fn_map[ind_func_count * 2 + 1];
+	  for (unsigned i = 0; i < ind_func_count; i++)
+	    {
+	      ind_fn_map[i * 2] = host_ind_fn_table[i];
+	      ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
+	      GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+			 i, host_ind_fn_table[i], ind_funcs_table[i]);
+	    }
+	  ind_fn_map[ind_func_count * 2] = 0;
+	  /* Write the map onto the target.  */
+	  map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+						sizeof (ind_fn_map));
+	  GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+				 (void*) ind_fn_map, sizeof (ind_fn_map));
+	}
+      #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+      else
+	{
+	  /* FIXME: Handle multi-kernel load and unload, cf. PR 114690.  */
+	  size_t host_map_size;
+	  void *host_map;
+	  host_map = create_target_indirect_map (&host_map_size, ind_func_count,
+						 host_ind_fn_table,
+						 ind_funcs_table);
+	  for (unsigned i = 0; i < ind_func_count; i++)
+	      GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+			 i, host_ind_fn_table[i], ind_funcs_table[i]);
+	  /* Write the map onto the target.  */
+	  map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+						host_map_size);
+	  GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+				 host_map, host_map_size);
+	}
+      #endif
 
+      GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
+
+      /* Write address of the map onto the target.  */
       GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
 			     &map_target_addr,
 			     sizeof (map_target_addr));
@@ -4435,6 +4702,83 @@ init_hip_runtime_functions (void)
   return true;
 }
 
+bool
+GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count)
+{
+  hsa_status_t status = HSA_STATUS_SUCCESS;
+
+  /* A memset feature is only provided via hsa_amd_memory_fill; while it
+     is fast, it is an HSA extension and it has two requirements: The memory
+     must be aligned to multiples of 4 bytes - and, by construction, only
+     multiples of 4 bytes can be filled (uint32_t value argument).
+
+     This means: Either not using that function or up to three function calls:
+     - copy 1 to 3 bytes to get alignment (hsa_memory_copy), if unaligned
+     - call hsa_amd_memory_fill
+     - copy remaining 1 to 3 bytes (hsa_memory_copy), if after alignment
+       count is not a multiple of 4 bytes.
+
+     Having more than one function call is only profitable if there is
+     enough data to process; see below for the used heuristic values.  */
+
+  uint8_t v8 = (uint8_t) val;
+  size_t before = (4 - (uintptr_t) ptr % 4) % 4;  /* 0 to 3 bytes.  */
+  size_t tail = (count - before) % 4;  /* 0 to 3 bytes.  */
+
+  /* Heuristic  */
+  enum {
+    /* Prefer alloca to malloc up to ... */
+    alloca_size = 256,  /* bytes */
+    /* Call hsa_amd_memory_fill also when two copy calls are required.  */
+    always_use_fill = 256*1024,  /* bytes */
+    /* Call hsa_amd_memory_fill also when on copy call is required.  */
+    use_fill_one_copy = (128+64)*1024  /* bytes */
+  };
+
+  /* Do not call hsa_amd_memory_fill when any of the following conditions
+     is true. Note that it is always preferred if available and
+     before == tail == 0.  */
+  if (__builtin_expect (!hsa_fns.hsa_amd_memory_fill_fn, 0)
+      || (before && tail && count < always_use_fill)
+      || ((before || tail) && count < use_fill_one_copy))
+    before = count;
+
+  /* Copy call for alignment - or all data, if condition above is true.  */
+  if (before)
+    {
+      void *data;
+      if (before > alloca_size)
+	data = malloc (before * sizeof (uint8_t));
+      else
+	data = alloca (before * sizeof (uint8_t));
+      memset (data, val, before);
+      status = hsa_fns.hsa_memory_copy_fn (ptr, data, before);
+      if (before > alloca_size)
+	free (data);
+      if (data == 0 || status != HSA_STATUS_SUCCESS)
+	goto fail;
+      count -= before;
+    }
+
+  if (count == 0)
+    return true;
+
+  ptr += before;
+
+  uint32_t values = v8 | (v8 << 8) | (v8 << 16) | (v8 << 24);
+  status = hsa_fns.hsa_amd_memory_fill_fn (ptr, values, count / 4);
+  if (tail && status == HSA_STATUS_SUCCESS)
+    {
+      ptr += count - tail;
+      status = hsa_fns.hsa_memory_copy_fn (ptr, &values, tail);
+    }
+  if (status == HSA_STATUS_SUCCESS)
+    return true;
+
+fail:
+  GOMP_PLUGIN_error ("memory set failed");
+  return false;
+}
 
 void
 GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
@@ -4885,6 +5229,35 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
 		       GOMP_PLUGIN_target_task_completion, async_data);
 }
 
+/* Allocate memory suitable for Managed Memory.  */
+
+void *
+GOMP_OFFLOAD_managed_alloc (int device, size_t size)
+{
+  struct agent_info *agent = get_agent_info (device);
+  while (1)
+    {
+      void *result = gomp_simple_alloc (managed_ctx, size);
+      if (result)
+	return result;
+
+      /* Allocation failed.  Try again if we can create a new heap block.
+	 Note: it's possible another thread could get to the new memory
+	 first, so the while loop is necessary. */
+      if (!managed_heap_create (agent, size))
+	return NULL;
+    }
+}
+
+/* Free memory allocated via GOMP_OFFLOAD_managed_alloc.  */
+
+bool
+GOMP_OFFLOAD_managed_free (int device, void *ptr)
+{
+  gomp_simple_free (managed_ctx, ptr);
+  return true;
+}
+
 /* }}} */
 /* {{{ OpenACC Plugin API  */
 
@@ -5079,7 +5452,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
   queue_push_callback (aq, fn, data);
 }
 
-/* Queue up an asynchronous data copy from host to DEVICE.  */
+/* Queue up an asynchronous data copy from host to DEVICE.
+   (Also handles dev2host and dev2dev.)  */
 
 bool
 GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
@@ -5097,10 +5471,16 @@ bool
 GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
 				     size_t n, struct goacc_asyncqueue *aq)
 {
-  struct agent_info *agent = get_agent_info (device);
-  assert (agent == aq->agent);
-  queue_push_copy (aq, dst, src, n);
-  return true;
+  return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
+}
+
+/* Queue up an asynchronous data copy from DEVICE to DEVICE.  */
+
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src,
+				    size_t n, struct goacc_asyncqueue *aq)
+{
+  return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
 }
 
 union goacc_property_value
@@ -5160,4 +5540,8 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
   free (data);
 }
 
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+  #include "build-target-indirect-htab.h"
+#endif
+
 /* }}} */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a5cf859..dd8bcf9 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -60,6 +60,14 @@
 #include <errno.h>
 #include <stdlib.h>
 
+/* Create hash-table for declare target's indirect clause on the host;
+   see build-target-indirect-htab.h for details.  */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+					 uint64_t *, uint64_t *);
+#endif
+
 /* An arbitrary fixed limit (128MB) for the size of the OpenMP soft stacks
    block to cache between kernel invocations.  For soft-stacks blocks bigger
    than this, we will free the block before attempting another GPU memory
@@ -1125,11 +1133,13 @@ nvptx_stacks_free (struct ptx_device *ptx_dev, bool force)
 }
 
 static void *
-nvptx_alloc (size_t s, bool suppress_errors)
+nvptx_alloc (size_t s, bool suppress_errors, bool managed)
 {
   CUdeviceptr d;
 
-  CUresult r = CUDA_CALL_NOCHECK (cuMemAlloc, &d, s);
+  CUresult r = (managed ? CUDA_CALL_NOCHECK (cuMemAllocManaged, &d, s,
+					     CU_MEM_ATTACH_GLOBAL)
+		: CUDA_CALL_NOCHECK (cuMemAlloc, &d, s));
   if (suppress_errors && r == CUDA_ERROR_OUT_OF_MEMORY)
     return NULL;
   else if (r != CUDA_SUCCESS)
@@ -1238,6 +1248,24 @@ nvptx_get_current_cuda_context (void)
   return nvthd->ptx_dev->ctx;
 }
 
+#if 0  /* TODO: Use to enable self-mapping/USM automatically.  */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+   variables, even if ignoring all other mappings. Cf. PR 115279.  */
+
+/* Return TRUE if the GPU is integrated with host memory, i.e. GPU and
+   host share the same memory controller.  As of Oct 2025, no such
+   Nvidia GPU seems to exist.  */
+static bool
+is_integrated_apu (struct ptx_device *ptx_dev)
+{
+  int pi;
+  CUresult r;
+  r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+			 CU_DEVICE_ATTRIBUTE_INTEGRATED, ptx_dev->dev);
+  return (r == CUDA_SUCCESS && pi == 1);
+}
+#endif
+
 /* Plugin entry points.  */
 
 const char *
@@ -1626,39 +1654,71 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
       if (r != CUDA_SUCCESS)
 	GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
 
-      /* Build host->target address map for indirect functions.  */
-      uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
-      for (unsigned k = 0; k < ind_fn_entries; k++)
-	{
-	  ind_fn_map[k * 2] = host_ind_fn_table[k];
-	  ind_fn_map[k * 2 + 1] = ind_fn_table[k];
-	  GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
-			     k, host_ind_fn_table[k], ind_fn_table[k]);
-	}
-      ind_fn_map[ind_fn_entries * 2] = 0;
-
-      /* Write the map onto the target.  */
-      void *map_target_addr
-	= GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
-      GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
-
-      GOMP_OFFLOAD_host2dev (ord, map_target_addr,
-			     (void*) ind_fn_map,
-			     sizeof (ind_fn_map));
+      /* For newer binaries, the hash table for 'indirect' is created on the
+	 host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+	 device side - and have to create the table themselves using
+	 GOMP_INDIRECT_ADDR_MAP.  */
 
-      /* Write address of the map onto the target.  */
       CUdeviceptr varptr;
       size_t varsize;
+      bool host_init_htab = true;
+      #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
       r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
-			     module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+			     module, XSTRING (GOMP_INDIRECT_ADDR_HMAP));
+      if (r != CUDA_SUCCESS)
+      #endif
+	{
+	  host_init_htab = false;
+	  r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
+				 module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+	}
       if (r != CUDA_SUCCESS)
 	GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
 			   cuda_error (r));
-
       GOMP_PLUGIN_debug (0,
-			 "Indirect map variable found at %llx with size %ld\n",
+			 "%s-style indirect map variable found at %llx with "
+			 "size %ld\n", host_init_htab ? "New" : "Old",
 			 varptr, varsize);
 
+      void *map_target_addr;
+      if (!host_init_htab)
+	{
+	  /* Build host->target address map for indirect functions.  */
+	  uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
+	  for (unsigned k = 0; k < ind_fn_entries; k++)
+	    {
+	      ind_fn_map[k * 2] = host_ind_fn_table[k];
+	      ind_fn_map[k * 2 + 1] = ind_fn_table[k];
+	      GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+				 k, host_ind_fn_table[k], ind_fn_table[k]);
+	    }
+	  ind_fn_map[ind_fn_entries * 2] = 0;
+	  /* Write the map onto the target.  */
+	  map_target_addr = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
+	  GOMP_OFFLOAD_host2dev (ord, map_target_addr,
+				 (void *) ind_fn_map, sizeof (ind_fn_map));
+	}
+      #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+      else
+	{
+	  /* FIXME: Handle multi-kernel load and unload, cf. PR 114690.  */
+	  size_t host_map_size;
+	  void *host_map;
+	  host_map = create_target_indirect_map (&host_map_size, ind_fn_entries,
+						 host_ind_fn_table,
+						 ind_fn_table);
+	  for (unsigned k = 0; k < ind_fn_entries; k++)
+	    GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+			       k, host_ind_fn_table[k], ind_fn_table[k]);
+	  /* Write the map onto the target.  */
+	  map_target_addr = GOMP_OFFLOAD_alloc (ord, host_map_size);
+	  GOMP_OFFLOAD_host2dev (ord, map_target_addr, host_map, host_map_size);
+	}
+      #endif
+
+      GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
+
+      /* Write address of the map onto the target.  */
       GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
 			     sizeof (map_target_addr));
     }
@@ -1785,8 +1845,8 @@ GOMP_OFFLOAD_unload_image (int ord, unsigned version, const void *target_data)
   return ret;
 }
 
-void *
-GOMP_OFFLOAD_alloc (int ord, size_t size)
+static void *
+cleanup_and_alloc (int ord, size_t size, bool managed)
 {
   if (!nvptx_attach_host_thread_to_device (ord))
     return NULL;
@@ -1809,7 +1869,7 @@ GOMP_OFFLOAD_alloc (int ord, size_t size)
       blocks = tmp;
     }
 
-  void *d = nvptx_alloc (size, true);
+  void *d = nvptx_alloc (size, true, managed);
   if (d)
     return d;
   else
@@ -1817,10 +1877,22 @@ GOMP_OFFLOAD_alloc (int ord, size_t size)
       /* Memory allocation failed.  Try freeing the stacks block, and
 	 retrying.  */
       nvptx_stacks_free (ptx_dev, true);
-      return nvptx_alloc (size, false);
+      return nvptx_alloc (size, false, managed);
     }
 }
 
+void *
+GOMP_OFFLOAD_alloc (int ord, size_t size)
+{
+  return cleanup_and_alloc (ord, size, false);
+}
+
+void *
+GOMP_OFFLOAD_managed_alloc (int ord, size_t size)
+{
+  return cleanup_and_alloc (ord, size, true);
+}
+
 bool
 GOMP_OFFLOAD_free (int ord, void *ptr)
 {
@@ -1828,6 +1900,45 @@ GOMP_OFFLOAD_free (int ord, void *ptr)
 	  && nvptx_free (ptr, ptx_devices[ord]));
 }
 
+bool
+GOMP_OFFLOAD_managed_free (int ord, void *ptr)
+{
+  return GOMP_OFFLOAD_free (ord, ptr);
+}
+
+bool
+GOMP_OFFLOAD_page_locked_host_alloc (void **ptr, size_t size)
+{
+  if (size == 0)
+    {
+      /* Special case to ensure omp_alloc specification compliance.  */
+      *ptr = NULL;
+      return true;
+    }
+
+  CUresult r;
+
+  unsigned int flags = 0;
+  /* Given 'CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING', we don't need
+     'flags |= CU_MEMHOSTALLOC_PORTABLE;' here.  */
+  r = CUDA_CALL_NOCHECK (cuMemHostAlloc, ptr, size, flags);
+  if (r == CUDA_ERROR_OUT_OF_MEMORY)
+    *ptr = NULL;
+  else if (r != CUDA_SUCCESS)
+    {
+      GOMP_PLUGIN_error ("cuMemHostAlloc error: %s", cuda_error (r));
+      return false;
+    }
+  return true;
+}
+
+bool
+GOMP_OFFLOAD_page_locked_host_free (void *ptr)
+{
+  CUDA_CALL (cuMemFreeHost, ptr);
+  return true;
+}
+
 void
 GOMP_OFFLOAD_openacc_exec (void (*fn) (void *),
 			   size_t mapnum  __attribute__((unused)),
@@ -2019,6 +2130,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
 }
 
 static bool
+cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s)
+{
+  CUdeviceptr pb1, pb2;
+  size_t ps1, ps2;
+  if (!s)
+    return true;
+  if (!d1 || !d2)
+    {
+      GOMP_PLUGIN_error ("invalid device address");
+      return false;
+    }
+  CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1);
+  CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2);
+  if (!pb1 || !pb2)
+    {
+      GOMP_PLUGIN_error ("invalid device address");
+      return false;
+    }
+  if ((void *)(d1 + s) > (void *)(pb1 + ps1)
+      || (void *)(d2 + s) > (void *)(pb2 + ps2))
+    {
+      GOMP_PLUGIN_error ("invalid size");
+      return false;
+    }
+  return true;
+}
+
+static bool
 cuda_memcpy_sanity_check (const void *h, const void *d, size_t s)
 {
   CUdeviceptr pb;
@@ -2077,6 +2216,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
 bool
 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
 {
+  if (!nvptx_attach_host_thread_to_device (ord)
+      || !cuda_memcpy_dev_sanity_check (dst, src, n))
+    return false;
   CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL);
   return true;
 }
@@ -2267,6 +2409,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
 }
 
 bool
+GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count)
+{
+  if (!nvptx_attach_host_thread_to_device (ord))
+    return false;
+  CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count);
+  return true;
+}
+
+bool
 GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src,
 				     size_t n, struct goacc_asyncqueue *aq)
 {
@@ -2288,6 +2439,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
   return true;
 }
 
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src,
+				    size_t n, struct goacc_asyncqueue *aq)
+{
+  if (!nvptx_attach_host_thread_to_device (ord)
+      || !cuda_memcpy_dev_sanity_check (dst, src, n))
+    return false;
+  CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
+	     aq->cuda_stream);
+  return true;
+}
+
 union goacc_property_value
 GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
 {
@@ -2846,3 +3009,7 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
 }
 
 /* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+  #include "build-target-indirect-htab.h"
+#endif
diff --git a/libgomp/simple-allocator.c b/libgomp/simple-allocator.c
new file mode 100644
index 0000000..25ec2c7
--- /dev/null
+++ b/libgomp/simple-allocator.c
@@ -0,0 +1,316 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a simple "malloc" implementation intended for use with device
+   Managed Memory and Pinned Memory.  It allocates memory from a pool allocated
+   and configured by the device plugin, or the OS-specific allocator
+   (for pinned).
+ 
+   Unlike the "basic" allocator, this implementation keeps the allocated/free
+   chain in a side-table (splay tree) to ensure that the allocation routine
+   does not have the side-effect of migrating all the managed memory pages back
+   into host memory.  Keeping the meta-data elsewhere is also useful for pinned
+   memory, which is typically an extremely limited resource.  */
+
+#include <string.h>
+#include "libgomp.h"
+
+/* Use a splay tree to track allocations.  */
+
+typedef struct simple_alloc_splay_tree_node_s *simple_alloc_splay_tree_node;
+typedef struct simple_alloc_splay_tree_s *simple_alloc_splay_tree;
+typedef struct simple_alloc_splay_tree_key_s *simple_alloc_splay_tree_key;
+
+struct simple_alloc_splay_tree_key_s {
+  void *base;
+  size_t size;
+};
+
+static inline int
+simple_alloc_splay_compare (simple_alloc_splay_tree_key x,
+			    simple_alloc_splay_tree_key y)
+{
+  return (x->base == y->base ? 0
+	  : x->base > y->base ? 1
+	  : -1);
+}
+
+#define splay_tree_prefix simple_alloc
+#include "splay-tree.h"
+
+/* 128-byte granularity means GPU cache-line aligned.  */
+#define ALIGN(VAR) (((VAR) + 127) & ~127)
+
+/* The context data prevents the need for global state.  */
+struct gomp_simple_alloc_context {
+  struct simple_alloc_splay_tree_s allocations;
+  struct simple_alloc_splay_tree_s free_space;
+  gomp_mutex_t lock;
+};
+
+gomp_simple_alloc_ctx_p
+gomp_simple_alloc_init_context ()
+{
+  return calloc (1, sizeof (struct gomp_simple_alloc_context));
+}
+
+/* Coalesce contiguous free space into one entry.  This considers the entries
+   either side of the root node only, so it should be called each time a new
+   entry in inserted into the root.  */
+
+static void
+simple_alloc_coalesce_free_space (gomp_simple_alloc_ctx_p ctx)
+{
+  simple_alloc_splay_tree_node prev, next, node = ctx->free_space.root;
+
+  for (prev = node->left; prev && prev->right; prev = prev->right)
+    ;
+  for (next = node->right; next && next->left; next = next->left)
+    ;
+
+  /* Coalesce adjacent free chunks.  */
+  if (next
+      && node->key.base + node->key.size == next->key.base)
+    {
+      /* Free chunk follows.  */
+      node->key.size += next->key.size;
+      simple_alloc_splay_tree_remove (&ctx->free_space, &next->key);
+      free (next);
+    }
+  if (prev
+      && prev->key.base + prev->key.size == node->key.base)
+    {
+      /* Free chunk precedes.  */
+      prev->key.size += node->key.size;
+      simple_alloc_splay_tree_remove (&ctx->free_space, &node->key);
+      free (node);
+    }
+}
+
+/* Add a new memory region into the free chain.  This is how our heap is
+   initialized and extended (using memory acquired by an external caller).  If
+   the new region is contiguous with an existing region then any free space
+   will be coalesced.  */
+
+void
+gomp_simple_alloc_register_memory (gomp_simple_alloc_ctx_p ctx, char *base,
+				   size_t size)
+{
+  if (base == NULL || ctx == NULL)
+    return;
+
+  gomp_mutex_lock (&ctx->lock);
+
+  simple_alloc_splay_tree_node node;
+  node = malloc (sizeof (struct simple_alloc_splay_tree_node_s));
+  node->key.base = base;
+  node->key.size = size;
+  node->left = NULL;
+  node->right = NULL;
+  simple_alloc_splay_tree_insert (&ctx->free_space, node);
+  simple_alloc_coalesce_free_space (ctx);
+
+  gomp_mutex_unlock (&ctx->lock);
+}
+
+/* This splay_tree_foreach callback selects the first free space large enough
+   to hold the allocation needed.  Since the splay_tree walk may start in the
+   middle the "first" isn't necessarily the "leftmost" entry.  */
+
+struct simple_alloc_callback_data {
+  size_t size;
+  simple_alloc_splay_tree_node found;
+};
+
+static int
+simple_alloc_callback (simple_alloc_splay_tree_key key, void *data)
+{
+  struct simple_alloc_callback_data *cbd
+    = (struct simple_alloc_callback_data *)data;
+
+  if (key->size >= cbd->size)
+    {
+      cbd->found = (simple_alloc_splay_tree_node)key;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Simple "malloc".  Selects and moves and address range from ctx->free_space to
+   ctx->allocations, while leaving any excess in ctx->free_space.  */
+
+void *
+gomp_simple_alloc (gomp_simple_alloc_ctx_p ctx, size_t size)
+{
+  if (ctx == NULL)
+    return NULL;
+
+  /* Memory is allocated in N-byte granularity.  */
+  size = ALIGN (size);
+
+  gomp_mutex_lock (&ctx->lock);
+
+  if (!ctx->free_space.root)
+    {
+      /* No memory registered, or no free space.  */
+      gomp_mutex_unlock (&ctx->lock);
+      return NULL;
+    }
+
+  /* Find a suitable free block.  */
+  struct simple_alloc_callback_data cbd = {size, NULL};
+  simple_alloc_splay_tree_foreach_lazy (&ctx->free_space,
+					simple_alloc_callback, &cbd);
+  simple_alloc_splay_tree_node freenode = cbd.found;
+
+  void *result = NULL;
+  if (freenode)
+    {
+      /* Allocation successful.  */
+      result = freenode->key.base;
+      simple_alloc_splay_tree_node allocnode = malloc (sizeof (*allocnode));
+      allocnode->key.base = result;
+      allocnode->key.size = size;
+      allocnode->left = NULL;
+      allocnode->right = NULL;
+      simple_alloc_splay_tree_insert (&ctx->allocations, allocnode);
+
+      /* Update the free chain.  */
+      size_t stillfree_size = freenode->key.size - size;
+      if (stillfree_size > 0)
+	{
+	  freenode->key.base = freenode->key.base + size;
+	  freenode->key.size = stillfree_size;
+	}
+      else
+	{
+	  simple_alloc_splay_tree_remove (&ctx->free_space, &freenode->key);
+	  free (freenode);
+	}
+    }
+
+  gomp_mutex_unlock (&ctx->lock);
+
+  return result;
+}
+
+/* Simple "free".  Moves an address range from ctx->allocations to
+   ctx->free_space and merges that record with any contiguous free memory.  */
+
+void
+gomp_simple_free (gomp_simple_alloc_ctx_p ctx, void *addr)
+{
+  if (ctx == NULL)
+    return;
+
+  gomp_mutex_lock (&ctx->lock);
+
+  /* Convert the memory map to free.  */
+  struct simple_alloc_splay_tree_key_s key = {addr};
+  simple_alloc_splay_tree_key found
+    = simple_alloc_splay_tree_lookup (&ctx->allocations, &key);
+  if (!found)
+    GOMP_PLUGIN_fatal ("invalid free");
+  simple_alloc_splay_tree_remove (&ctx->allocations, &key);
+  simple_alloc_splay_tree_insert (&ctx->free_space,
+				  (simple_alloc_splay_tree_node)found);
+  simple_alloc_coalesce_free_space (ctx);
+
+  gomp_mutex_unlock (&ctx->lock);
+}
+
+/* Simple "realloc".  Works in-place, if possible; reallocates otherwise.  */
+
+void *
+gomp_simple_realloc (gomp_simple_alloc_ctx_p ctx, void *addr, size_t newsize)
+{
+  if (ctx == NULL)
+    return NULL;
+
+  newsize = ALIGN (newsize);
+
+  gomp_mutex_lock (&ctx->lock);
+
+  /* Convert the memory map to free.  */
+  struct simple_alloc_splay_tree_key_s key = {addr};
+  simple_alloc_splay_tree_key found
+    = simple_alloc_splay_tree_lookup (&ctx->allocations, &key);
+  if (!found)
+    GOMP_PLUGIN_fatal ("invalid realloc");
+
+  if (newsize == found->size)
+    ; /* Nothing to do.  */
+  else if (newsize < found->size)
+    {
+      /* We're reducing the allocation size.  */
+      simple_alloc_splay_tree_node newfree = malloc (sizeof (*newfree));
+      newfree->key.base = found->base + newsize;
+      newfree->key.size = found->size - newsize;
+      newfree->left = NULL;
+      newfree->right = NULL;
+      simple_alloc_splay_tree_insert (&ctx->free_space, newfree);
+      simple_alloc_coalesce_free_space (ctx);
+    }
+  else
+    {
+      /* We're extending the allocation.  */
+      struct simple_alloc_splay_tree_key_s freekey = {addr + found->size};
+      simple_alloc_splay_tree_key foundfree;
+      foundfree = simple_alloc_splay_tree_lookup (&ctx->free_space, &freekey);
+      if (foundfree && foundfree->size >= newsize - found->size)
+	{
+	  /* Allocation can be expanded in place.  */
+	  foundfree->base += found->size;
+	  foundfree->size -= newsize - found->size;
+	  found->size = newsize;
+
+	  if (foundfree->size == 0)
+	    simple_alloc_splay_tree_remove (&ctx->free_space, &freekey);
+	}
+      else
+	{
+	  /* Allocation must be relocated.
+	     Release the lock and use alloc/free.  */
+	  gomp_mutex_unlock (&ctx->lock);
+
+	  void *newaddr = gomp_simple_alloc (ctx, newsize);
+	  if (!newaddr)
+	    return NULL;
+
+	  memcpy (newaddr, addr, found->size);
+	  gomp_simple_free (ctx, addr);
+	  return newaddr;
+	}
+    }
+
+  gomp_mutex_unlock (&ctx->lock);
+  return addr;
+}
+
+/* Include the splay tree code inline, with the prefixes added.  */
+#define splay_tree_prefix simple_alloc
+#define splay_tree_c
+#define gomp_fatal GOMP_PLUGIN_fatal  /* So it links into a plugin.  */
+#include "splay-tree.h"
diff --git a/libgomp/target.c b/libgomp/target.c
index a64ee96..af7c702 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -139,6 +139,14 @@ gomp_get_num_devices (void)
   return num_devices_openmp;
 }
 
+static int
+gomp_get_default_device ()
+{
+  gomp_init_targets_once ();
+  struct gomp_task_icv *icv = gomp_icv (false);
+  return icv->default_device_var;
+}
+
 static struct gomp_device_descr *
 resolve_device (int device_id, bool remapped)
 {
@@ -148,11 +156,7 @@ resolve_device (int device_id, bool remapped)
 
   if ((remapped && device_id == GOMP_DEVICE_ICV)
       || device_id == GOMP_DEVICE_DEFAULT_OMP_61)
-    {
-      struct gomp_task_icv *icv = gomp_icv (false);
-      device_id = icv->default_device_var;
-      remapped = false;
-    }
+    device_id = gomp_get_default_device ();
 
   if (device_id < 0)
     {
@@ -461,6 +465,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
 
+attribute_hidden void
+gomp_copy_dev2dev (struct gomp_device_descr *devicep,
+		   struct goacc_asyncqueue *aq,
+		   void *dst, const void *src, size_t sz)
+{
+  if (__builtin_expect (aq != NULL, 0))
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func,
+			     "dev", dst, "dev", src, NULL, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst,
+		      "dev", src, sz);
+}
+
 static void
 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
 {
@@ -800,12 +817,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
 	      (void *) cur_node.host_end);
 }
 
-attribute_hidden void
+/* Update the devptr by setting it to the device address of the host pointee
+   'attach_to'; devptr is obtained from the splay_tree_key n.
+   When the pointer is already attached or the host pointee is either
+   NULL or in memory map, this function returns true.
+   Otherwise, the device pointer is set to point to the host pointee and:
+   - If allow_zero_length_array_sections is set, true is returned.
+   - Else, if fail_if_not_found is set, a fatal error is issued.
+   - Otherwise, false is returned.  */
+
+attribute_hidden bool
 gomp_attach_pointer (struct gomp_device_descr *devicep,
 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
 		     struct gomp_coalesce_buf *cbufp,
-		     bool allow_zero_length_array_sections)
+		     bool allow_zero_length_array_sections,
+		     bool fail_if_not_found)
 {
   struct splay_tree_key_s s;
   size_t size, idx;
@@ -860,7 +887,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 	  gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
 			      sizeof (void *), true, cbufp);
 
-	  return;
+	  return true;
 	}
 
       s.host_start = target + bias;
@@ -869,15 +896,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 
       if (!tn)
 	{
-	  if (allow_zero_length_array_sections)
-	    /* When allowing attachment to zero-length array sections, we
-	       copy the host pointer when the target region is not mapped.  */
-	    data = target;
-	  else
+	  /* We copy the host pointer when the target region is not mapped;
+	     for allow_zero_length_array_sections, that's permitted.
+	     Otherwise, it depends on the context. Return false in that
+	     case, unless fail_if_not_found.  */
+	  if (!allow_zero_length_array_sections && fail_if_not_found)
 	    {
 	      gomp_mutex_unlock (&devicep->lock);
 	      gomp_fatal ("pointer target not mapped for attach");
 	    }
+	  data = target;
 	}
       else
 	data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
@@ -889,10 +917,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
 			  sizeof (void *), true, cbufp);
+      if (!tn && !allow_zero_length_array_sections)
+	return false;
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
 		(void *) attach_to, (int) n->aux->attach_count[idx]);
+  return true;
 }
 
 attribute_hidden void
@@ -976,6 +1007,155 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
     }
 }
 
+static const char *
+kind_to_name (unsigned short kind, bool short_mapkind)
+{
+  if (short_mapkind && GOMP_MAP_IMPLICIT_P (kind))
+    kind &= ~GOMP_MAP_IMPLICIT;
+
+  switch (kind & (short_mapkind ? 0xff : 0x7))
+    {
+    case GOMP_MAP_ALLOC: return "GOMP_MAP_ALLOC";
+    case GOMP_MAP_TO: return "GOMP_MAP_TO";
+    case GOMP_MAP_FROM: return "GOMP_MAP_FROM";
+    case GOMP_MAP_TOFROM: return "GOMP_MAP_TOFROM";
+    case GOMP_MAP_POINTER: return "GOMP_MAP_POINTER";
+    case GOMP_MAP_TO_PSET: return "GOMP_MAP_TO_PSET";
+    case GOMP_MAP_FORCE_PRESENT: return "GOMP_MAP_FORCE_PRESENT";
+    case GOMP_MAP_DELETE: return "GOMP_MAP_DELETE";
+    case GOMP_MAP_FORCE_DEVICEPTR: return "GOMP_MAP_FORCE_DEVICEPTR";
+    case GOMP_MAP_DEVICE_RESIDENT: return "GOMP_MAP_DEVICE_RESIDENT";
+    case GOMP_MAP_LINK: return "GOMP_MAP_LINK";
+    case GOMP_MAP_IF_PRESENT: return "GOMP_MAP_IF_PRESENT";
+    case GOMP_MAP_FIRSTPRIVATE: return "GOMP_MAP_FIRSTPRIVATE";
+    case GOMP_MAP_FIRSTPRIVATE_INT: return "GOMP_MAP_FIRSTPRIVATE_INT";
+    case GOMP_MAP_USE_DEVICE_PTR: return "GOMP_MAP_USE_DEVICE_PTR";
+    case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: return "GOMP_MAP_ZERO_LEN_ARRAY_SECTION";
+    case GOMP_MAP_FORCE_ALLOC: return "GOMP_MAP_FORCE_ALLOC";
+    case GOMP_MAP_FORCE_TO: return "GOMP_MAP_FORCE_TO";
+    case GOMP_MAP_FORCE_FROM: return "GOMP_MAP_FORCE_FROM";
+    case GOMP_MAP_FORCE_TOFROM: return "GOMP_MAP_FORCE_TOFROM";
+    case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
+      return "GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT";
+    case GOMP_MAP_ALWAYS_TO: return "GOMP_MAP_ALWAYS_TO";
+    case GOMP_MAP_ALWAYS_FROM: return "GOMP_MAP_ALWAYS_FROM";
+    case GOMP_MAP_ALWAYS_TOFROM: return "GOMP_MAP_ALWAYS_TOFROM";
+    case GOMP_MAP_ALWAYS_PRESENT_TO: return "GOMP_MAP_ALWAYS_PRESENT_TO";
+    case GOMP_MAP_ALWAYS_PRESENT_FROM: return "GOMP_MAP_ALWAYS_PRESENT_FROM";
+    case GOMP_MAP_ALWAYS_PRESENT_TOFROM: return "GOMP_MAP_ALWAYS_PRESENT_TOFROM";
+    case GOMP_MAP_STRUCT: return "GOMP_MAP_STRUCT";
+    case GOMP_MAP_STRUCT_UNORD: return "GOMP_MAP_STRUCT_UNORD";
+    case GOMP_MAP_ALWAYS_POINTER: return "GOMP_MAP_ALWAYS_POINTER";
+    case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
+      return "GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION";
+    case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
+      return "GOMP_MAP_DELETE_ZERO_LENGTH_ARRAY_SECTION";
+    case GOMP_MAP_RELEASE: return "GOMP_MAP_RELEASE";
+    case GOMP_MAP_ATTACH: return "GOMP_MAP_ATTACH";
+    case GOMP_MAP_DETACH: return "GOMP_MAP_DETACH";
+    case GOMP_MAP_FORCE_DETACH: return "GOMP_MAP_FORCE_DETACH";
+    case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
+      return "GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION";
+    default: return "unknown";
+    }
+}
+
+/*  When GCC encounters a clause with an iterator, e.g.:
+
+   #pragma omp target map (iterator(i=0:4), to: x[i])
+
+   it generates an array containing the number of iterations and the
+   address and size of each iteration. e.g.:
+
+   void *omp_iter_data[] = {
+     (void *) 4, // Number of iterations
+     &x[0], (void *) sizeof(x[0]),
+     &x[1], (void *) sizeof(x[1]),
+     &x[2], (void *) sizeof(x[2]),
+     &x[3], (void *) sizeof(x[3])
+   };
+
+   When the construct is lowered, &omp_iter_data is used as the host address
+   for the map (instead of &x[i]), and the size is set to SIZE_MAX to mark
+   the map as an iterator map.
+
+   Map entries containing expanded iterators will be flattened and merged into
+   HOSTADDRS, SIZES and KINDS, and MAPNUM updated.  Returns true if there are
+   any iterators found.  ITERATOR_COUNT holds the iteration count of the
+   iterator that generates each map (and 0 if not generated from an iterator).
+   HOSTADDRS, SIZES, KINDS and ITERATOR_COUNT must be freed afterwards if any
+   merging occurs.  */
+
+static bool
+gomp_merge_iterator_maps (size_t *mapnum, void ***hostaddrs, size_t **sizes,
+			  void **kinds, size_t **iterator_count)
+{
+  bool iterator_p = false;
+  size_t map_count = 0;
+  unsigned short **skinds = (unsigned short **) kinds;
+
+  for (size_t i = 0; i < *mapnum; i++)
+    if ((*sizes)[i] == SIZE_MAX)
+      {
+	uintptr_t *iterator_array = (*hostaddrs)[i];
+	map_count += iterator_array[0];
+	iterator_p = true;
+      }
+    else
+      map_count++;
+
+  if (!iterator_p)
+    return false;
+
+  gomp_debug (1,
+	      "Expanding iterator maps - number of map entries: %u -> %u\n",
+	      (int) *mapnum, (int) map_count);
+  void **new_hostaddrs = (void **) gomp_malloc (map_count * sizeof (void *));
+  size_t *new_sizes = (size_t *) gomp_malloc (map_count * sizeof (size_t));
+  unsigned short *new_kinds
+    = (unsigned short *) gomp_malloc (map_count * sizeof (unsigned short));
+  size_t new_idx = 0;
+  *iterator_count = (size_t *) gomp_malloc (map_count * sizeof (size_t));
+
+  for (size_t i = 0; i < *mapnum; i++)
+    {
+      if ((*sizes)[i] == SIZE_MAX)
+	{
+	  uintptr_t *iterator_array = (*hostaddrs)[i];
+	  size_t count = *iterator_array++;
+	  for (size_t j = 0; j < count; j++)
+	    {
+	      new_hostaddrs[new_idx] = (void *) *iterator_array++;
+	      new_sizes[new_idx] = *iterator_array++;
+	      new_kinds[new_idx] = (*skinds)[i];
+	      (*iterator_count)[new_idx] = j + 1;
+	      gomp_debug (1,
+			  "Expanding map %u <%s>: "
+			  "hostaddrs[%u] = %p, sizes[%u] = %lu\n",
+			  (int) i, kind_to_name (new_kinds[new_idx], true),
+			  (int) new_idx, new_hostaddrs[new_idx],
+			  (int) new_idx, (unsigned long) new_sizes[new_idx]);
+	      new_idx++;
+	    }
+	}
+      else
+	{
+	  new_hostaddrs[new_idx] = (*hostaddrs)[i];
+	  new_sizes[new_idx] = (*sizes)[i];
+	  new_kinds[new_idx] = (*skinds)[i];
+	  (*iterator_count)[new_idx] = 0;
+	  new_idx++;
+	}
+    }
+
+  *mapnum = map_count;
+  *hostaddrs = new_hostaddrs;
+  *sizes = new_sizes;
+  *kinds = new_kinds;
+
+  return true;
+}
+
 static inline __attribute__((always_inline)) struct target_mem_desc *
 gomp_map_vars_internal (struct gomp_device_descr *devicep,
 			struct goacc_asyncqueue *aq, size_t mapnum,
@@ -992,6 +1172,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
   const int typemask = short_mapkind ? 0xff : 0x7;
   struct splay_tree_s *mem_map = &devicep->mem_map;
   struct splay_tree_key_s cur_node;
+  bool iterators_p = false;
+  size_t *iterator_count = NULL;
+  if (short_mapkind)  /* OpenMP */
+    iterators_p = gomp_merge_iterator_maps (&mapnum, &hostaddrs, &sizes,
+					    &kinds, &iterator_count);
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
@@ -1480,14 +1665,14 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	      case GOMP_MAP_STRUCT_UNORD:
 		if (sizes[i] > 1)
 		  {
-		    void *first = hostaddrs[i + 1];
 		    for (size_t j = i + 1; j < i + sizes[i]; j++)
-		      if (hostaddrs[j + 1] != first)
+		      if (hostaddrs[j + 1] < hostaddrs[j])
 			{
 			  gomp_mutex_unlock (&devicep->lock);
-			  gomp_fatal ("Mapped array elements must be the "
-				      "same (%p vs %p)", first,
-				      hostaddrs[j + 1]);
+			  gomp_fatal (
+			    "Mapped array elements must be the same or in "
+			    "increasing address order (got %p > %p)",
+			    hostaddrs[j], hostaddrs[j + 1]);
 			}
 		  }
 		/* Fallthrough.  */
@@ -1587,9 +1772,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		      bool zlas
 			= ((kind & typemask)
 			   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
-		      gomp_attach_pointer (devicep, aq, mem_map, n,
-					   (uintptr_t) hostaddrs[i], sizes[i],
-					   cbufp, zlas);
+		      /* For 'target enter data', the map clauses are split;
+			 however, for more complex code with struct and
+			 pointer members, the mapping and the attach can end up
+			 in different sets; or the wrong mapping with the
+			 attach. As there is no way to know whether a size
+			 zero like  'var->ptr[i][:0]' happend in the same
+			 directive or not, the not-attached check is now
+			 fully silenced for 'enter data'.  */
+		      if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA))
+			zlas = true;
+		      if (!gomp_attach_pointer (devicep, aq, mem_map, n,
+						(uintptr_t) hostaddrs[i], sizes[i],
+						cbufp, zlas, !openmp_p))
+			{
+			  /* Pointee not found; that's an error except for
+			     map(var[:n]) with n == 0; the compiler adds a
+			     runtime condition such that for those the kind is
+			     always GOMP_MAP_ZERO_LEN_ARRAY_SECTION.  */
+			  for (j = i; j > 0; j--)
+			    if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i]
+				&& sizes[j-1] == 0
+				&& (GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+				    == (get_kind (short_mapkind, kinds, j-1)
+					& typemask)))
+			      break;
+			  if (j == 0)
+			    {
+			      gomp_mutex_unlock (&devicep->lock);
+			      gomp_fatal ("pointer target not mapped for attach");
+			    }
+			}
 		    }
 		  else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
 		    {
@@ -1841,14 +2054,22 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 
   if (pragma_kind & GOMP_MAP_VARS_TARGET)
     {
+      /* The target variables table is constructed with maps using iterators
+	 unexpanded. Now that the iterator maps are expanded, we will need to
+	 skip all expanded maps after the initial entry, otherwise subsequent
+	 maps will be out-of-sync with their corresponding entry in the
+	 target variables table.  */
+      size_t map_num = 0;
       for (i = 0; i < mapnum; i++)
-	{
-	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
-	  gomp_copy_host2dev (devicep, aq,
-			      (void *) (tgt->tgt_start + i * sizeof (void *)),
-			      (void *) &cur_node.tgt_offset, sizeof (void *),
-			      true, cbufp);
-	}
+	if (!iterator_count || iterator_count[i] <= 1)
+	  {
+	    cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
+	    gomp_copy_host2dev (devicep, aq,
+				(void *) (tgt->tgt_start + map_num * sizeof (void *)),
+				(void *) &cur_node.tgt_offset, sizeof (void *),
+				true, cbufp);
+	    map_num++;
+	  }
     }
 
   if (cbufp)
@@ -1880,6 +2101,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
     }
 
   gomp_mutex_unlock (&devicep->lock);
+
+  if (iterators_p)
+    {
+      free (hostaddrs);
+      free (sizes);
+      free (kinds);
+      free (iterator_count);
+    }
+
   return tgt;
 }
 
@@ -2146,6 +2376,8 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
   size_t i;
   struct splay_tree_key_s cur_node;
   const int typemask = short_mapkind ? 0xff : 0x7;
+  bool iterators_p = false;
+  size_t *iterator_count = NULL;
 
   if (!devicep)
     return;
@@ -2153,6 +2385,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
   if (mapnum == 0)
     return;
 
+  if (short_mapkind)  /* OpenMP */
+    iterators_p = gomp_merge_iterator_maps (&mapnum, &hostaddrs, &sizes,
+					    &kinds, &iterator_count);
+
   gomp_mutex_lock (&devicep->lock);
   if (devicep->state == GOMP_DEVICE_FINALIZED)
     {
@@ -2246,6 +2482,14 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 	  }
       }
   gomp_mutex_unlock (&devicep->lock);
+
+  if (iterators_p)
+    {
+      free (hostaddrs);
+      free (sizes);
+      free (kinds);
+      free (iterator_count);
+    }
 }
 
 static struct gomp_offload_icv_list *
@@ -2586,6 +2830,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
     }
 }
 
+#define GOMP_REQUIRES_NAME_BUF_LEN \
+  sizeof ("unified_address, unified_shared_memory, " \
+	  "self_maps, reverse_offload")
+
 static void
 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
 {
@@ -2634,10 +2882,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
 
   if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
     {
-      char buf1[sizeof ("unified_address, unified_shared_memory, "
-			"self_maps, reverse_offload")];
-      char buf2[sizeof ("unified_address, unified_shared_memory, "
-			"self_maps, reverse_offload")];
+      char buf1[GOMP_REQUIRES_NAME_BUF_LEN];
+      char buf2[GOMP_REQUIRES_NAME_BUF_LEN];
       gomp_requires_to_name (buf2, sizeof (buf2),
 			     omp_req != GOMP_REQUIRES_TARGET_USED
 			     ? omp_req : omp_requires_mask);
@@ -4411,6 +4657,9 @@ GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
 void *
 omp_target_alloc (size_t size, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     return malloc (size);
@@ -4432,6 +4681,9 @@ omp_target_alloc (size_t size, int device_num)
 void
 omp_target_free (void *device_ptr, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     {
@@ -4455,9 +4707,163 @@ omp_target_free (void *device_ptr, int device_num)
   gomp_mutex_unlock (&devicep->lock);
 }
 
+void *
+gomp_managed_alloc (size_t size)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  struct gomp_device_descr *devicep = resolve_device (icv->default_device_var,
+						      false);
+  if (devicep == NULL)
+    return NULL;
+
+  void *ret = NULL;
+  gomp_mutex_lock (&devicep->lock);
+  if (devicep->managed_alloc_func)
+    ret = devicep->managed_alloc_func (devicep->target_id, size);
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+void
+gomp_managed_free (void *device_ptr)
+{
+  if (device_ptr == NULL)
+    return;
+
+  struct gomp_task_icv *icv = gomp_icv (false);
+  struct gomp_device_descr *devicep = resolve_device (icv->default_device_var,
+						      false);
+  if (devicep == NULL)
+    gomp_fatal ("attempted to free managed memory at %p, but the default "
+		"device is set to the host device", device_ptr);
+
+  gomp_mutex_lock (&devicep->lock);
+  if (!devicep->managed_free_func
+      || !devicep->managed_free_func (devicep->target_id, device_ptr))
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("error in freeing managed memory block at %p", device_ptr);
+    }
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+/* Device (really: libgomp plugin) to use for paged-locked memory.  We
+   assume there is either none or exactly one such device for the lifetime of
+   the process.  */
+
+static struct gomp_device_descr *device_for_page_locked
+  = /* uninitialized */ (void *) -1;
+
+static struct gomp_device_descr *
+get_device_for_page_locked (void)
+{
+  struct gomp_device_descr *device;
+#ifdef HAVE_SYNC_BUILTINS
+  device
+    = __atomic_load_n (&device_for_page_locked, MEMMODEL_RELAXED);
+  if (device == (void *) -1)
+    {
+      gomp_init_targets_once ();
+
+      device = NULL;
+      for (int i = 0; i < num_devices; ++i)
+	{
+	  /* We consider only the first device of potentially several of the
+	     same type as this functionality is not specific to an individual
+	     offloading device, but instead relates to the host-side
+	     implementation of the respective offloading implementation.  */
+	  if (devices[i].target_id != 0)
+	    continue;
+
+	  if (!devices[i].page_locked_host_alloc_func)
+	    continue;
+
+	  if (device)
+	    gomp_fatal ("Unclear how %s and %s libgomp plugins may"
+			" simultaneously provide functionality"
+			" for page-locked memory",
+			device->name, devices[i].name);
+
+	  device = &devices[i];
+	  gomp_debug (0, "Using device %s for page-locked memory\n",
+		      device->name);
+	}
+
+      struct gomp_device_descr *device_old
+	= __atomic_exchange_n (&device_for_page_locked, device,
+			       MEMMODEL_RELAXED);
+      assert (device_old == (void *) -1
+	      /* We shouldn't have concurrently found a different or no
+		 device.  */
+	      || device_old == device);
+    }
+#else /* !HAVE_SYNC_BUILTINS */
+  (void) &device_for_page_locked;
+  device = NULL;
+#endif /* HAVE_SYNC_BUILTINS */
+
+  return device;
+}
+
+/* Allocate page-locked host memory.
+   Returns whether we have a device capable of that.  */
+
+attribute_hidden bool
+gomp_page_locked_host_alloc (void **ptr, size_t size)
+{
+  struct gomp_device_descr *device = get_device_for_page_locked ();
+  if (device)
+    {
+      gomp_mutex_lock (&device->lock);
+      if (device->state == GOMP_DEVICE_UNINITIALIZED)
+	gomp_init_device (device);
+      else if (device->state == GOMP_DEVICE_FINALIZED)
+	{
+	  gomp_mutex_unlock (&device->lock);
+	  gomp_fatal ("Device %s used for page-locked memory is finalized",
+		      device->name);
+	}
+      gomp_mutex_unlock (&device->lock);
+
+      if (!device->page_locked_host_alloc_func (ptr, size))
+	gomp_fatal ("Failed to allocate page-locked host memory"
+		    " via %s libgomp plugin",
+		    device->name);
+    }
+  return device != NULL;
+}
+
+/* Free page-locked host memory.
+   This must only be called if 'gomp_page_locked_host_alloc' returned
+   'true'.  */
+
+attribute_hidden void
+gomp_page_locked_host_free (void *ptr)
+{
+  struct gomp_device_descr *device = get_device_for_page_locked ();
+  assert (device);
+
+  gomp_mutex_lock (&device->lock);
+  assert (device->state != GOMP_DEVICE_UNINITIALIZED);
+  if (device->state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&device->lock);
+      return;
+    }
+  gomp_mutex_unlock (&device->lock);
+
+  if (!device->page_locked_host_free_func (ptr))
+    gomp_fatal ("Failed to free page-locked host memory"
+		" via %s libgomp plugin",
+		device->name);
+}
+
 int
 omp_target_is_present (const void *ptr, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     return 1;
@@ -4490,6 +4896,11 @@ omp_target_memcpy_check (int dst_device_num, int src_device_num,
 			 struct gomp_device_descr **dst_devicep,
 			 struct gomp_device_descr **src_devicep)
 {
+  if (dst_device_num == omp_default_device)
+    dst_device_num = gomp_get_default_device ();
+  if (src_device_num == omp_default_device)
+    src_device_num = gomp_get_default_device ();
+
   if (dst_device_num != gomp_get_num_devices ()
       /* Above gomp_get_num_devices has to be called unconditionally.  */
       && dst_device_num != omp_initial_device)
@@ -4948,10 +5359,101 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
   return 0;
 }
 
+static void
+omp_target_memset_int (void *ptr, int val, size_t count,
+		       struct gomp_device_descr *devicep)
+{
+  if (__builtin_expect (count == 0, 0))
+    return;
+  if (devicep == NULL)
+    {
+      memset (ptr, val, count);
+      return;
+    }
+
+  gomp_mutex_lock (&devicep->lock);
+  int ret = devicep->memset_func (devicep->target_id, ptr, val, count);
+  gomp_mutex_unlock (&devicep->lock);
+  if (!ret)
+    gomp_fatal ("omp_target_memset failed");
+}
+
+void*
+omp_target_memset (void *ptr, int val, size_t count, int device_num)
+{
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
+  struct gomp_device_descr *devicep;
+  if (device_num == omp_initial_device
+      || device_num == gomp_get_num_devices ()
+      || (devicep = resolve_device (device_num, false)) == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    devicep = NULL;
+
+  omp_target_memset_int (ptr, val, count, devicep);
+  return ptr;
+}
+
+typedef struct
+{
+  void *ptr;
+  size_t count;
+  struct gomp_device_descr *devicep;
+  int val;
+} omp_target_memset_data;
+
+static void
+omp_target_memset_async_helper (void *args)
+{
+  omp_target_memset_data *a = args;
+  omp_target_memset_int (a->ptr, a->val, a->count, a->devicep);
+}
+
+void*
+omp_target_memset_async (void *ptr, int val, size_t count, int device_num,
+			 int depobj_count, omp_depend_t *depobj_list)
+{
+  void *depend[depobj_count + 5];
+  struct gomp_device_descr *devicep;
+  unsigned flags = 0;
+  int i;
+
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
+  if (device_num == omp_initial_device
+      || device_num == gomp_get_num_devices ()
+      || (devicep = resolve_device (device_num, false)) == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    devicep = NULL;
+
+  omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count,
+			      .devicep = devicep};
+  if (depobj_count > 0 && depobj_list != NULL)
+    {
+      flags |= GOMP_TASK_FLAG_DEPEND;
+      depend[0] = 0;
+      depend[1] = (void *) (uintptr_t) depobj_count;
+      depend[2] = depend[3] = depend[4] = 0;
+      for (i = 0; i < depobj_count; ++i)
+	depend[i + 5] = &depobj_list[i];
+    }
+
+  GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s),
+	     __alignof__ (s), true, flags, depend, 0, NULL);
+  return ptr;
+}
+
 int
 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
 			  size_t size, size_t device_offset, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     return EINVAL;
@@ -5049,6 +5551,9 @@ omp_target_disassociate_ptr (const void *ptr, int device_num)
 void *
 omp_get_mapped_ptr (const void *ptr, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == omp_get_initial_device ())
     return (void *) ptr;
@@ -5085,6 +5590,9 @@ omp_get_mapped_ptr (const void *ptr, int device_num)
 int
 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     return true;
@@ -5102,6 +5610,8 @@ int
 omp_pause_resource (omp_pause_resource_t kind, int device_num)
 {
   (void) kind;
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
   if (device_num == omp_initial_device
       || device_num == gomp_get_num_devices ())
     return gomp_pause_host ();
@@ -5412,6 +5922,9 @@ gomp_get_uid_for_device (struct gomp_device_descr *devicep, int device_num)
 const char *
 omp_get_uid_from_device (int device_num)
 {
+  if (device_num == omp_default_device)
+    device_num = gomp_get_default_device ();
+
   if (device_num < omp_initial_device || device_num > gomp_get_num_devices ())
     return NULL;
 
@@ -5494,6 +6007,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   DLSYM (unload_image);
   DLSYM (alloc);
   DLSYM (free);
+  DLSYM_OPT (managed_alloc, managed_alloc);
+  DLSYM_OPT (managed_free, managed_free);
+  DLSYM_OPT (page_locked_host_alloc, page_locked_host_alloc);
+  DLSYM_OPT (page_locked_host_free, page_locked_host_free);
   DLSYM (dev2host);
   DLSYM (host2dev);
   DLSYM_OPT (memcpy2d, memcpy2d);
@@ -5513,6 +6030,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
       DLSYM_OPT (async_run, async_run);
       DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
+      DLSYM (memset);
     }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
@@ -5531,6 +6049,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
+	  || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev)
 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
 	{
 	  /* Require all the OpenACC handlers if we have
@@ -5647,8 +6166,7 @@ gomp_target_init (void)
 		    found = true;
 		if (found)
 		  {
-		    char buf[sizeof ("unified_address, unified_shared_memory, "
-				     "reverse_offload")];
+		    char buf[GOMP_REQUIRES_NAME_BUF_LEN];
 		    gomp_requires_to_name (buf, sizeof (buf), omp_req);
 		    char *name = (char *) malloc (cur_len + 1);
 		    memcpy (name, cur, cur_len);
diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in
index 4155350..477bfdb 100644
--- a/libgomp/testsuite/Makefile.in
+++ b/libgomp/testsuite/Makefile.in
@@ -92,9 +92,11 @@ subdir = testsuite
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
 	$(top_srcdir)/../config/ax_count_cpus.m4 \
+	$(top_srcdir)/../config/clang-plugin.m4 \
 	$(top_srcdir)/../config/depstand.m4 \
 	$(top_srcdir)/../config/enable.m4 \
 	$(top_srcdir)/../config/futex.m4 \
+	$(top_srcdir)/../config/gcc-plugin.m4 \
 	$(top_srcdir)/../config/lead-dot.m4 \
 	$(top_srcdir)/../config/lthostflags.m4 \
 	$(top_srcdir)/../config/multi.m4 \
@@ -175,6 +177,7 @@ LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
 LIPO = @LIPO@
+LLVM_CONFIG = @LLVM_CONFIG@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAINT = @MAINT@
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 54f2f708..cce2e93 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -30,6 +30,7 @@ load_gcc_lib scandump.exp
 load_gcc_lib scanlang.exp
 load_gcc_lib scanrtl.exp
 load_gcc_lib scansarif.exp
+load_gcc_lib scanhtml.exp
 load_gcc_lib scantree.exp
 load_gcc_lib scanltrans.exp
 load_gcc_lib scanoffload.exp
@@ -294,7 +295,9 @@ proc libgomp_target_compile { source dest type options } {
 	set options [concat "$ALWAYS_CFLAGS" $options]
     }
 
-    set options [dg-additional-files-options $options $source $dest $type]
+    if { $source != "" } {
+        set options [dg-additional-files-options $options $source $dest $type]
+    }
 
     set result [target_compile $source $dest $type $options]
 
@@ -721,3 +724,71 @@ int main() {
     return 0;
 } } "-lhipblas" ]
 }
+
+# return 1 if OpenMP Unified Shared Memory is supported by offload devices
+
+proc check_effective_target_omp_usm { } {
+    if { [check_effective_target_offload_device_nvptx] 
+         || [check_effective_target_offload_target_amdgcn] } {
+	if [check_runtime usm_available_ {
+	    #include <omp.h>
+	    #pragma omp requires unified_shared_memory
+	    int main ()
+	    {
+	      int a;
+	      #pragma omp target map(from: a)
+		a = omp_is_initial_device ();
+	      return a;
+	    }
+	} ] {
+	  return 1
+	}
+    }
+
+    return 0
+}
+
+# return 1 if OpenMP Device Managed Memory is supported
+
+proc check_effective_target_omp_managedmem { } {
+    if { [check_effective_target_offload_device_nvptx] } {
+	return 1
+    }
+
+    if { [libgomp_check_effective_target_offload_target "amdgcn"] } {
+	if [check_runtime_nocache managed_available_ {
+	    #include <omp.h>
+	    #include <stdlib.h>
+	    int main ()
+	    {
+	      const omp_alloctrait_t traits[] = {
+		  { omp_atk_fallback, omp_atv_null_fb }
+	      };
+	      omp_allocator_handle_t managed_no_fallback
+	         = omp_init_allocator (ompx_gnu_managed_mem_space, 1, traits);
+	      void *a = omp_alloc (16, managed_no_fallback);
+	      return a == NULL;
+	    }
+	} ] {
+	  return 1
+	}
+    }
+
+    return 0
+}
+
+# return 1 if -mxnack=on is accepted
+
+proc check_effective_target_offload_target_amdgcn_with_xnack { } {
+    if { [libgomp_check_effective_target_offload_target "amdgcn"] } {
+	return [check_runtime amd_xnack_ {
+	   int main () {
+	     #pragma omp target
+	       ;
+	     return 0;
+	   }
+	} "-foffload-options=amdgcn-amdhsa=-mxnack=on" ]
+    }
+
+    return 0
+}
diff --git a/libgomp/testsuite/libgomp.c++/alloc-managed-1.C b/libgomp/testsuite/libgomp.c++/alloc-managed-1.C
new file mode 100644
index 0000000..86de0aa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/alloc-managed-1.C
@@ -0,0 +1,36 @@
+// { dg-do run }
+// { dg-require-effective-target omp_managedmem }
+// { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } }
+
+// Check that the ompx::allocator::gnu_managed_mem allocator can allocate
+// Managed Memory, and that host and target can see the data, at the same
+// address, without a mapping.
+
+#include <omp.h>
+#include <cstdint>
+#include <memory>
+
+int
+main ()
+{
+  using Allocator = ompx::allocator::gnu_managed_mem<int>;
+  using Traits = std::allocator_traits<Allocator>;
+
+  Allocator alloc;
+  int *a = Traits::allocate (alloc, 1);
+  if (!a)
+    __builtin_abort ();
+
+  Traits::construct (alloc, a, 42);
+  std::uintptr_t a_p = reinterpret_cast<std::uintptr_t>(a);
+
+  #pragma omp target is_device_ptr(a)
+    {
+      if (*a != 42 || a_p != reinterpret_cast<std::uintptr_t>(a))
+	__builtin_abort ();
+    }
+
+  Traits::destroy (alloc, a);
+  Traits::deallocate (alloc, a, 1);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/allocate-1.C b/libgomp/testsuite/libgomp.c++/allocate-1.C
index 0876719..7b4904f 100644
--- a/libgomp/testsuite/libgomp.c++/allocate-1.C
+++ b/libgomp/testsuite/libgomp.c++/allocate-1.C
@@ -90,7 +90,7 @@ foo (int &x, int &y, int &r, int &l, int (&l2)[4], int &l3, int &n, int *&p,
 	if ((fl & 1) && (((uintptr_t) &l2[0] | (uintptr_t) &l3) & 63) != 0)
 	  abort ();
       }
-    #pragma omp for reduction(+:p[2:px], q[:3], r2) allocate(h: p, q, r2)
+    #pragma omp for reduction(+:p[2:px], q[ :3], r2) allocate(h: p, q, r2)
     for (i = 0; i < 32; i++)
       {
 	p[2] += i;
diff --git a/libgomp/testsuite/libgomp.c++/atomic-12.C b/libgomp/testsuite/libgomp.c++/atomic-12.C
index d1ae9d8..5b1a7f3 100644
--- a/libgomp/testsuite/libgomp.c++/atomic-12.C
+++ b/libgomp/testsuite/libgomp.c++/atomic-12.C
@@ -15,17 +15,17 @@ main ()
   int v, *p;
   p = &x;
   #pragma omp atomic update
-    p[foo (), 0] = 16 + 6 - p[foo (), 0];
+    p[(foo (), 0)] = 16 + 6 - p[(foo (), 0)];
   #pragma omp atomic read
     v = x;
   if (cnt != 2 || v != 16)
     abort ();
   #pragma omp atomic capture
-    v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3;
+    v = p[(foo () + foo (), 0)] = p[(foo () + foo (), 0)] + 3;
   if (cnt != 6 || v != 19)
     abort ();
   #pragma omp atomic capture
-    v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0];
+    v = p[(foo (), 0)] = 12 * 1 / 2 + ((foo (), 0)) + p[(foo (), 0)];
   if (cnt != 9 || v != 25)
     abort ();
   #pragma omp atomic capture
@@ -46,7 +46,7 @@ main ()
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6;
+      v = p[(foo (), 0)]; p[(foo (), 0)] = (foo (), 7) ? 13 : foo () + 6;
     }
   if (cnt != 19 || v != 1)
     abort ();
diff --git a/libgomp/testsuite/libgomp.c++/atomic-13.C b/libgomp/testsuite/libgomp.c++/atomic-13.C
index 0569d1c..1c27119 100644
--- a/libgomp/testsuite/libgomp.c++/atomic-13.C
+++ b/libgomp/testsuite/libgomp.c++/atomic-13.C
@@ -17,17 +17,17 @@ bar ()
   T v, *p;
   p = &x;
   #pragma omp atomic update
-    p[foo (), 0] = 16 + 6 - p[foo (), 0];
+    p[(foo (), 0)] = 16 + 6 - p[(foo (), 0)];
   #pragma omp atomic read
     v = x;
   if (cnt != 2 || v != 16)
     abort ();
   #pragma omp atomic capture
-    v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3;
+    v = p[(foo () + foo (), 0)] = p[(foo () + foo (), 0)] + 3;
   if (cnt != 6 || v != 19)
     abort ();
   #pragma omp atomic capture
-    v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0];
+    v = p[(foo (), 0)] = 12 * 1 / 2 + ((foo (), 0)) + p[(foo (), 0)];
   if (cnt != 9 || v != 25)
     abort ();
   #pragma omp atomic capture
@@ -48,7 +48,7 @@ bar ()
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6;
+      v = p[(foo (), 0)]; p[(foo (), 0)] = (foo (), 7) ? 13 : foo () + 6;
     }
   if (cnt != 19 || v != 1)
     abort ();
diff --git a/libgomp/testsuite/libgomp.c++/atomic-8.C b/libgomp/testsuite/libgomp.c++/atomic-8.C
index 744b340..9b7fbaa 100644
--- a/libgomp/testsuite/libgomp.c++/atomic-8.C
+++ b/libgomp/testsuite/libgomp.c++/atomic-8.C
@@ -72,22 +72,22 @@ main ()
     abort ();
   p = &x;
   #pragma omp atomic update
-    p[foo (), 0] = p[foo (), 0] - 16;
+    p[(foo (), 0)] = p[(foo (), 0)] - 16;
   #pragma omp atomic read
     v = x;
   if (cnt != 2 || v != 0)
     abort ();
   #pragma omp atomic capture
     {
-      p[foo (), 0] += 6;
-      v = p[foo (), 0];
+      p[(foo (), 0)] += 6;
+      v = p[(foo (), 0)];
     }
   if (cnt != 4 || v != 6)
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0];
-      p[foo (), 0] += 6;
+      v = p[(foo (), 0)];
+      p[(foo (), 0)] += 6;
     }
   if (cnt != 6 || v != 6)
     abort ();
@@ -97,15 +97,15 @@ main ()
     abort ();
   #pragma omp atomic capture
     {
-      p[foo (), 0] = p[foo (), 0] + 6;
-      v = p[foo (), 0];
+      p[(foo (), 0)] = p[(foo (), 0)] + 6;
+      v = p[(foo (), 0)];
     }
   if (cnt != 9 || v != 18)
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0];
-      p[foo (), 0] = p[foo (), 0] + 6;
+      v = p[(foo (), 0)];
+      p[(foo (), 0)] = p[(foo (), 0)] + 6;
     }
   if (cnt != 12 || v != 18)
     abort ();
@@ -114,23 +114,23 @@ main ()
   if (v != 24)
     abort ();
   #pragma omp atomic capture
-  { v = p[foo (), 0]; p[foo (), 0]++; }
+  { v = p[(foo (), 0)]; p[(foo (), 0)]++; }
   #pragma omp atomic capture
-  { v = p[foo (), 0]; ++p[foo (), 0]; }
+  { v = p[(foo (), 0)]; ++p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { p[foo (), 0]++; v = p[foo (), 0]; }
+  { p[(foo (), 0)]++; v = p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { ++p[foo (), 0]; v = p[foo (), 0]; }
+  { ++p[(foo (), 0)]; v = p[(foo (), 0)]; }
   if (cnt != 20 || v != 28)
     abort ();
   #pragma omp atomic capture
-  { v = p[foo (), 0]; p[foo (), 0]--; }
+  { v = p[(foo (), 0)]; p[(foo (), 0)]--; }
   #pragma omp atomic capture
-  { v = p[foo (), 0]; --p[foo (), 0]; }
+  { v = p[(foo (), 0)]; --p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { p[foo (), 0]--; v = p[foo (), 0]; }
+  { p[(foo (), 0)]--; v = p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { --p[foo (), 0]; v = p[foo (), 0]; }
+  { --p[(foo (), 0)]; v = p[(foo (), 0)]; }
   if (cnt != 28 || v != 24)
     abort ();
   return 0;
diff --git a/libgomp/testsuite/libgomp.c++/atomic-9.C b/libgomp/testsuite/libgomp.c++/atomic-9.C
index ece1bf3..937cc48 100644
--- a/libgomp/testsuite/libgomp.c++/atomic-9.C
+++ b/libgomp/testsuite/libgomp.c++/atomic-9.C
@@ -75,22 +75,22 @@ bar ()
     abort ();
   p = &x;
   #pragma omp atomic update
-    p[foo (), 0] = p[foo (), 0] - 16;
+    p[(foo (), 0)] = p[(foo (), 0)] - 16;
   #pragma omp atomic read
     v = x;
   if (cnt != 2 || v != 0)
     abort ();
   #pragma omp atomic capture
     {
-      p[foo (), 0] += 6;
-      v = p[foo (), 0];
+      p[(foo (), 0)] += 6;
+      v = p[(foo (), 0)];
     }
   if (cnt != 4 || v != 6)
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0];
-      p[foo (), 0] += 6;
+      v = p[(foo (), 0)];
+      p[(foo (), 0)] += 6;
     }
   if (cnt != 6 || v != 6)
     abort ();
@@ -100,15 +100,15 @@ bar ()
     abort ();
   #pragma omp atomic capture
     {
-      p[foo (), 0] = p[foo (), 0] + 6;
-      v = p[foo (), 0];
+      p[(foo (), 0)] = p[(foo (), 0)] + 6;
+      v = p[(foo (), 0)];
     }
   if (cnt != 9 || v != 18)
     abort ();
   #pragma omp atomic capture
     {
-      v = p[foo (), 0];
-      p[foo (), 0] = p[foo (), 0] + 6;
+      v = p[(foo (), 0)];
+      p[(foo (), 0)] = p[(foo (), 0)] + 6;
     }
   if (cnt != 12 || v != 18)
     abort ();
@@ -117,23 +117,23 @@ bar ()
   if (v != 24)
     abort ();
   #pragma omp atomic capture
-  { v = p[foo (), 0]; p[foo (), 0]++; }
+  { v = p[(foo (), 0)]; p[(foo (), 0)]++; }
   #pragma omp atomic capture
-  { v = p[foo (), 0]; ++p[foo (), 0]; }
+  { v = p[(foo (), 0)]; ++p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { p[foo (), 0]++; v = p[foo (), 0]; }
+  { p[(foo (), 0)]++; v = p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { ++p[foo (), 0]; v = p[foo (), 0]; }
+  { ++p[(foo (), 0)]; v = p[(foo (), 0)]; }
   if (cnt != 20 || v != 28)
     abort ();
   #pragma omp atomic capture
-  { v = p[foo (), 0]; p[foo (), 0]--; }
+  { v = p[(foo (), 0)]; p[(foo (), 0)]--; }
   #pragma omp atomic capture
-  { v = p[foo (), 0]; --p[foo (), 0]; }
+  { v = p[(foo (), 0)]; --p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { p[foo (), 0]--; v = p[foo (), 0]; }
+  { p[(foo (), 0)]--; v = p[(foo (), 0)]; }
   #pragma omp atomic capture
-  { --p[foo (), 0]; v = p[foo (), 0]; }
+  { --p[(foo (), 0)]; v = p[(foo (), 0)]; }
   if (cnt != 28 || v != 24)
     abort ();
 }
diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-3.C b/libgomp/testsuite/libgomp.c++/baseptrs-3.C
index 39a48a4..4c4ffba 100644
--- a/libgomp/testsuite/libgomp.c++/baseptrs-3.C
+++ b/libgomp/testsuite/libgomp.c++/baseptrs-3.C
@@ -30,7 +30,7 @@ foo0 ()
 
   memset (my_c.a.ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c.a.ptr, my_c.a.ptr[:10])
+  #pragma omp target map (my_c.a.ptr, my_c.a.ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c.a.ptr[i] = i;
@@ -41,7 +41,7 @@ foo0 ()
 
   memset (my_c.b.arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c.b.arr[:10])
+  #pragma omp target map (my_c.b.arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c.b.arr[i] = i;
@@ -81,7 +81,7 @@ foo ()
 
   memset (my_c.a.ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c.a.ptr, my_c.a.ptr[:10])
+  #pragma omp target map (my_c.a.ptr, my_c.a.ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c.a.ptr[i] = i;
@@ -92,7 +92,7 @@ foo ()
 
   memset (my_c.b.arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c.b.arr[:10])
+  #pragma omp target map (my_c.b.arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c.b.arr[i] = i;
@@ -116,7 +116,7 @@ bar ()
 
   memset (my_cref.a.ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_cref.a.ptr, my_cref.a.ptr[:10])
+  #pragma omp target map (my_cref.a.ptr, my_cref.a.ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_cref.a.ptr[i] = i;
@@ -127,7 +127,7 @@ bar ()
 
   memset (my_cref.b.arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_cref.b.arr[:10])
+  #pragma omp target map (my_cref.b.arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_cref.b.arr[i] = i;
@@ -157,7 +157,7 @@ foop0 ()
 
   memset (my_c->a->ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c->a, my_c->a[:1], my_c->a->ptr, my_c->a->ptr[:10])
+  #pragma omp target map (my_c->a, my_c->a[ :1], my_c->a->ptr, my_c->a->ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c->a->ptr[i] = i;
@@ -168,7 +168,7 @@ foop0 ()
 
   memset (my_c->b->arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c->b, my_c->b[:1], my_c->b->arr[:10])
+  #pragma omp target map (my_c->b, my_c->b[ :1], my_c->b->arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c->b->arr[i] = i;
@@ -200,7 +200,7 @@ foop ()
 
   memset (my_c->a->ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c->a, my_c->a[:1], my_c->a->ptr, my_c->a->ptr[:10])
+  #pragma omp target map (my_c->a, my_c->a[ :1], my_c->a->ptr, my_c->a->ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c->a->ptr[i] = i;
@@ -211,7 +211,7 @@ foop ()
 
   memset (my_c->b->arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_c->b, my_c->b[:1], my_c->b->arr[:10])
+  #pragma omp target map (my_c->b, my_c->b[ :1], my_c->b->arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_c->b->arr[i] = i;
@@ -237,8 +237,8 @@ barp ()
 
   memset (my_cref->a->ptr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_cref->a, my_cref->a[:1], my_cref->a->ptr, \
-			  my_cref->a->ptr[:10])
+  #pragma omp target map (my_cref->a, my_cref->a[ :1], my_cref->a->ptr, \
+			  my_cref->a->ptr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_cref->a->ptr[i] = i;
@@ -249,7 +249,7 @@ barp ()
 
   memset (my_cref->b->arr, 0, sizeof (int) * 10);
 
-  #pragma omp target map (my_cref->b, my_cref->b[:1], my_cref->b->arr[:10])
+  #pragma omp target map (my_cref->b, my_cref->b[ :1], my_cref->b->arr[ :10])
   {
     for (int i = 0; i < 10; i++)
       my_cref->b->arr[i] = i;
diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-5.C b/libgomp/testsuite/libgomp.c++/baseptrs-5.C
index 16bdfff..5c18bf3 100644
--- a/libgomp/testsuite/libgomp.c++/baseptrs-5.C
+++ b/libgomp/testsuite/libgomp.c++/baseptrs-5.C
@@ -35,8 +35,8 @@ main ()
   memset (my_c->a->ptr2, 0, sizeof (int) * 10);
 
   #pragma omp target map (my_c->a, \
-			  my_c->a->ptr, my_c->a->ptr[:10], \
-			  my_c->a->ptr2, my_c->a->ptr2[:10])
+			  my_c->a->ptr, my_c->a->ptr[ :10], \
+			  my_c->a->ptr2, my_c->a->ptr2[ :10])
   {
     for (int i = 0; i < 10; i++)
       {
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module1.C b/libgomp/testsuite/libgomp.c++/bdv_module1.C
new file mode 100644
index 0000000..0c82430
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module1.C
@@ -0,0 +1,23 @@
+// Test that "begin declare variant" in a module interface is
+// visible to things that import the module.
+
+// { dg-additional-sources "bdv_module1_main.C" }
+// { dg-additional-options "-fmodules" }
+
+export module bdv_module1;
+
+export int
+test ()
+{
+  return 0;
+}
+
+#if _OPENMP
+#pragma omp begin declare variant match(construct={parallel})
+export int
+test ()
+{
+  return 1;
+}
+#pragma omp end declare variant
+#endif
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module1_main.C b/libgomp/testsuite/libgomp.c++/bdv_module1_main.C
new file mode 100644
index 0000000..85f1fbf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module1_main.C
@@ -0,0 +1,16 @@
+// { dg-skip-if "" { *-*-* } }
+// Built with bdv_module1.C
+
+import bdv_module1;
+
+int
+main ()
+{
+  if (test () != 0)
+    __builtin_abort ();
+  #pragma omp parallel if(0)
+  {
+    if (test () != 1)
+      __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module2.C b/libgomp/testsuite/libgomp.c++/bdv_module2.C
new file mode 100644
index 0000000..5152d32
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module2.C
@@ -0,0 +1,15 @@
+// Test that "begin declare variant" in a module implementation unit is
+// visible only in that unit.
+
+// { dg-additional-sources "bdv_module2_impl.C bdv_module2_main.C" }
+// { dg-additional-options "-fmodules" }
+
+export module bdv_module2;
+
+export int
+test ()
+{
+  return 0;
+}
+
+export void doit ();
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module2_impl.C b/libgomp/testsuite/libgomp.c++/bdv_module2_impl.C
new file mode 100644
index 0000000..8287ae5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module2_impl.C
@@ -0,0 +1,26 @@
+// { dg-skip-if "" { *-*-* } }
+// Built with bdv_module2.C
+
+module bdv_module2;
+
+#if _OPENMP
+#pragma omp begin declare variant match(construct={teams})
+int
+test ()
+{
+  return -1;
+}
+#pragma omp end declare variant
+#endif
+
+void
+doit ()
+{
+  if (test () != 0)
+    __builtin_abort ();
+  #pragma omp teams
+  {
+    if (test () != -1)
+      __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module2_main.C b/libgomp/testsuite/libgomp.c++/bdv_module2_main.C
new file mode 100644
index 0000000..e3909f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module2_main.C
@@ -0,0 +1,20 @@
+// { dg-skip-if "" { *-*-* } }
+// Built with bdv_module2.C
+
+import bdv_module2;
+
+int
+main ()
+{
+  // Calls to test from doit() should invoke the omp teams variant
+  // present in the TU where it is defined.
+  doit ();
+  // Calls to test from here shouldn't.
+  if (test () != 0)
+    __builtin_abort ();
+  #pragma omp teams
+  {
+    if (test () != 0)
+      __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module3.C b/libgomp/testsuite/libgomp.c++/bdv_module3.C
new file mode 100644
index 0000000..3afe4fb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module3.C
@@ -0,0 +1,27 @@
+// Test that "begin declare variant" in a module interface is
+// visible to things that import the module, and that it works in
+// conjunction with additional "begin declare variant"s local
+// to a module implementation TU.
+
+// { dg-additional-sources "bdv_module3_impl.C bdv_module3_main.C" }
+// { dg-additional-options "-fmodules" }
+
+export module bdv_module3;
+
+export int
+test ()
+{
+  return 0;
+}
+
+#if _OPENMP
+#pragma omp begin declare variant match(construct={parallel})
+export int
+test ()
+{
+  return 1;
+}
+#pragma omp end declare variant
+#endif
+
+export void doit ();
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module3_impl.C b/libgomp/testsuite/libgomp.c++/bdv_module3_impl.C
new file mode 100644
index 0000000..5e79873
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module3_impl.C
@@ -0,0 +1,31 @@
+// { dg-skip-if "" { *-*-* } }
+// Built with bdv_module3.C
+
+module bdv_module3;
+
+#if _OPENMP
+#pragma omp begin declare variant match(construct={teams})
+int
+test ()
+{
+  return -1;
+}
+#pragma omp end declare variant
+#endif
+
+void
+doit ()
+{
+  if (test () != 0)
+    __builtin_abort ();
+  #pragma omp teams
+  {
+    if (test () != -1)
+      __builtin_abort ();
+  }
+  #pragma omp parallel if(0)
+  {
+    if (test () != 1)
+      __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/bdv_module3_main.C b/libgomp/testsuite/libgomp.c++/bdv_module3_main.C
new file mode 100644
index 0000000..08d9279
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/bdv_module3_main.C
@@ -0,0 +1,25 @@
+// { dg-skip-if "" { *-*-* } }
+// Built with bdv_module3.C
+
+import bdv_module3;
+
+int
+main ()
+{
+  // Calls to test from doit() should invoke the omp teams variant
+  // present in the TU where it is defined.
+  doit ();
+  // Calls to test from here shouldn't.
+  if (test () != 0)
+    __builtin_abort ();
+  #pragma omp teams
+  {
+    if (test () != 0)
+      __builtin_abort ();
+  }
+  #pragma omp parallel if(0)
+  {
+    if (test () != 1)
+      __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/class-array-1.C b/libgomp/testsuite/libgomp.c++/class-array-1.C
index d8d3f7f..b6dd34b 100644
--- a/libgomp/testsuite/libgomp.c++/class-array-1.C
+++ b/libgomp/testsuite/libgomp.c++/class-array-1.C
@@ -16,14 +16,14 @@ public:
 
   void incr_with_this (int c)
   {
-#pragma omp target map(this->array[:N])
+#pragma omp target map(this->array[ :N])
     for (int i = 0; i < N; i++)
       array[i] += c;
   }
 
   void incr_without_this (int c)
   {
-#pragma omp target map(array[:N])
+#pragma omp target map(array[ :N])
     for (int i = 0; i < N; i++)
       array[i] += c;
   }
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
new file mode 100644
index 0000000..aba4f42
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
@@ -0,0 +1,87 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+#define N 64
+
+struct points
+{
+  double *x;
+  double *y;
+  double *z;
+  size_t len;
+};
+
+#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \
+				     map(p.x[0:p.len]) \
+				     map(p.y[0:p.len]) \
+				     map(p.z[0:p.len])
+
+struct shape
+{
+  points tmp;
+  points *pts;
+  int metadata[128];
+};
+
+#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp)
+
+void
+alloc_points (points *pts, size_t sz)
+{
+  pts->x = new double[sz];
+  pts->y = new double[sz];
+  pts->z = new double[sz];
+  pts->len = sz;
+  for (int i = 0; i < sz; i++)
+    pts->x[i] = pts->y[i] = pts->z[i] = 0;
+}
+
+int main (int argc, char *argv[])
+{
+  shape myshape;
+  points mypts;
+
+  myshape.pts = &mypts;
+
+  alloc_points (&myshape.tmp, N);
+  myshape.pts = new points;
+  alloc_points (myshape.pts, N);
+
+  #pragma omp target map(myshape)
+  {
+    for (int i = 0; i < N; i++)
+      {
+	myshape.pts->x[i]++;
+	myshape.pts->y[i]++;
+	myshape.pts->z[i]++;
+      }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (myshape.pts->x[i] == 1);
+      assert (myshape.pts->y[i] == 1);
+      assert (myshape.pts->z[i] == 1);
+    }
+
+  #pragma omp target
+  {
+    for (int i = 0; i < N; i++)
+      {
+	myshape.pts->x[i]++;
+	myshape.pts->y[i]++;
+	myshape.pts->z[i]++;
+      }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (myshape.pts->x[i] == 2);
+      assert (myshape.pts->y[i] == 2);
+      assert (myshape.pts->z[i] == 2);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
new file mode 100644
index 0000000..d848fdb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
@@ -0,0 +1,55 @@
+// { dg-do run }
+
+#include <cassert>
+
+#define N 256
+
+struct doublebuf
+{
+  int buf_a[N][N];
+  int buf_b[N][N];
+};
+
+#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \
+					   map(b.buf_b[0:N/2][0:N])
+
+#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \
+					   map(b.buf_b[N/2:N/2][0:N])
+
+int main (int argc, char *argv[])
+{
+  doublebuf db;
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      db.buf_a[i][j] = db.buf_b[i][j] = 0;
+
+  #pragma omp target map(mapper(lo), tofrom:db)
+  {
+    for (int i = 0; i < N / 2; i++)
+      for (int j = 0; j < N; j++)
+	{
+	  db.buf_a[i][j]++;
+	  db.buf_b[i][j]++;
+	}
+  }
+
+  #pragma omp target map(mapper(hi), tofrom:db)
+  {
+    for (int i = N / 2; i < N; i++)
+      for (int j = 0; j < N; j++)
+	{
+	  db.buf_a[i][j]++;
+	  db.buf_b[i][j]++;
+	}
+  }
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      {
+	assert (db.buf_a[i][j] == 1);
+	assert (db.buf_b[i][j] == 1);
+      }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
new file mode 100644
index 0000000..ea9b7de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+  int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+  void incr_a (S my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 0; i < 100; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+namespace B
+{
+  void incr_b (S my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 100; i < 200; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+
+  my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+  {
+    for (int i = 0; i < 20; i++)
+      my_s.myarr[i]++;
+  }
+
+  A::incr_a (my_s);
+  B::incr_b (my_s);
+
+  for (int i = 0; i < 200; i++)
+    assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
new file mode 100644
index 0000000..f194e63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+  int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+  void incr_a (S &my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 0; i < 100; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+namespace B
+{
+  void incr_b (S &my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 100; i < 200; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+
+  my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+  {
+    for (int i = 0; i < 20; i++)
+      my_s.myarr[i]++;
+  }
+
+  A::incr_a (my_s);
+  B::incr_b (my_s);
+
+  for (int i = 0; i < 200; i++)
+    assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
new file mode 100644
index 0000000..0030de8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
@@ -0,0 +1,52 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+  int len;
+};
+
+class C
+{
+  S smemb;
+#pragma omp declare mapper (custom:S s) map(to:s.myarr) \
+					map(tofrom:s.myarr[0:s.len])
+
+public:
+  C(int l)
+  {
+    smemb.myarr = new int[l];
+    smemb.len = l;
+    for (int i = 0; i < l; i++)
+      smemb.myarr[i] = 0;
+  }
+  void bump();
+  void check();
+};
+
+void
+C::bump ()
+{
+#pragma omp target map(mapper(custom), tofrom: smemb)
+  {
+    for (int i = 0; i < smemb.len; i++)
+      smemb.myarr[i]++;
+  }
+}
+
+void
+C::check ()
+{
+  for (int i = 0; i < smemb.len; i++)
+    assert (smemb.myarr[i] == 1);
+}
+
+int main (int argc, char *argv[])
+{
+  C test (100);
+  test.bump ();
+  test.check ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
new file mode 100644
index 0000000..14ed10d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
@@ -0,0 +1,37 @@
+// { dg-do run }
+
+#include <cassert>
+
+template <typename T>
+void adjust (T param)
+{
+#pragma omp declare mapper (T x) map(to:x.len, x.base) \
+				 map(tofrom:x.base[0:x.len])
+
+#pragma omp target
+  for (int i = 0; i < param.len; i++)
+    param.base[i]++;
+}
+
+struct S {
+  int len;
+  int *base;
+};
+
+int main (int argc, char *argv[])
+{
+  S a;
+
+  a.len = 100;
+  a.base = new int[a.len];
+
+  for (int i = 0; i < a.len; i++)
+    a.base[i] = 0;
+
+  adjust (a);
+
+  for (int i = 0; i < a.len; i++)
+    assert (a.base[i] == 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
new file mode 100644
index 0000000..ba4792a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+};
+
+struct T
+{
+  S *s;
+};
+
+#pragma omp declare mapper (s100: S x) map(to: x.myarr) \
+				       map(tofrom: x.myarr[0:100])
+// Define this because ...
+#pragma omp declare mapper (default: S x) map(to: x.myarr) \
+					  map(tofrom: x.myarr[0:100])
+
+
+void
+bump (T t)
+{
+  /* Here we have an implicit/default mapper invoking a named mapper.  We
+     need to make sure that can be located properly at gimplification
+     time.  */
+
+// ... the following is invalid in OpenMP - albeit supported by GCC
+// (after disabling:  error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ )
+
+// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0])
+
+// ... thus, we now use ...
+#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0])
+
+#pragma omp target
+  for (int i = 0; i < 100; i++)
+    t.s->myarr[i]++;
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+  T my_t;
+
+  my_s.myarr = new int[100];
+  my_t.s = &my_s;
+
+  for (int i = 0; i < 100; i++)
+    my_s.myarr[i] = 0;
+
+  bump (my_t);
+
+  for (int i = 0; i < 100; i++)
+    assert (my_s.myarr[i] == 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
new file mode 100644
index 0000000..3818e52
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
@@ -0,0 +1,61 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+  int len;
+};
+
+template<typename T>
+class C
+{
+  T memb;
+#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \
+				 map(tofrom:t.myarr[0:t.len])
+
+public:
+  C(int sz);
+  ~C();
+  void bump();
+  void check();
+};
+
+template<typename T>
+C<T>::C(int sz)
+{
+  memb.myarr = new int[sz];
+  for (int i = 0; i < sz; i++)
+    memb.myarr[i] = 0;
+  memb.len = sz;
+}
+
+template<typename T>
+C<T>::~C()
+{
+  delete[] memb.myarr;
+}
+
+template<typename T>
+void C<T>::bump()
+{
+#pragma omp target map(memb)
+  for (int i = 0; i < memb.len; i++)
+    memb.myarr[i]++;
+}
+
+template<typename T>
+void C<T>::check()
+{
+  for (int i = 0; i < memb.len; i++)
+    assert (memb.myarr[i] == 1);
+}
+
+int main(int argc, char *argv[])
+{
+  C<S> c_int(100);
+  c_int.bump();
+  c_int.check();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare_target-2.C b/libgomp/testsuite/libgomp.c++/declare_target-2.C
new file mode 100644
index 0000000..ab94a55
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare_target-2.C
@@ -0,0 +1,25 @@
+// { dg-do link }
+
+// Actually not needed: -fipa-cp is default with -O2:
+// { dg-additional-options "-O2 -fipa-cp" }
+
+// The code failed because 'std::endl' becoḿes implicitly 'declare target'
+// but not the 'widen' function it calls.  While the linker had no issues
+// (endl is never called, either because it is inlined or optimized away),
+// the IPA-CP (enabled by -O2 and higher) failed as the definition for
+// 'widen' did not exist on the offload side.
+
+#include <iostream>
+
+void func (int m)
+{
+  if (m < 0)
+    std::cout << "should not happen" << std::endl;
+}
+
+
+int main()
+{
+  #pragma omp target
+    func (1);
+}
diff --git a/libgomp/testsuite/libgomp.c++/delim-declare-variant-1.C b/libgomp/testsuite/libgomp.c++/delim-declare-variant-1.C
new file mode 100644
index 0000000..bf146dd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/delim-declare-variant-1.C
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-foffload=disable" } */
+
+/* Check that variants within a "begin declare variant" directive
+   are attached to the correct overloaded function.  */
+
+int f (int x) { return x; }
+
+#pragma omp begin declare variant match (implementation={vendor("gnu")})
+int f (int x) { return -1; }
+#pragma omp end declare variant
+
+int f (int x, int y) { return x * y; }
+
+#pragma omp begin declare variant match (construct={target})
+int f (int x, int y) { return -2; }
+#pragma omp end declare variant
+
+int f (int x, int y, int z) { return x * y * z; }
+
+#pragma omp begin declare variant match (device={kind("host")})
+int f (int x, int y, int z) { return -3; }
+#pragma omp end declare variant
+
+int main (void)
+{
+  if (f (10) != -1) __builtin_abort ();
+  if (f (10, 20) != 200) __builtin_abort ();   /* no match on this one */
+  if (f (10, 20, 30) != -3) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/delim-declare-variant-2.C b/libgomp/testsuite/libgomp.c++/delim-declare-variant-2.C
new file mode 100644
index 0000000..6641768
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/delim-declare-variant-2.C
@@ -0,0 +1,37 @@
+/* Check that "omp begin declare variant" works on methods in a
+   class declaration.  */
+
+class test1 {
+
+ private:
+  int n;
+  static int m;
+
+ public:
+
+  void set_n (int x) { n = x; }
+  int get_n (void) { return n; }
+
+  static void set_m (int x) { m = x; }
+  static int get_m (void) { return m; }
+
+  #pragma omp begin declare variant match (implementation={vendor("gnu")})
+  int get_n (void) { return n * 2; }
+  static int get_m (void) { return m * 2; }
+  #pragma omp end declare variant
+
+  #pragma omp begin declare variant match (construct={target})
+  int get_n (void) { return this->n * 2; }
+  #pragma omp end declare variant
+};
+
+int test1::m;
+
+int main (void)
+{
+  test1 t1;
+  t1.set_n (10);
+  if (t1.get_n () != 20) __builtin_abort ();
+  test1::set_m (1);
+  if (test1::get_m () != 2) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/delim-declare-variant-7.C b/libgomp/testsuite/libgomp.c++/delim-declare-variant-7.C
new file mode 100644
index 0000000..60cc5d8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/delim-declare-variant-7.C
@@ -0,0 +1,39 @@
+/* Check that "omp begin declare variant" works on methods in a template
+   class declaration.  */
+
+template <typename T>
+class test1 {
+
+ private:
+  T n;
+  static T m;
+
+ public:
+
+  void set_n (T x) { n = x; }
+  T get_n (void) { return n; }
+
+  static void set_m (T x) { m = x; }
+  static T get_m (void) { return m; }
+
+  #pragma omp begin declare variant match (implementation={vendor("gnu")})
+  T get_n (void) { return n * 2; }
+  static T get_m (void) { return m * 2; }
+  #pragma omp end declare variant
+
+  #pragma omp begin declare variant match (construct={target})
+  T get_n (void) { return this->n * 2; }
+  #pragma omp end declare variant
+};
+
+template <typename T>
+T test1<T>::m;
+
+int main (void)
+{
+  test1<int> t1;
+  t1.set_n (10);
+  if (t1.get_n () != 20) __builtin_abort ();
+  test1<int>::set_m (1);
+  if (test1<int>::get_m () != 2) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/examples-4/target_data-5.C b/libgomp/testsuite/libgomp.c++/examples-4/target_data-5.C
index 4298e23..cf7dd4b 100644
--- a/libgomp/testsuite/libgomp.c++/examples-4/target_data-5.C
+++ b/libgomp/testsuite/libgomp.c++/examples-4/target_data-5.C
@@ -33,7 +33,7 @@ void vec_mult_ref (float *&p, float *&v1, float *&v2, int n)
 
 void vec_mult (float *&p, float *&v1, float *&v2, int n)
 {
-  #pragma omp target map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+  #pragma omp target map(to: v1[0:n], v2[ :n]) map(from: p[0:n])
     #pragma omp parallel for
       for (int i = 0; i < n; i++)
 	p[i] = v1[i] * v2[i];
diff --git a/libgomp/testsuite/libgomp.c++/loop-6.C b/libgomp/testsuite/libgomp.c++/loop-6.C
index f4a6925..8c0c2c5 100644
--- a/libgomp/testsuite/libgomp.c++/loop-6.C
+++ b/libgomp/testsuite/libgomp.c++/loop-6.C
@@ -5,7 +5,8 @@ extern "C" void abort (void);
 volatile int count;
 static int test(void)
 {
-  return ++count > 0;
+  count = count + 1;
+  return count > 0;
 }
 
 int i;
diff --git a/libgomp/testsuite/libgomp.c++/lvalue-tofrom-2.C b/libgomp/testsuite/libgomp.c++/lvalue-tofrom-2.C
index adc493b..91835c4 100644
--- a/libgomp/testsuite/libgomp.c++/lvalue-tofrom-2.C
+++ b/libgomp/testsuite/libgomp.c++/lvalue-tofrom-2.C
@@ -23,7 +23,7 @@ void foo()
   for (int i = 0; i < aw.length; i++)
     aw.data[i] = i;
 
-#pragma omp target update from(aw.data[:aw.length])
+#pragma omp target update from(aw.data[ :aw.length])
 
 #pragma omp target exit data map(delete: aw.data, aw.length, \
 				 aw.data[0:aw.length])
@@ -54,7 +54,7 @@ main ()
   for (int i = 0; i < aw.length; i++)
     aw.data[i] = i;
 
-#pragma omp target update from(aw.data[:aw.length])
+#pragma omp target update from(aw.data[ :aw.length])
 
 #pragma omp target exit data map(delete: aw.data, aw.length, \
 				 aw.data[0:aw.length])
diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1.C b/libgomp/testsuite/libgomp.c++/pr101544-1.C
index fcd3e97..22c81aa 100644
--- a/libgomp/testsuite/libgomp.c++/pr101544-1.C
+++ b/libgomp/testsuite/libgomp.c++/pr101544-1.C
@@ -64,8 +64,8 @@ int main() {
   double* inptr = in.data();
   double* outptr = out.data();
 
-#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr)
-#pragma acc parallel loop copy(inptr[:10], outptr[:10]) deviceptr(devPtr)
+#pragma omp target teams distribute parallel for map(inptr[ :10], outptr[ :10]) is_device_ptr(devPtr)
+#pragma acc parallel loop copy(inptr[ :10], outptr[ :10]) deviceptr(devPtr)
   for(int i = 0; i < 10; i++) {
     outptr[i] = devPtr->sag(inptr[i], inptr[i]);
   }
diff --git a/libgomp/testsuite/libgomp.c++/pr108286.C b/libgomp/testsuite/libgomp.c++/pr108286.C
index ee88c2f..3d2fb7f 100644
--- a/libgomp/testsuite/libgomp.c++/pr108286.C
+++ b/libgomp/testsuite/libgomp.c++/pr108286.C
@@ -6,7 +6,7 @@ struct S {
   foo ()
   {
     int res = 0;
-#pragma omp target map(size, ptr[:size], res) nowait
+#pragma omp target map(size, ptr[ :size], res) nowait
     res = ptr[size - 1];
 #pragma omp taskwait
     return res;
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
index 6995f26..d329ad3 100644
--- a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
@@ -3,6 +3,9 @@
 /* { dg-additional-options -DDEFAULT=defaultmap(firstprivate) }
    Wrong code for offloading execution.
    { dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
 /* { dg-additional-options -fdump-tree-gimple } */
 
 #include "pr119692-1-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
index 02121b6..6bbe186 100644
--- a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
@@ -3,6 +3,9 @@
 /* { dg-additional-options -DDEFAULT=defaultmap(to) }
    Wrong code for offloading execution.
    { dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
 /* { dg-additional-options -fdump-tree-gimple } */
 
 #include "pr119692-1-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr38650.C b/libgomp/testsuite/libgomp.c++/pr38650.C
index ebe221a..08c8075 100644
--- a/libgomp/testsuite/libgomp.c++/pr38650.C
+++ b/libgomp/testsuite/libgomp.c++/pr38650.C
@@ -1,5 +1,6 @@
 // PR c++/38650
 // { dg-do run }
+// { dg-additional-options "-std=gnu++17" }
 
 #include <cstdlib>
 
diff --git a/libgomp/testsuite/libgomp.c++/reduction-10.C b/libgomp/testsuite/libgomp.c++/reduction-10.C
index 2254430..353a667 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-10.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-10.C
@@ -63,9 +63,9 @@ foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3
   short (&b)[p7] = bb;
   for (int i = 0; i < p7; i++)
     bb[i] = -6;
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2 + N - 2], z[:p3]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5 - N + 2]) \
-			   reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2 + N - 2], z[ :p3]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5 - N + 2]) \
+			   reduction(&:w[0:p6 - 3 + N][ :p6]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
@@ -120,9 +120,9 @@ template <int N>
 void
 S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
 {
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:N], z[:p3 + N - 2]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5]) \
-			   reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2][0:N], z[ :p3 + N - 2]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5]) \
+			   reduction(&:w[0:p6 - 3 + N][ :p6]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-11.C b/libgomp/testsuite/libgomp.c++/reduction-11.C
index 67c7320..9e3ee3f 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-11.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-11.C
@@ -7,9 +7,9 @@ foo (int (*&x)[3][2], int *y, long (&w)[1][2], int s, int t)
 {
   unsigned long long a[9] = {};
   short b[5] = {};
-  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+  #pragma omp parallel for reduction(+:x[-1:2][ : ][0:2], z[t + 2:4]) \
 			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
-			   reduction(&:w[s + 1:][t:2]) reduction(max:b[2:])
+			   reduction(&:w[s + 1: ][t:2]) reduction(max:b[2: ])
   for (int i = 0; i < 128; i++)
     {
       x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
@@ -59,9 +59,9 @@ struct S
 void
 S::foo (int s, int t)
 {
-  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+  #pragma omp parallel for reduction(+:x[-1:2][ : ][0:2], z[t + 2:4]) \
 			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
-			   reduction(&:w[s + 1:][t:2]) reduction(max:b[2:])
+			   reduction(&:w[s + 1: ][t:2]) reduction(max:b[2: ])
   for (int i = 0; i < 128; i++)
     {
       x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-12.C b/libgomp/testsuite/libgomp.c++/reduction-12.C
index 1495549..ef3c3e5 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-12.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-12.C
@@ -65,9 +65,9 @@ foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3
   short (&b)[p7] = bb;
   for (int i = 0; i < p7; i++)
     bb[i] = -6;
-  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2 + N - 2], z[t + N:p3]) \
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][ :p2 + N - 2], z[t + N:p3]) \
 			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5 - N + 2]) \
-			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:])
+			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N: ])
   for (int i = 0; i < 128; i++)
     {
       x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
@@ -118,9 +118,9 @@ template <int N>
 void
 S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7, int s, int t)
 {
-  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2][0:N], z[t + N:p3 + N - 2]) \
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][ :p2][0:N], z[t + N:p3 + N - 2]) \
 			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \
-			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:])
+			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N: ])
   for (int i = 0; i < 128; i++)
     {
       x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-5.C b/libgomp/testsuite/libgomp.c++/reduction-5.C
index 212fd69..127519d 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-5.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-5.C
@@ -5,9 +5,9 @@ foo (int (*&x)[3][2], int *y, long (&w)[1][2])
 {
   unsigned long long a[9] = {};
   short b[5] = {};
-  #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:2]) reduction(max:b)
+  #pragma omp parallel for reduction(+:x[0:2][ : ][0:2], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :2]) reduction(max:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
@@ -61,9 +61,9 @@ struct S
 void
 S::foo ()
 {
-  #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:2]) reduction(max:b)
+  #pragma omp parallel for reduction(+:x[0:2][ : ][0:2], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :2]) reduction(max:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-6.C b/libgomp/testsuite/libgomp.c++/reduction-6.C
index f180ca3..4c06f8d 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-6.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-6.C
@@ -59,9 +59,9 @@ foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2])
   A<unsigned long long> a[9];
   short bb[5] = {};
   short (&b)[5] = bb;
-  #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:2]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:2][ : ][0:2], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :2]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
@@ -114,9 +114,9 @@ struct S
 void
 S::foo ()
 {
-  #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:2]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:2][ : ][0:2], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :2]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-7.C b/libgomp/testsuite/libgomp.c++/reduction-7.C
index 75f9d08..616e3a0 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-7.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-7.C
@@ -12,9 +12,9 @@ foo (int (*&x)[3][2], int *y, long (&w)[1][2], int p1, long p2, long p3, int p4,
 	b[i] = -6;
       a[i] = 0;
     }
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5]) \
-			   reduction(&:w[0:p6 - 1][:p6]) reduction(max:b)
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2], z[ :p3]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5]) \
+			   reduction(&:w[0:p6 - 1][ :p6]) reduction(max:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
@@ -68,9 +68,9 @@ struct S
 void
 S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
 {
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5]) \
-			   reduction(&:w[0:p6 - 1][:p6]) reduction(max:b[0:p7])
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2], z[ :p3]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5]) \
+			   reduction(&:w[0:p6 - 1][ :p6]) reduction(max:b[0:p7])
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-8.C b/libgomp/testsuite/libgomp.c++/reduction-8.C
index cffd7cc..6bd5ea6 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-8.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-8.C
@@ -62,9 +62,9 @@ foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2], int p1, long p2, long p3
   short (&b)[p7] = bb;
   for (int i = 0; i < p7; i++)
     bb[i] = -6;
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5]) \
-			   reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2], z[ :p3]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5]) \
+			   reduction(&:w[0:p6 - 1][ :p6]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
@@ -117,9 +117,9 @@ struct S
 void
 S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
 {
-  #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:2], z[:p3]) \
-			   reduction(*:y[:p4]) reduction(|:a[:p5]) \
-			   reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b)
+  #pragma omp parallel for reduction(+:x[0:p1 + 1][ :p2][0:2], z[ :p3]) \
+			   reduction(*:y[ :p4]) reduction(|:a[ :p5]) \
+			   reduction(&:w[0:p6 - 1][ :p6]) reduction(maxb:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1].t += i;
diff --git a/libgomp/testsuite/libgomp.c++/reduction-9.C b/libgomp/testsuite/libgomp.c++/reduction-9.C
index 117a8f6..d6f3041 100644
--- a/libgomp/testsuite/libgomp.c++/reduction-9.C
+++ b/libgomp/testsuite/libgomp.c++/reduction-9.C
@@ -6,9 +6,9 @@ foo (int (*&x)[3][N], int *y, long (&w)[1][N])
 {
   unsigned long long a[9] = {};
   short b[5] = {};
-  #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:N]) reduction(max:b)
+  #pragma omp parallel for reduction(+:x[0:N][ : ][0:N], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :N]) reduction(max:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
@@ -64,9 +64,9 @@ template <int N>
 void
 S<N>::foo ()
 {
-  #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \
-			   reduction(*:y[:3]) reduction(|:a[:4]) \
-			   reduction(&:w[0:][:N]) reduction(max:b)
+  #pragma omp parallel for reduction(+:x[0:N][ : ][0:N], z[ :4]) \
+			   reduction(*:y[ :3]) reduction(|:a[ :4]) \
+			   reduction(&:w[0: ][ :N]) reduction(max:b)
   for (int i = 0; i < 128; i++)
     {
       x[i / 64][i % 3][(i / 4) & 1] += i;
diff --git a/libgomp/testsuite/libgomp.c++/target-18.C b/libgomp/testsuite/libgomp.c++/target-18.C
index a21ed4e..5d616da 100644
--- a/libgomp/testsuite/libgomp.c++/target-18.C
+++ b/libgomp/testsuite/libgomp.c++/target-18.C
@@ -12,7 +12,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
     /* For zero length array sections, p points to the start of
        already mapped range, q to the end of it (with nothing mapped
        after it), and r does not point to an mapped range.  */
-    #pragma omp target map(alloc:p[:0]) map(to:q[:0]) map(from:r[:0]) private(i) map(from:err) firstprivate (s)
+    #pragma omp target map(alloc:p[ :0]) map(to:q[ :0]) map(from:r[ :0]) private(i) map(from:err) firstprivate (s)
     {
       err = 0;
       for (i = 0; i < 8; i++)
@@ -51,7 +51,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
       abort ();
     /* And zero-length array sections, though not known at compile
        time, behave the same.  */
-    #pragma omp target map(p[:n]) map(tofrom:q[:n]) map(alloc:r[:n]) private(i) map(from:err) firstprivate (s)
+    #pragma omp target map(p[ :n]) map(tofrom:q[ :n]) map(alloc:r[ :n]) private(i) map(from:err) firstprivate (s)
     {
       err = 0;
       for (i = 0; i < 8; i++)
@@ -71,7 +71,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
       abort ();
     /* Non-zero length array sections, though not known at compile,
        behave differently.  */
-    #pragma omp target map(p[:m]) map(tofrom:q[:m]) map(to:r[:m]) private(i) map(from:err)
+    #pragma omp target map(p[ :m]) map(tofrom:q[ :m]) map(to:r[ :m]) private(i) map(from:err)
     {
       err = 0;
       for (i = 0; i < 8; i++)
@@ -87,7 +87,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
       /* For zero length array sections, p points to the start of
 	 already mapped range, q points to the start of another one,
 	 and r to the end of the second one.  */
-      #pragma omp target map(to:p[:0]) map(from:q[:0]) map(tofrom:r[:0]) private(i) map(from:err)
+      #pragma omp target map(to:p[ :0]) map(from:q[ :0]) map(tofrom:r[ :0]) private(i) map(from:err)
       {
 	err = 0;
 	for (i = 0; i < 8; i++)
@@ -128,7 +128,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
 	abort ();
       /* And zero-length array sections, though not known at compile
 	 time, behave the same.  */
-      #pragma omp target map(p[:n]) map(alloc:q[:n]) map(from:r[:n]) private(i) map(from:err)
+      #pragma omp target map(p[ :n]) map(alloc:q[ :n]) map(from:r[ :n]) private(i) map(from:err)
       {
 	err = 0;
 	for (i = 0; i < 8; i++)
@@ -149,7 +149,7 @@ foo (int *&p, int *&q, int *&r, int n, int m)
 	abort ();
       /* Non-zero length array sections, though not known at compile,
 	 behave differently.  */
-      #pragma omp target map(p[:m]) map(alloc:q[:m]) map(tofrom:r[:m]) private(i) map(from:err)
+      #pragma omp target map(p[ :m]) map(alloc:q[ :m]) map(tofrom:r[ :m]) private(i) map(from:err)
       {
 	err = 0;
 	for (i = 0; i < 8; i++)
diff --git a/libgomp/testsuite/libgomp.c++/target-19.C b/libgomp/testsuite/libgomp.c++/target-19.C
index 7bae31d..3a00a67 100644
--- a/libgomp/testsuite/libgomp.c++/target-19.C
+++ b/libgomp/testsuite/libgomp.c++/target-19.C
@@ -21,7 +21,7 @@ foo (S s, int (&t)[3], int z)
   }
   if (err) abort ();
   // But explicit zero length array section mapping does.
-  #pragma omp target map(from: err) map(tofrom: s.r[:0], t[:0])
+  #pragma omp target map(from: err) map(tofrom: s.r[ :0], t[ :0])
   {
     if (sep)
       /* Since OpenMP 5.2, if no matching mapped list it has been found,
@@ -32,7 +32,7 @@ foo (S s, int (&t)[3], int z)
   }
   if (err) abort ();
   // Similarly zero length array section, but unknown at compile time.
-  #pragma omp target map(from: err) map(tofrom: s.r[:z], t[:z])
+  #pragma omp target map(from: err) map(tofrom: s.r[ :z], t[ :z])
   {
     if (sep)
       /* Since OpenMP 5.2, if no matching mapped list it has been found,
@@ -44,13 +44,13 @@ foo (S s, int (&t)[3], int z)
   if (err) abort ();
   #pragma omp target enter data map (to: s.r, t)
   // But when already mapped, it binds to existing mappings.
-  #pragma omp target map(from: err) map(tofrom: s.r[:0], t[:0])
+  #pragma omp target map(from: err) map(tofrom: s.r[ :0], t[ :0])
   {
     err = t[0] != 1 || t[1] != 2 || t[2] != 3 || s.r[0] != 6 || s.r[1] != 7;
     sep = 0;
   }
   if (err) abort ();
-  #pragma omp target map(from: err) map(tofrom: s.r[:z], t[:z])
+  #pragma omp target map(from: err) map(tofrom: s.r[ :z], t[ :z])
   {
     err = t[0] != 1 || t[1] != 2 || t[2] != 3 || s.r[0] != 6 || s.r[1] != 7;
     sep = 0;
diff --git a/libgomp/testsuite/libgomp.c++/target-2.C b/libgomp/testsuite/libgomp.c++/target-2.C
index 1eab7f2..bbf2d8a 100644
--- a/libgomp/testsuite/libgomp.c++/target-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-2.C
@@ -32,7 +32,7 @@ fn2 (int x, double (&dr) [1024], double *&er)
   double *&ir = i;
   int j;
   fn1 (hr + 2 * x, ir + 2 * x, x);
-  #pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \
+  #pragma omp target map(to: br[ :x], cr[0:x], dr[x:x], er[x:x]) \
 		     map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) \
 		     map(tofrom: s)
     #pragma omp parallel for reduction(+:s)
diff --git a/libgomp/testsuite/libgomp.c++/target-22.C b/libgomp/testsuite/libgomp.c++/target-22.C
index 9d9dea0..da7967b 100644
--- a/libgomp/testsuite/libgomp.c++/target-22.C
+++ b/libgomp/testsuite/libgomp.c++/target-22.C
@@ -20,7 +20,7 @@ foo (int *&p, int (&s)[5], int &t, S &u, int n)
   }
   if (err)
     abort ();
-  #pragma omp target data use_device_ptr(p) map(from:err) map(to:q[:4])
+  #pragma omp target data use_device_ptr(p) map(from:err) map(to:q[ :4])
   #pragma omp target is_device_ptr(p) private(i) map(from:err)
   {
     err = 0;
@@ -50,7 +50,7 @@ foo (int *&p, int (&s)[5], int &t, S &u, int n)
   }
   if (err)
     abort ();
-  #pragma omp target data map(to:s[:5]) use_device_addr(s) map(from:err)
+  #pragma omp target data map(to:s[ :5]) use_device_addr(s) map(from:err)
   #pragma omp target is_device_ptr(s) private(i) map(from:err)
   {
     err = 0;
diff --git a/libgomp/testsuite/libgomp.c++/target-23.C b/libgomp/testsuite/libgomp.c++/target-23.C
index 63d3436..66a693e 100644
--- a/libgomp/testsuite/libgomp.c++/target-23.C
+++ b/libgomp/testsuite/libgomp.c++/target-23.C
@@ -16,13 +16,13 @@ main (void)
     s->data[i] = 0;
 
   #pragma omp target enter data map(to: s)
-  #pragma omp target enter data map(to: s->data, s->data[:SZ])
+  #pragma omp target enter data map(to: s->data, s->data[ :SZ])
   #pragma omp target
   {
     for (int i = 0; i < SZ; i++)
       s->data[i] = i;
   }
-  #pragma omp target exit data map(from: s->data, s->data[:SZ])
+  #pragma omp target exit data map(from: s->data, s->data[ :SZ])
   #pragma omp target exit data map(from: s)
 
   for (int i = 0; i < SZ; i++)
diff --git a/libgomp/testsuite/libgomp.c++/target-9.C b/libgomp/testsuite/libgomp.c++/target-9.C
index 83a61cf..1b6d2b4 100644
--- a/libgomp/testsuite/libgomp.c++/target-9.C
+++ b/libgomp/testsuite/libgomp.c++/target-9.C
@@ -21,7 +21,7 @@ foo (int *&p, int (&s)[5], int &t, S &u, int n)
   }
   if (err)
     abort ();
-  #pragma omp target data map(to:q[:4])
+  #pragma omp target data map(to:q[ :4])
   #pragma omp target data use_device_ptr(p) map(from:err)
   #pragma omp target is_device_ptr(p) private(i) map(from:err)
   {
@@ -54,7 +54,7 @@ foo (int *&p, int (&s)[5], int &t, S &u, int n)
   }
   if (err)
     abort ();
-  #pragma omp target data map(to:s[:5])
+  #pragma omp target data map(to:s[ :5])
   #pragma omp target data use_device_addr(s) map(from:err)
   #pragma omp target is_device_ptr(s) private(i) map(from:err)
   {
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
index ecb029e..7e8cc58 100644
--- a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -63,14 +63,19 @@ int main()
   return 0;
 }
 
-/* Verify '__cxa_atexit' calls.
+/* Verify '__cxa_atexit' calls (or '__aeabi_atexit', per 'targetm.cxx.use_aeabi_atexit').
 
    For the host, there are four expected calls:
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, } 4 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sH1, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sHD1, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&svHD1, _ZNSt6vectorI1SSaIS0_EED1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sH2, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
 
    For the device, there are two expected calls:
    { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
index 75e48ca..9c85122 100644
--- a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -93,14 +93,19 @@ int main()
   return 0;
 }
 
-/* Verify '__cxa_atexit' calls.
+/* Verify '__cxa_atexit' calls (or '__aeabi_atexit', per 'targetm.cxx.use_aeabi_atexit').
 
    For the host, there are four expected calls:
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
-   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, } 4 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sH1, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sHD1, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&svHD1, _ZNSt6vectorI1SSaIS0_EED1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target { cxa_atexit && { ! arm_eabi } } } } }
+     { dg-final { scan-tree-dump-times {gimple_call <__aeabi_atexit, NULL, \&sH2, _ZN1SD1Ev, \&__dso_handle>} 1 optimized { target { cxa_atexit && arm_eabi } } } }
 
    For the device, there are two expected calls:
    { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
index 3848295..4158ece 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
@@ -23,3 +23,6 @@
    PR119692.
 
    { dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
index 8861740..ca72e57 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
@@ -22,3 +22,6 @@
 
    For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
    { dg-shouldfail {'MyException' exception} { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C
new file mode 100644
index 0000000..8fa9af7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C
@@ -0,0 +1,215 @@
+/* Basic container usage.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+bool vector_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::vector<int> vector;
+      ok = vector.empty();
+    }
+  return ok;
+}
+
+bool deque_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::deque<int> deque;
+      ok = deque.empty();
+    }
+  return ok;
+}
+
+bool list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::list<int> list;
+      ok = list.empty();
+    }
+  return ok;
+}
+
+bool map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::map<int, int> map;
+      ok = map.empty();
+    }
+  return ok;
+}
+
+bool set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::set<int> set;
+      ok = set.empty();
+    }
+  return ok;
+}
+
+bool multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::multimap<int, int> multimap;
+      ok = multimap.empty();
+    }
+  return ok;
+}
+
+bool multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::multiset<int, int> multiset;
+      ok = multiset.empty();
+    }
+  return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+  static constexpr std::size_t array_size = 42;
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::array<int, array_size> array{};
+      ok = array[0] == 0
+	   && array[array_size - 1] == 0;
+    }
+  return ok;
+}
+
+bool forward_list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::forward_list<int> forward_list;
+      ok = forward_list.empty();
+    }
+  return ok;
+}
+
+bool unordered_map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_map<int, int> unordered_map;
+      ok = unordered_map.empty();
+    }
+  return ok;
+}
+
+bool unordered_set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_set<int> unordered_set;
+      ok = unordered_set.empty();
+    }
+  return ok;
+}
+
+bool unordered_multimap_test()
+{
+
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_multimap<int, int> unordered_multimap;
+      ok = unordered_multimap.empty();
+    }
+  return ok;
+}
+
+bool unordered_multiset_test()
+{
+
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_multiset<int> unordered_multiset;
+      ok = unordered_multiset.empty();
+    }
+  return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+  const bool vec_res                = vector_test();
+  __builtin_printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  const bool deque_res              = deque_test();
+  __builtin_printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  const bool list_res               = list_test();
+  __builtin_printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  const bool map_res                = map_test();
+  __builtin_printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  const bool set_res                = set_test();
+  __builtin_printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  const bool multimap_res           = multimap_test();
+  __builtin_printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  const bool multiset_res           = multiset_test();
+  __builtin_printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  const bool array_res              = array_test();
+  __builtin_printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  const bool forward_list_res       = forward_list_test();
+  __builtin_printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  const bool unordered_map_res      = unordered_map_test();
+  __builtin_printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  const bool unordered_set_res      = unordered_set_test();
+  __builtin_printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  const bool unordered_multimap_res = unordered_multimap_test();
+  __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  const bool unordered_multiset_res = unordered_multiset_test();
+  __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C
new file mode 100644
index 0000000..38b0381
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C
@@ -0,0 +1,210 @@
+/* Container adaptors in target region.
+   Does not test comparison operators other than equality to allow these tests
+   to be generalized to arbitrary input data.  */
+
+#include <algorithm>
+#include <cstdio>
+#include <deque>
+#include <queue>
+#include <stack>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test_stack(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      const std::size_t half_size = Size / 2;
+      const T first_element = arr[0];
+      const T middle_element = arr[half_size - 1];
+      const T last_element = arr[Size - 1];
+      typedef std::stack<T, std::vector<T> > stack_type;
+      stack_type stack;
+      VERIFY (stack.empty());
+      VERIFY (stack.size() == 0);
+      {
+	/* Do half with push.  */
+	std::size_t idx = 0;
+	for (; idx < half_size; ++idx)
+	  {
+	    stack.push(arr[idx]);
+	    VERIFY (stack.top() == arr[idx]);
+	  }
+	VERIFY (stack.size() == half_size);
+	VERIFY (static_cast<const stack_type&>(stack).size() == half_size);
+	for (; idx < Size; ++idx)
+	  {
+	    #if __cplusplus >= 201103L
+	      /* Do the rest with emplace if C++11 or higher.  */
+	      stack.emplace(arr[idx]);
+	    #else
+	      /* Otherwise just use push again.  */
+	      stack.push(arr[idx]);
+	    #endif
+	    VERIFY (stack.top() == arr[idx]);
+	  }
+	VERIFY (stack.size() == Size);
+	VERIFY (static_cast<const stack_type&>(stack).size() == Size);
+
+	const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size));
+	VERIFY (stack == stack_orig);
+	/* References are contained in their own scope so we don't accidently
+	   add tests referencing them after they have been invalidated.  */
+	{
+	  const T& const_top = static_cast<const stack_type&>(stack).top();
+	  VERIFY (const_top == last_element);
+	  T& mutable_top = stack.top();
+	  mutable_top = first_element;
+	  VERIFY (const_top == first_element);
+	}
+	/* Will only compare inequal if the first and last elements are different.  */
+	VERIFY (first_element != last_element || stack != stack_orig);
+	for (std::size_t count = Size - half_size; count != 0; --count)
+	  stack.pop();
+	VERIFY (stack.top() == middle_element);
+	const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size));
+	VERIFY (stack == stack_half_orig);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_queue(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      const std::size_t half_size = Size / 2;
+      const T first_element = arr[0];
+      const T last_element = arr[Size - 1];
+      typedef std::queue<T, std::deque<T> > queue_type;
+      queue_type queue;
+      VERIFY (queue.empty());
+      VERIFY (queue.size() == 0);
+      {
+	/* Do half with push.  */
+	std::size_t idx = 0;
+	for (; idx < half_size; ++idx)
+	  {
+	    queue.push(arr[idx]);
+	    VERIFY (queue.back() == arr[idx]);
+	    VERIFY (queue.front() == first_element);
+	  }
+	VERIFY (queue.size() == half_size);
+	VERIFY (static_cast<const queue_type&>(queue).size() == half_size);
+	for (; idx < Size; ++idx)
+	  {
+	    #if __cplusplus >= 201103L
+	      /* Do the rest with emplace if C++11 or higher.  */
+	      queue.emplace(arr[idx]);
+	    #else
+	      /* Otherwise just use push again.  */
+	      queue.push(arr[idx]);
+	    #endif
+	    VERIFY (queue.back() == arr[idx]);
+	  }
+	VERIFY (queue.size() == Size);
+	VERIFY (static_cast<const queue_type&>(queue).size() == Size);
+
+	const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size));
+	VERIFY (queue == queue_orig);
+
+	/* References are contained in their own scope so we don't accidently
+	   add tests referencing them after they have been invalidated.  */
+	{
+	  const T& const_front = static_cast<const queue_type&>(queue).front();
+	  VERIFY (const_front == first_element);
+	  T& mutable_front = queue.front();
+
+	  const T& const_back = static_cast<const queue_type&>(queue).back();
+	  VERIFY (const_back == last_element);
+	  T& mutable_back = queue.back();
+	  {
+	    using std::swap;
+	    swap(mutable_front, mutable_back);
+	  }
+	  VERIFY (const_front == last_element);
+	  VERIFY (const_back == first_element);
+	  /* Will only compare inequal if the first and last elements are different.  */
+	  VERIFY (first_element != last_element || queue != queue_orig);
+	  /* Return the last element to normal for the next comparison.  */
+	  mutable_back = last_element;
+	}
+
+	const T middle_element = arr[half_size];
+	for (std::size_t count = Size - half_size; count != 0; --count)
+	  queue.pop();
+	VERIFY (queue.front() == middle_element);
+	const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size));
+	VERIFY (queue == queue_upper_half);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      typedef std::priority_queue<T, std::vector<T> > priority_queue_type;
+      {
+	priority_queue_type pqueue;
+	VERIFY (pqueue.empty());
+	VERIFY (pqueue.size() == 0);
+      }
+      {
+	priority_queue_type pqueue(arr, arr + Size);
+	VERIFY (!pqueue.empty());
+	VERIFY (pqueue.size() == Size);
+	VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size);
+
+	const T old_max = pqueue.top();
+
+	#if __cplusplus >= 201103L
+	  pqueue.emplace(max_value);
+	#else
+	  pqueue.push(max_value);
+	#endif
+	VERIFY (pqueue.top() == max_value);
+	pqueue.pop();
+	VERIFY (pqueue.top() == old_max);
+	pqueue.push(min_value);
+	VERIFY (pqueue.top() == old_max);
+	pqueue.push(max_value);
+	VERIFY (pqueue.top() == max_value);
+	pqueue.pop();
+	VERIFY (pqueue.top() == old_max);
+	VERIFY (pqueue.size() == Size + 1);
+
+	for (std::size_t count = Size; count != 0; --count)
+	  pqueue.pop();
+	VERIFY (pqueue.size() == 1);
+	VERIFY (pqueue.top() == min_value);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+
+  return test_stack(arr)
+	 && test_queue(arr)
+	 && test_priority_queue(arr, 0, 1000) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C
new file mode 100644
index 0000000..9eaa8a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C
@@ -0,0 +1,139 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* { dg-ice {TODO PR122268} { offload_target_amdgcn || offload_target_nvptx } }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn || offload_target_nvptx } } } */
+
+/* C++23 container adaptors in target region.
+   Severely needs additional tests.  */
+
+#include <cstdio>
+#include <utility>
+#include <version>
+
+#if __cpp_lib_flat_map >= 202207L
+#define ENABLE_FLAT_MAP 1
+#endif
+#if __cpp_lib_flat_set >= 202207L
+#define ENABLE_FLAT_SET 1
+#endif
+
+#ifdef ENABLE_FLAT_MAP
+#include <flat_map>
+#endif
+#ifdef ENABLE_FLAT_SET
+#include <flat_set>
+#endif
+
+#include "target-flex-common.h"
+
+#ifdef ENABLE_FLAT_MAP
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_map_type = std::flat_map<K, V>;
+	flat_map_type map = {arr, arr + Size};
+
+	VERIFY (!map.empty());
+	for (const auto& element : arr)
+	  VERIFY (map.contains(element.first));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_map_type = std::flat_map<K, V>;
+	flat_map_type map = {arr, arr + Size};
+
+	VERIFY (!map.empty());
+	for (const auto& element : arr)
+	  VERIFY (map.contains(element.first));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+#else
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; }
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; }
+#endif
+
+#ifdef ENABLE_FLAT_SET
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_set_type = std::flat_set<T>;
+	flat_set_type set = {arr, arr + Size};
+
+	VERIFY (!set.empty());
+	for (const auto& element : arr)
+	  VERIFY (set.contains(element));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_multiset_type = std::flat_multiset<T>;
+	flat_multiset_type multiset = {arr, arr + Size};
+
+	VERIFY (!multiset.empty());
+	for (const auto& element : arr)
+	  VERIFY (multiset.contains(element));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+#else
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size]) { return true; }
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size]) { return true; }
+#endif
+
+int main()
+{
+  int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+  std::pair<int, int> pairs[10] = {{ 1,  2}, { 2,  4}, { 3,  6}, { 4,  8}, { 5, 10},
+				   { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}};
+
+  return test_flat_set(arr)
+	 && test_flat_multiset(arr)
+	 && test_flat_map(pairs)
+	 && test_flat_multimap(pairs) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C
new file mode 100644
index 0000000..6d55129
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C
@@ -0,0 +1,444 @@
+/* Check constructors/destructors are called in containers.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#include <utility>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include "target-flex-common.h"
+
+struct indirect_counter
+{
+  typedef int counter_value_type;
+  counter_value_type *_count_ptr;
+
+  indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) {
+    ++(*_count_ptr);
+  }
+  indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) {
+    ++(*_count_ptr);
+  }
+  /* Don't declare a move constructor, we want to copy no matter what.  */
+  ~indirect_counter() {
+    --(*_count_ptr);
+  }
+};
+
+bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+  { return lhs._count_ptr == rhs._count_ptr; }
+bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+  { return lhs._count_ptr < rhs._count_ptr; }
+
+#if __cplusplus >= 201103L
+template<>
+struct std::hash<indirect_counter>
+{
+  std::size_t operator()(const indirect_counter& ic) const noexcept
+    { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); }
+};
+#endif
+
+/* Not a container, just a sanity check really.  */
+bool automatic_lifetime_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	indirect_counter c = indirect_counter(&counter);
+	indirect_counter(static_cast<int*>(&counter));
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool vector_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::vector<indirect_counter> vec(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	vec.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	vec.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	vec.pop_back();
+	VERIFY (counter == 32);
+	vec.pop_back();
+	VERIFY (counter == 31);
+	vec.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool deque_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::deque<indirect_counter> vec(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	vec.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	vec.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	vec.pop_back();
+	VERIFY (counter == 32);
+	vec.pop_back();
+	VERIFY (counter == 31);
+	vec.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::list<indirect_counter> list(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	list.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	list.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	list.pop_back();
+	VERIFY (counter == 32);
+	list.pop_back();
+	VERIFY (counter == 31);
+	list.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::map<int, indirect_counter> map;
+	map.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	map.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	map.insert(std::make_pair(2, indirect_counter(&counter)));
+	VERIFY (counter == 2);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::set<indirect_counter> set;
+	set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	set.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 1 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::multimap<int, indirect_counter> multimap;
+	multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 2);
+	multimap.insert(std::make_pair(2, indirect_counter(&counter)));
+	VERIFY (counter == 3);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::multiset<indirect_counter> multiset;
+	multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 2);
+	multiset.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 2 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	indirect_counter ic(&counter);
+	std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic,
+					       ic, ic, ic, ic, ic};
+	VERIFY (counter == 11);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool forward_list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	forward_list.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	forward_list.push_front(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	forward_list.pop_front();
+	VERIFY (counter == 32);
+	forward_list.pop_front();
+	VERIFY (counter == 31);
+	forward_list.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::unordered_map<int, indirect_counter> unordered_map;
+	unordered_map.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_map.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_map.insert({2, indirect_counter(&counter)});
+	VERIFY (counter == 2);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::unordered_set<indirect_counter> unordered_set;
+	unordered_set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_set.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 1 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::unordered_multimap<int, indirect_counter> unordered_multimap;
+	unordered_multimap.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_multimap.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 2);
+	unordered_multimap.insert({2, indirect_counter(&counter)});
+	VERIFY (counter == 3);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::unordered_multiset<indirect_counter> unordered_multiset;
+	unordered_multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 2);
+	unordered_multiset.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 2 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+  const bool auto_res               = automatic_lifetime_test();
+  const bool vec_res                = vector_test();
+  const bool deque_res              = deque_test();
+  const bool list_res               = list_test();
+  const bool map_res                = map_test();
+  const bool set_res                = set_test();
+  const bool multimap_res           = multimap_test();
+  const bool multiset_res           = multiset_test();
+  const bool array_res              = array_test();
+  const bool forward_list_res       = forward_list_test();
+  const bool unordered_map_res      = unordered_map_test();
+  const bool unordered_set_res      = unordered_set_test();
+  const bool unordered_multimap_res = unordered_multimap_test();
+  const bool unordered_multiset_res = unordered_multiset_test();
+  std::printf("sanity check      : %s\n", auto_res               ? "PASS" : "FAIL");
+  std::printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  std::printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  std::printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  std::printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  std::printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  std::printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  std::printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  std::printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  std::printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  std::printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  std::printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = auto_res
+		  && vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C
new file mode 100644
index 0000000..d4534c7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C
@@ -0,0 +1,736 @@
+/* Populated with mapped data, validate, mutate, validate again.
+   The cases using sets do not mutate.
+   Note: Some of the code in here really sucks due to being made to be
+   compatible with c++98.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include <limits>
+#include <iterator>
+
+#include "target-flex-common.h"
+
+template<bool B, class T = void>
+struct enable_if {};
+ 
+template<class T>
+struct enable_if<true, T> { typedef T type; };
+
+struct identity_func
+{
+#if __cplusplus < 201103L
+  template<typename T>
+  T& operator()(T& arg) const BL_NOEXCEPT { return arg; }
+  template<typename T>
+  T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; }
+#else
+  template<typename T>
+  constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); }
+#endif
+};
+
+/* Applies projection to the second iterator.  */
+template<typename It0, typename It1, typename Proj>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+				  const It1 begin1, const It1 end1,
+				  Proj proj) BL_NOEXCEPT
+{
+  It0 it0 = begin0;
+  It1 it1 = begin1;
+  for (; it0 != end0; ++it0, ++it1)
+    {
+      /* Sizes mismatch, don't bother aborting though just fail the test.  */
+      if (it1 == end1)
+	return false;
+      if (*it0 != proj(*it1))
+	return false;
+    }
+  /* Sizes mismatch, do as above.  */
+  if (it1 != end1)
+    return false;
+  return true;
+}
+
+template<typename It0, typename It1>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+				  const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+  return validate_sequential_elements(begin0, end0, begin1, end1, identity_func());
+}
+
+/* Inefficient, but simple.  */
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it, ++out)
+    *out = *it;
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it)
+    *it = mut_fn(*it);
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool vector_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size], out_mut_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> vector(arr, arr + Size);
+	VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+					     arr, arr + Size));
+	simple_copy(vector.begin(), vector.end(), out_arr);
+	simple_mutate(vector.begin(), vector.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(vector.begin(), vector.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool deque_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size], out_mut_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::deque<T> deque(arr, arr + Size);
+	VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+					     arr, arr + Size));
+	simple_copy(deque.begin(), deque.end(), out_arr);
+	simple_mutate(deque.begin(), deque.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(deque.begin(), deque.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool list_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size], out_mut_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::list<T> list(arr, arr + Size);
+	VERIFY (validate_sequential_elements(list.begin(), list.end(),
+					     arr, arr + Size));
+	simple_copy(list.begin(), list.end(), out_arr);
+	simple_mutate(list.begin(), list.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(list.begin(), list.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(list.begin(), list.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename T>
+const T& get_key(const T& arg) BL_NOEXCEPT
+  { return arg; }
+template<typename K, typename V>
+const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT
+  { return pair.first; }
+template<typename T>
+const T& get_value(const T& arg) BL_NOEXCEPT
+  { return arg; }
+template<typename K, typename V>
+const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT
+  { return pair.second; }
+
+template<typename T>
+struct key_type { typedef T type; };
+template<typename K, typename V>
+struct key_type<std::pair<K, V> > { typedef K type; };
+
+template<typename Proj, typename Container, typename It>
+bool validate_associative(const Container& container,
+			  const It compare_begin,
+			  const It compare_end,
+			  Proj proj) BL_NOEXCEPT
+{
+  const typename Container::const_iterator elem_end = container.end();
+  for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+    {
+      const typename Container::const_iterator elem_it = container.find(get_key(*compare_it));
+      VERIFY_NON_TARGET (elem_it != elem_end);
+      VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it));
+    }
+  return true;
+}
+
+template<typename Container, typename It>
+bool validate_associative(const Container& container,
+			  const It compare_begin,
+			  const It compare_end) BL_NOEXCEPT
+{
+  return validate_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it)
+    it->second = mut_fn(it->second);
+}
+
+template<typename It, typename OutIter>
+void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT
+{
+  /* In case anyone reads this, I want it to be known that I hate c++98.  */
+  typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+  std::set<key_t> already_seen;
+  for (It it = begin; it != end; ++it, ++out)
+    {
+      key_t key = get_key(*it);
+      if (already_seen.find(key) != already_seen.end())
+	continue;
+      already_seen.insert(key);
+      *out = *it;
+    }
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool map_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::map<K, V> reference_map(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  std::pair<K, V> out_pairs[Size];
+  std::size_t out_size;
+  std::pair<K, V> out_pairs_mut[Size];
+  std::size_t out_size_mut;
+  #pragma omp target map(from: ok, out_pairs[ :Size], out_size, \
+			       out_pairs_mut[ :Size], out_size_mut) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<std::pair<K, V> > unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+	std::map<K, V> map(arr, arr + Size);
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+	simple_copy(map.begin(), map.end(), out_pairs);
+	out_size = map.size();
+	simple_mutate_map(map.begin(), map.end(), MutationFunc());
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+				     MutationFunc()));
+	simple_copy(map.begin(), map.end(), out_pairs_mut);
+	out_size_mut = map.size();
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out_size == out_size_mut);
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs, out_pairs + out_size));
+  simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs_mut, out_pairs_mut + out_size_mut));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool set_test(const T (&arr)[Size])
+{
+  std::set<T> reference_set(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  T out_arr[Size];
+  std::size_t out_size;
+  #pragma omp target map(from: ok, out_arr[ :Size], out_size) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+	std::set<T> set(arr, arr + Size);
+	VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+	simple_copy(set.begin(), set.end(), out_arr);
+	out_size = set.size();
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_associative(reference_set,
+					  out_arr, out_arr + out_size));
+  return true;
+}
+
+template<typename Proj, typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+				const It compare_begin,
+				const It compare_end,
+				Proj proj) BL_NOEXCEPT
+{
+  /* Once again, for the poor soul reviewing these, I hate c++98.  */
+  typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+  typedef std::map<key_t, std::size_t> counter_map; 
+  counter_map key_count_map;
+  for (It it = compare_begin; it != compare_end; ++it)
+    {
+      const key_t& key = get_key(*it);
+      typename counter_map::iterator counter_it
+	= key_count_map.find(key);
+      if (counter_it != key_count_map.end())
+	++counter_it->second;
+      else
+	key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1)));
+    }
+  const typename Container::const_iterator elem_end = container.end();
+  for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+    {
+      const key_t& key = get_key(*compare_it);
+      typename counter_map::iterator count_it = key_count_map.find(key);
+      std::size_t key_count = count_it != key_count_map.end() ? count_it->second
+							      : std::size_t(0);
+      VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen");
+      /* This gets tested multiple times but that should be fine.  */
+      VERIFY_NON_TARGET (key_count == container.count(key));
+      typename Container::const_iterator elem_it = container.find(key);
+      /* This will never happen if the previous case passed.  */
+      VERIFY_NON_TARGET (elem_it != elem_end);
+      bool found_element = false;
+      for (; elem_it != elem_end; ++elem_it)
+	if (proj(get_value(*compare_it)) == get_value(*elem_it))
+	  {
+	    found_element = true;
+	    break;
+	  }
+      VERIFY_NON_TARGET (found_element);
+    }
+  return true;
+}
+
+template<typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+				const It compare_begin,
+				const It compare_end) BL_NOEXCEPT
+{
+  return validate_multi_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::multimap<K, V> reference_multimap(arr, arr + Size);
+  bool ok;
+  std::pair<K, V> out_pairs[Size];
+  std::pair<K, V> out_pairs_mut[Size];
+  #pragma omp target map(from: ok, out_pairs[ :Size], out_pairs_mut[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::multimap<K, V> multimap(arr, arr + Size);
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs);
+	simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs, out_pairs + Size));
+  simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs_mut, out_pairs_mut + Size));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool multiset_test(const T (&arr)[Size])
+{
+  std::multiset<T> reference_multiset(arr, arr + Size);
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::multiset<T> set(arr, arr + Size);
+	VERIFY (validate_multi_associative(set, arr, arr + Size));
+	simple_copy(set.begin(), set.end(), out_arr);
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+						out_arr, out_arr + Size));
+  return true;
+}
+
+#if __cplusplus >= 201103L
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool array_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size], out_mut_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::array<T, Size> std_array{};
+	/* Special case for std::array since it can't be initialized
+	   with iterators.  */
+	{
+	  T zero_val = T{};
+	  for (auto it = std_array.begin(); it != std_array.end(); ++it)
+	    VERIFY (*it == zero_val);
+	}
+	simple_copy(arr, arr + Size, std_array.begin());
+	VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+					     arr, arr + Size));
+	simple_copy(std_array.begin(), std_array.end(), out_arr);
+	simple_mutate(std_array.begin(), std_array.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(std_array.begin(), std_array.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool forward_list_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size], out_mut_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::forward_list<T> fwd_list(arr, arr + Size);
+	VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+					     arr, arr + Size));
+	simple_copy(fwd_list.begin(), fwd_list.end(), out_arr);
+	simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_map_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::unordered_map<K, V> reference_map(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  std::pair<K, V> out_pairs[Size];
+  std::size_t out_size;
+  std::pair<K, V> out_pairs_mut[Size];
+  std::size_t out_size_mut;
+  #pragma omp target map(from: ok, out_pairs[ :Size], out_size, \
+			       out_pairs_mut[ :Size], out_size_mut) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<std::pair<K, V> > unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+	std::unordered_map<K, V> map(arr, arr + Size);
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+	simple_copy(map.begin(), map.end(), out_pairs);
+	out_size = map.size();
+	simple_mutate_map(map.begin(), map.end(), MutationFunc());
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+				     MutationFunc()));
+	simple_copy(map.begin(), map.end(), out_pairs_mut);
+	out_size_mut = map.size();
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out_size == out_size_mut);
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs, out_pairs + out_size));
+  simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs_mut, out_pairs_mut + out_size_mut));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_set_test(const T (&arr)[Size])
+{
+  std::unordered_set<T> reference_set(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  T out_arr[Size];
+  std::size_t out_size;
+  #pragma omp target map(from: ok, out_arr[ :Size], out_size) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+	std::unordered_set<T> set(arr, arr + Size);
+	VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+	simple_copy(set.begin(), set.end(), out_arr);
+	out_size = set.size();
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_associative(reference_set,
+					  out_arr, out_arr + out_size));
+  return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::unordered_multimap<K, V> reference_multimap(arr, arr + Size);
+  bool ok;
+  std::pair<K, V> out_pairs[Size];
+  std::pair<K, V> out_pairs_mut[Size];
+  #pragma omp target map(from: ok, out_pairs[ :Size], out_pairs_mut[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::unordered_multimap<K, V> multimap(arr, arr + Size);
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs);
+	simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs, out_pairs + Size));
+  simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs_mut, out_pairs_mut + Size));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_multiset_test(const T (&arr)[Size])
+{
+  std::unordered_multiset<T> reference_multiset(arr, arr + Size);
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok, out_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::unordered_multiset<T> set(arr, arr + Size);
+	VERIFY (validate_multi_associative(set, arr, arr + Size));
+	simple_copy(set.begin(), set.end(), out_arr);
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+						out_arr, out_arr + Size));
+  return true;
+}
+
+#else
+template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; }
+#endif
+
+/* This clamps to the maximum value to guard against overflowing,
+   assuming std::numeric_limits is specialized for T.  */
+struct multiply_by_2
+{
+  template<typename T>
+  typename enable_if<std::numeric_limits<T>::is_specialized, T>::type
+  operator()(T arg) const BL_NOEXCEPT {
+    if (arg < static_cast<T>(0))
+      {
+	if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg)
+	  return std::numeric_limits<T>::min();
+      }
+    else
+      {
+	if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg)
+	  return std::numeric_limits<T>::max();
+      }
+    return arg * 2;
+  }
+  template<typename T>
+  typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type
+  operator()(T arg) const BL_NOEXCEPT {
+    return arg * 2;
+  }
+};
+
+int main()
+{
+  int data[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  std::pair<int, int> pairs[10] = {std::pair<int, int>( 1,  2),
+				   std::pair<int, int>( 2,  4),
+				   std::pair<int, int>( 3,  6),
+				   std::pair<int, int>( 4,  8),
+				   std::pair<int, int>( 5, 10),
+				   std::pair<int, int>( 6, 12),
+				   std::pair<int, int>( 7, 14),
+				   std::pair<int, int>( 8, 16),
+				   std::pair<int, int>( 9, 18),
+				   std::pair<int, int>(10, 20)};
+  const bool vec_res                = vector_test<multiply_by_2>(data);
+  const bool deque_res              = deque_test<multiply_by_2>(data);
+  const bool list_res               = list_test<multiply_by_2>(data);
+  const bool map_res                = map_test<multiply_by_2>(pairs);
+  const bool set_res                = set_test(data);
+  const bool multimap_res           = multimap_test<multiply_by_2>(pairs);
+  const bool multiset_res           = multiset_test(data);
+  const bool array_res              = array_test<multiply_by_2>(data);
+  const bool forward_list_res       = forward_list_test<multiply_by_2>(data);
+  const bool unordered_map_res      = unordered_map_test<multiply_by_2>(pairs);
+  const bool unordered_set_res      = unordered_set_test(data);
+  const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs);
+  const bool unordered_multiset_res = unordered_multiset_test(data);
+  std::printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  std::printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  std::printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  std::printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  std::printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  std::printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  std::printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  std::printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  std::printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  std::printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  std::printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
new file mode 100644
index 0000000..688c014
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
@@ -0,0 +1,32 @@
+/* Tiny tuple test.  */
+
+#include <tuple>
+
+#include "target-flex-common.h"
+
+bool test(int arg)
+{
+  bool ok;
+  int out;
+  std::tuple tup = {'a', arg, 3.14f};
+  #pragma omp target map(from: ok, out) map(to: tup)
+    {
+      bool inner_ok = true;
+      {
+	VERIFY (std::get<0>(tup) == 'a');
+	out = std::get<1>(tup);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out == arg);
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42u;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
new file mode 100644
index 0000000..f1a6c12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
@@ -0,0 +1,61 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* Functional  */
+
+#include <functional>
+#include <utility>
+
+#include "target-flex-common.h"
+
+template<typename T,typename Fn>
+auto invoke_unary(T&& a, Fn&& fn) noexcept
+{
+  return std::invoke(std::forward<Fn>(fn),
+		     std::forward<T>(a));
+}
+
+template<typename T, typename U, typename Fn>
+auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept
+{
+  return std::invoke(std::forward<Fn>(fn),
+		     std::forward<T>(a),
+		     std::forward<U>(b));
+}
+
+bool test(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	VERIFY (std::plus{}(arg, 2) == arg + 2);
+	auto bound_plus_arg = std::bind_front(std::plus{}, arg);
+	VERIFY (bound_plus_arg(10) == arg + 10);
+	VERIFY (bound_plus_arg(20) == arg + 20);
+
+	VERIFY (std::not_fn(std::not_equal_to{})(arg, arg));
+	VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{})));
+	auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg);
+	VERIFY (bound_equals_arg(arg));
+	VERIFY (std::not_fn(bound_equals_arg)(arg + 1));
+	VERIFY (invoke_unary(arg, bound_equals_arg));
+
+	VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg));
+	VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{})));
+	auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg);
+	VERIFY (bound_ranges_equals_arg(arg));
+	VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1));
+	VERIFY (invoke_unary(arg, bound_ranges_equals_arg));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile unsigned arg = 42u;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
new file mode 100644
index 0000000..f738806
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
@@ -0,0 +1,97 @@
+/* { dg-additional-options "-std=c++23" } */
+
+/* expected/optional  */
+
+#include <optional>
+#include <expected>
+
+#include "target-flex-common.h"
+
+std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept
+{
+  if (!b)
+    return std::nullopt;
+  return {arg};
+}
+
+bool test_optional(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto null_opt = make_optional(false);
+	VERIFY (!null_opt);
+	VERIFY (!null_opt.has_value());
+	VERIFY (null_opt.value_or(arg * 2u) == arg * 2u);
+	VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; })
+			.transform([](int a){ return a * 2u; })
+			.value_or(0) == arg * 2u);
+
+	auto opt = make_optional(true, arg);
+	VERIFY (opt);
+	VERIFY (opt.has_value());
+	VERIFY (opt.value() == arg);
+	VERIFY (*opt == arg);
+	VERIFY (opt.value_or(arg + 42) == arg);
+	VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; })
+		   .transform([](int a){ return a * 2u; })
+		   .value_or(0) == arg * 2u);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+struct my_error
+{
+  int _e;
+};
+
+std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept
+{
+  if (!b)
+    return std::unexpected{my_error{-1}};
+  return {arg};
+}
+
+bool test_expected(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto unexpected = make_expected(false);
+	VERIFY (!unexpected);
+	VERIFY (!unexpected.has_value());
+	VERIFY (unexpected.error()._e == -1);
+	VERIFY (unexpected.value_or(arg * 2u) == arg * 2u);
+	VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; })
+			  .transform([](int a){ return a * 2u; })
+			  .value_or(0) == arg * 2u);
+
+	auto expected = make_expected(true, arg);
+	VERIFY (expected);
+	VERIFY (expected.has_value());
+	VERIFY (expected.value() == arg);
+	VERIFY (*expected == arg);
+	VERIFY (expected.value_or(arg + 42) == arg);
+	VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; })
+			.transform([](int a){ return a * 2u; })
+			.value_or(0) == arg * 2u);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile unsigned arg = 42;
+  return test_optional(arg)
+	 && test_expected(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
new file mode 100644
index 0000000..9303ee54
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
@@ -0,0 +1,176 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* bit_cast and memcpy  */
+
+#include <bit>
+#include <cstring>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+  int _v0;
+  char _v1;
+  long long _v2;
+};
+
+struct S1
+{
+  int _v0;
+  char _v1;
+  long long _v2;
+};
+
+bool test_bit_cast(int arg)
+{
+  bool ok;
+  S1 s1_out;
+  #pragma omp target map(from: ok, s1_out) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	long long v = static_cast<long long>(arg + 42ll);
+	S0 s = {arg, 'a', v};
+	VERIFY (std::bit_cast<S1>(s)._v0 == arg);
+	VERIFY (std::bit_cast<S1>(s)._v1 == 'a');
+	VERIFY (std::bit_cast<S1>(s)._v2 == v);
+	s1_out = std::bit_cast<S1>(s);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  long long v = static_cast<long long>(arg + 42ll);
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg);
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a');
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v);
+  return true;
+}
+
+
+struct OutStruct
+{
+  std::size_t _id;
+  void *_next;
+};
+
+struct Extendable1
+{
+  std::size_t _id;
+  void *_next;
+  int _v;
+};
+
+struct Extendable2
+{
+  std::size_t _id;
+  void *_next;
+  char _str[256];
+};
+
+struct Extendable3
+{
+  std::size_t _id;
+  void *_next;
+  const int *_nums;
+  std::size_t _size;
+};
+
+struct ExtendableUnknown
+{
+  std::size_t _id;
+  void *_next;
+};
+
+template<typename To, std::size_t Id>
+To *get_extendable(void *p)
+{
+  while (p != nullptr)
+    {
+      OutStruct out;
+      std::memcpy(&out, p, sizeof(OutStruct));
+      if (out._id == Id)
+	return static_cast<To *>(p);
+      p = out._next;
+    }
+  return nullptr;
+}
+
+bool test_memcpy(int arg, const int *nums, std::size_t nums_size)
+{
+  bool ok;
+  Extendable2 e2_out;
+  #pragma omp target map(from: ok, e2_out) map(to: arg, nums[ :nums_size], nums_size)
+    {
+      bool inner_ok = true;
+      {
+	Extendable3 e3 = {3u, nullptr, nums, nums_size};
+	ExtendableUnknown u1 = {100u, &e3};
+	Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}};
+	ExtendableUnknown u2 = {101u, &e2};
+	ExtendableUnknown u3 = {102u, &u2};
+	ExtendableUnknown u4 = {142u, &u3};
+	Extendable1 e1 = {1u, &u4, arg};
+
+	void *p = &e1;
+	while (p != nullptr)
+	  {
+	    /* You can always cast a pointer to a struct to a pointer to
+	       the type of it's first member.  */
+	    switch (*static_cast<std::size_t *>(p))
+	      {
+		case 1:
+		  {
+		    Extendable1 *e1_p = static_cast<Extendable1 *>(p);
+		    p = e1_p->_next;
+		    VERIFY (e1_p->_v == arg);
+		    break;
+		  }
+		case 2:
+		  {
+		    Extendable2 *e2_p = static_cast<Extendable2 *>(p);
+		    p = e2_p->_next;
+		    VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0);
+		    break;
+		  }
+		case 3:
+		  {
+		    Extendable3 *e3_p = static_cast<Extendable3 *>(p);
+		    p = e3_p->_next;
+		    VERIFY (nums == e3_p->_nums);
+		    VERIFY (nums_size == e3_p->_size);
+		    break;
+		  }
+		default:
+		  {
+		    /* Casting to a pointer to OutStruct invokes undefined
+		       behavior though, memcpy is required to extract the _next
+		       member.  */
+		    OutStruct out;
+		    std::memcpy(&out, p, sizeof(OutStruct));
+		    p = out._next;
+		  }
+	      }
+	  }
+	Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1);
+	VERIFY (e2_p != nullptr);
+	e2_out = *e2_p;
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (e2_out._id == 2u);
+  VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0);
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test_bit_cast(arg)
+	 && test_memcpy(arg, arr, 8) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C
new file mode 100644
index 0000000..7a7eb39
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C
@@ -0,0 +1,51 @@
+/* std::initializer_list in target region.  */
+
+#include <initializer_list>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[ :out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto il = {0, 1, 2, 3, 4, 5, arg};
+
+	int sum = 0;
+	for (auto const& e : il)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : il)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C
new file mode 100644
index 0000000..ecfd8cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C
@@ -0,0 +1,49 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* numerics  */
+
+#include <algorithm>
+#include <numeric>
+#include <ranges>
+#include <span>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::iota)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+bool test(std::size_t arg)
+{
+  bool ok;
+  int midpoint_out;
+  std::vector<int> vec(arg);
+  int *data = vec.data();
+  std::size_t size = vec.size();
+  #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[ :size]) map(to: arg, size)
+    {
+      std::span span = {data, size};
+      bool inner_ok = true;
+      {
+	VERIFY (stdr::all_of(span, [](int v){ return v == int{}; }));
+	stdr::iota(span, 0);
+	midpoint_out = *std::midpoint(span.data(), span.data() + span.size());
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size()))));
+  VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size())
+		     == midpoint_out);
+  return true;
+}
+
+int main()
+{
+  volatile std::size_t arg = 42;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C
new file mode 100644
index 0000000..adaf18f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C
@@ -0,0 +1,80 @@
+/* std::initializer_list in target region.  */
+
+#include <initializer_list>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+  int _v;
+  S0(std::initializer_list<int> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+struct S1
+{
+  int _v;
+  template<typename T>
+  S1(std::initializer_list<T> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+template<typename T>
+struct S2
+{
+  T _v;
+  S2(std::initializer_list<T> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+#if __cplusplus >= 201703L
+template<typename T>
+S2(std::initializer_list<T>) -> S2<T>;
+#endif
+
+bool test_initializer_list(int arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5;
+
+	S0 s0{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s0._v == partial_sum + arg);
+
+	S1 s1{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s1._v == partial_sum + arg);
+
+	S2<int> s2{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s2._v == partial_sum + arg);
+
+	#if __cplusplus >= 201703L
+	  S2 s2_ctad{0, 1, 2, 3, 4, 5, arg};
+	  VERIFY (s2_ctad._v == partial_sum + arg);
+	#endif
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C
new file mode 100644
index 0000000..0ed4fbc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C
@@ -0,0 +1,50 @@
+/* std::initializer_list constructor of std::vector (explicit template arg) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[ :out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	std::vector<int> vec{0, 1, 2, 3, 4, 5, arg};
+	int sum = 0;
+	for (auto const& e : vec)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : vec)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C
new file mode 100644
index 0000000..6c16c79
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-std=c++17" } */
+
+/* deduced std::initializer_list constructor of std::vector (CTAD) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[ :out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	std::vector vec{0, 1, 2, 3, 4, 5, arg};
+	int sum = 0;
+	for (auto const& e : vec)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : vec)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C
new file mode 100644
index 0000000..7232d92
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* <iterator> c++20  */
+
+/* std::common_iterator uses std::variant.  */
+
+#include <vector>
+#include <iterator>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::distance, std::ranges::next)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(const It0 begin0, const It0 end0,
+		  const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+  It0 it0 = begin0;
+  It1 it1 = begin1;
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it, ++out)
+    *out = *it;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_rev_arr[Size];
+  T out_fwd_arr[Size];
+  T out_first_half_arr[Size / 2];
+  #pragma omp target defaultmap(none) \
+		     map(from: ok, out_rev_arr[ :Size], out_fwd_arr[ :Size], \
+			       out_first_half_arr[ :Size / 2]) \
+		     map(to: arr[ :Size])
+    {
+      bool inner_ok = true;
+      {
+	std::span<const T> span = {arr, Size};
+	std::vector<T> rev_vec(std::reverse_iterator{span.end()},
+			       std::reverse_iterator{span.begin()});
+	VERIFY (std::distance(span.begin(), span.end())
+		== std::distance(rev_vec.begin(), rev_vec.end()));
+	VERIFY (stdr::distance(span.begin(), span.end())
+		== stdr::distance(rev_vec.begin(), rev_vec.end()));
+	VERIFY (stdr::distance(span) == stdr::distance(rev_vec));
+	VERIFY (simple_equal(span.begin(), span.end(),
+			     std::reverse_iterator{rev_vec.end()},
+			     std::reverse_iterator{rev_vec.begin()}));
+	simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr);
+	simple_copy(std::reverse_iterator{rev_vec.end()},
+		    std::reverse_iterator{rev_vec.begin()},
+		    out_fwd_arr);
+	using counted_iter = std::counted_iterator<decltype(span.begin())>;
+	using common_iter = std::common_iterator<counted_iter,
+						 std::default_sentinel_t>;
+	std::vector<T> front_half;
+	simple_copy(common_iter{counted_iter{span.begin(), Size / 2}},
+		    common_iter{std::default_sentinel},
+		    std::back_insert_iterator{front_half});
+	VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2),
+			     front_half.begin(), front_half.end()));
+	simple_copy(front_half.begin(), front_half.end(), out_first_half_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size},
+				  std::reverse_iterator{arr},
+				  out_rev_arr, out_rev_arr + Size));
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+				  out_fwd_arr, out_fwd_arr + Size));
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2,
+				  out_first_half_arr, out_first_half_arr + Size / 2));
+  return ok;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C
new file mode 100644
index 0000000..7e1bf96
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C
@@ -0,0 +1,46 @@
+/* algorithms pre c++20 */
+
+#include <algorithm>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_2x_arr[Size];
+  T out_shifted_arr[Size];
+  #pragma omp target map(from: ok, out_2x_arr[ :Size], out_shifted_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      std::vector<T> vec(Size);
+      std::vector<T> mutated(Size);
+      bool inner_ok = true;
+      {
+	std::copy(arr, arr + Size, vec.begin());
+	VERIFY (std::equal(arr, arr + Size, vec.begin()));
+	std::transform(vec.begin(), vec.end(), mutated.begin(),
+		       [](const T& v){ return v * 2; });
+	std::copy(mutated.begin(), mutated.end(), out_2x_arr);
+	std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end());
+	std::copy(vec.begin(), vec.end(), out_shifted_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr,
+				[](const T& a, const T& b){ return a * 2 == b; }));
+  std::vector<T> shifted(arr, arr + Size);
+  std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end());
+  VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin()));
+  return true;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C
new file mode 100644
index 0000000..551679f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C
@@ -0,0 +1,54 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* ranged algorithms c++20 */
+
+#include <algorithm>
+#include <ranges>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_2x_arr[Size];
+  T out_shifted_arr[Size];
+  #pragma omp target defaultmap(none) \
+		     map(from: ok, out_2x_arr[ :Size], out_shifted_arr[ :Size]) \
+		     map(to: arr[ :Size])
+    {
+      std::vector<T> vec(Size);
+      std::vector<T> mutated(Size);
+      bool inner_ok = true;
+      {
+	stdr::copy(arr, vec.begin());
+	VERIFY (stdr::equal(arr, vec));
+	stdr::transform(vec, mutated.begin(),
+			[](const T& v){ return v * 2; });
+	stdr::copy(mutated, out_2x_arr);
+	stdr::rotate(vec, std::next(vec.begin(), Size / 2));
+	stdr::copy(vec, out_shifted_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; }));
+  std::vector<T> shifted(arr, arr + Size);
+  stdr::rotate(shifted, std::next(shifted.begin(), Size / 2));
+  VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted));
+  return true;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C
new file mode 100644
index 0000000..6fb4345
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C
@@ -0,0 +1,50 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* std::views stuff.  Also tests std::tuple with std::views::zip.  */
+
+#include <algorithm>
+#include <ranges>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+namespace stdv = std::views;
+
+bool f()
+{
+  const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0};
+
+  bool ok;
+  #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[ :8], arr_rev[ :8])
+    {
+      std::span<const int> fwd = {arr_fwd, 8};
+      std::span<const int> rev = {arr_rev, 8};
+      bool inner_ok = true;
+      {
+	VERIFY(stdr::equal(fwd, rev | stdv::reverse));
+	VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse,
+			   rev | stdv::take(4)));
+	for (auto [first, second] : stdv::zip(fwd, rev))
+	  VERIFY(first + second == 7);
+	auto plus = [](int a, int b){ return a + b; };
+	auto is_even = [](int v){ return v % 2 == 0; };
+	VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus)
+	       == 12);
+	VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }),
+			    is_even));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C
new file mode 100644
index 0000000..9e9383d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C
@@ -0,0 +1,26 @@
+/* CTAD in target regions.  */
+
+template<typename T>
+struct S
+{
+  T _v;
+};
+
+template<typename T>
+S(T) -> S<T>;
+
+bool f()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      S s{42};
+      ok = s._v == 42;
+    }
+  return ok;
+}
+
+int main()
+{
+  return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C
new file mode 100644
index 0000000..6e1c4d6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C
@@ -0,0 +1,49 @@
+// { dg-additional-options "-std=c++20" }
+
+/* std::span  */
+
+#include <span>
+
+#include "target-flex-common.h"
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+		  It1 it1, const It1 end1) noexcept
+{
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok) map(to: arr[ :Size])
+    {
+      std::span span = {arr, Size};
+      bool inner_ok = true;
+      {
+	VERIFY (!span.empty());
+	VERIFY (span.size() == Size);
+	auto out_it = out_arr;
+	for (auto elem : span)
+	  *(out_it++) = elem;
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+				  out_arr, out_arr + Size));
+  return true;
+}
+
+int main()
+{
+  int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C
new file mode 100644
index 0000000..d7ed69f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C
@@ -0,0 +1,75 @@
+/* { dg-additional-options "-std=c++20" } */
+
+#include <ranges>
+#include <span>
+#include <type_traits>
+#include <vector>
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+		  It1 it1, const It1 end1) noexcept
+{
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename Rn0, typename Rn1>
+bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept
+{
+  return simple_equal(stdr::begin(rn0), stdr::end(rn0),
+		      stdr::begin(rn1), stdr::end(rn1));
+}
+
+template<typename Rn>
+bool test(Rn&& range)
+{
+  using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>;
+  std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)};
+  value_type *data = vec.data();
+  std::size_t size = vec.size();
+  bool ok;
+  #pragma omp target map(from: ok) map(tofrom: data[ :size]) map(to: size)
+    {
+      std::vector<value_type> orig = {data, data + size};
+      std::span<value_type> span = {data, size};
+      bool inner_ok = true;
+      {
+	auto mul_by_2 = [](const value_type& v){ return v * 2; };
+	VERIFY (simple_equal(orig, span));
+	for (auto& elem : span)
+	  elem = mul_by_2(elem);
+	VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span));
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  auto mul_by_2 = [](const value_type& v){ return v * 2; };
+  VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec));
+  return true;
+}
+
+struct my_int
+{
+  int _v;
+  bool operator==(my_int const&) const = default;
+  my_int operator*(int rhs) const noexcept {
+    return {_v * rhs};
+  }
+};
+
+int main()
+{
+  std::vector<int> ints = {1, 2, 3, 4, 5};
+  const bool ints_res = test(ints);
+  std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}};
+  const bool my_ints_res = test(my_ints);
+  return ints_res && my_ints_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C
new file mode 100644
index 0000000..b3f1197
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C
@@ -0,0 +1,107 @@
+/* structured bindings  */
+
+#include <array>
+#include <tuple>
+
+#include "target-flex-common.h"
+
+template<typename Array, typename Tuple, typename Struct>
+bool test(Array array, Tuple tuple, Struct s)
+{
+  bool ok;
+  auto array_2nd_in = std::get<2>(array);
+  auto tuple_2nd_in = std::get<2>(tuple);
+  auto s_2nd_in = s._2;
+  decltype(array_2nd_in) array_2nd_out_0;
+  decltype(tuple_2nd_in) tuple_2nd_out_0;
+  decltype(s_2nd_in) s_2nd_out_0;
+  decltype(array_2nd_in) array_2nd_out_1;
+  decltype(tuple_2nd_in) tuple_2nd_out_1;
+  decltype(s_2nd_in) s_2nd_out_1;
+  decltype(array_2nd_in) array_2nd_out_2;
+  decltype(tuple_2nd_in) tuple_2nd_out_2;
+  decltype(s_2nd_in) s_2nd_out_2;
+  #pragma omp target map(from: ok, \
+			       array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \
+			       array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \
+			       array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \
+		     map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s)
+    {
+      bool inner_ok = true;
+      {
+	{
+	  auto [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_0 = array_2nd;
+	  auto [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_0 = tuple_2nd;
+	  auto [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_0 = s_2nd;
+	}
+	{
+	  auto& [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_1 = array_2nd;
+	  auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_1 = tuple_2nd;
+	  auto& [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_1 = s_2nd;
+	}
+	{
+	  const auto& [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_2 = array_2nd;
+	  const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_2 = tuple_2nd;
+	  const auto& [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_2 = s_2nd;
+	}
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in);
+  VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in);
+  VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in);
+
+  return true;
+}
+
+struct S
+{
+  char _0;
+  float _1;
+  int _2;
+};
+
+int main()
+{
+  const bool test_res
+    = test(std::array{0, 1, 2},
+	   std::tuple{'a', 3.14f, 42},
+	   S{'a', 3.14f, 42});
+  return test_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h
new file mode 100644
index 0000000..14523c4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h
@@ -0,0 +1,40 @@
+#include <cstdio>
+
+#if __cplusplus >= 201103L
+  #define BL_NOEXCEPT noexcept
+#else
+  #define BL_NOEXCEPT throw()
+#endif
+
+#if defined __has_builtin
+# if __has_builtin (__builtin_LINE)
+#  define VERIFY_LINE __builtin_LINE ()
+# endif
+#endif
+#if !defined VERIFY_LINE
+# define VERIFY_LINE __LINE__
+#endif
+
+/* I'm not a huge fan of macros but in the interest of keeping the code that
+   isn't being tested as simple as possible, we use them.  */
+
+#define VERIFY(EXPR) \
+  do {										\
+    if (!(EXPR))								\
+      {										\
+	std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n",		\
+		    VERIFY_LINE);						\
+	inner_ok = false;							\
+	goto end;								\
+      }										\
+  } while (false)
+
+#define VERIFY_NON_TARGET(EXPR) \
+  do {										\
+    if (!(EXPR))								\
+      {										\
+	std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n",		\
+		    VERIFY_LINE);						\
+	return false;								\
+      }										\
+  } while (false)
diff --git a/libgomp/testsuite/libgomp.c++/target-has-device-addr-7.C b/libgomp/testsuite/libgomp.c++/target-has-device-addr-7.C
index 2c4571b..ae5b09a 100644
--- a/libgomp/testsuite/libgomp.c++/target-has-device-addr-7.C
+++ b/libgomp/testsuite/libgomp.c++/target-has-device-addr-7.C
@@ -18,8 +18,8 @@ void
 bar (T (&x)[])
 {
   x[0] = 24;
-  #pragma omp target data map(x[:2]) use_device_addr(x)
-    #pragma omp target has_device_addr(x[:2])
+  #pragma omp target data map(x[ :2]) use_device_addr(x)
+    #pragma omp target has_device_addr(x[ :2])
       x[0] = 42;
 
   if (x[0] != 42)
diff --git a/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C b/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C
index 21130f5..a1d1e57 100644
--- a/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C
@@ -8,9 +8,9 @@ foo (int &x, int *&y, int n, int v)
   int (&w)[n] = wu;
   for (i = 0; i < n; i++)
     w[i] = u[i] = n + i;
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x++;
       y[0] += 2;
@@ -19,7 +19,7 @@ foo (int &x, int *&y, int n, int v)
       u[0] += 5;
       w[1] += 6;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x += 4;
       y[0] += 5;
@@ -28,7 +28,7 @@ foo (int &x, int *&y, int n, int v)
       u[1] += 8;
       w[2] += 7;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x += 9;
       y[0] += 10;
@@ -59,9 +59,9 @@ bar (int &x, int *&y, int n, int v)
   for (i = 0; i < n; i++)
     w[i] = u[i] = n + i;
   #pragma omp parallel master
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x++;
       y[0] += 2;
@@ -70,7 +70,7 @@ bar (int &x, int *&y, int n, int v)
       u[0] += 5;
       w[1] += 6;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x += 4;
       y[0] += 5;
@@ -79,7 +79,7 @@ bar (int &x, int *&y, int n, int v)
       u[1] += 8;
       w[2] += 7;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x += 9;
       y[0] += 10;
diff --git a/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C b/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C
index 5da0e90..835cec1 100644
--- a/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C
@@ -18,9 +18,9 @@ foo (S &x, S *&y, int n, int v)
       w[i].c[0] = u[i].c[0] = 0;
       w[i].c[1] = u[i].c[1] = 0;
     }
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a++;
       x.b++;
@@ -35,7 +35,7 @@ foo (S &x, S *&y, int n, int v)
       w[1].a += 6;
       w[1].b += 16;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a += 4;
       x.b += 14;
@@ -50,7 +50,7 @@ foo (S &x, S *&y, int n, int v)
       w[2].a += 7;
       w[2].b += 17;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x.a += 9;
       x.b += 19;
@@ -101,9 +101,9 @@ bar (S &x, S *&y, int n, int v)
       w[i].c[1] = u[i].c[1] = 0;
     }
   #pragma omp parallel master
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a++;
       x.b++;
@@ -118,7 +118,7 @@ bar (S &x, S *&y, int n, int v)
       w[1].a += 6;
       w[1].b += 16;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a += 4;
       x.b += 14;
@@ -133,7 +133,7 @@ bar (S &x, S *&y, int n, int v)
       w[2].a += 7;
       w[2].b += 17;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x.a += 9;
       x.b += 19;
diff --git a/libgomp/testsuite/libgomp.c++/target-lambda-1.C b/libgomp/testsuite/libgomp.c++/target-lambda-1.C
index 6eb0d0b..e3a71d5 100644
--- a/libgomp/testsuite/libgomp.c++/target-lambda-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-lambda-1.C
@@ -20,7 +20,7 @@ struct S
 
   auto merge_data_func (int *iptr, int &b)
   {
-    auto fn = [=](void) -> bool
+    auto fn = [=,this](void) -> bool
       {
 	bool mapped;
 	uintptr_t hostptr = (uintptr_t) ptr;
@@ -56,12 +56,12 @@ int main (void)
 
   int val = 1;
   int &valref = val;
-  #pragma omp target enter data map(alloc: data1[:N], data2[:N])
+  #pragma omp target enter data map(alloc: data1[ :N], data2[ :N])
 
   omp_target_loop (0, N, [=](int i) { data1[i] = val; });
   omp_target_loop (0, N, [=](int i) { data2[i] = valref + 1; });
 
-  #pragma omp target update from(data1[:N], data2[:N])
+  #pragma omp target update from(data1[ :N], data2[ :N])
 
   for (int i = 0; i < N; i++)
     {
@@ -69,7 +69,7 @@ int main (void)
       if (data2[i] != 2) abort ();
     }
 
-  #pragma omp target exit data map(delete: data1[:N], data2[:N])
+  #pragma omp target exit data map(delete: data1[ :N], data2[ :N])
 
   int b = 8;
   S s = { 4, N, data1 };
@@ -77,13 +77,13 @@ int main (void)
 
   if (f ()) abort ();
 
-  #pragma omp target enter data map(to: data1[:N])
+  #pragma omp target enter data map(to: data1[ :N])
   if (f ()) abort ();
 
-  #pragma omp target enter data map(to: data2[:N])
+  #pragma omp target enter data map(to: data2[ :N])
   if (!f () && !shared_mem) abort ();
 
-  #pragma omp target exit data map(from: data1[:N], data2[:N])
+  #pragma omp target exit data map(from: data1[ :N], data2[ :N])
 
   if (!shared_mem)
   for (int i = 0; i < N; i++)
diff --git a/libgomp/testsuite/libgomp.c++/target-lambda-3.C b/libgomp/testsuite/libgomp.c++/target-lambda-3.C
index 6be8426..6531acd 100644
--- a/libgomp/testsuite/libgomp.c++/target-lambda-3.C
+++ b/libgomp/testsuite/libgomp.c++/target-lambda-3.C
@@ -52,12 +52,12 @@ void run (int dev)
 		     && omp_target_is_present (data2, dev));
   int val = 1;
   int &valref = val;
-  #pragma omp target enter data map(alloc: data1[:N], data2[:N]) device(dev)
+  #pragma omp target enter data map(alloc: data1[ :N], data2[ :N]) device(dev)
 
   omp_target_loop (0, N, [=](int i) { data1[i] = val; }, dev);
   omp_target_loop (0, N, [=](int i) { data2[i] = valref + 1; }, dev);
 
-  #pragma omp target update from(data1[:N], data2[:N]) device(dev)
+  #pragma omp target update from(data1[ :N], data2[ :N]) device(dev)
 
   for (int i = 0; i < N; i++)
     {
@@ -65,20 +65,20 @@ void run (int dev)
       if (data2[i] != 2) abort ();
     }
 
-  #pragma omp target exit data map(delete: data1[:N], data2[:N]) device(dev)
+  #pragma omp target exit data map(delete: data1[ :N], data2[ :N]) device(dev)
 
   int b = 8;
   S s = { 4, N, data1 };
   auto f = s.merge_data_func (data2, b, dev);
   if (f () ^ shared_mem) abort ();
 
-  #pragma omp target enter data map(to: data1[:N]) device(dev)
+  #pragma omp target enter data map(to: data1[ :N]) device(dev)
   if (f () ^ shared_mem) abort ();
 
-  #pragma omp target enter data map(to: data2[:N]) device(dev)
+  #pragma omp target enter data map(to: data2[ :N]) device(dev)
   if (!f ()) abort ();
 
-  #pragma omp target exit data map(from: data1[:N], data2[:N]) device(dev)
+  #pragma omp target exit data map(from: data1[ :N], data2[ :N]) device(dev)
 
   for (int i = 0; i < N; i++)
     {
diff --git a/libgomp/testsuite/libgomp.c++/target-map-class-1.C b/libgomp/testsuite/libgomp.c++/target-map-class-1.C
index ad4802d..bf11a97 100644
--- a/libgomp/testsuite/libgomp.c++/target-map-class-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-map-class-1.C
@@ -30,7 +30,7 @@ test_map_tofrom_class_heap ()
   int *array = new int[N];
   A *obj = new A (array, N);
 
-  #pragma omp target map(from: array[:N]) map(tofrom: obj[:1])
+  #pragma omp target map(from: array[ :N]) map(tofrom: obj[ :1])
     {
       int *tmp_h_array = obj->h_array;
       obj->h_array = array;
@@ -64,7 +64,7 @@ test_map_tofrom_class_stack ()
   int array[N];
   A obj(array, N);
 
-  #pragma omp target map(from: array[:N]) map(tofrom: obj)
+  #pragma omp target map(from: array[ :N]) map(tofrom: obj)
     {
       int *tmp_h_array = obj.h_array;
       obj.h_array = array;
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
new file mode 100644
index 0000000..aa36f71
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__array-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
new file mode 100644
index 0000000..ee5e094
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <array>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::array<int,N> &arr, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (arr[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+  std::array<int,N> arr;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: arr)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&arr) std::array<int,N> ();
+#endif
+	  std::copy (data, data + N, arr.begin ());
+	}
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  arr[i] *= arr[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (arr, data);
+#ifndef MEM_SHARED
+	  arr.~array ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
new file mode 100644
index 0000000..d08ea71
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__bitset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
new file mode 100644
index 0000000..9dc941d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
@@ -0,0 +1,69 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <bitset>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::bitset<MAX> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: _set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::bitset<MAX> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set[data[i]] = true;
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set[i])
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~bitset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
new file mode 100644
index 0000000..aaf7152
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
@@ -0,0 +1,340 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_basic ()
+{
+  T x = -3.456789;
+  T y = 1.234567;
+  T z = 5.678901;
+
+  if (std::abs (x) != -x)
+    return false;
+  if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x))
+    return false;
+  if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y)))
+    return false;
+  if (!FP_EQUAL (std::fma (x, y, z), x * y + z))
+    return false;
+  if (std::fmax (x, y) != (x > y ? x : y))
+    return false;
+  if (std::fmin (x, y) != (x < y ? x : y))
+    return false;
+  if (std::fdim (x, y) != std::max(x - y, (T) 0.0))
+    return false;
+  if (std::fdim (y, x) != std::max(y - x, (T) 0.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_exp ()
+{
+  T x = -4.567890;
+  T y = 2.345678;
+
+  if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x)))
+    return false;
+  if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y)))
+    return false;
+  if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0))
+    return false;
+  if (!FP_EQUAL (std::log (std::exp (x)), x))
+    return false;
+  if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y))
+    return false;
+  if (!FP_EQUAL (std::log2 (std::exp2 (y)), y))
+    return false;
+  if (!FP_EQUAL (std::log1p (std::expm1 (y)), y))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_power ()
+{
+  T x = 7.234251;
+  T y = 0.340128;
+
+  if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y))
+    return false;
+  if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x))
+    return false;
+  if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x))
+    return false;
+  if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y)))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_trig ()
+{
+  T theta = std::numbers::pi / 4;
+  T phi = std::numbers::pi / 6;
+
+  if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2))
+    return false;
+  if (!FP_EQUAL (std::sin (phi), 0.5))
+    return false;
+  if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2))
+    return false;
+  if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2))
+    return false;
+  if (!FP_EQUAL (std::tan (theta), 1.0))
+    return false;
+  if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3))
+    return false;
+
+  T x = 0.33245623;
+
+  if (!FP_EQUAL (std::asin (std::sin (x)), x))
+    return false;
+  if (!FP_EQUAL (std::acos (std::cos (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atan (std::tan (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_hyperbolic ()
+{
+  T x = 0.7423532;
+
+  if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x)))
+    return false;
+  if (!FP_EQUAL (std::asinh (std::sinh (x)), x))
+    return false;
+  if (!FP_EQUAL (std::acosh (std::cosh (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atanh (std::tanh (x)), x))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_erf ()
+{
+  if (!FP_EQUAL (std::erf ((T) 0), 0))
+    return false;
+  if (!FP_EQUAL (std::erf ((T) INFINITY), 1))
+    return false;
+  if (!FP_EQUAL (std::erf ((T) -INFINITY), -1))
+    return false;
+
+  if (!FP_EQUAL (std::erfc (0), 1))
+    return false;
+  if (!FP_EQUAL (std::erfc ((T) INFINITY), 0))
+    return false;
+  if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_gamma ()
+{
+  if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+
+  if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1)))
+    return false;
+  if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>))))
+    return false;
+  if (!FP_EQUAL (std::lgamma ((T) 2.5),
+		 std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>))))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_rounding ()
+{
+  T x = -2.5678;
+  T y = 3.6789;
+
+  if (std::ceil (x) != -2)
+    return false;
+  if (std::floor (x) != -3)
+    return false;
+  if (std::trunc (x) != -2)
+    return false;
+  if (std::round (x) != -3)
+    return false;
+
+  if (std::ceil (y) != 4)
+    return false;
+  if (std::floor (y) != 3)
+    return false;
+  if (std::trunc (y) != 3)
+    return false;
+  if (std::round (y) != 4)
+    return false;
+
+  /* Not testing std::rint and std::nearbyint due to dependence on
+     floating-point environment.  */
+
+  return true;
+}
+
+template<typename T> bool test_fpmanip ()
+{
+  T x = -2.3456789;
+  T y = 3.6789012;
+  int exp;
+
+  T mantissa = std::frexp (x, &exp);
+  if (std::ldexp (mantissa, exp) != x)
+    return false;
+  if (std::logb (x) + 1 != exp)
+    return false;
+  if (std::ilogb (x) + 1 != exp)
+    return false;
+  if (std::scalbn (x, -exp) != mantissa)
+    return false;
+
+  T next = std::nextafter (x, y);
+  if (!(next > x && next < y))
+    return false;
+
+#if 0
+  /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a
+     '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error
+     with x86_64 host and nvptx, GCN offload compilers, or
+     '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error
+     with powerpc64le host and nvptx offload compiler, for example;
+     PR71064 'nvptx offloading: "long double" data type'.
+     It ought to work on systems where the host's 'long double' is the same as
+     'double' ('DF'): aarch64, for example?  */
+  next = std::nexttoward (x, y);
+  if (!(next > x && next < y))
+    return false;
+#endif
+
+  if (std::copysign (x, y) != std::abs (x))
+    return false;
+  if (std::copysign (y, x) != -y)
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_classify ()
+{
+  T x = -2.3456789;
+  T y = 3.6789012;
+
+  if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL)
+    return false;
+  if (std::fpclassify ((T) INFINITY) != FP_INFINITE
+      || std::fpclassify ((T) -INFINITY) != FP_INFINITE)
+    return false;
+  if (std::fpclassify ((T) 0.0) != FP_ZERO)
+    return false;
+  if (std::fpclassify ((T) NAN) != FP_NAN)
+    return false;
+  if (!std::isfinite (x) || !std::isfinite (y))
+    return false;
+  if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY))
+    return false;
+  if (std::isinf (x) || std::isinf (y))
+    return false;
+  if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY))
+    return false;
+  if (std::isnan (x) || std::isnan (y))
+    return false;
+  if (!std::isnan ((T) 0.0 / (T) 0.0))
+    return false;
+  if (std::isnan (x) || std::isnan (y))
+    return false;
+  if (!std::isnormal (x) || !std::isnormal (y))
+    return false;
+  if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN))
+    return false;
+  if (!std::signbit (x) || std::signbit (y))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_compare ()
+{
+  T x = 5.6789012;
+  T y = 8.9012345;
+
+  if (std::isgreater (x, y))
+    return false;
+  if (std::isgreater (x, x))
+    return false;
+  if (std::isgreaterequal (x, y))
+    return false;
+  if (!std::isgreaterequal (x, x))
+    return false;
+  if (!std::isless (x, y))
+    return false;
+  if (std::isless (x, x))
+    return false;
+  if (!std::islessequal (x, y))
+    return false;
+  if (!std::islessequal (x, x))
+    return false;
+  if (!std::islessgreater (x, y))
+    return false;
+  if (std::islessgreater (x, x))
+    return false;
+  if (std::isunordered (x, y))
+    return false;
+  if (!std::isunordered (x, NAN))
+    return false;
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (basic);
+      RUN_TEST (exp);
+      RUN_TEST (power);
+      RUN_TEST (trig);
+      RUN_TEST (hyperbolic);
+      RUN_TEST (erf);
+      RUN_TEST (gamma);
+      RUN_TEST (rounding);
+      RUN_TEST (fpmanip);
+      RUN_TEST (classify);
+      RUN_TEST (compare);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C
new file mode 100644
index 0000000..e392d17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C
@@ -0,0 +1,175 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <complex>
+#include <numbers>
+
+using namespace std::complex_literals;
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \
+			    && FP_EQUAL ((x).imag (), (y).imag ()))
+
+#pragma omp declare target
+template<typename T> bool test_complex ()
+{
+  std::complex<T> z (-1.334, 5.763);
+
+  if (!FP_EQUAL (z.real (), (T) -1.334))
+    return false;
+  if (!FP_EQUAL (z.imag (), (T) 5.763))
+    return false;
+  if (!FP_EQUAL (std::abs (z),
+		 std::sqrt (z.real () * z.real () + z.imag () * z.imag ())))
+    return false;
+  if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ())))
+    return false;
+  if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ()))
+    return false;
+
+  auto conj = std::conj (z);
+  if (!FP_EQUAL (conj.real (), z.real ())
+      || !FP_EQUAL (conj.imag (), -z.imag ()))
+    return false;
+
+  if (std::proj (z) != z)
+    return false;
+
+  auto infz1 = std::proj (std::complex<float> (INFINITY, -1));
+  if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0)
+    return false;
+  auto infz2 = std::proj (std::complex<float> (0, -INFINITY));
+  if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0)
+    return false;
+
+  auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4);
+  if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4))
+      || !FP_EQUAL (polarz.imag (),
+		    (T) 1.5* std::sin (std::numbers::pi_v<T> / 4)))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_complex_exp_log ()
+{
+  std::complex<T> z (-1.724, -3.763);
+
+  // Euler's identity
+  auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi));
+  eulerz += 1.0;
+  if (!COMPLEX_EQUAL (eulerz, std::complex<T> ()))
+    return false;
+
+  auto my_exp_z
+    = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()),
+		       std::exp (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::exp (z), my_exp_z))
+    return false;
+
+  if (!COMPLEX_EQUAL (std::log10 (z),
+		      std::log (z) / std::log (std::complex<T> (10))))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_complex_trig ()
+{
+  std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10);
+  const std::complex<T> i (0, 1);
+
+  auto my_sin_z
+    = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()),
+		       std::cos (z.real ()) * std::sinh (z.imag ()));
+  if (!COMPLEX_EQUAL (std::sin (z), my_sin_z))
+    return false;
+
+  auto my_cos_z
+    = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()),
+		       -std::sin (z.real ()) * std::sinh (z.imag ()));
+  if (!COMPLEX_EQUAL (std::cos (z), my_cos_z))
+    return false;
+
+  auto my_tan_z
+    = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ()))
+      / (std::cos (2*z.real ()) + std::cosh (2*z.imag ()));
+  if (!COMPLEX_EQUAL (std::tan (z), my_tan_z))
+    return false;
+
+  auto my_sinh_z
+    = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()),
+		       std::cosh (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z))
+    return false;
+
+  auto my_cosh_z
+    = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()),
+		       std::sinh (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z))
+    return false;
+
+  auto my_tanh_z
+    = std::complex<T> (std::sinh (2*z.real ()),
+		       std::sin (2*z.imag ()))
+		       / (std::cosh (2*z.real ()) + std::cos (2*z.imag ()));
+  if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z))
+    return false;
+
+  auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+  if (!COMPLEX_EQUAL (std::asin (z), my_asin_z))
+    return false;
+
+  auto my_acos_z
+    = std::complex<T> (std::numbers::pi / 2)
+		       + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+  if (!COMPLEX_EQUAL (std::acos (z), my_acos_z))
+    return false;
+
+  auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z)));
+  if (!COMPLEX_EQUAL (std::atan (z), my_atan_z))
+    return false;
+
+  auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0));
+  if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z))
+    return false;
+
+  auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0));
+  if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z))
+    return false;
+
+  auto my_atanh_z
+    = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z));
+  if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z))
+    return false;
+
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (complex);
+      RUN_TEST (complex_exp_log);
+      RUN_TEST (complex_trig);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
new file mode 100644
index 0000000..b30ade4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__deque-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
new file mode 100644
index 0000000..d8c9299
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
@@ -0,0 +1,64 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <deque>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::deque<int> &_deque, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (_deque[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::deque<int> _deque (std::begin (data), std::end (data));
+#else
+  std::deque<int> _deque;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: _deque)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&_deque) std::deque<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  _deque[i] *= _deque[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (_deque, data);
+#ifndef MEM_SHARED
+	  _deque.~deque ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
new file mode 100644
index 0000000..958b75e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
@@ -0,0 +1,67 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <flat_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.count (data[i]) > 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::flat_map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::flat_map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~flat_map ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
new file mode 100644
index 0000000..cee6323
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::flat_multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::flat_multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it) {
+	  sum += (long long) it->first * it->second;
+	}
+      }
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~flat_multimap ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
new file mode 100644
index 0000000..13d2ff9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
@@ -0,0 +1,60 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::flat_multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[ :N]) map (alloc: set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::flat_multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~flat_multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
new file mode 100644
index 0000000..0f4a1a8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
@@ -0,0 +1,67 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::flat_set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.count (data[i]) != 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::flat_set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[ :N]) map (alloc: _set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::flat_set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.count (i) > 0)
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~flat_set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
new file mode 100644
index 0000000..65004b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__forward_list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
new file mode 100644
index 0000000..4a98e47
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <forward_list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::forward_list<int> &list, int data[])
+{
+  int i = 0;
+  for (auto &v : list)
+    {
+      if (v != data[i] * data[i])
+	return false;
+      ++i;
+    }
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::forward_list<int> list (std::begin (data), std::end (data));
+#else
+  std::forward_list<int> list;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: list)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&list) std::forward_list<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams
+	do
+	  {
+	    int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+	    int start = len * omp_get_team_num ();
+	    if (start >= N)
+	      break;
+	    if (start + len >= N)
+	      len = N - start;
+	    auto it = list.begin ();
+	    std::advance (it, start);
+	    for (int i = 0; i < len; ++i)
+	      {
+		*it *= *it;
+		++it;
+	      }
+	  } while (false);
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (list, data);
+#ifndef MEM_SHARED
+	  list.~forward_list ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
new file mode 100644
index 0000000..3cdd44d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
new file mode 100644
index 0000000..bede839
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::list<int> &_list, int data[])
+{
+  int i = 0;
+  for (auto &v : _list)
+    {
+      if (v != data[i] * data[i])
+	return false;
+      ++i;
+    }
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::list<int> _list (std::begin (data), std::end (data));
+#else
+  std::list<int> _list;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: _list)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&_list) std::list<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams
+	do
+	  {
+	    int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+	    int start = len * omp_get_team_num ();
+	    if (start >= N)
+	      break;
+	    if (start + len >= N)
+	      len = N - start;
+	    auto it = _list.begin ();
+	    std::advance (it, start);
+	    for (int i = 0; i < len; ++i)
+	      {
+		*it *= *it;
+		++it;
+	      }
+	  } while (false);
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (_list, data);
+#ifndef MEM_SHARED
+	  _list.~list ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
new file mode 100644
index 0000000..b7d3dd8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__map-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
new file mode 100644
index 0000000..c54acea
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~map ();
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: _map)
+#endif
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
new file mode 100644
index 0000000..f243790
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multimap-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
new file mode 100644
index 0000000..34518a5a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
@@ -0,0 +1,79 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <map>
+#include <omp.h>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it)
+	  sum += (long long) it->first * it->second;
+      }
+#ifdef MEM_SHARED
+  /* Even with USM, memory allocated on the device (with _map.insert)
+     must be freed on the device.  */
+  if (omp_get_default_device () != omp_initial_device
+      && omp_get_default_device () != omp_get_num_devices ())
+    {
+      #pragma omp target
+	_map.clear ();
+    }
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~multimap ();
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: _map)
+#endif
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
new file mode 100644
index 0000000..d869e89
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multiset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
new file mode 100644
index 0000000..bace420
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
new file mode 100644
index 0000000..a6b3665
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
@@ -0,0 +1,93 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_pi ()
+{
+  if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0))
+    return false;
+  if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T>
+		 * std::numbers::inv_sqrtpi_v<T>, (T) 1.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_sqrt ()
+{
+  if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_phi ()
+{
+  T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0;
+  if (!FP_EQUAL (myphi, std::numbers::phi_v<T>))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_log ()
+{
+  if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_egamma ()
+{
+  T myegamma = 0.0;
+  #pragma omp parallel for reduction(+:myegamma)
+    for (int k = 2; k < 100000; ++k)
+      myegamma += (std::riemann_zeta (k) - 1) / k;
+  myegamma = (T) 1 - myegamma;
+  if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>))
+    return false;
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (pi);
+      RUN_TEST (sqrt);
+      RUN_TEST (phi);
+      RUN_TEST (log);
+      RUN_TEST (egamma);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
new file mode 100644
index 0000000..5fbf91b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__set-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
new file mode 100644
index 0000000..4559778
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
@@ -0,0 +1,68 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: _set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.find (i) != _set.end ())
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
new file mode 100644
index 0000000..09f9879
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
@@ -0,0 +1,8 @@
+// { dg-additional-options "-std=c++20" }
+/* { dg-require-effective-target omp_usm } */
+
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__span-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
new file mode 100644
index 0000000..d6186f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <span>
+
+#define N 64
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::span<int, N> &span, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (span[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+  int elements[N];
+  std::span<int, N> span(elements);
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: data[ :N]) map (alloc: elements, span)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&span) std::span<int, N> (elements);
+#endif
+      std::copy (data, data + N, span.begin ());
+    }
+
+  #pragma omp target teams distribute parallel for
+    for (int i = 0; i < N; ++i)
+      span[i] *= span[i];
+
+  #pragma omp target map (from: ok)
+    {
+      ok = validate (span, data);
+#ifndef MEM_SHARED
+      span.~span ();
+#endif
+    }
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: elements, span)
+#endif
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
new file mode 100644
index 0000000..3b259c4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <unordered_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.count (data[i]) > 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::unordered_map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::unordered_map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~unordered_map ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
new file mode 100644
index 0000000..d36d95d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
@@ -0,0 +1,65 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::unordered_multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+  #pragma omp target enter data map (to: keys[ :N], data[ :N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::unordered_multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it) {
+	  sum += (long long) it->first * it->second;
+	}
+      }
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~unordered_multimap ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
new file mode 100644
index 0000000..3a3df22
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::unordered_multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[ :N]) map (alloc: set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::unordered_multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~unordered_multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
new file mode 100644
index 0000000..ca38d33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::unordered_set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.count (data[i]) != 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::unordered_set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[ :N]) map (alloc: _set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::unordered_set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.count (i) > 0)
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~unordered_set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
new file mode 100644
index 0000000..9a77fb2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
@@ -0,0 +1,179 @@
+// { dg-additional-options -std=c++20 }
+// { dg-output-file target-std__valarray-1.output }
+
+#include <valarray>
+#include <ostream>
+#include <sstream>
+
+
+/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467".
+
+We can't:
+
+    #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf)
+
+... because:
+
+    error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’
+
+Therefore, use dummy classes in '#pragma omp declare target':
+*/
+
+#pragma omp declare target
+
+// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf':
+
+class dummy_basic_streambuf__char
+  : public std::basic_streambuf<char>
+{
+public:
+  dummy_basic_streambuf__char() {}
+};
+
+// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()':
+
+class dummy_basic_ios__char
+  : public std::basic_ios<char>
+{
+public:
+  dummy_basic_ios__char() {}
+};
+
+#pragma omp end declare target
+
+
+int main()
+{
+  // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device.
+  std::initializer_list<int> v1_i = {10, 20, 30, 40, 50};
+  const int *v1_i_data = std::data(v1_i);
+  size_t v1_i_size = v1_i.size();
+  std::initializer_list<int> v2_i = {5, 4, 3, 2, 1};
+  const int *v2_i_data = std::data(v2_i);
+  size_t v2_i_size = v2_i.size();
+  std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5};
+  const int *shiftData_i_data = std::data(shiftData_i);
+  size_t shiftData_i_size = shiftData_i.size();
+#pragma omp target \
+  defaultmap(none) \
+  map(to: v1_i_data[ :v1_i_size], v1_i_size, \
+          v2_i_data[ :v2_i_size], v2_i_size, \
+          shiftData_i_data[ :shiftData_i_size], shiftData_i_size)
+  {
+    /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region.  */
+    std::stringbuf out_b;
+    std::ostream out(&out_b);
+
+    std::valarray<int> v1(v1_i_data, v1_i_size);
+    out << "\nv1:";
+    for (auto val : v1)
+      out << " " << val;
+
+    std::valarray<int> v2(v2_i_data, v2_i_size);
+    out << "\nv2:";
+    for (auto val : v2)
+      out << " " << val;
+
+    std::valarray<int> sum = v1 + v2;
+    out << "\nv1 + v2:";
+    for (auto val : sum)
+      out << " " << val;
+
+    std::valarray<int> diff = v1 - v2;
+    out << "\nv1 - v2:";
+    for (auto val : diff)
+      out << " " << val;
+
+    std::valarray<int> product = v1 * v2;
+    out << "\nv1 * v2:";
+    for (auto val : product)
+      out << " " << val;
+
+    std::valarray<int> quotient = v1 / v2;
+    out << "\nv1 / v2:";
+    for (auto val : quotient)
+      out << " " << val;
+
+    std::valarray<int> squares = pow(v1, 2);
+    out << "\npow(v1, 2):";
+    for (auto val : squares)
+      out << " " << val;
+
+    std::valarray<int> sinhs = sinh(v2);
+    out << "\nsinh(v2):";
+    for (auto val : sinhs)
+      out << " " << val;
+
+    std::valarray<int> logs = log(v1 * v2);
+    out << "\nlog(v1 * v2):";
+    for (auto val : logs)
+      out << " " << val;
+
+    std::valarray<int> data(12);
+    for (size_t i = 0; i < data.size(); ++i)
+      data[i] = i;
+    out << "\nOriginal array:";
+    for (auto val : data)
+      out << " " << val;
+
+    std::slice slice1(2, 5, 1);
+    std::valarray<int> sliced1 = data[slice1];
+    out << "\nSlice(2, 5, 1):";
+    for (auto val : sliced1)
+      out << " " << val;
+
+    std::slice slice2(1, 4, 3);
+    std::valarray<int> sliced2 = data[slice2];
+    out << "\nSlice(1, 4, 3):";
+    for (auto val : sliced2)
+      out << " " << val;
+
+    data[slice1] = 99;
+    out << "\nArray after slice modification:";
+    for (auto val : data)
+      out << " " << val;
+
+    std::valarray<bool> mask = (v1 > 20);
+    out << "\nElements of v1 > 20:";
+    for (size_t i = 0; i < v1.size(); ++i)
+      {
+	if (mask[i])
+	  out << " " << v1[i];
+      }
+
+    std::valarray<int> masked = v1[mask];
+    out << "\nMasked array:";
+    for (auto val : masked)
+      out << " " << val;
+
+    std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size);
+    out << "\nOriginal shiftData:";
+    for (auto val : shiftData)
+      out << " " << val;
+
+    std::valarray<int> shifted = shiftData.shift(2);
+    out << "\nshift(2):";
+    for (auto val : shifted)
+      out << " " << val;
+
+    std::valarray<int> cshifted = shiftData.cshift(-1);
+    out << "\ncshift(-1):";
+    for (auto val : cshifted)
+      out << " " << val;
+
+    out << "\nSum(v1): " << v1.sum();
+    out << "\nMin(v1): " << v1.min();
+    out << "\nMax(v1): " << v1.max();
+
+    out << "\n";
+
+    /* Terminate with a NUL.  Otherwise, we'd have to use:
+           __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data());
+       ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO).  */
+    out << '\0';
+    std::string_view out_b_sv = out_b.view();
+    __builtin_printf("%s", out_b_sv.data());
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
new file mode 100644
index 0000000..c441e06
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
@@ -0,0 +1,22 @@
+
+v1: 10 20 30 40 50
+v2: 5 4 3 2 1
+v1 + v2: 15 24 33 42 51
+v1 - v2: 5 16 27 38 49
+v1 * v2: 50 80 90 80 50
+v1 / v2: 2 5 10 20 50
+pow(v1, 2): 100 400 900 1600 2500
+sinh(v2): 74 27 10 3 1
+log(v1 * v2): 3 4 4 4 3
+Original array: 0 1 2 3 4 5 6 7 8 9 10 11
+Slice(2, 5, 1): 2 3 4 5 6
+Slice(1, 4, 3): 1 4 7 10
+Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11
+Elements of v1 > 20: 30 40 50
+Masked array: 30 40 50
+Original shiftData: 1 2 3 4 5
+shift(2): 3 4 5 0 0
+cshift(-1): 5 1 2 3 4
+Sum(v1): 150
+Min(v1): 10
+Max(v1): 50
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
new file mode 100644
index 0000000..828b67c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__valarray-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
new file mode 100644
index 0000000..81eebe8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <valarray>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::valarray<int> &arr, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (arr[i] != data[i] * data[i] + i)
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::valarray<int> arr (data, N);
+#else
+  std::valarray<int> arr;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: arr)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&arr) std::valarray<int> (data, N);
+#endif
+	  arr *= arr;
+	}
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  arr[i] += i;
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (arr, data);
+#ifndef MEM_SHARED
+	  arr.~valarray ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
new file mode 100644
index 0000000..835f6d5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
@@ -0,0 +1,6 @@
+/* { dg-require-effective-target omp_usm } */
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__vector-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
new file mode 100644
index 0000000..d728194
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <vector>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::vector<int> &vec, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (vec[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::vector<int> vec (data, data + N);
+#else
+  std::vector<int> vec;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[ :N]) map (alloc: vec)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&vec) std::vector<int> (data, data + N);
+#endif
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  vec[i] *= vec[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (vec, data);
+#ifndef MEM_SHARED
+	  vec.~vector ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-this-3.C b/libgomp/testsuite/libgomp.c++/target-this-3.C
index 9865824..65b4cdc 100644
--- a/libgomp/testsuite/libgomp.c++/target-this-3.C
+++ b/libgomp/testsuite/libgomp.c++/target-this-3.C
@@ -74,7 +74,7 @@ int main (void)
     if (ptr2[i] != 0)
       abort ();
 
-  #pragma omp target data map(ptr1[:N])
+  #pragma omp target data map(ptr1[ :N])
   mapped = s.set_ptr (val);
 
   if (!mapped)
@@ -87,7 +87,7 @@ int main (void)
     if (ptr1[i] != val)
       abort ();
 
-  #pragma omp target data map(ptr2[:N])
+  #pragma omp target data map(ptr2[ :N])
   mapped = s.set_refptr (val);
 
   if (!mapped)
diff --git a/libgomp/testsuite/libgomp.c++/target-this-4.C b/libgomp/testsuite/libgomp.c++/target-this-4.C
index b2a593d..8f16444 100644
--- a/libgomp/testsuite/libgomp.c++/target-this-4.C
+++ b/libgomp/testsuite/libgomp.c++/target-this-4.C
@@ -83,7 +83,7 @@ int main (void)
     if (ptr2[i] != 0)
       abort ();
 
-  #pragma omp target data map(ptr1[:N], ptr2[:N])
+  #pragma omp target data map(ptr1[ :N], ptr2[ :N])
   {
     if (!p1 ())
       abort ();
diff --git a/libgomp/testsuite/libgomp.c++/target-virtual-1.C b/libgomp/testsuite/libgomp.c++/target-virtual-1.C
index a6ac30e..7b09f8f 100644
--- a/libgomp/testsuite/libgomp.c++/target-virtual-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-virtual-1.C
@@ -15,7 +15,7 @@ struct derived : public base {
     void do_work ()
     {
       int error = 0;
-      #pragma omp target map (tofrom: this[:1], error)
+      #pragma omp target map (tofrom: this[ :1], error)
       {
 	if (scalar != 42 || this->array[0] != 123 || array[4] != 555)
 	  error = 1;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-11.C b/libgomp/testsuite/libgomp.c++/task-reduction-11.C
index 542bdd6..faa199a 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-11.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-11.C
@@ -25,10 +25,10 @@ foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[
 {
   T i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -84,15 +84,15 @@ test (T &n)
     T (&o)[4] = os;
     #pragma omp parallel reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \
 			 reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (task,*: t[2:2][:], s[1:n + 1])
+			 reduction (task,+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (task,*: t[2:2][ : ], s[1:n + 1])
     {
       #pragma omp for
       for (int i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  T j;
 	  a[0] += 2;
@@ -110,11 +110,11 @@ test (T &n)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-12.C b/libgomp/testsuite/libgomp.c++/task-reduction-12.C
index 02c1a78..8b1f31e 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-12.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-12.C
@@ -62,10 +62,10 @@ foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -129,15 +129,15 @@ test (int n)
     S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
     #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 			 reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (task, *: t[2:2][:], s[1:n + 1])
+			 reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (task, *: t[2:2][ : ], s[1:n + 1])
     {
       #pragma omp for
       for (int i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0].s += 2;
@@ -170,11 +170,11 @@ test (int n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-13.C b/libgomp/testsuite/libgomp.c++/task-reduction-13.C
index 3d0165d..596bcea 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-13.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-13.C
@@ -71,10 +71,10 @@ foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
 {
   T i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -145,15 +145,15 @@ test (T &n)
     S (&o)[4] = os;
     #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 			 reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (task, *: t[2:2][:], s[1:n + 1])
+			 reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (task, *: t[2:2][ : ], s[1:n + 1])
     {
       #pragma omp for
       for (T i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  T j;
 	  a[0].s += 2;
@@ -186,11 +186,11 @@ test (T &n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-17.C b/libgomp/testsuite/libgomp.c++/task-reduction-17.C
index c00c8e4..5a2b1e3 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-17.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-17.C
@@ -25,10 +25,10 @@ foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[
 {
   T i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -86,13 +86,13 @@ test (T &n, I x, I y)
     {
       #pragma omp for reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \
 		      reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-		      reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-		      reduction (task,*: t[2:2][:], s[1:n + 1]) schedule (dynamic)
+		      reduction (task,+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+		      reduction (task,*: t[2:2][ : ], s[1:n + 1]) schedule (dynamic)
       for (I i = x; i != y; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  T j;
 	  a[0] += 2;
@@ -110,11 +110,11 @@ test (T &n, I x, I y)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-18.C b/libgomp/testsuite/libgomp.c++/task-reduction-18.C
index 99c0e37..68ca151 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-18.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-18.C
@@ -62,10 +62,10 @@ foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -134,14 +134,14 @@ test (int n)
     {
       #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 		      reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-		      reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-		      reduction (task, *: t[2:2][:], s[1:n + 1]) \
+		      reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+		      reduction (task, *: t[2:2][ : ], s[1:n + 1]) \
 		      schedule (nonmonotonic: guided, 1)
       for (unsigned long long i = x; i < y; i += z)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0].s += 2;
@@ -174,11 +174,11 @@ test (int n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-19.C b/libgomp/testsuite/libgomp.c++/task-reduction-19.C
index 15945c5..baed72a 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-19.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-19.C
@@ -71,10 +71,10 @@ foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
 {
   T i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -147,14 +147,14 @@ test (T &n)
     {
       #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 		      reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-		      reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-		      reduction (task, *: t[2:2][:], s[1:n + 1]) \
+		      reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+		      reduction (task, *: t[2:2][ : ], s[1:n + 1]) \
 		      schedule (monotonic: runtime)
       for (T i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  T j;
 	  a[0].s += 2;
@@ -187,11 +187,11 @@ test (T &n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-4.C b/libgomp/testsuite/libgomp.c++/task-reduction-4.C
index 1d4da79..0edc965 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-4.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-4.C
@@ -25,10 +25,10 @@ foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[
 {
   T i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -85,15 +85,15 @@ test (T &n)
     T (&o)[4] = os;
     #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \
 			  task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			  task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			  task_reduction (*: t[2:2][:], s[1:n + 1])
+			  task_reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			  task_reduction (*: t[2:2][ : ], s[1:n + 1])
     {
       T i;
       for (i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  T j;
 	  a[0] += 2;
@@ -111,11 +111,11 @@ test (T &n)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-5.C b/libgomp/testsuite/libgomp.c++/task-reduction-5.C
index 59583f1..296bf9d 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-5.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-5.C
@@ -61,10 +61,10 @@ foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -128,15 +128,15 @@ test (int n)
     S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
     #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \
 			  task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			  task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			  task_reduction (*: t[2:2][:], s[1:n + 1])
+			  task_reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			  task_reduction (*: t[2:2][ : ], s[1:n + 1])
     {
       int i;
       for (i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0].s += 2;
@@ -169,11 +169,11 @@ test (int n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-6.C b/libgomp/testsuite/libgomp.c++/task-reduction-6.C
index d7f69da..a1dfc87 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-6.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-6.C
@@ -70,10 +70,10 @@ foo (int &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0].s += 7;
       a[1].s += 17;
@@ -144,15 +144,15 @@ test (int &n)
     S (&o)[4] = os;
     #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \
 			  task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			  task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			  task_reduction (*: t[2:2][:], s[1:n + 1])
+			  task_reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			  task_reduction (*: t[2:2][ : ], s[1:n + 1])
     {
       int i;
       for (i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0].s += 2;
@@ -185,11 +185,11 @@ test (int &n)
 	    if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	      abort ();
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1].s += 6;
 	      r[2].s += 7;
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-7.C b/libgomp/testsuite/libgomp.c++/task-reduction-7.C
index 2a4d82e..72afa5c 100644
--- a/libgomp/testsuite/libgomp.c++/task-reduction-7.C
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-7.C
@@ -4,7 +4,7 @@ extern "C" void abort ();
 void
 bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], int *f, int *g, size_t n)
 {
-  #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[:n], g[1:n])
+  #pragma omp task in_reduction (*: a[ :n], b[3:n], c[n:n], d[0][ :n], e[0][1:n], f[ :n], g[1:n])
   {
     a[0] *= 12;
     a[1] *= 13;
@@ -43,10 +43,10 @@ foo (size_t n, void *x, void *y)
       f[i] = 1;
       g[i + 1] = 1;
     }
-  #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n])
+  #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][ :n], e[0][1:n], f, g[1:n])
   {
     bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, &f[0], &g[0], n);
-    #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n])
+    #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][ :n], e[0][1:n], f, g[1:n])
     {
       a[0] *= 2;
       a[1] *= 3;
@@ -98,10 +98,10 @@ baz (size_t n, void *x, void *y)
   {
     int (&f)[n] = fb;
     int (&g)[n * 2] = gb;
-    #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n])
+    #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][ :n], e[0][1:n], f, g[1:n])
     {
       bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, &f[0], &g[0], n);
-      #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n])
+      #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][ :n], e[0][1:n], f, g[1:n])
       {
 	a[0] *= 2;
 	a[1] *= 3;
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C
index f1de5da..698fb23 100644
--- a/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C
+++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C
@@ -38,10 +38,10 @@ foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int
   int i;
   U u;
   u.u[2] = 8;
-  #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		       in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		       in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		       in_reduction (*: s[1:2], t[2:2][:]) firstprivate (u) nogroup
+  #pragma omp taskloop in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		       in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		       in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		       in_reduction (*: s[1:2], t[2:2][ : ]) firstprivate (u) nogroup
   for (i = 0; i < 2; i++)
     {
       a[0] += 7;
@@ -103,8 +103,8 @@ test (int &n)
     int i;
     #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \
 			 reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (*: t[2:2][:], s[1:n + 1]) firstprivate (u)
+			 reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (*: t[2:2][ : ], s[1:n + 1]) firstprivate (u)
     for (i = 0; i < 4; i++)
       {
 	int j;
@@ -125,11 +125,11 @@ test (int &n)
 	if (u.u[2] != 10)
 	  abort ();
 	for (j = 0; j < 2; j++)
-	  #pragma omp task in_reduction (+: a, c[:2]) \
+	  #pragma omp task in_reduction (+: a, c[ :2]) \
 			   in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			   in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			   in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			   in_reduction (*: s[n:2], t[2:2][:]) firstprivate (u)
+			   in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			   in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			   in_reduction (*: s[n:2], t[2:2][ : ]) firstprivate (u)
 	  {
 	    m[1] += 6;
 	    r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C
index 0588e47..04b7d1b 100644
--- a/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C
+++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C
@@ -60,10 +60,10 @@ void
 foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
 {
   int i;
-  #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		       in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		       in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		       in_reduction (*: s[1:2], t[2:2][:]) nogroup
+  #pragma omp taskloop in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		       in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		       in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		       in_reduction (*: s[1:2], t[2:2][ : ]) nogroup
   for (i = 0; i < 2; i++)
     {
       a[0].s += 7;
@@ -129,8 +129,8 @@ test (int n)
     int i;
     #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \
 			 reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (*: t[2:2][:], s[1:n + 1])
+			 reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (*: t[2:2][ : ], s[1:n + 1])
     for (i = 0; i < 4; i++)
       {
 	int j;
@@ -163,11 +163,11 @@ test (int n)
 	for (int z = 0; z < 3; z++)
 	  if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	    abort ();
-	#pragma omp taskloop in_reduction (+: a, c[:2]) \
+	#pragma omp taskloop in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:]) nogroup
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ]) nogroup
 	for (j = 0; j < 2; j++)
 	  {
 	    m[1].s += 6;
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C
index 41c7040..4afb05c 100644
--- a/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C
+++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C
@@ -61,10 +61,10 @@ void
 foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
 {
   int i;
-  #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		       reduction (default, +: o[n:n*2], m[1], p[0]) in_reduction (+: k[1:2][:], f[2:2]) \
-		       reduction (+: q[1:2][:], r[2:2]) in_reduction (+: g[n:n*2], e[1], h[0]) \
-		       in_reduction (*: s[1:2], t[2:2][:])
+  #pragma omp taskloop in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		       reduction (default, +: o[n:n*2], m[1], p[0]) in_reduction (+: k[1:2][ : ], f[2:2]) \
+		       reduction (+: q[1:2][ : ], r[2:2]) in_reduction (+: g[n:n*2], e[1], h[0]) \
+		       in_reduction (*: s[1:2], t[2:2][ : ])
   for (i = 0; i < 2; i++)
     {
       a[0].s += 7;
@@ -131,8 +131,8 @@ test (int n)
     int i;
     #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \
 			 reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (*: t[2:2][:], s[1:n + 1])
+			 reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (*: t[2:2][ : ], s[1:n + 1])
     for (i = 0; i < 4; i++)
       {
 	int j;
@@ -165,11 +165,11 @@ test (int n)
 	for (int z = 0; z < 3; z++)
 	  if (b[z + 2].t != 5 && b[z + 2].t != 9)
 	    abort ();
-	#pragma omp taskloop in_reduction (+: a, c[:2]) \
+	#pragma omp taskloop in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:]) nogroup
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ]) nogroup
 	for (j = 0; j < 2; j++)
 	  {
 	    m[1].s += 6;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c
index d3af3b8..2e6b22e 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c
@@ -126,7 +126,7 @@ foo (int x, int *p, int *q, int px, omp_allocator_handle_t h, int fl)
 	if ((fl & 2) && (((uintptr_t) &i5) & 63) != 0)
 	  abort ();
       }
-    #pragma omp for reduction(+:p[2:px], q[:3], r2) allocate(h: p, q, r2)
+    #pragma omp for reduction(+:p[2:px], q[ :3], r2) allocate(h: p, q, r2)
     for (i = 0; i < 32; i++)
       {
 	p[2] += i;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/allocate-3.c b/libgomp/testsuite/libgomp.c-c++-common/allocate-3.c
index 0b50744..373064e 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/allocate-3.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/allocate-3.c
@@ -148,7 +148,7 @@ foo (int x, int *p, int *q, int px, omp_allocator_handle_t h, int fl)
 	if ((((uintptr_t) &i5) & 31) != 0)
 	  abort ();
       }
-    #pragma omp for reduction(+:p[2:px], q[:3], r2) allocate(align (16), allocator (h): p, q, r2)
+    #pragma omp for reduction(+:p[2:px], q[ :3], r2) allocate(align (16), allocator (h): p, q, r2)
     for (i = 0; i < 32; i++)
       {
 	p[2] += i;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c
index e335d7d..d8ee3d1 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c
@@ -39,7 +39,7 @@ int main (int argc, char *argv[])
 
 #pragma omp target map(to: p->b, p->b[0], p->c, p->c[0], p->b->c, p->b->c[0]) \
 		   map(to: p->b->c->ptr, p->c->ptr) \
-		   map(tofrom: p->b->c->ptr[:N], p->c->ptr[:N])
+		   map(tofrom: p->b->c->ptr[ :N], p->c->ptr[ :N])
   {
     for (int i = 0; i < N; i++)
       {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
new file mode 100644
index 0000000..00eb48b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \
+					  map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+					  map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+  int *arr1;
+  B *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (float));
+  var.arr2->size = N;
+
+  {
+    // ... permitted here:
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			  map(tofrom: x.arr1[0:N]) \
+			  map(mapper(default), tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3[i] == 0);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
new file mode 100644
index 0000000..942d6a5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct B_tag {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \
+				   map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+  int *arr1;
+  B *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (int));
+  var.arr2->size = N;
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3[i] == 0);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
new file mode 100644
index 0000000..cfc6a91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \
+					      map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+					      map(tofrom: myb.arr[0:myb.size])
+typedef struct {
+  int *arr;
+  int size;
+} C;
+
+
+struct A {
+  int *arr1;
+  B *arr2;
+  C *arr3;
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (int));
+  var.arr2->size = N;
+  var.arr3 = (C *) malloc (sizeof (C));
+  var.arr3->arr = (int *) calloc (N, sizeof (int));
+  var.arr3->size = N;
+
+  {
+    // ... permitted here.
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			map(tofrom: x.arr1[0:N]) \
+			map(mapper(default), tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  {
+    #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \
+			map(tofrom: myc.arr[0:myc.size])
+    // While GCC handles more, only default is ...
+    #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \
+			map(tofrom: myc.arr[0:myc.size])
+    // ... permitted here.
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \
+			map(tofrom: x.arr1[0:N]) \
+			map(mapper( default ) , tofrom: *x.arr3)
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr3->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 2);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3->arr[i] == 1);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+  free (var.arr3->arr);
+  free (var.arr3);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
new file mode 100644
index 0000000..c4784eb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <assert.h>
+
+struct T {
+  int a;
+  int b;
+  int c;
+};
+
+void foo (void)
+{
+  struct T x;
+  x.a = x.b = x.c = 0;
+
+#pragma omp target
+  {
+    x.a++;
+    x.c++;
+  }
+
+  assert (x.a == 1);
+  assert (x.b == 0);
+  assert (x.c == 1);
+}
+
+// An identity mapper.  This should do the same thing as the default!
+#pragma omp declare mapper (struct T v) map(v)
+
+void bar (void)
+{
+  struct T x;
+  x.a = x.b = x.c = 0;
+
+#pragma omp target
+  {
+    x.b++;
+  }
+
+#pragma omp target map(x)
+  {
+    x.a++;
+  }
+
+  assert (x.a == 1);
+  assert (x.b == 1);
+  assert (x.c == 0);
+}
+
+int main (int argc, char *argv[])
+{
+  foo ();
+  bar ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
new file mode 100644
index 0000000..3e6027e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct Z {
+  int *arr;
+};
+
+void baz (struct Z *zarr, int len)
+{
+#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \
+					    map(tofrom: myvar.arr[0:len])
+  zarr[0].arr = (int *) calloc (len, sizeof (int));
+  zarr[5].arr = (int *) calloc (len, sizeof (int));
+
+#pragma omp target map(zarr, *zarr)
+  {
+    for (int i = 0; i < len; i++)
+      zarr[0].arr[i]++;
+  }
+
+#pragma omp target map(zarr, zarr[5])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+#pragma omp target map(zarr[5])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+#pragma omp target map(zarr, zarr[5:1])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+  for (int i = 0; i < len; i++)
+    assert (zarr[0].arr[i] == 1);
+
+  for (int i = 0; i < len; i++)
+    assert (zarr[5].arr[i] == 3);
+
+  free (zarr[5].arr);
+  free (zarr[0].arr);
+}
+
+int
+main (int argc, char *argv[])
+{
+  struct Z myzarr[10];
+  baz (myzarr, 256);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
new file mode 100644
index 0000000..324d535
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+struct A {
+  int *arr1;
+  float *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (float *) calloc (N, sizeof (float));
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr1) \
+					    map(tofrom: x.arr1[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr1[i]++;
+    }
+  }
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr2) \
+					    map(tofrom: x.arr2[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr2[i]++;
+    }
+  }
+
+  {
+    #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr3[i]++;
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2[i] == 1);
+      assert (var.arr3[i] == 1);
+    }
+
+  free (var.arr1);
+  free (var.arr2);
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-variant-1.c b/libgomp/testsuite/libgomp.c-c++-common/declare-variant-1.c
new file mode 100644
index 0000000..b6fc40e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-variant-1.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+/* PR middle-end/121922  */
+
+/* Failed to re-check the global flag due to tree sharing.  */
+
+extern int flag;
+int flag = 0;
+
+int
+test_with_flag ()
+{
+  return flag;
+}
+
+#pragma omp declare variant (test_with_flag) match (user={condition(score(10): flag > 1)})
+int
+test ()
+{
+  return 0;
+}
+
+void
+doit ()
+{
+  flag = 0;
+  if (test () != 0) __builtin_abort ();
+  flag = 1;
+  if (test () != 0) __builtin_abort ();
+  flag = 42;
+  if (test () != 42) __builtin_abort ();
+}
+
+int main ()
+{
+  doit ();
+}
+
+/* { dg-final { scan-tree-dump-times "flag\\.\[^=\]*= flag;\[\n\r\]+ *if \\(flag\\.\[^>\]*> 1\\)" 3 "gimple" } } */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-1.c b/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-1.c
new file mode 100644
index 0000000..916f8a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-1.c
@@ -0,0 +1,45 @@
+/* Check basic functionality for the delimited form of "declare variant"
+   - no error re duplicate definitions
+   - variants are registered and correctly resolved at call site.  */
+
+int foo (int a)
+{
+  return a;
+}
+
+int bar (int x)
+{
+  return x;
+}
+
+#pragma omp begin declare variant match (construct={target})
+int foo (int a)
+{
+  return a + 1;
+}
+
+int bar (int x)
+{
+  return x * 2;
+}
+#pragma omp end declare variant
+
+/* Because of the high score value, this variant for "bar" should always be
+   selected even when the one above also matches.  */
+#pragma omp begin declare variant match (implementation={vendor(score(10000):"gnu")})
+int bar (int x)
+{
+  return x * 4;
+}
+#pragma omp end declare variant
+
+int main (void)
+{
+  if (foo (42) != 42) __builtin_abort ();
+  if (bar (3) != 12) __builtin_abort ();
+#pragma omp target
+  {
+    if (foo (42) != 43) __builtin_abort ();
+    if (bar (3) != 12) __builtin_abort ();
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-2.c b/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-2.c
new file mode 100644
index 0000000..152b9f3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/delim-declare-variant-2.c
@@ -0,0 +1,47 @@
+/* Check that the correct function is used;
+   assumes that vendor(gnu) is always true.  */
+
+int inner() { return 1; }
+
+int outer(int is_novar) {
+  int k;
+  if (!is_novar) __builtin_abort();
+
+  k = inner();
+  if (k != 22) __builtin_abort();
+
+  #pragma omp dispatch novariants(1)
+    k = inner();
+  if (k != 1) __builtin_abort();
+  return 3;
+}
+
+#pragma omp begin declare variant match(implementation={vendor(gnu)})
+int outer(int is_novar) {
+  int k;
+  if (is_novar) __builtin_abort();
+
+  k = inner();
+  if (k != 22) __builtin_abort();
+
+  #pragma omp dispatch novariants(1)
+    k = inner();
+  if (k != 1) __builtin_abort();
+  return 44;
+}
+
+int inner() { return 22; }
+#pragma omp end declare variant
+
+int
+main()
+{
+  int j;
+  j = outer(0);
+  if (j != 44) __builtin_abort();
+
+  #pragma omp dispatch novariants(1)
+    j = outer(1);
+  if (j != 3) __builtin_abort();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c
index d9cbfdc..da83c67 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c
@@ -4,53 +4,53 @@ __attribute__((noipa)) void
 foo (int *p, int i)
 {
   #pragma omp task depend (out: p[0])
-  v++;
+  v = v + 1;
   #pragma omp task depend (in: p[0])
-  v++;
+  v = v + 1;
   #pragma omp task depend (inout: p[0])
-  v++;
+  v = v + 1;
   #pragma omp task depend (mutexinoutset: p[0])
-  v++;
+  v = v + 1;
   #pragma omp task depend (out: p[0]) depend (in: p[1])
-  v++;
+  v = v + 1;
   #pragma omp task depend (in: p[0]) depend (inout: p[1])
-  v++;
+  v = v + 1;
   #pragma omp task depend (inout: p[0]) depend (mutexinoutset: p[1])
-  v++;
+  v = v + 1;
   #pragma omp task depend (mutexinoutset: p[0]) depend (out: p[1])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , out : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , in : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , inout : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , mutexinoutset : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , out : p[j]) depend (iterator (j=0:2) , in : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , in : p[j]) depend (iterator (j=0:2) , inout : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , inout : p[j]) depend (iterator (j=0:2) , mutexinoutset : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:2) , mutexinoutset : p[j]) depend (iterator (j=0:2) , out : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , out : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , in : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , inout : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , mutexinoutset : p[j])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , out : p[j]) depend (iterator (j=0:i) , in : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , in : p[j]) depend (iterator (j=0:i) , inout : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , inout : p[j]) depend (iterator (j=0:i) , mutexinoutset : p[j + 2])
-  v++;
+  v = v + 1;
   #pragma omp task depend (iterator (j=0:i) , mutexinoutset : p[j]) depend (iterator (j=0:i) , out : p[j + 2])
-  v++;
+  v = v + 1;
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/dispatch-1.c b/libgomp/testsuite/libgomp.c-c++-common/dispatch-1.c
index 0efc075..ab11303 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/dispatch-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/dispatch-1.c
@@ -45,7 +45,7 @@ int test (int n)
     }
 
   int f, last_dev = omp_get_num_devices () - 1;
-#pragma omp target data map(to: av[:n]) map(from: d_bv[:n]) device(last_dev) if (n == 1024)
+#pragma omp target data map(to: av[ :n]) map(from: d_bv[ :n]) device(last_dev) if (n == 1024)
   {
     #pragma omp dispatch nocontext(n > 1024) novariants(n < 1024) device(last_dev)
     f = foo (d_bv, av, n);
diff --git a/libgomp/testsuite/libgomp.c-c++-common/dispatch-2.c b/libgomp/testsuite/libgomp.c-c++-common/dispatch-2.c
index faa0d8a..187fc61 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/dispatch-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/dispatch-2.c
@@ -53,7 +53,7 @@ int test (int n)
     }
 
   int f, last_dev = omp_get_num_devices () - 1;
-#pragma omp target data map(to: av[:n]) map(from: d_bv[:n]) device(last_dev) if (n == 1024)
+#pragma omp target data map(to: av[ :n]) map(from: d_bv[ :n]) device(last_dev) if (n == 1024)
   {
     #pragma omp dispatch nocontext(n > 1024) novariants(n < 1024) device(last_dev)
     f = foo (d_bv, av, n);
diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
index a7526dc..ab98949 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
@@ -58,7 +58,7 @@ test_async (const int dev)
      As OpenMP_VV's Issue #863 shows, the overhead is high enough to
      fail even when only doing an atomic integer increment.  */
 
-  #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait
+  #pragma omp target device(dev) map(A) depend(out: A[ :N]) nowait
   for (int i = 0; i < N; i++)
     #pragma omp atomic update
     A[i] += __builtin_sin (2*i*M_PI/N);
@@ -68,11 +68,11 @@ test_async (const int dev)
   if (obj1 == omp_interop_none)
     {
       // Same as below as 'nowait' is ignored.
-      #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait
+      #pragma omp interop destroy(obj1) depend(in: A[ :N]) nowait
     }
   else
     {
-      #pragma omp interop destroy(obj1) depend(in: A[:N])
+      #pragma omp interop destroy(obj1) depend(in: A[ :N])
     }
 
   /* ... this code is only executed once the dependency as been fulfilled.  */
@@ -93,7 +93,7 @@ test_async (const int dev)
 
   /* Integer */
 
-  #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait
+  #pragma omp target device(dev) map(B) depend(out: B[ :N]) nowait
   for (int i = 0; i < N; i++)
     #pragma omp atomic update
     B[i] += 42;
@@ -102,11 +102,11 @@ test_async (const int dev)
   if (obj2 == omp_interop_none)
     {
       // Same as below as 'nowait' is ignored.
-      #pragma omp interop use(obj2) depend(in: B[:N]) nowait
+      #pragma omp interop use(obj2) depend(in: B[ :N]) nowait
     }
   else
     {
-      #pragma omp interop use(obj2) depend(in: B[:N])
+      #pragma omp interop use(obj2) depend(in: B[ :N])
     }
 
   for (int i = 0; i < N; i++)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-2.c b/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-2.c
index ff7ce0e..55bd607 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-2.c
@@ -54,5 +54,5 @@ int main (void)
 }
 
 /* { dg-output "(\n|\r|\r\n)" { target offload_device_nonshared_as } } */
-/* { dg-output "libgomp: Mapped array elements must be the same .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } } */
+/* { dg-output "libgomp: Mapped array elements must be the same or in increasing address order .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } } */
 /* { dg-shouldfail "" { offload_device_nonshared_as } } */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-3.c b/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-3.c
index 770ac2a..0352682 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-3.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/map-arrayofstruct-3.c
@@ -64,5 +64,5 @@ int main (void)
 }
 
 /* { dg-output "(\n|\r|\r\n)" { target offload_device_nonshared_as } } */
-/* { dg-output "libgomp: Mapped array elements must be the same .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } } */
+/* { dg-output "libgomp: Mapped array elements must be the same or in increasing address order .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } } */
 /* { dg-shouldfail "" { offload_device_nonshared_as } } */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c b/libgomp/testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c
index c86050e..587fb77 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/matrix-omp-target-teams-distribute-parallel-for-1.c
@@ -2,7 +2,7 @@
 /* { dg-do run { target { offload_device } } } */
 /* { dg-additional-options "-Wall -Wno-unknown-pragmas" } */
 
-#define COMMON_DIRECTIVE omp target teams distribute parallel for map(tofrom:result[:dim0 * dim1]) map(to:matrix1[0:dim0 * dim1], matrix2[0:dim0 * dim1])
+#define COMMON_DIRECTIVE omp target teams distribute parallel for map(tofrom:result[ :dim0 * dim1]) map(to:matrix1[0:dim0 * dim1], matrix2[0:dim0 * dim1])
 #define COLLAPSE_1 collapse(1)
 #define COLLAPSE_2 collapse(2)
 #define COLLAPSE_3
diff --git a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
index a57d6fd..fbe4ac3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
@@ -1,4 +1,5 @@
-/* { dg-do run } */
+/* { dg-do run { target { ! offload_target_nvptx } } } */
+/* { dg-do compile { target offload_target_nvptx } } */
 
 #define N 100
 
@@ -7,12 +8,17 @@ f (int x[], int y[], int z[])
 {
   int i;
 
+  // The following fails as on the host the target side cannot be
+  // resolved - and the 'teams' or not status affects how 'target'
+  // is called.
+  // Note also the dg-do compile above for offload_target_nvptx
   #pragma omp target map(to: x[0:N], y[0:N]) map(from: z[0:N])
     #pragma omp metadirective \
 	when (device={arch("nvptx")}: teams loop) \
 	default (parallel loop)
       for (i = 0; i < N; i++)
 	z[i] = x[i] * y[i];
+  /* { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-6 }  */
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp-default-device.c b/libgomp/testsuite/libgomp.c-c++-common/omp-default-device.c
new file mode 100644
index 0000000..5489f01
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp-default-device.c
@@ -0,0 +1,59 @@
+#include <omp.h>
+
+#if __cplusplus
+static_assert (omp_default_device < -1
+	       && omp_default_device != omp_invalid_device, "");
+#else
+_Static_assert (omp_default_device < -1
+		&& omp_default_device != omp_invalid_device, "");
+#endif
+
+static int
+is_same_dev (int d1, int d2)
+{
+  int num_dev = omp_get_num_devices ();
+  if (d1 == omp_initial_device)
+    d1 = num_dev;
+  if (d2 == omp_initial_device)
+    d2 = num_dev;
+  return (d1 == d2);
+}
+
+int
+main()
+{
+  int dev = -99;
+  int def_dev = omp_get_default_device ();
+  #pragma omp target map(from: dev) device(omp_default_device)
+    dev = omp_get_device_num ();
+
+  if (!is_same_dev (def_dev, dev))
+    __builtin_abort ();
+
+  for (def_dev = omp_initial_device; def_dev <= omp_get_num_devices ();
+       def_dev++)
+    {
+      const char* uid = omp_get_uid_from_device(def_dev);
+      omp_set_default_device (def_dev);
+      dev = -99;
+      #pragma omp target map(from: dev) device(omp_default_device)
+        dev = omp_get_device_num ();
+      if (!is_same_dev (def_dev, dev))
+        __builtin_abort ();
+
+      /* Shall not modify the ICV.  */
+      omp_set_default_device (omp_default_device);
+      if (def_dev != omp_get_default_device ())
+        __builtin_abort ();
+
+      /* Assume the ptr and no only the string is the same.  */
+      if (uid != omp_get_uid_from_device (omp_default_device))
+        __builtin_abort ();
+    }
+
+  omp_set_default_device (omp_invalid_device);
+  /* Shall not modify the ICV.  */
+  omp_set_default_device (omp_default_device);
+  if (omp_invalid_device != omp_get_default_device ())
+    __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
new file mode 100644
index 0000000..b36d2f5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+// Async version
+
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel for
+  for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++)
+    {
+      char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+      omp_depend_t dep;
+      #pragma omp depobj(dep) depend(inout: ptr)
+
+      /* Play also around with the alignment - as hsa_amd_memory_fill operates
+	 on multiples of 4 bytes (uint32_t).  */
+
+      for (int start = 0; start < 32; start++)
+	for (int tail = 0; tail < 32; tail++)
+	  {
+	    unsigned char val = '0' + start + tail;
+#if __cplusplus
+	    void *ptr2 = omp_target_memset_async (ptr + start, val,
+					    1024 - start - tail, dev, 0);
+#else
+	    void *ptr2 = omp_target_memset_async (ptr + start, val,
+					    1024 - start - tail, dev, 0, nullptr);
+#endif
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    #pragma omp taskwait
+
+	    #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+	      for (int i = start; i < 1024 - start - tail; i++)
+		{
+		  if (ptr[i] != val)
+		    __builtin_abort ();
+		  ptr[i] += 2;
+		}
+
+	    omp_target_memset_async (ptr + start, val + 3,
+				     1024 - start - tail, dev, 1, &dep);
+
+	    #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+	      for (int i = start; i < 1024 - start - tail; i++)
+		{
+		  if (ptr[i] != val + 3)
+		    __builtin_abort ();
+		  ptr[i] += 1;
+		}
+
+	    omp_target_memset_async (ptr + start, val - 3,
+				     1024 - start - tail, dev, 1, &dep);
+
+	    #pragma omp taskwait depend (depobj: dep)
+	  }
+      #pragma omp depobj(dep) destroy
+      omp_target_free (ptr, dev);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
new file mode 100644
index 0000000..c0e4fa9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
@@ -0,0 +1,80 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <omp.h>
+
+#define MIN(x,y) ((x) < (y) ? x : y)
+
+enum { N = 524288 + 8 };
+
+static void
+init_val (int8_t *ptr, int val, size_t count)
+{
+  #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+  __builtin_memset (ptr, val, count);
+}
+
+static void
+check_val (int8_t *ptr, int val, size_t count)
+{
+  if (count == 0)
+    return;
+  #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+  for (size_t i = 0; i < count; i++)
+    if (ptr[i] != val) __builtin_abort ();
+}
+
+static void
+test_it (int8_t *ptr, int lshift, size_t count)
+{
+  if (N < count + lshift) __builtin_abort ();
+  if (lshift >= 4) __builtin_abort ();
+  ptr += lshift;
+
+  init_val (ptr, 'z', MIN (count + 32, N - lshift));
+
+  omp_target_memset (ptr, '1', count, omp_get_default_device());
+
+  check_val (ptr, '1', count);
+  check_val (ptr + count, 'z', MIN (32, N - lshift - count));
+}
+
+
+int main()
+{
+  size_t size;
+  int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device());
+  ptr += (4 - (uintptr_t) ptr % 4) % 4;
+  if ((uintptr_t) ptr % 4 != 0) __builtin_abort ();
+
+  test_it (ptr, 0, 1);
+  test_it (ptr, 3, 1);
+  test_it (ptr, 0, 4);
+  test_it (ptr, 3, 4);
+  test_it (ptr, 0, 5);
+  test_it (ptr, 3, 5);
+  test_it (ptr, 0, 6);
+  test_it (ptr, 3, 6);
+
+  for (int i = 1; i <= 9; i++)
+    {
+      switch (i)
+	{
+	case 1: size = 16; break; // = 2^4 bytes
+	case 2: size = 32; break; // = 2^5 bytes
+	case 3: size = 64; break; // = 2^7 bytes
+	case 4: size = 128; break; // = 2^7 bytes
+	case 5: size = 256; break; // = 2^8 bytes
+	case 6: size = 512; break; // = 2^9 bytes
+	case 7: size = 65536; break; // = 2^16 bytes
+	case 8: size = 262144; break; // = 2^18 bytes
+	case 9: size = 524288; break; // = 2^20 bytes
+	default: __builtin_abort ();
+	}
+      test_it (ptr, 0, size);
+      test_it (ptr, 3, size);
+      test_it (ptr, 0, size + 1);
+      test_it (ptr, 3, size + 1);
+      test_it (ptr, 3, size + 2);
+    }
+  omp_target_free (ptr, omp_get_default_device());
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
new file mode 100644
index 0000000..01909f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+
+#include <omp.h>
+
+int main()
+{
+  for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++)
+    {
+      char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+      /* Play also around with the alignment - as hsa_amd_memory_fill operates
+	 on multiples of 4 bytes (uint32_t).  */
+
+      for (int start = 0; start < 32; start++)
+	for (int tail = 0; tail < 32; tail++)
+	  {
+	    unsigned char val = '0' + start + tail;
+	    void *ptr2 = omp_target_memset (ptr + start, val,
+					    1024 - start - tail, dev);
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    #pragma omp target device(dev) is_device_ptr(ptr)
+	      for (int i = start; i < 1024 - start - tail; i++)
+		if (ptr[i] != val)
+		  __builtin_abort ();
+
+	  }
+
+      /* Check 'small' values for correctness.  */
+
+      for (int start = 0; start < 32; start++)
+	for (int size = 0; size <= 64 + 32; size++)
+	  {
+	    omp_target_memset (ptr, 'a' - 2, 1024, dev);
+
+	    unsigned char val = '0' + start + size % 32;
+	    void *ptr2 = omp_target_memset (ptr + start, val, size, dev);
+
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    if (size == 0)
+	      continue;
+
+	    #pragma omp target device(dev) is_device_ptr(ptr)
+	    {
+	      for (int i = 0; i < start; i++)
+		if (ptr[i] != 'a' - 2)
+		  __builtin_abort ();
+	      for (int i = start; i < start + size; i++)
+		if (ptr[i] != val)
+		  __builtin_abort ();
+	      for (int i = start + size + 1; i < 1024; i++)
+		if (ptr[i] != 'a' - 2)
+		  __builtin_abort ();
+	    }
+	  }
+
+      omp_target_free (ptr, dev);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c
index e7deec6..849dd97 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c
@@ -21,7 +21,7 @@ int main (void)
   int *ptr = (int *) malloc (sizeof (int) * N);
   int *orig_ptr = ptr;
 
-  #pragma omp target map (ptr, ptr[:N])
+  #pragma omp target map (ptr, ptr[ :N])
   {
     for (int i = 0; i < N; i++)
       ptr[i] = N - i;
@@ -36,7 +36,7 @@ int main (void)
 
   S s = { 0 };
   s.ptr = ptr;
-  #pragma omp target map (s, s.ptr[:N])
+  #pragma omp target map (s, s.ptr[ :N])
   {
     for (int i = 0; i < N; i++)
       s.ptr[i] = i;
@@ -61,7 +61,7 @@ int main (void)
   for (int i = 0; i < N; i++)
     gp[i] = i - 1;
 
-  #pragma omp target map (gp[:N])
+  #pragma omp target map (gp[ :N])
   {
     for (int i = 0; i < N; i++)
       gp[i] += 1;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c
index 889a4a2..60f938f 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c
@@ -18,10 +18,10 @@ void foo (struct L *l)
       l->m.num_blocks[i] = N;
     }
 
-  #pragma omp target enter data map(to:l[:1])
+  #pragma omp target enter data map(to:l[ :1])
   for (int i = 0; i < N; i++)
     {
-      #pragma omp target enter data map(to:l->m.blocks[i][:l->m.num_blocks[i]])
+      #pragma omp target enter data map(to:l->m.blocks[i][ :l->m.num_blocks[i]])
     }
 
   #pragma omp target
@@ -36,9 +36,9 @@ void foo (struct L *l)
 
   for (int i = 0; i < N; i++)
     {
-      #pragma omp target exit data map(from:l->m.blocks[i][:l->m.num_blocks[i]])
+      #pragma omp target exit data map(from:l->m.blocks[i][ :l->m.num_blocks[i]])
     }
-  #pragma omp target exit data map(from:l[:1])
+  #pragma omp target exit data map(from:l[ :1])
 
 
   for (int i = 0; i < N; i++)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c
index 5ccd908..0dd197b 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c
@@ -16,7 +16,7 @@ int main (void)
   unsigned char *p = (unsigned char *) &a;
   unsigned char *q = p + 2;
 
-  #pragma omp target enter data map (alloc:p[:1], q[:1])
+  #pragma omp target enter data map (alloc:p[ :1], q[ :1])
 
   if (d != id)
     {
@@ -40,7 +40,7 @@ int main (void)
 	abort ();
     }
 
-  #pragma omp target exit data map (from:q[:1])
+  #pragma omp target exit data map (from:q[ :1])
 
   if (d != id)
     {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-4.c b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c
index 8cb4821..9eae66a 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/requires-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4.c
@@ -3,6 +3,10 @@
 /* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
 /* { dg-additional-sources requires-4-aux.c } */
 
+/* GCC explicitly disables XNACK for gfx908 (and others) as the hardware
+   support is limited, which results in a diagnostic.  */
+/* { dg-xfail-if "gfx908 xnack broken" { offload_target_amdgcn } "-foffload=-march=gfx908" } */
+
 /* Check no diagnostic by device-compiler's or host compiler's lto1.
    Other file uses: 'requires reverse_offload', but that's inactive as
    there are no declare target directives, device constructs nor device routines  */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c b/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
index 0e0db92..845d4b3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
@@ -3,6 +3,10 @@
 /* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
 /* { dg-additional-sources requires-4-aux.c } */
 
+/* GCC explicitly disables XNACK for gfx908 (and others) as the hardware
+   support is limited, which results in a diagnostic.  */
+/* { dg-xfail-if "Unified Shared Memory is enabled, but XNACK is disabled" { offload_target_amdgcn } "-foffload=-march=gfx908" } */
+
 /* Same as requires-4.c, but uses heap memory for 'a'.  */
 
 /* Check no diagnostic by device-compiler's or host compiler's lto1.
diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-5.c b/libgomp/testsuite/libgomp.c-c++-common/requires-5.c
index d43d78d..cddd464 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/requires-5.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/requires-5.c
@@ -1,6 +1,10 @@
 /* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
 /* { dg-additional-sources requires-5-aux.c } */
 
+/* GCC explicitly disables XNACK for gfx908 (and others) as the hardware
+   support is limited, which results in a diagnostic.  */
+/* { dg-xfail-if "Unified Shared Memory is enabled, but XNACK is disabled" { offload_target_amdgcn } "-foffload=-march=gfx908" } */
+
 /* Depending on offload device capabilities, it may print something like the
    following (only) if GOMP_DEBUG=1:
    "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c
index 9a23b4f..935f7af 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c
@@ -20,7 +20,7 @@ int main (void)
 
   int *p = &s.b;
   int *q = &s.d;
-  #pragma omp target enter data map (alloc: p[:1], q[:1])
+  #pragma omp target enter data map (alloc: p[ :1], q[ :1])
 
   s.b = 88;
   s.d = 99;
@@ -36,7 +36,7 @@ int main (void)
 	abort ();
     }
 
-  #pragma omp target exit data map (from: q[:1])
+  #pragma omp target exit data map (from: q[ :1])
   if (d != id)
     {
       if (omp_target_is_present (&s, d))
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-2.c
index 0ba766c..6854bb5 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-2.c
@@ -23,7 +23,7 @@ fn2 (int x)
   int i;
   fn1 (b, c, x);
   fn1 (e, d + x, x);
-  #pragma omp target map(to: b, c[:x], d[x:x], e) map(tofrom: s)
+  #pragma omp target map(to: b, c[ :x], d[x:x], e) map(tofrom: s)
     #pragma omp parallel for reduction(+:s)
       for (i = 0; i < x; i++)
 	s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c);
@@ -53,10 +53,10 @@ fn4 (int x)
   int i;
   fn1 (b, c, x);
   fn1 (e, d + x, x);
-  #pragma omp target data map(from: b, c[:x], d[x:x], e)
+  #pragma omp target data map(from: b, c[ :x], d[x:x], e)
     {
-      #pragma omp target update to(b, c[:x], d[x:x], e)
-      #pragma omp target map(c[:x], d[x:x], s)
+      #pragma omp target update to(b, c[ :x], d[x:x], e)
+      #pragma omp target map(c[ :x], d[x:x], s)
 	#pragma omp parallel for reduction(+:s)
 	  for (i = 0; i < x; i++)
 	    {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-40.c b/libgomp/testsuite/libgomp.c-c++-common/target-40.c
index b466069..554860a 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-40.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-40.c
@@ -10,7 +10,7 @@ volatile int v;
 #pragma omp declare target to (v)
 typedef void (*fnp1) (void);
 typedef fnp1 (*fnp2) (void);
-void f1 (void) { v++; }
+void f1 (void) { v += 1; }
 void f2 (void) { v += 4; }
 void f3 (void) { v += 16; f1 (); }
 fnp1 f4 (void) { v += 64; return f2; }
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
new file mode 100644
index 0000000..9bf949a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
@@ -0,0 +1,3 @@
+/* { dg-additional-options -O0 } */
+
+#include "target-abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
new file mode 100644
index 0000000..d9268af
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
@@ -0,0 +1 @@
+#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-has-device-addr-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-has-device-addr-1.c
index fcc5c9e..119f0f2 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-has-device-addr-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-has-device-addr-1.c
@@ -26,29 +26,29 @@ main ()
     if (y[i] != i)
       __builtin_abort ();
 
-  #pragma omp target data map(y[:N]) use_device_addr(y)
-    #pragma omp target has_device_addr(y[:N])
+  #pragma omp target data map(y[ :N]) use_device_addr(y)
+    #pragma omp target has_device_addr(y[ :N])
       for (int i = 0; i < N; i++)
 	y[i] = i + 2;
   for (int i = 0; i < N; i++)
     if (y[i] != i + 2)
       __builtin_abort ();
 
-  #pragma omp target data map(y[:N]) use_device_addr(y)
+  #pragma omp target data map(y[ :N]) use_device_addr(y)
     #pragma omp target has_device_addr(y[24])
 	y[24] = 42;
   if (y[24] != 42)
     __builtin_abort ();
 
-  #pragma omp target data map(y[:N]) use_device_addr(y)
-    #pragma omp target has_device_addr(y[24:])
+  #pragma omp target data map(y[ :N]) use_device_addr(y)
+    #pragma omp target has_device_addr(y[24: ])
       for (int i = 24; i < N; i++)
 	y[i] = i + 3;
   for (int i = 24; i < N; i++)
     if (y[i] != i + 3)
       __builtin_abort ();
 
-  #pragma omp target data map(y[:N]) use_device_addr(y)
+  #pragma omp target data map(y[ :N]) use_device_addr(y)
     #pragma omp target has_device_addr(y[12:24])
       for (int i = 12; i < 24; i++)
 	y[i] = i + 4;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c
index 4c49cd0..dd3e45b 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c
@@ -18,29 +18,29 @@ main (void)
   for (int i = 0; i < N; i++)
     a.ptr[i] = 0;
 
-  #pragma omp target enter data map(to: a.ptr, a.ptr[:N])
+  #pragma omp target enter data map(to: a.ptr, a.ptr[ :N])
 
   #pragma omp target
   for (int i = 0; i < N; i++)
     a.ptr[i] += 1;
 
-  #pragma omp target update from(a.ptr[:N])
+  #pragma omp target update from(a.ptr[ :N])
 
   for (int i = 0; i < N; i++)
     if (a.ptr[i] != 1)
       abort ();
 
-  #pragma omp target map(a.ptr[:N])
+  #pragma omp target map(a.ptr[ :N])
   for (int i = 0; i < N; i++)
     a.ptr[i] += 1;
 
-  #pragma omp target update from(a.ptr[:N])
+  #pragma omp target update from(a.ptr[ :N])
 
   for (int i = 0; i < N; i++)
     if (a.ptr[i] != 2)
       abort ();
 
-  #pragma omp target exit data map(from:a.ptr, a.ptr[:N])
+  #pragma omp target exit data map(from:a.ptr, a.ptr[ :N])
 
   free (a.ptr);
 
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c
index d0b0cd1..97bb97a 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c
@@ -4,6 +4,7 @@
    and for not mapping the stack variables 'A' and 'B' (not mapped
    but accessible -> USM makes this tested feature even more important.)  */
 
+/* { dg-require-effective-target omp_usm } */
 #pragma omp requires unified_shared_memory
 
 /* Ensure that defaultmap(default : pointer) uses correct OpenMP 5.2
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c
index 81a7752..087d20b 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c
@@ -19,29 +19,29 @@ main (void)
     a.ptr[i] = 0;
 
   #pragma omp target enter data map(to: a.ptr)
-  #pragma omp target enter data map(to: a.ptr[:N])
+  #pragma omp target enter data map(to: a.ptr[ :N])
 
   #pragma omp target
   for (int i = 0; i < N; i++)
     a.ptr[i] += 1;
 
-  #pragma omp target update from(a.ptr[:N])
+  #pragma omp target update from(a.ptr[ :N])
 
   for (int i = 0; i < N; i++)
     if (a.ptr[i] != 1)
       abort ();
 
-  #pragma omp target map(a.ptr[:N])
+  #pragma omp target map(a.ptr[ :N])
   for (int i = 0; i < N; i++)
     a.ptr[i] += 1;
 
-  #pragma omp target update from(a.ptr[:N])
+  #pragma omp target update from(a.ptr[ :N])
 
   for (int i = 0; i < N; i++)
     if (a.ptr[i] != 2)
       abort ();
 
-  #pragma omp target exit data map(release: a.ptr[:N])
+  #pragma omp target exit data map(release: a.ptr[ :N])
   #pragma omp target exit data map(release: a.ptr)
 
   free (a.ptr);
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c
index 813b5d9..3236cf9 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c
@@ -5,9 +5,9 @@ foo (int x, int *y, int n, int v)
   int u[n], w[n], i;
   for (i = 0; i < n; i++)
     w[i] = u[i] = n + i;
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x++;
       y[0] += 2;
@@ -16,7 +16,7 @@ foo (int x, int *y, int n, int v)
       u[0] += 5;
       w[1] += 6;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x += 4;
       y[0] += 5;
@@ -25,7 +25,7 @@ foo (int x, int *y, int n, int v)
       u[1] += 8;
       w[2] += 7;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x += 9;
       y[0] += 10;
@@ -53,9 +53,9 @@ bar (int x, int *y, int n, int v)
   for (i = 0; i < n; i++)
     w[i] = u[i] = n + i;
   #pragma omp parallel master
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x++;
       y[0] += 2;
@@ -64,7 +64,7 @@ bar (int x, int *y, int n, int v)
       u[0] += 5;
       w[1] += 6;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x += 4;
       y[0] += 5;
@@ -73,7 +73,7 @@ bar (int x, int *y, int n, int v)
       u[1] += 8;
       w[2] += 7;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x += 9;
       y[0] += 10;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c
index dd56965..7e15972 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c
@@ -15,9 +15,9 @@ foo (struct S x, struct S *y, int n, int v)
       w[i].c[0] = u[i].c[0] = 0;
       w[i].c[1] = u[i].c[1] = 0;
     }
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a++;
       x.b++;
@@ -32,7 +32,7 @@ foo (struct S x, struct S *y, int n, int v)
       w[1].a += 6;
       w[1].b += 16;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[:2])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[ :2])
     {
       x.a += 4;
       x.b += 14;
@@ -47,7 +47,7 @@ foo (struct S x, struct S *y, int n, int v)
       w[2].a += 7;
       w[2].b += 17;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x.a += 9;
       x.b += 19;
@@ -95,9 +95,9 @@ bar (struct S x, struct S *y, int n, int v)
       w[i].c[1] = u[i].c[1] = 0;
     }
   #pragma omp parallel master
-  #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+  #pragma omp taskgroup task_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
   {
-    #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
+    #pragma omp task in_reduction (+: x, y[ :2], z[1:2], u, w[1:v])
     {
       x.a++;
       x.b++;
@@ -112,7 +112,7 @@ bar (struct S x, struct S *y, int n, int v)
       w[1].a += 6;
       w[1].b += 16;
     }
-    #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[:2])
+    #pragma omp target in_reduction (+: x, y[ :2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[ :2])
     {
       x.a += 4;
       x.b += 14;
@@ -127,7 +127,7 @@ bar (struct S x, struct S *y, int n, int v)
       w[2].a += 7;
       w[2].b += 17;
     }
-    #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
+    #pragma omp target in_reduction (+: x, y[ :v], z[1:v], u, w[1:2])
     {
       x.a += 9;
       x.b += 19;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
index 2e75c63..71d6b2a 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
@@ -20,7 +20,7 @@ main ()
   if (!omp_target_is_accessible (p, sizeof (int), omp_initial_device))
     __builtin_abort ();
 
-  if (omp_target_is_accessible (p, sizeof (int), -5))
+  if (omp_target_is_accessible (p, sizeof (int), -6 /* omp_default_device - 1 */))
     __builtin_abort ();
 
   if (omp_target_is_accessible (p, sizeof (int), n + 1))
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c
index c707b38..9664235 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c
@@ -3,6 +3,7 @@
 #include <stdint.h>
 #include <omp.h>
 
+/* { dg-require-effective-target omp_usm } */
 #pragma omp requires unified_shared_memory
 
 int A[3] = {-3,-4,-5};
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c
index 785055e..009c521 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c
@@ -3,6 +3,7 @@
 #include <stdint.h>
 #include <omp.h>
 
+/* { dg-require-effective-target omp_usm } */
 #pragma omp requires self_maps
 
 int A[3] = {-3,-4,-5};
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-1.c
new file mode 100644
index 0000000..af73399
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+/* Test transfer of dynamically-allocated arrays to target using map
+   iterators.  */
+
+#include <stdlib.h>
+
+#define DIM1 8
+#define DIM2 15
+
+int mkarray (int *x[])
+{
+  int expected = 0;
+
+  for (int i = 0; i < DIM1; i++)
+    {
+      x[i] = (int *) malloc (DIM2 * sizeof (int));
+      for (int j = 0; j < DIM2; j++)
+	{
+	  x[i][j] = rand ();
+	  expected += x[i][j];
+	}
+    }
+
+  return expected;
+}
+
+int main (void)
+{
+  int *x[DIM1];
+  int y;
+
+  int expected = mkarray (x);
+
+  #pragma omp target enter data map(to: x)
+  #pragma omp target map(iterator(i=0:DIM1), to: x[i][ :DIM2]) \
+		     map(from: y)
+    {
+      y = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  y += x[i][j];
+    }
+
+  return y - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-2.c
new file mode 100644
index 0000000..ba3954d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+/* Test transfer of dynamically-allocated arrays from target using map
+   iterators.  */
+
+#include <stdlib.h>
+
+#define DIM1 8
+#define DIM2 15
+
+void mkarray (int *x[])
+{
+  for (int i = 0; i < DIM1; i++)
+    x[i] = (int *) malloc (DIM2 * sizeof (int));
+}
+
+int main (void)
+{
+  int *x[DIM1];
+  int y, expected;
+
+  mkarray (x);
+
+  #pragma omp target enter data map(alloc: x)
+  #pragma omp target map(iterator(i=0:DIM1), from: x[i][ :DIM2]) \
+		     map(from: expected)
+    {
+      expected = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  {
+	    x[i][j] = (i+1) * (j+1);
+	    expected += x[i][j];
+	  }
+    }
+
+  y = 0;
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      y += x[i][j];
+
+  return y - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-3.c b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-3.c
new file mode 100644
index 0000000..fc05e0e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-map-iterators-3.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+/* Test transfer of dynamically-allocated arrays to target using map
+   iterators, with multiple iterators and function calls in the iterator
+   expression.  */
+
+#include <stdlib.h>
+
+#define DIM1 16
+#define DIM2 15
+
+int mkarrays (int *x[], int *y[])
+{
+  int expected = 0;
+
+  for (int i = 0; i < DIM1; i++)
+    {
+      x[i] = (int *) malloc (DIM2 * sizeof (int));
+      y[i] = (int *) malloc (sizeof (int));
+      *y[i] = rand ();
+      for (int j = 0; j < DIM2; j++)
+	{
+	  x[i][j] = rand ();
+	  expected += x[i][j] * *y[i];
+	}
+    }
+
+  return expected;
+}
+
+int f (int i, int j)
+{
+  return i * 4 + j;
+}
+
+int main (void)
+{
+  int *x[DIM1], *y[DIM1];
+  int sum;
+
+  int expected = mkarrays (x, y);
+
+  #pragma omp target enter data map(to: x, y)
+  #pragma omp target map(iterator(i=0:DIM1/4, j=0:4), to: x[f(i, j)][ :DIM2]) \
+		     map(iterator(i=0:DIM1), to: y[i][ :1]) \
+		     map(from: sum)
+    {
+      sum = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  sum += x[i][j] * y[i][0];
+    }
+
+  return sum - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c
index 1ec0c9a..7ee4830 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c
@@ -18,13 +18,13 @@ main (void)
   for (int i = 0; i < N; i++)
     a.ptr[i] = 0;
 
-  #pragma omp target enter data map(to: a.ptr[:N])
+  #pragma omp target enter data map(to: a.ptr[ :N])
 
-  #pragma omp target map(a, a.ptr[:0])
+  #pragma omp target map(a, a.ptr[ :0])
   for (int i = 0; i < N; i++)
     a.ptr[i] += 1;
 
-  #pragma omp target exit data map(from: a.ptr[:N])
+  #pragma omp target exit data map(from: a.ptr[ :N])
 
   for (int i = 0; i < N; i++)
     if (a.ptr[i] != 1)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-1.c
new file mode 100644
index 0000000..239406d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+
+/* Test target enter data and target update to the target using map
+   iterators.  */
+
+#include <stdlib.h>
+
+#define DIM1 8
+#define DIM2 15
+
+int mkarray (int *x[])
+{
+  int expected = 0;
+  for (int i = 0; i < DIM1; i++)
+    {
+      x[i] = (int *) malloc (DIM2 * sizeof (int));
+      for (int j = 0; j < DIM2; j++)
+	{
+	  x[i][j] = rand ();
+	  expected += x[i][j];
+	}
+    }
+
+  return expected;
+}
+
+int main (void)
+{
+  int *x[DIM1];
+  int sum;
+  int expected = mkarray (x);
+
+  #pragma omp target enter data map(to: x[ :DIM1])
+  #pragma omp target enter data map(iterator(i=0:DIM1), to: x[i][ :DIM2])
+  #pragma omp target map(from: sum)
+    {
+      sum = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  sum += x[i][j];
+    }
+
+  if (sum != expected)
+    return 1;
+
+  expected = 0;
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      {
+	x[i][j] *= rand ();
+	expected += x[i][j];
+      }
+
+  #pragma omp target update to(iterator(i=0:DIM1): x[i][ :DIM2])
+
+  #pragma omp target map(from: sum)
+    {
+      sum = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  sum += x[i][j];
+    }
+
+  return sum != expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-2.c
new file mode 100644
index 0000000..c17464c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+/* Test target enter data and target update from the target using map
+   iterators.  */
+
+#include <stdlib.h>
+
+#define DIM1 8
+#define DIM2 15
+
+void mkarray (int *x[])
+{
+  for (int i = 0; i < DIM1; i++)
+    {
+      x[i] = (int *) malloc (DIM2 * sizeof (int));
+      for (int j = 0; j < DIM2; j++)
+	x[i][j] = 0;
+    }
+}
+
+int main (void)
+{
+  int *x[DIM1];
+  int sum, expected;
+
+  mkarray (x);
+
+  #pragma omp target enter data map(alloc: x[ :DIM1])
+  #pragma omp target enter data map(iterator(i=0:DIM1), to: x[i][ :DIM2])
+  #pragma omp target map(from: expected)
+    {
+      expected = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  {
+	    x[i][j] = (i + 1) * (j + 2);
+	    expected += x[i][j];
+	  }
+    }
+
+  /* Host copy of x should remain unchanged.  */
+  sum = 0;
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      sum += x[i][j];
+  if (sum != 0)
+    return 1;
+
+  #pragma omp target update from(iterator(i=0:DIM1): x[i][ :DIM2])
+
+  /* Host copy should now be updated.  */
+  sum = 0;
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      sum += x[i][j];
+  return sum - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-3.c b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-3.c
new file mode 100644
index 0000000..465a1bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-update-iterators-3.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+/* Test target enter data and target update to the target using map
+   iterators with a function.  */
+
+#include <stdlib.h>
+
+#define DIM1 8
+#define DIM2 15
+
+void mkarray (int *x[])
+{
+  for (int i = 0; i < DIM1; i++)
+    {
+      x[i] = (int *) malloc (DIM2 * sizeof (int));
+      for (int j = 0; j < DIM2; j++)
+	x[i][j] = rand ();
+    }
+}
+
+int f (int i)
+{
+  return i * 2;
+}
+
+int main (void)
+{
+  int *x[DIM1], x_new[DIM1][DIM2];
+  int sum, expected;
+
+  mkarray (x);
+
+  #pragma omp target enter data map(alloc: x[ :DIM1])
+  #pragma omp target enter data map(iterator(i=0:DIM1), to: x[i][ :DIM2])
+
+  /* Update x on host.  */
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      {
+	x_new[i][j] = x[i][j];
+	x[i][j] = (i + 1) * (j + 2);
+      }
+
+  /* Update a subset of x on target.  */
+  #pragma omp target update to(iterator(i=0:DIM1/2): x[f (i)][ :DIM2])
+
+  #pragma omp target map(from: sum)
+    {
+      sum = 0;
+      for (int i = 0; i < DIM1; i++)
+	for (int j = 0; j < DIM2; j++)
+	  sum += x[i][j];
+    }
+
+  /* Calculate expected value on host.  */
+  for (int i = 0; i < DIM1/2; i++)
+    for (int j = 0; j < DIM2; j++)
+      x_new[f (i)][j] = x[f (i)][j];
+
+  expected = 0;
+  for (int i = 0; i < DIM1; i++)
+    for (int j = 0; j < DIM2; j++)
+      expected += x_new[i][j];
+
+  return sum - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c
index 038b0e2..5dce545 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c
@@ -9,7 +9,7 @@ unsigned long int c[2] = { ~0UL, ~0UL };
 void
 bar (int i)
 {
-  #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+  #pragma omp task in_reduction (*: b[ :3]) in_reduction (&: c[1: ]) \
 	      in_reduction (+: a)
   {
     a += 4;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c
index 0ad9273..5a5273e 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c
@@ -9,7 +9,7 @@ unsigned long int c[2] = { ~0UL, ~0UL };
 void
 bar (int i)
 {
-  #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+  #pragma omp task in_reduction (*: b[ :3]) in_reduction (&: c[1: ]) \
 	      in_reduction (+: a)
   {
     a += 4;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c
index 44d32c7..34e0eb4 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c
@@ -9,7 +9,7 @@ unsigned long int c[2] = { ~0UL, ~0UL };
 void
 bar (int i)
 {
-  #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+  #pragma omp task in_reduction (*: b[ :3]) in_reduction (&: c[1: ]) \
 	      in_reduction (+: a)
   {
     a += 4;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c
index 8a90e86e..a2140cb 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c
@@ -18,10 +18,10 @@ foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -70,15 +70,15 @@ test (int n)
     int o[4] = { 1, 0, 0, 2 };
     #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \
 			  task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			  task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			  task_reduction (*: t[2:2][:], s[1:n + 1])
+			  task_reduction (+: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			  task_reduction (*: t[2:2][ : ], s[1:n + 1])
     {
       int i;
       for (i = 0; i < 4; i++)
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0] += 2;
@@ -96,11 +96,11 @@ test (int n)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c
index c656f5f..0ef7366 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c
@@ -18,10 +18,10 @@ foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -67,17 +67,17 @@ test (int n)
   t = tt;
   #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 		       reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-		       reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-		       reduction (task, *: t[2:2][:], s[1:n + 1]) num_threads(4)
+		       reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+		       reduction (task, *: t[2:2][ : ], s[1:n + 1]) num_threads(4)
   {
     int i;
     #pragma omp for
     for (i = 0; i < 4; i++)
       {
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0] += 2;
@@ -95,11 +95,11 @@ test (int n)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c
index 3d71fef..1938cf3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c
@@ -18,10 +18,10 @@ foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[
 {
   int i;
   for (i = 0; i < 2; i++)
-    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		     in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		     in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		     in_reduction (*: s[1:2], t[2:2][:])
+    #pragma omp task in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		     in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		     in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		     in_reduction (*: s[1:2], t[2:2][ : ])
     {
       a[0] += 7;
       a[1] += 17;
@@ -70,15 +70,15 @@ test (int n)
     int i;
     #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
 		    reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-		    reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-		    reduction (task, *: t[2:2][:], s[1:n + 1]) \
+		    reduction (task, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+		    reduction (task, *: t[2:2][ : ], s[1:n + 1]) \
 		    schedule(nonmonotonic: runtime)
     for (i = 0; i < 4; i++)
       {
 	#pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
-			 in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
-			 in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
-			 in_reduction (*: s[1:2], t[2:2][:])
+			 in_reduction (+: o[n:n*2], q[1:2][ : ], p[0], m[1], r[2:2]) \
+			 in_reduction (+: g[n:n * 2], e[1], k[1:2][ : ], h[0], f[2:2]) \
+			 in_reduction (*: s[1:2], t[2:2][ : ])
 	{
 	  int j;
 	  a[0] += 2;
@@ -96,11 +96,11 @@ test (int n)
 	  t[2][1] *= 2;
 	  t[3][1] *= 2;
 	  for (j = 0; j < 2; j++)
-	    #pragma omp task in_reduction (+: a, c[:2]) \
+	    #pragma omp task in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:])
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ])
 	    {
 	      m[1] += 6;
 	      r[2] += 7;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c
index 8fc05dc..340c53e 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c
@@ -17,10 +17,10 @@ void
 foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2])
 {
   int i;
-  #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
-		       in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
-		       in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
-		       in_reduction (*: s[1:2], t[2:2][:]) nogroup
+  #pragma omp taskloop in_reduction (+: a, c[ :2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+		       in_reduction (+: o[n:n*2], m[1], k[1:2][ : ], p[0], f[2:2]) \
+		       in_reduction (+: q[1:2][ : ], g[n:n*2], e[1], h[0], r[2:2]) \
+		       in_reduction (*: s[1:2], t[2:2][ : ]) nogroup
   for (i = 0; i < 2; i++)
     {
       a[0] += 7;
@@ -71,8 +71,8 @@ test (int n)
     int i;
     #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \
 			 reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
-			 reduction (default, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
-			 reduction (*: t[2:2][:], s[1:n + 1])
+			 reduction (default, +: o[n:n*2], m[1], q[1:2][ : ], p[0], r[2:2]) \
+			 reduction (*: t[2:2][ : ], s[1:n + 1])
     for (i = 0; i < 4; i++)
       {
 	int j;
@@ -90,11 +90,11 @@ test (int n)
 	s[1] *= 2;
 	t[2][1] *= 2;
 	t[3][1] *= 2;
-	#pragma omp taskloop in_reduction (+: a, c[:2]) \
+	#pragma omp taskloop in_reduction (+: a, c[ :2]) \
 			     in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
-			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
-			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
-			     in_reduction (*: s[n:2], t[2:2][:]) nogroup
+			     in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][ :2]) \
+			     in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][ :2]) \
+			     in_reduction (*: s[n:2], t[2:2][ : ]) nogroup
 	for (j = 0; j < 2; j++)
 	  {
 	    m[1] += 6;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/teams-nteams-icv-1.c b/libgomp/testsuite/libgomp.c-c++-common/teams-nteams-icv-1.c
index c3c2109..f2d0916 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/teams-nteams-icv-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/teams-nteams-icv-1.c
@@ -19,7 +19,7 @@
    - OMP_NUM_TEAMS(_DEV(_<dev-num>)) overrides it
      OMP_NUM_TEAMS_ALL overrides it
    - Number of teams is:
-     -> the value specific by num_teams([lower:]upper)
+     -> the value specific by num_teams([lower: ]upper)
 	with lower := upper if unspecified
      -> Otherwise, if nteams-var ICV > 0, #teams <= nteams-var ICV
      -> Otherwise, if nteams-var ICV <= 0, #teams > 1
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
index 03d93cc..02e02dc 100644
--- a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
@@ -9,8 +9,8 @@
 void __attribute__ ((noipa))
 parallel_reduction ()
 {
-  int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
-  int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
   svint32_t va = svld1_s32 (svptrue_b32 (), b);
   int i = 0;
   int64_t res;
@@ -30,8 +30,8 @@ parallel_reduction ()
 void __attribute__ ((noipa))
 for_reduction ()
 {
-  int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
-  int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
   svint32_t va = svld1_s32 (svptrue_b32 (), b);
   int j;
   int64_t res;
@@ -58,13 +58,13 @@ simd_reduction ()
   for (j = 0; j < 8; j++)
     a[j] = 1;
 
-  #pragma omp simd reduction (+:va, i)
+  #pragma omp simd reduction (+:va)
   for (j = 0; j < 16; j++)
-    va = svld1_s32 (svptrue_b32 (), a);
+    va += svld1_s32 (svptrue_b32 (), a);
 
   res = svaddv_s32 (svptrue_b32 (), va);
 
-  if (res != 8)
+  if (res != 128)
     __builtin_abort ();
 }
 
@@ -72,22 +72,57 @@ void __attribute__ ((noipa))
 inscan_reduction_incl ()
 {
   svint32_t va = svindex_s32 (0, 0);
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[64] = { 0 };
   int j;
   int64_t res = 0;
 
-  #pragma omp parallel
-  #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate (res)
+  #pragma omp parallel for reduction (inscan, +:va)
   for (j = 0; j < 8; j++)
     {
-      va = svindex_s32 (1, 0);
+      va += svld1_s32 (svptrue_b32 (), a);
       #pragma omp scan inclusive (va)
-      res += svaddv_s32 (svptrue_b32 (), va);
+      svst1_s32 (svptrue_b32 (), b + j * 8, va);
+    }
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 64)
+    __builtin_abort ();
+
+  for (j = 0; j < 64; j+=8)
+    if (b[j] != (j / 8 + 1))
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+inscan_reduction_excl ()
+{
+  svint32_t va = svindex_s32 (0, 0);
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[64] = { 0 };
+  int j;
+  int64_t res = 0;
+
+  #pragma omp parallel for reduction (inscan, +:va)
+  for (j = 0; j < 8; j++)
+    {
+      svst1_s32 (svptrue_b32 (), b + j * 8, va);
+      #pragma omp scan exclusive (va)
+      va += svld1_s32 (svptrue_b32 (), a);
     }
 
+  res = svaddv_s32 (svptrue_b32 (), va);
+
   if (res != 64)
     __builtin_abort ();
+
+  for (j = 0; j < 64; j+=8)
+    if (b[j] != j / 8)
+      __builtin_abort ();
 }
 
+
 int
 main ()
 {
@@ -95,4 +130,5 @@ main ()
   for_reduction ();
   simd_reduction ();
   inscan_reduction_incl ();
+  inscan_reduction_excl ();
 }
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-1.c b/libgomp/testsuite/libgomp.c/alloc-managed-1.c
new file mode 100644
index 0000000..88ddcf3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-1.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_alloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  *a = 42;
+  uintptr_t a_p = (uintptr_t)a;
+
+  #pragma omp target is_device_ptr(a)
+    {
+      if (*a != 42 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+    }
+
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-2.c b/libgomp/testsuite/libgomp.c/alloc-managed-2.c
new file mode 100644
index 0000000..660f6e6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_calloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_calloc(5, sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  /* Check that memory is zero-initialized */
+  for (int i = 0; i < 5; i++)
+    if (a[i] != 0)
+      __builtin_abort ();
+
+  a[0] = 42;
+  a[4] = 99;
+  uintptr_t a_p = (uintptr_t)a;
+
+  #pragma omp target is_device_ptr(a)
+    {
+      if (a[0] != 42 || a[4] != 99 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+      /* Check zero-initialization on device side */
+      for (int i = 1; i < 4; i++)
+	if (a[i] != 0)
+	  __builtin_abort ();
+    }
+
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-3.c b/libgomp/testsuite/libgomp.c/alloc-managed-3.c
new file mode 100644
index 0000000..fefdeb3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-3.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_realloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(2 * sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  a[0] = 42;
+  a[1] = 43;
+
+  /* Reallocate to larger size */
+  int *b = (int *) omp_realloc(a, 5 * sizeof(int), ompx_gnu_managed_mem_alloc,
+			       ompx_gnu_managed_mem_alloc);
+  if (!b)
+    __builtin_abort ();
+
+  /* Check that original data is preserved */
+  if (b[0] != 42 || b[1] != 43)
+    __builtin_abort ();
+
+  b[2] = 44;
+  b[3] = 45;
+  b[4] = 46;
+  uintptr_t b_p = (uintptr_t)b;
+
+  #pragma omp target is_device_ptr(b)
+    {
+      if (b[0] != 42 || b[1] != 43 || b[2] != 44 || b[3] != 45 || b[4] != 46
+	  || b_p != (uintptr_t)b)
+	__builtin_abort ();
+    }
+
+  omp_free(b, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-4.c b/libgomp/testsuite/libgomp.c/alloc-managed-4.c
new file mode 100644
index 0000000..577e3e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-4.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+/* { dg-shouldfail "" } */
+/* { dg-output "libgomp: attempted to free managed memory at 0x\[0-9a-f\]+, but the default device is set to the host device" } */
+
+/* Check that omp_free emits an error if the default device has been changed
+   to the host device.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(2 * sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  omp_set_default_device (omp_initial_device);
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
index 672f245..693f903 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory works.  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);  // This is intended to help diagnose failures
+#endif
 
   const omp_alloctrait_t traits[] = {
       { omp_atk_pinned, 1 }
@@ -88,21 +96,39 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, allocator, allocator);
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
   p = omp_calloc (1, SIZE * 2, allocator);
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
index b6d1d83..e7ac64e 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory works (pool_size code path).  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);  // This is intended to help diagnose failures
+#endif
 
   const omp_alloctrait_t traits[] = {
       { omp_atk_pinned, 1 },
@@ -89,16 +97,28 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, allocator, allocator);
   if (!p)
     abort ();
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
@@ -106,8 +126,14 @@ main ()
   if (!p)
     abort ();
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
index 11dc818..250cb55 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
@@ -1,5 +1,7 @@
 /* { dg-do run } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory fails correctly.  */
 
 #include <stdio.h>
@@ -75,8 +77,15 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* This needs to be large enough to cover multiple pages.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE * 2;
 
   /* Pinned memory, no fallback.  */
   const omp_alloctrait_t traits1[] = {
@@ -101,23 +110,34 @@ main ()
 #endif
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p1 = omp_alloc (SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p1)
+    abort ();
+#else
+  // Should fail
   if (p1)
     abort ();
+#endif
 
-  // Should fail
   void *p2 = omp_calloc (1, SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p2)
+    abort ();
+#else
+  // Should fail
   if (p2)
     abort ();
+#endif
 
-  // Should fall back
   void *p3 = omp_alloc (SIZE, allocator2);
   if (!p3)
     abort ();
@@ -128,16 +148,29 @@ main ()
     abort ();
   verify0 (p4, SIZE);
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   void *p5 = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p5 || p5 == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p5)
     abort ();
+#endif
 
-  // Should fall back to no realloc needed
+#ifdef OFFLOAD_DEVICE_NVPTX
+  void *p6 = omp_realloc (p5, SIZE, allocator2, allocator1);
+  // Does reallocate.
+  if (p5 == p6)
+    abort ();
+#else
   void *p6 = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+  // Should fall back to no realloc needed
   if (p6 != notpinned)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-4.c b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
index 2ecd01f..b7a9966 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
@@ -1,5 +1,7 @@
 /* { dg-do run } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory fails correctly, pool_size code path.  */
 
 #include <stdio.h>
@@ -75,8 +77,15 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* This needs to be large enough to cover multiple pages.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE * 2;
 
   /* Pinned memory, no fallback.  */
   const omp_alloctrait_t traits1[] = {
@@ -103,21 +112,33 @@ main ()
 #endif
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p = omp_alloc (SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail
   p = omp_calloc (1, SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
   // Should fall back
   p = omp_alloc (SIZE, allocator2);
@@ -130,16 +151,29 @@ main ()
     abort ();
   verify0 (p, SIZE);
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   p = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p || p == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p)
     abort ();
+#endif
 
-  // Should fall back to no realloc needed
+#ifdef OFFLOAD_DEVICE_NVPTX
+  void *p_ = omp_realloc (p, SIZE, allocator2, allocator1);
+  // Does reallocate.
+  if (p_ == p)
+    abort ();
+#else
   p = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+  // Should fall back to no realloc needed
   if (p != notpinned)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-5.c b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
index 0ba2feb..cc77764 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that ompx_gnu_pinned_mem_alloc works.  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);
+#endif
 
   // Sanity check
   if (get_pinned_mem () != 0)
@@ -77,22 +85,40 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, ompx_gnu_pinned_mem_alloc,
 		   ompx_gnu_pinned_mem_alloc);
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
   p = omp_calloc (1, SIZE * 2, ompx_gnu_pinned_mem_alloc);
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-6.c b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
index 99f1269..6dd5544 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
 
 /* Test that ompx_gnu_pinned_mem_alloc fails correctly.  */
 
@@ -66,32 +67,57 @@ set_pin_limit (int size)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE*2;
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail
   p = omp_calloc (1, SIZE, ompx_gnu_pinned_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   p = omp_realloc (notpinned, SIZE, ompx_gnu_pinned_mem_alloc,
 		   omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p || p == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-8.c b/libgomp/testsuite/libgomp.c/alloc-pinned-8.c
new file mode 100644
index 0000000..0fc737b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-8.c
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+
+/* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
+
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
+/* Test that pinned memory works for small allocations.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#define CHECK_SIZE(SIZE) { \
+  struct rlimit limit; \
+  if (getrlimit (RLIMIT_MEMLOCK, &limit) \
+      || limit.rlim_cur <= SIZE) \
+    fprintf (stderr, "insufficient lockable memory; please increase ulimit\n"); \
+  }
+
+int
+get_pinned_mem ()
+{
+  int pid = getpid ();
+  char buf[100];
+  sprintf (buf, "/proc/%d/status", pid);
+
+  FILE *proc = fopen (buf, "r");
+  if (!proc)
+    abort ();
+  while (fgets (buf, 100, proc))
+    {
+      int val;
+      if (sscanf (buf, "VmLck: %d", &val))
+	{
+	  fclose (proc);
+	  return val;
+	}
+    }
+  abort ();
+}
+#else
+#error "OS unsupported"
+#endif
+
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+    if (p[i] != 0)
+      abort ();
+}
+
+#include <omp.h>
+
+int
+main ()
+{
+  /* Choose a small size where all our allocations fit on one page.  */
+  const int SIZE = 10;
+#ifndef OFFLOAD_DEVICE_NVPTX
+  CHECK_SIZE (SIZE*4);
+#endif
+
+  const omp_alloctrait_t traits[] = {
+      { omp_atk_pinned, 1 }
+  };
+  omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, 1, traits);
+
+  // Sanity check
+  if (get_pinned_mem () != 0)
+    abort ();
+
+  void *p = omp_alloc (SIZE, allocator);
+  if (!p)
+    abort ();
+
+  int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
+  if (amount == 0)
+    abort ();
+#endif
+
+  p = omp_realloc (p, SIZE * 2, allocator, allocator);
+
+  int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
+  /* A small allocation should not allocate another page.  */
+  if (amount2 != amount)
+    abort ();
+#endif
+
+  p = omp_calloc (1, SIZE, allocator);
+
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
+  /* A small allocation should not allocate another page.  */
+  if (get_pinned_mem () != amount2)
+    abort ();
+#endif
+
+  verify0 (p, SIZE);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
new file mode 100644
index 0000000..e6941d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_nvptx } } } */
+/* { dg-additional-options -foffload=nvptx-none } */
+/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-3.h"
+
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h
index c9c8f4a..f5695a2 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-3.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h
@@ -37,6 +37,13 @@ f53 (void)
 
 __attribute__ ((noipa))
 int
+f61 (void)
+{
+  return 61;
+}
+
+__attribute__ ((noipa))
+int
 f70 (void)
 {
   return 70;
@@ -68,6 +75,7 @@ f89 (void)
 #pragma omp declare variant (f37) match (device={isa("sm_37")})
 #pragma omp declare variant (f52) match (device={isa("sm_52")})
 #pragma omp declare variant (f53) match (device={isa("sm_53")})
+#pragma omp declare variant (f61) match (device={isa("sm_61")})
 #pragma omp declare variant (f70) match (device={isa("sm_70")})
 #pragma omp declare variant (f75) match (device={isa("sm_75")})
 #pragma omp declare variant (f80) match (device={isa("sm_80")})
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c
new file mode 100644
index 0000000..b7b95e6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx10-3-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx10_3_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
index d98d5ef..3703e96 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1030 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c
new file mode 100644
index 0000000..e0d6289
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1031 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1031 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c
new file mode 100644
index 0000000..46174cc2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1032 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1032 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c
new file mode 100644
index 0000000..1bd6e66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1033 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1033 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c
new file mode 100644
index 0000000..4f67a73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1034 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1034 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c
new file mode 100644
index 0000000..a69d5e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1035 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1035 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
index 93b8641..8c258c4 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1036 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c
new file mode 100644
index 0000000..fa9efb4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx11-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx11_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
index 6ade352..f0b7c6d 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1100 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c
new file mode 100644
index 0000000..213e904
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1101 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1101 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c
new file mode 100644
index 0000000..3f68dc8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1102 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1102 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
index 6a6dc4f..c1eed44 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1103 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c
new file mode 100644
index 0000000..39d64ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1150 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1150 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c
new file mode 100644
index 0000000..2a0c732
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1151 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1151 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c
new file mode 100644
index 0000000..3c987dd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1152 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1152 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c
new file mode 100644
index 0000000..7d38b82
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1153 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1153 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c
new file mode 100644
index 0000000..07d1254
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx9-4-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_4_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c
new file mode 100644
index 0000000..d6ba097
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx9-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
index f3f5244..37005fc 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx900 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c
new file mode 100644
index 0000000..82981c5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx902 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx902 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c
new file mode 100644
index 0000000..89815fe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx904 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx904 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
index ac43388..aeef690 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx906 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
index f60741f..799b546 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx908 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c
new file mode 100644
index 0000000..e8a6f63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx909 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx909 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
index 832d174..de5626e 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx90a \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
index 44629a8..dfad7ec 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx90c \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
new file mode 100644
index 0000000..c8c7446
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx942 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c
new file mode 100644
index 0000000..af81f11
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx950 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx950 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h
index 53788d2..dd97edb 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h
@@ -9,6 +9,20 @@ gfx900 (void)
 
 __attribute__ ((noipa))
 int
+gfx902 (void)
+{
+  return 0x902;
+}
+
+__attribute__ ((noipa))
+int
+gfx904 (void)
+{
+  return 0x904;
+}
+
+__attribute__ ((noipa))
+int
 gfx906 (void)
 {
   return 0x906;
@@ -23,6 +37,13 @@ gfx908 (void)
 
 __attribute__ ((noipa))
 int
+gfx909 (void)
+{
+  return 0x909;
+}
+
+__attribute__ ((noipa))
+int
 gfx90a (void)
 {
   return 0x90a;
@@ -37,6 +58,20 @@ gfx90c (void)
 
 __attribute__ ((noipa))
 int
+gfx942 (void)
+{
+  return 0x942;
+}
+
+__attribute__ ((noipa))
+int
+gfx950 (void)
+{
+  return 0x950;
+}
+
+__attribute__ ((noipa))
+int
 gfx1030 (void)
 {
   return 0x1030;
@@ -44,6 +79,41 @@ gfx1030 (void)
 
 __attribute__ ((noipa))
 int
+gfx1031 (void)
+{
+  return 0x1031;
+}
+
+__attribute__ ((noipa))
+int
+gfx1032 (void)
+{
+  return 0x1032;
+}
+
+__attribute__ ((noipa))
+int
+gfx1033 (void)
+{
+  return 0x1033;
+}
+
+__attribute__ ((noipa))
+int
+gfx1034 (void)
+{
+  return 0x1034;
+}
+
+__attribute__ ((noipa))
+int
+gfx1035 (void)
+{
+  return 0x1035;
+}
+
+__attribute__ ((noipa))
+int
 gfx1036 (void)
 {
   return 0x1036;
@@ -58,20 +128,111 @@ gfx1100 (void)
 
 __attribute__ ((noipa))
 int
+gfx1101 (void)
+{
+  return 0x1101;
+}
+
+__attribute__ ((noipa))
+int
+gfx1102 (void)
+{
+  return 0x1102;
+}
+
+__attribute__ ((noipa))
+int
 gfx1103 (void)
 {
   return 0x1103;
 }
 
+__attribute__ ((noipa))
+int
+gfx1150 (void)
+{
+  return 0x1150;
+}
+
+__attribute__ ((noipa))
+int
+gfx1151 (void)
+{
+  return 0x1151;
+}
+
+__attribute__ ((noipa))
+int
+gfx1152 (void)
+{
+  return 0x1152;
+}
+
+__attribute__ ((noipa))
+int
+gfx1153 (void)
+{
+  return 0x1153;
+}
+
+__attribute__ ((noipa))
+int
+gfx9_generic (void)
+{
+  return 0x90ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx9_4_generic (void)
+{
+  return 0x94ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx10_3_generic (void)
+{
+  return 0x103ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx11_generic (void)
+{
+  return 0x110ff;
+}
+
+
 #pragma omp declare variant(gfx900) match(device = {isa("gfx900")})
+#pragma omp declare variant(gfx902) match(device = {isa("gfx902")})
+#pragma omp declare variant(gfx904) match(device = {isa("gfx904")})
 #pragma omp declare variant(gfx906) match(device = {isa("gfx906")})
 #pragma omp declare variant(gfx908) match(device = {isa("gfx908")})
+#pragma omp declare variant(gfx909) match(device = {isa("gfx909")})
 #pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")})
 #pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")})
+#pragma omp declare variant(gfx942) match(device = {isa("gfx942")})
+#pragma omp declare variant(gfx950) match(device = {isa("gfx950")})
 #pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")})
+#pragma omp declare variant(gfx1031) match(device = {isa("gfx1031")})
+#pragma omp declare variant(gfx1032) match(device = {isa("gfx1032")})
+#pragma omp declare variant(gfx1033) match(device = {isa("gfx1033")})
+#pragma omp declare variant(gfx1034) match(device = {isa("gfx1034")})
+#pragma omp declare variant(gfx1035) match(device = {isa("gfx1035")})
 #pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")})
 #pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")})
+#pragma omp declare variant(gfx1101) match(device = {isa("gfx1101")})
+#pragma omp declare variant(gfx1102) match(device = {isa("gfx1102")})
 #pragma omp declare variant(gfx1103) match(device = {isa("gfx1103")})
+#pragma omp declare variant(gfx1150) match(device = {isa("gfx1150")})
+#pragma omp declare variant(gfx1151) match(device = {isa("gfx1151")})
+#pragma omp declare variant(gfx1152) match(device = {isa("gfx1152")})
+#pragma omp declare variant(gfx1153) match(device = {isa("gfx1153")})
+#pragma omp declare variant(gfx9_generic) match(device = {isa("gfx9-generic")})
+#pragma omp declare variant(gfx9_4_generic) match(device = {isa("gfx9-4-generic")})
+#pragma omp declare variant(gfx10_3_generic) match(device = {isa("gfx10-3-generic")})
+#pragma omp declare variant(gfx11_generic) match(device = {isa("gfx11-generic")})
 __attribute__ ((noipa))
 int
 f (void)
diff --git a/libgomp/testsuite/libgomp.c/device_uid.c b/libgomp/testsuite/libgomp.c/device_uid.c
index 0412d06..83aba0f 100644
--- a/libgomp/testsuite/libgomp.c/device_uid.c
+++ b/libgomp/testsuite/libgomp.c/device_uid.c
@@ -5,10 +5,12 @@
 int main()
 {
   const char **strs = (const char **) malloc (sizeof (char*) * (omp_get_num_devices () + 1));
-  for (int i = omp_invalid_device - 1; i <= omp_get_num_devices () + 1; i++)
+  for (int i = omp_default_device - 1; i <= omp_get_num_devices () + 1; i++)
     {
       const char *str = omp_get_uid_from_device (i);
       int dev = omp_get_device_from_uid (str);
+      if (i == omp_default_device)
+	i = omp_get_default_device ();
 // __builtin_printf("%i -> %s -> %d\n", i, str, dev);
       if (i < omp_initial_device || i > omp_get_num_devices ())
 	{
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
index 38aa6b1..c48a934 100644
--- a/libgomp/testsuite/libgomp.c/interop-cuda-full.c
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
 /* { dg-require-effective-target openacc_cuda } */
 /* { dg-require-effective-target openacc_cudart } */
 /* { dg-additional-options "-lcuda -lcudart" } */
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
index 17cbb15..bc257a2 100644
--- a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
 /* { dg-require-effective-target openacc_libcudart } */
 /* { dg-require-effective-target openacc_libcuda } */
 /* { dg-additional-options "-lcuda -lcudart" } */
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
index d7725fc..bd44f44 100644
--- a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
 /* { dg-require-effective-target gomp_hip_header_amd } */
 /* { dg-require-effective-target gomp_libamdhip64 } */
 /* { dg-additional-options "-lamdhip64" } */
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
index 2584537..91ad987 100644
--- a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
 /* { dg-require-effective-target gomp_libamdhip64 } */
 /* { dg-additional-options "-lamdhip64" } */
 
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
index 79af47d..d5dc236 100644
--- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
 /* { dg-require-effective-target openacc_cudart } */
 /* { dg-require-effective-target openacc_cuda } */
 /* { dg-require-effective-target gomp_hip_header_nvidia } */
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
index 4586398..7cff2cb 100644
--- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
 /* { dg-require-effective-target openacc_libcudart } */
 /* { dg-require-effective-target openacc_libcuda } */
 /* { dg-additional-options "-lcuda -lcudart" } */
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
index 4186984..7b7dc74 100644
--- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -1,3 +1,6 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
 /* { dg-require-effective-target openacc_cudart } */
 /* { dg-require-effective-target openacc_cuda } */
 /* { dg-additional-options "-lcuda -lcudart" } */
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
new file mode 100644
index 0000000..ff82f4c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
@@ -0,0 +1,18 @@
+/* Test that GOMP_task is special cased when cpyfn is NULL.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+void test(int c) {
+  for (int i = 0; i < c; i++)
+    if (!__builtin_constant_p(c))
+      __builtin_abort();
+}
+int main() {
+#pragma omp task
+  test(7);
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of main._omp_fn" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c b/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c
new file mode 100644
index 0000000..30894d7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c
@@ -0,0 +1,20 @@
+/* Check that GOMP_task doesn't produce callback edges when cpyfn is not
+   NULL.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+void test(int *a) {
+  for (int i = 0; i < 100; i++) {
+    a[i] = i;
+  }
+}
+int main() {
+  int a[100];
+  __builtin_memset (a, 0, sizeof (a));
+  #pragma omp task
+  test (a);
+}
+
+/* { dg-final { scan-ipa-dump-not "Created callback edge" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb1.c b/libgomp/testsuite/libgomp.c/ipcp-cb1.c
new file mode 100644
index 0000000..e390f04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb1.c
@@ -0,0 +1,24 @@
+/* Test that we can propagate constants into outlined OpenMP kernels.
+   This tests the underlying callback attribute and its related edges.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+int a[100];
+void test(int c) {
+#pragma omp parallel for
+  for (int i = 0; i < c; i++) {
+    if (!__builtin_constant_p(c)) {
+      __builtin_abort();
+    }
+    a[i] = i;
+  }
+}
+int main() {
+  test(100);
+  return a[5] - 5;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of test._omp_fn" "cp" } } */
+/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=100\\(by_ref\\)" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/pr122281.c b/libgomp/testsuite/libgomp.c/pr122281.c
new file mode 100644
index 0000000..68fc3be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr122281.c
@@ -0,0 +1,43 @@
+/* { dg-do run }  */
+/* { dg-additional-options "-O3" }  */
+
+/* PR libgomp/122281  */
+/* PR middle-end/105001  */
+
+/* If SIMT is supported, the inner 'omp simd' is duplicated into
+   one SIMT and one SIMD variant. SIMT is currently only supported
+   with nvidia GPUs.  (This only happens with -O1 or higher.)
+
+   The duplication failed for the SIMD case as a tree was shared and
+   the initialization only happened in the SIMT branch, i.e. when
+   compiling for a SIMT-device, all non-SIMD (offload or host devices)
+   accesses failed (segfault) for the atomic update.  */
+
+#include <omp.h>
+
+int __attribute__((noinline, noclone))
+f(int *A, int n, int dev) {
+ int cnt = 0;
+ #pragma omp target map(cnt) map(to:A[0:n]) device(dev)
+ {
+   #pragma omp parallel for simd
+   for (int i = 0; i < n; i++)
+   if (A[i] != 0)
+     {
+       #pragma omp atomic
+       cnt++;
+     }
+ }
+ return cnt;
+}
+
+int main() {
+  int n = 10;
+  int A[10] = {11,22,33,44,55,66,77,88,99,110};
+
+  /* Run over all devices, including the host; the host should be SIMD,
+     some non-host devices might be SIMT.  */
+  for (int dev = omp_initial_device; dev <= omp_get_num_devices(); dev++)
+    if (f (A, n, dev) != 10)
+      __builtin_abort();
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
new file mode 100644
index 0000000..3220828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
@@ -0,0 +1,74 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S { int **ptrset; };
+
+// -----------------------------------
+
+/* The produced mapping for sptr1->ptrset[i][:n]
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+
+which get split into 3 separate map_vars call; in particular,
+the latter is separate and points to an unmpapped variable.
+
+Thus, it failed with:
+   libgomp: pointer target not mapped for attach  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n])
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3])
+
+  __builtin_free (s1.ptrset);
+
+// -----------------------------------
+
+/* The produced mapping for sptr2->ptrset[i][:n] is similar:
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_TO  ! this one has now a finite size
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO
+
+As the latter GOMP_MAP_ATTACH has now a pointer target,
+the attachment worked.  */
+
+  struct S s2, *sptr2;
+  s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s2.ptrset[0] = data;
+  s2.ptrset[1] = data;
+  s2.ptrset[2] = data;
+  sptr2 = &s2;
+
+  i = 1;
+  n = 2;
+  #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3])
+  #pragma omp target enter data map(sptr2->ptrset[i][:n])
+
+  #pragma omp target
+    if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr2->ptrset[i][:n])
+  #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3])
+
+  __builtin_free (s2.ptrset);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
new file mode 100644
index 0000000..580c6ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
@@ -0,0 +1,50 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S {
+    int **ptrset;
+    int **ptrset2;
+  };
+
+  /* This is the same as target-map-zero-sized-3.c, but by mixing
+     mapped and non-mapped items, the mapping before the ATTACH
+     might (or here: is) not actually associated with the the
+     pointer used for attaching. Thus, if one does a simple
+
+       if (openmp_p
+	   && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)
+	   && mapnum == 1)
+     check in target.c's gomp_map_vars_internal will fail
+     as mapnum > 1 but still the map associated with this
+     ATTACH is in a different set.  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  s1.ptrset2[0] = data;
+  s1.ptrset2[1] = data;
+  s1.ptrset2[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(data)
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+
+  #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+    if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target exit data map(data)
+
+  __builtin_free (s1.ptrset);
+  __builtin_free (s1.ptrset2);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
new file mode 100644
index 0000000..7c4ab80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O0" }  */
+
+/* Issue showed up in the real world when large data was distributed
+   over multiple MPI progresses - such that for one process n == 0
+   happend at run time.
+
+   Before  map(var[:0])  and  map(var[:n])  with n > 0 was handled,
+   this patch now also handles  map(var[:n]) with n == 0.
+
+   Failed before with "libgomp: pointer target not mapped for attach".  */
+
+/* Here, the base address is shifted - which should have no effect,
+   but must work as well.  */
+void
+with_offset ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+
+  #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+}
+
+int
+main ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  with_offset ();
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
new file mode 100644
index 0000000..d5e982b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
@@ -0,0 +1,75 @@
+!
+! Check that mapping with map(var%tiles(1)) works.
+!
+! This uses deep mapping to handle the allocatable
+! derived-type components
+!
+! The tricky part is that GCC generates intermittently
+! an SSA_NAME that needs to be resolved.
+!
+module m
+type t
+ integer, allocatable :: den1(:,:), den2(:,:)
+end type t
+
+type t2
+ type(t), allocatable :: tiles(:)
+end type t2
+end
+
+use m
+use iso_c_binding
+implicit none (type, external)
+type(t2), target :: var
+logical :: is_self_map
+type(C_ptr) :: pden1, pden2, ptiles, ptiles1
+
+allocate(var%tiles(1))
+var%tiles(1)%den1 = reshape([1,2,3,4],[2,2])
+var%tiles(1)%den2 = reshape([11,22,33,44],[2,2])
+
+ptiles = c_loc(var%tiles)
+ptiles1 = c_loc(var%tiles(1))
+pden1 = c_loc(var%tiles(1)%den1)
+pden2 = c_loc(var%tiles(1)%den2)
+
+
+is_self_map = .false.
+!$omp target map(to: is_self_map)
+  is_self_map = .true.
+!$omp end target
+
+!$omp target enter data map(var%tiles(1))
+
+!$omp target firstprivate(ptiles, ptiles1, pden1, pden2)
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1
+ if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 2
+ var%tiles(1)%den1 = var%tiles(1)%den1 + 5
+ var%tiles(1)%den2 = var%tiles(1)%den2 + 7
+
+ if (is_self_map) then
+   if (.not. c_associated (ptiles, c_loc(var%tiles))) stop 3
+   if (.not. c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+   if (.not. c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+   if (.not. c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ else
+   if (c_associated (ptiles, c_loc(var%tiles))) stop 3
+   if (c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+   if (c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+   if (c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ endif
+!$omp end target
+
+if (is_self_map) then
+  if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+  if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+else
+  if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 7
+  if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 8
+endif
+
+!$omp target exit data map(var%tiles(1))
+
+if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+end
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-managed-1.f90 b/libgomp/testsuite/libgomp.fortran/alloc-managed-1.f90
new file mode 100644
index 0000000..e19eb04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-managed-1.f90
@@ -0,0 +1,30 @@
+! { dg-do run }
+! { dg-require-effective-target omp_managedmem }
+! { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } }
+
+! Check that omp_alloc can allocate Managed Memory, and that host and target
+! can see the data, at the same address, without a mapping.
+
+program main
+  use omp_lib
+  use iso_c_binding
+  implicit none
+
+  type(c_ptr) :: cptr
+  integer, pointer :: a
+  integer(c_intptr_t) :: a_p, a_p2
+
+  cptr = omp_alloc(c_sizeof(a), ompx_gnu_managed_mem_alloc)
+  if (.not. c_associated(cptr)) stop 1
+
+  call c_f_pointer(cptr, a)
+  a = 42
+  a_p = transfer(c_loc(a), a_p)
+
+  !$omp target is_device_ptr(a)
+    a_p2 = transfer(c_loc(a), a_p2)
+    if (a /= 42 .or. a_p /= a_p2) stop 2
+  !$omp end target
+
+  call omp_free(cptr, ompx_gnu_managed_mem_alloc)
+end program main
diff --git a/libgomp/testsuite/libgomp.fortran/device_uid.f90 b/libgomp/testsuite/libgomp.fortran/device_uid.f90
index 504f6ca..9bc02e4 100644
--- a/libgomp/testsuite/libgomp.fortran/device_uid.f90
+++ b/libgomp/testsuite/libgomp.fortran/device_uid.f90
@@ -10,10 +10,13 @@ program main
 
   allocate(strs(0:omp_get_num_devices ()))
 
-  do i = omp_invalid_device - 1, omp_get_num_devices () + 1
+  do j = omp_default_device - 1, omp_get_num_devices () + 1
+    i = j
     str => omp_get_uid_from_device (i)
     dev = omp_get_device_from_uid (str)
 ! print *, i, str, dev
+    if (i == omp_default_device) &
+      i = omp_get_default_device ()
     if (i < omp_initial_device .or. i > omp_get_num_devices ()) then
       if (dev /= omp_invalid_device .or. associated(str)) &
         stop 1
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
index bbd49dd..eb2f437 100644
--- a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
@@ -1,3 +1,6 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
 ! { dg-require-effective-target gomp_hipfort_module }
 ! { dg-require-effective-target gomp_libamdhip64 }
 ! { dg-additional-options "-lamdhip64" }
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
index 0afec83..0ebbe80 100644
--- a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
@@ -1,3 +1,6 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
 ! { dg-require-effective-target gomp_libamdhip64 }
 ! { dg-additional-options "-lamdhip64" }
 
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
index cef592f..d29a689 100644
--- a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
@@ -1,3 +1,6 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
 ! { dg-require-effective-target gomp_hipfort_module }
 ! { dg-require-effective-target openacc_cudart }
 ! { dg-require-effective-target openacc_cuda }
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
index c1ef29d..2063610 100644
--- a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
@@ -1,3 +1,6 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
 ! { dg-require-effective-target openacc_libcudart }
 ! { dg-require-effective-target openacc_libcuda }
 ! { dg-additional-options "-lcuda -lcudart" }
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
new file mode 100644
index 0000000..90378c0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
@@ -0,0 +1,11 @@
+! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" }
+!
+! We silence the warning:
+!  Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp]
+!
+! Ensure that polymorphic mapping is diagnosed as undefined behavior
+! Ensure that static access to polymorphic variables works
+
+! Run map-alloc-comp-9.f90 in unified-shared-memory mode
+
+#include "map-alloc-comp-9.f90"
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
index 3cec392..26c73d7 100644
--- a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
@@ -1,8 +1,19 @@
+! { dg-additional-options "-cpp" }
+!
 ! Ensure that polymorphic mapping is diagnosed as undefined behavior
 ! Ensure that static access to polymorphic variables works
 
+! Some extended tests are only run with shared memory
+! To enforce this (where possible) on the device side:
+!   #define USE_USM_REQUIREMENT
+! which is done in map-alloc-comp-9-usm.f90
+
 subroutine test(case)
 implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
 type t
   integer :: x(4)
 end type t
@@ -73,10 +84,14 @@ var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444]
 var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444]
 var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444]
 
+#ifdef USE_USM_REQUIREMENT
+is_shared_mem = .true.
+#else
 is_shared_mem = .false.
 !$omp target map(to: is_shared_mem)
   is_shared_mem = .true.
 !$omp end target
+#endif
 
 if (case == 1) then
   ! implicit mapping
@@ -532,6 +547,10 @@ end subroutine test
 program main
   use omp_lib
   implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
   interface
     subroutine test(case)
       integer, value :: case
diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-10.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-10.f90
new file mode 100644
index 0000000..9afb845
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-subarray-10.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! PR fortran/120505
+
+! This test case checks that explicit mapping of allocatable DT components from
+! different containing array elements followed by implicit deep mapping works.
+
+module m
+type t
+ integer, allocatable :: den1(:,:), den2(:,:)
+end type t
+
+type t2
+ type(t), allocatable :: tiles(:)
+end type t2
+
+type(t2) :: var
+end
+
+use m
+
+allocate(var%tiles(2))
+var%tiles(1)%den1 = reshape([1,2,3,4],[2,2])
+var%tiles(2)%den2 = reshape([11,22,33,44],[2,2])
+
+!$omp target enter data map(var%tiles(1)%den1, var%tiles(2)%den2)
+
+!$omp target
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1
+ if (any (var%tiles(2)%den2 /= reshape([11,22,33,44],[2,2]))) stop 1
+ var%tiles(1)%den1 = var%tiles(1)%den1 + 5
+ var%tiles(2)%den2 = var%tiles(2)%den2 + 7
+!$omp end target
+
+!$omp target exit data map(var%tiles(1)%den1, var%tiles(2)%den2)
+
+if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 1
+if (any (var%tiles(2)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 1
+
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-5.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-5.f90
index 59ad01a..7bf3102 100644
--- a/libgomp/testsuite/libgomp.fortran/map-subarray-5.f90
+++ b/libgomp/testsuite/libgomp.fortran/map-subarray-5.f90
@@ -50,5 +50,5 @@ end do
 end
 
 ! { dg-output "(\n|\r|\r\n)" { target offload_device_nonshared_as } }
-! { dg-output "libgomp: Mapped array elements must be the same .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } }
+! { dg-output "libgomp: Mapped array elements must be the same or in increasing address order .*(\n|\r|\r\n)+" { target offload_device_nonshared_as } }
 ! { dg-shouldfail "" { offload_device_nonshared_as } }
diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-9.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-9.f90
new file mode 100644
index 0000000..f310155
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-subarray-9.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! PR fortran/120505
+
+! This test case checks that explicit mapping of allocatable DT components
+! followed by implicit deep mapping works.
+
+module m
+type t
+ integer, allocatable :: den1(:,:), den2(:,:)
+end type t
+
+type t2
+ type(t), allocatable :: tiles(:)
+end type t2
+
+type(t2) :: var
+end
+
+use m
+
+allocate(var%tiles(1))
+var%tiles(1)%den1 = reshape([1,2,3,4],[2,2])
+var%tiles(1)%den2 = reshape([11,22,33,44],[2,2])
+
+!$omp target enter data map(var%tiles(1)%den1, var%tiles(1)%den2)
+
+!$omp target
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1
+ if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 1
+ var%tiles(1)%den1 = var%tiles(1)%den1 + 5
+ var%tiles(1)%den2 = var%tiles(1)%den2 + 7
+!$omp end target
+
+!$omp target exit data map(var%tiles(1)%den1, var%tiles(1)%den2)
+
+if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 1
+if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 1
+
+end
diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
index 7b3e09f..d6f4d5b 100644
--- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
@@ -1,4 +1,5 @@
-! { dg-do run }
+! { dg-do run { target { ! offload_target_nvptx } } }
+! { dg-do compile { target offload_target_nvptx } }
 
 program test
   implicit none
@@ -33,6 +34,10 @@ program test
 contains
   subroutine f (x, y, z)
     integer :: x(N), y(N), z(N)
+    ! The following fails as on the host the target side cannot be
+    ! resolved - and the 'teams' or not status affects how 'target'
+    ! is called. -> See PR118694, esp. comment 9.
+    ! Note also the dg-do compile above for offload_target_nvptx
 
     !$omp target map (to: x, y) map(from: z)
       block
@@ -43,6 +48,7 @@ contains
 	  z(i) = x(i) * y(i)
 	enddo
       end block
+    ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 }  */
   end subroutine
   subroutine g (x, y, z)
     integer :: x(N), y(N), z(N)
@@ -56,6 +62,7 @@ contains
 	  z(i) = x(i) * y(i)
 	enddo
     end block
+    ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 }  */
     !$omp end target
   end subroutine
 end program
diff --git a/libgomp/testsuite/libgomp.fortran/omp-default-device.f90 b/libgomp/testsuite/libgomp.fortran/omp-default-device.f90
new file mode 100644
index 0000000..28e3496
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp-default-device.f90
@@ -0,0 +1,61 @@
+program main
+  use omp_lib
+  implicit none (type, external)
+  integer :: dev, def_dev
+
+  if (omp_default_device >= -1 .or. omp_default_device == omp_invalid_device) &
+    error stop 1
+
+  dev = -99
+  def_dev = omp_get_default_device ()
+  !$omp target map(from: dev) device(omp_default_device)
+    dev = omp_get_device_num ()
+  !$omp end target
+
+  if (.not.is_same_dev (def_dev, dev)) &
+    error stop 2
+
+  do def_dev = omp_initial_device, omp_get_num_devices ()
+  block
+    character(:), pointer :: uid
+
+    uid => omp_get_uid_from_device(def_dev)
+    call omp_set_default_device (def_dev)
+    dev = -99
+    !$omp target map(from: dev) device(omp_default_device)
+      dev = omp_get_device_num ()
+    !$omp end target
+    if (.not.is_same_dev (def_dev, dev)) &
+      error stop 3
+
+    ! Shall not modify the ICV.  */
+    call omp_set_default_device (omp_default_device)
+    if (def_dev /= omp_get_default_device ()) &
+      error stop 4
+
+    ! Assume the ptr and no only the string is the same.  */
+    if (.not.associated(uid, omp_get_uid_from_device (omp_default_device))) &
+      error stop 5
+  end block
+  end do
+
+  call omp_set_default_device (omp_invalid_device)
+  ! Shall not modify the ICV.
+  call omp_set_default_device (omp_default_device)
+  if (omp_invalid_device /= omp_get_default_device ()) &
+    error stop 6
+
+contains
+
+  logical function is_same_dev (d1, d2)
+    integer, value :: d1, d2
+    integer :: num_dev
+
+    num_dev = omp_get_num_devices ()
+    if (d1 == omp_initial_device) &
+      d1 = num_dev
+    if (d2 == omp_initial_device) &
+      d2 = num_dev
+    is_same_dev = d1 == d2
+  end function is_same_dev
+end program
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
new file mode 100644
index 0000000..78c66d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
@@ -0,0 +1,72 @@
+! PR libgomp/120444
+! Async version
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+integer(c_int) :: dev
+
+!$omp parallel do
+do dev = omp_initial_device, omp_get_num_devices ()
+block
+  integer(c_int) :: i, val, start, tail
+  type(c_ptr) :: ptr, ptr2, tmpptr
+  integer(c_intptr_t) :: intptr
+  integer(c_size_t), parameter :: count = 1024
+  integer(omp_depend_kind) :: dep(1)
+
+  ptr = omp_target_alloc (count, dev)
+
+  !$omp depobj(dep(1)) depend(inout: ptr)
+
+  ! Play also around with the alignment - as hsa_amd_memory_fill operates
+  ! on multiples of 4 bytes (c_int32_t)
+
+  do start = 0, 31
+    do tail = 0, 31
+      val = iachar('0') + start + tail
+
+      tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+      ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0)
+
+      if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+      !$omp taskwait
+
+      !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+      block
+        integer(c_int8_t), pointer, contiguous :: fptr(:)
+        call c_f_pointer (ptr, fptr, [count])
+        do i = 1 + start, int(count, c_int) - start - tail
+          if (fptr(i) /= int (val, c_int8_t)) stop 2
+          fptr(i) = fptr(i) + 2_c_int8_t
+        end do
+      end block
+      !$omp end target
+
+      ptr2 = omp_target_memset_async (tmpptr, val + 3, &
+                                      count - start - tail, dev, 1, dep)
+
+      !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+      block
+        integer(c_int8_t), pointer, contiguous :: fptr(:)
+        call c_f_pointer (ptr, fptr, [count])
+        do i = 1 + start, int(count, c_int) - start - tail
+          if (fptr(i) /= int (val + 3, c_int8_t)) stop 3
+          fptr(i) = fptr(i) - 1_c_int8_t
+        end do
+      end block
+      !$omp end target
+
+      ptr2 = omp_target_memset_async (tmpptr, val - 3, &
+                                      count - start - tail, dev, 1, dep)
+
+      !$omp taskwait depend (depobj: dep(1))
+    end do
+  end do
+
+  !$omp depobj(dep(1)) destroy
+  call omp_target_free (ptr, dev);
+end block
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
new file mode 100644
index 0000000..91a6baa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
@@ -0,0 +1,41 @@
+! PR libgomp/120444
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+
+integer(c_int) :: dev, i, val, start, tail
+type(c_ptr) :: ptr, ptr2, tmpptr
+integer(c_intptr_t) :: intptr
+integer(c_size_t), parameter :: count = 1024
+
+do dev = omp_initial_device, omp_get_num_devices ()
+  ptr = omp_target_alloc (count, dev)
+
+  ! Play also around with the alignment - as hsa_amd_memory_fill operates
+  ! on multiples of 4 bytes (c_int32_t)
+
+  do start = 0, 31
+    do tail = 0, 31
+      val = iachar('0') + start + tail
+
+      tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+      ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev)
+
+      if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+      !$omp target device(dev) is_device_ptr(ptr)
+      block
+        integer(c_int8_t), pointer, contiguous :: fptr(:)
+        call c_f_pointer (ptr, fptr, [count])
+        do i = 1 + start, int(count, c_int) - start - tail
+          if (fptr(i) /= int (val, c_int8_t)) stop 2
+        end do
+      end block
+      !$omp end target
+    end do
+  end do
+
+  call omp_target_free (ptr, dev);
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/self_maps.f90 b/libgomp/testsuite/libgomp.fortran/self_maps.f90
index 208fd1c..6088968 100644
--- a/libgomp/testsuite/libgomp.fortran/self_maps.f90
+++ b/libgomp/testsuite/libgomp.fortran/self_maps.f90
@@ -1,4 +1,5 @@
 ! Basic test whether self_maps work
+! { dg-require-effective-target omp_usm }
 
 module m
   !$omp requires self_maps
diff --git a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90 b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
index 150df6f..2c25dca 100644
--- a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
@@ -22,7 +22,7 @@ program main
   if (omp_target_is_accessible (p, c_sizeof (d), omp_initial_device) /= 1) &
     stop 3
 
-  if (omp_target_is_accessible (p, c_sizeof (d), -5) /= 0) &
+  if (omp_target_is_accessible (p, c_sizeof (d), -6) /= 0) &  ! -6 = omp_default_device - 1
     stop 4
 
   if (omp_target_is_accessible (p, c_sizeof (d), n + 1) /= 0) &
diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-1.c b/libgomp/testsuite/libgomp.graphite/force-parallel-1.c
index 0393356..b873d7a 100644
--- a/libgomp/testsuite/libgomp.graphite/force-parallel-1.c
+++ b/libgomp/testsuite/libgomp.graphite/force-parallel-1.c
@@ -2,7 +2,7 @@ void abort (void);
 
 int x[10000000];
 
-void parloop (int N)
+void __attribute__((noipa)) parloop (int N)
 {
   int i;
 
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
index 0545601..e6cbe17 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
@@ -52,3 +52,6 @@ int main()
    PR119692.
 
    { dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
index 24399ef..599425f 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
@@ -58,3 +58,6 @@ int main()
 
    For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
    { dg-shouldfail {'std::bad_cast' exception} { ! openacc_host_selected } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 { target offload_device } } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
new file mode 100644
index 0000000..4b54171
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
@@ -0,0 +1,125 @@
+/* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'.  */
+
+/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'.  */
+
+/* To exercise PR119835 (if optimizations enabled): disable inlining, so that
+   GIMPLE passes still see the functions that return aggregate types.  */
+#pragma GCC optimize "-fno-inline"
+
+typedef struct {} empty;  /* See 'gcc/doc/extend.texi', "Empty Structures".  */
+typedef struct {char a;} schar;
+typedef struct {short a;} sshort;
+typedef struct {int a;} sint;
+typedef struct {long long a;} slonglong;
+typedef struct {int a, b[12];} sint_13;
+
+#pragma omp declare target
+
+#define M(T) ({T t; t.a = sizeof t; t;})
+
+static __SIZE_TYPE__ empty_a;
+#pragma acc declare create(empty_a)
+#pragma acc routine
+static empty rempty(void)
+{
+  return ({empty t; empty_a = sizeof t; t;});
+}
+
+#pragma acc routine
+static schar rschar(void)
+{
+  return M(schar);
+}
+
+#pragma acc routine
+static sshort rsshort(void)
+{
+  return M(sshort);
+}
+
+#pragma acc routine
+static sint rsint(void)
+{
+  return M(sint);
+}
+
+#pragma acc routine
+static slonglong rslonglong(void)
+{
+  return M(slonglong);
+}
+
+#pragma acc routine
+static sint_13 rsint_13(void)
+{
+  return M(sint_13);
+}
+
+#pragma acc routine
+static void aempty(empty empty)
+{
+  (void) empty;
+
+  __SIZE_TYPE__ empty_a_exp;
+#ifndef __cplusplus
+  empty_a_exp = 0;
+#else
+  empty_a_exp = sizeof (char);
+#endif
+  if (empty_a != empty_a_exp)
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void aschar(schar schar)
+{
+  if (schar.a != sizeof (char))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asshort(sshort sshort)
+{
+  if (sshort.a != sizeof (short))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asint(sint sint)
+{
+  if (sint.a != sizeof (int))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void aslonglong(slonglong slonglong)
+{
+  if (slonglong.a != sizeof (long long))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asint_13(sint_13 sint_13)
+{
+  if (sint_13.a != (sizeof (int) * 13))
+    __builtin_abort();
+}
+
+#pragma omp end declare target
+
+int main()
+{
+#pragma omp target
+#pragma acc serial
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    aempty(rempty());
+    aschar(rschar());
+    asshort(rsshort());
+    asint(rsint());
+    aslonglong(rslonglong());
+    asint_13(rsint_13());
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
new file mode 100644
index 0000000..eda651d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
@@ -0,0 +1,96 @@
+/* { dg-prune-output "using .vector_length \\(32\\)" } */
+
+/* PR libgomp/93226  */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <openacc.h>
+
+enum { N = 1024 };
+
+static int D[N];
+#pragma acc declare device_resident(D)
+
+#pragma acc routine
+intptr_t init_d()
+{
+  for (int i = 0; i < N; i++)
+    D[i] = 27*i;
+  return (intptr_t) &D[0];
+}
+
+int
+main ()
+{
+  int *a, *b, *e;
+  void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f;
+  intptr_t intptr;
+  bool fail = false;
+
+  a = (int *) malloc (N*sizeof (int));
+  b = (int *) malloc (N*sizeof (int));
+  e = (int *) malloc (N*sizeof (int));
+  d_c = acc_malloc (N*sizeof (int));
+  d_f = acc_malloc (N*sizeof (int));
+
+  memset (e, 0xff, N*sizeof (int));
+  d_e = acc_copyin (e, N*sizeof (int));
+
+  #pragma acc serial copyout(intptr)
+    intptr = init_d ();
+  d_d = (void*) intptr;
+  acc_memcpy_device (d_c, d_d, N*sizeof (int));
+
+  #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr)
+  {
+    int *cc = (int *) d_c;
+    int *dd = (int *) intptr;
+    for (int i = 0; i < N; i++)
+      if (dd[i] != 27*i || cc[i] != 27*i)
+	{
+	  fail = true;
+	  __builtin_abort ();
+	}
+  }
+  if (fail) __builtin_abort ();
+
+  for (int i = 0; i < N; i++)
+    a[i] = 11*i;
+  for (int i = 0; i < N; i++)
+    b[i] = 31*i;
+
+  d_a = acc_copyin (a, N*sizeof (int));
+  acc_copyin_async (b, N*sizeof (int), acc_async_noval);
+
+  #pragma acc parallel deviceptr(d_c) async
+  {
+    int *cc = (int *) d_c;
+    #pragma acc loop
+    for (int i = 0; i < N; i++)
+      cc[i] = -17*i;
+  }
+
+  acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval);
+  acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval);
+  acc_wait (acc_async_noval);
+  d_b = acc_deviceptr (b);
+  acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval);
+  acc_wait (acc_async_noval);
+
+  #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail)
+  {
+    int *dd = (int *) d_d;
+    int *ee = (int *) d_e;
+    int *ff = (int *) d_f;
+    for (int i = 0; i < N; i++)
+      if (dd[i] != 11*i
+	  || ee[i] != 31*i
+	  || ff[i] != -17*i)
+	{
+	  fail = true;
+	  __builtin_abort ();
+	}
+  }
+  if (fail) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c
index b976094..b8a76a1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c
@@ -38,11 +38,9 @@ main(int argc, char **argv)
       imin = idata[i] < imin ? idata[i] : imin;
     }
 
-  if (imax != 1234 || imin != 0)
+  if (imax != 1234 || imin < 0 || imin > 1)
     abort ();
 
-  return 0;
-
   igot = 0;
   iexp = 32;
 
@@ -444,17 +442,16 @@ main(int argc, char **argv)
     }
   }
 
+  int ones = 0, zeros = 0;
+
   for (i = 0; i < N; i++)
-    if (i % 2 == 0)
-      {
-	if (idata[i] != 1)
-	  abort ();
-      }
-    else
-      {
-	if (idata[i] != 0)
-	  abort ();
-      }
+    if (idata[i] == 1)
+      ones++;
+    else if (idata[i] == 0)
+      zeros++;
+
+  if (ones != N / 2 || zeros != N / 2)
+    abort ();
 
   if (iexp != igot)
     abort ();
@@ -492,17 +489,16 @@ main(int argc, char **argv)
       }
   }
 
+  ones = zeros = 0;
+
   for (i = 0; i < N; i++)
-    if (i % 2 == 0)
-      {
-	if (idata[i] != 0)
-	  abort ();
-      }
-    else
-      {
-	if (idata[i] != 1)
-	  abort ();
-      }
+    if (idata[i] == 1)
+      ones++;
+    else if (idata[i] == 0)
+      zeros++;
+
+  if (ones != N / 2 || zeros != N / 2)
+    abort ();
 
   if (iexp != igot)
     abort ();
@@ -580,7 +576,7 @@ main(int argc, char **argv)
   if (lexp != lgot)
     abort ();
 
-  lgot = 2LL;
+  lgot = 2LL << N;
   lexp = 2LL;
 
 #pragma acc data copy (lgot, ldata[0:N])
@@ -588,7 +584,7 @@ main(int argc, char **argv)
 #pragma acc parallel loop
     for (i = 0; i < N; i++)
       {
-        long long expr = 1LL << N;
+	long long expr = 2LL;
 
 #pragma acc atomic capture
         { lgot = lgot / expr; ldata[i] = lgot; }
@@ -1451,17 +1447,16 @@ main(int argc, char **argv)
       }
   }
 
+  ones = zeros = 0;
+
   for (i = 0; i < N; i++)
-    if (i % 2 == 0)
-      {
-	if (fdata[i] != 1.0)
-	  abort ();
-      }
-    else
-      {
-	if (fdata[i] != 0.0)
-	  abort ();
-      }
+    if (fdata[i] == 1.0)
+      ones++;
+    else if (fdata[i] == 0.0)
+      zeros++;
+
+  if (ones != N / 2 || zeros != N / 2)
+    abort ();
 
   if (fexp != fgot)
     abort ();
@@ -1499,17 +1494,16 @@ main(int argc, char **argv)
       }
   }
 
+  ones = zeros = 0;
+
   for (i = 0; i < N; i++)
-    if (i % 2 == 0)
-      {
-	if (fdata[i] != 0.0)
-	  abort ();
-      }
-    else
-      {
-	if (fdata[i] != 1.0)
-	  abort ();
-      }
+    if (fdata[i] == 1.0)
+      ones++;
+    else if (fdata[i] == 0.0)
+      zeros++;
+
+  if (ones != N / 2 || zeros != N / 2)
+    abort ();
 
   if (fexp != fgot)
     abort ();
@@ -1570,7 +1564,7 @@ main(int argc, char **argv)
     abort ();
 
   fgot = 8192.0*8192.0*64.0;
-  fexp = 1.0;
+  fexp = fgot;
 
 #pragma acc data copy (fgot, fdata[0:N])
   {
@@ -1587,15 +1581,15 @@ main(int argc, char **argv)
   if (fexp != fgot)
     abort ();
 
-  fgot = 4.0;
-  fexp = 4.0;
+  fgot = 2.0 * (1LL << N);
+  fexp = 2.0;
 
 #pragma acc data copy (fgot, fdata[0:N])
   {
 #pragma acc parallel loop
     for (i = 0; i < N; i++)
       {
-        long long expr = 1LL << N;
+	long long expr = 2LL;
 
 #pragma acc atomic capture
         { fgot = fgot / expr; fdata[i] = fgot; }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
index e9d1eda..2af8afc 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
@@ -1,4 +1,4 @@
-/* Test asynchronous, unstructed data regions, runtime library variant.  */
+/* Test asynchronous, unstructured data regions, runtime library variant.  */
 /* See also data-2.c.  */
 
 #include <stdlib.h>
@@ -155,11 +155,23 @@ main (int argc, char **argv)
   for (int ii = 0; ii < N; ii++)
     e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
 
+  /* The kernels above use `a', so wait for them to finish with it before
+     exiting that array.  */
+  acc_wait_async (11, 10);
+  acc_wait_async (12, 10);
+  acc_wait_async (13, 10);
+  acc_wait_async (14, 10);
   acc_copyout_async (a, nbytes, 10);
   acc_copyout_async (b, nbytes, 11);
   acc_copyout_async (c, nbytes, 12);
   acc_copyout_async (d, nbytes, 13);
   acc_copyout_async (e, nbytes, 14);
+
+  /* As for `a', same goes for `N'.  */
+  acc_wait_async (11, 15);
+  acc_wait_async (12, 15);
+  acc_wait_async (13, 15);
+  acc_wait_async (14, 15);
   acc_delete_async (&N, sizeof (int), 15);
   acc_wait_all ();
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
index 2fc4a59..b974277 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
@@ -1,4 +1,4 @@
-/* Test asynchronous, unstructed data regions, directives variant.  */
+/* Test asynchronous, unstructured data regions, directives variant.  */
 /* See also data-2-lib.c.  */
 
 #include <stdlib.h>
@@ -149,12 +149,15 @@ main (int argc, char **argv)
   for (int ii = 0; ii < N; ii++)
     e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
 
-#pragma acc exit data copyout (a[0:N]) async (10)
+  /* The kernels above use `a', so wait for them to finish with it before
+     exiting that array.  */
+#pragma acc exit data copyout (a[0:N]) async (10) wait (11) wait (12) wait (13) wait (14)
 #pragma acc exit data copyout (b[0:N]) async (11)
 #pragma acc exit data copyout (c[0:N]) async (12)
 #pragma acc exit data copyout (d[0:N]) async (13)
 #pragma acc exit data copyout (e[0:N]) async (14)
-#pragma acc exit data delete (N) async (15)
+  /* As for `a`, same goes for `N'.  */
+#pragma acc exit data delete (N) async (15)  wait (11) wait (12) wait (13) wait (14)
 #pragma acc wait
 
   for (i = 0; i < N; i++)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
index d8d7067..071c946 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
@@ -13,7 +13,7 @@ main ()
   int n = 100, i;
   struct dc v = { .a = 3, .b = (int *) malloc (sizeof (int) * n) };
 
-#pragma acc parallel loop copy(v.a, v.b[:n])
+#pragma acc parallel loop copy(v.a, v.b[ :n])
   for (i = 0; i < n; i++)
     v.b[i] = v.a;
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c
index a7308e8..0735eae 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c
@@ -65,11 +65,11 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(p.m.s2[:1])
-#pragma acc parallel loop copy(p.m.s2->e[:N])
+#pragma acc enter data copyin(p.m.s2[ :1])
+#pragma acc parallel loop copy(p.m.s2->e[ :N])
       for (int j = 0; j < N; j++)
 	p.m.s2->e[j]++;
-#pragma acc exit data delete(p.m.s2[:1])
+#pragma acc exit data delete(p.m.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -80,16 +80,16 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(p.m.s2[:1])
-#pragma acc enter data copyin(p.m.s2->f[:1])
-#pragma acc parallel loop copy(p.m.s2->f->a[:N]) copy(p.m.s2->f->c[:N])
+#pragma acc enter data copyin(p.m.s2[ :1])
+#pragma acc enter data copyin(p.m.s2->f[ :1])
+#pragma acc parallel loop copy(p.m.s2->f->a[ :N]) copy(p.m.s2->f->c[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    p.m.s2->f->a[j]++;
 	    p.m.s2->f->c[j]++;
 	  }
-#pragma acc exit data delete(p.m.s2->f[:1])
-#pragma acc exit data delete(p.m.s2[:1])
+#pragma acc exit data delete(p.m.s2->f[ :1])
+#pragma acc exit data delete(p.m.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -100,10 +100,10 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(p.m.s2[:1]) copyin(p.n.s2[:1])
-#pragma acc enter data copyin(p.m.s2->f[:1]) copyin(p.n.s2->f[:1])
-#pragma acc parallel loop copy(p.m.s2->f->a[:N]) copy(p.m.s2->f->c[:N]) \
-			  copy(p.n.s2->f->a[:N]) copy(p.n.s2->f->c[:N])
+#pragma acc enter data copyin(p.m.s2[ :1]) copyin(p.n.s2[ :1])
+#pragma acc enter data copyin(p.m.s2->f[ :1]) copyin(p.n.s2->f[ :1])
+#pragma acc parallel loop copy(p.m.s2->f->a[ :N]) copy(p.m.s2->f->c[ :N]) \
+			  copy(p.n.s2->f->a[ :N]) copy(p.n.s2->f->c[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    p.m.s2->f->a[j]++;
@@ -111,8 +111,8 @@ int main (int argc, char* argv[])
 	    p.n.s2->f->a[j]++;
 	    p.n.s2->f->c[j]++;
 	  }
-#pragma acc exit data delete(p.m.s2->f[:1]) delete(p.n.s2->f[:1])
-#pragma acc exit data delete(p.m.s2[:1]) delete(p.n.s2[:1])
+#pragma acc exit data delete(p.m.s2->f[ :1]) delete(p.n.s2->f[ :1])
+#pragma acc exit data delete(p.m.s2[ :1]) delete(p.n.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -124,19 +124,19 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(p.m.s2[:1]) copyin(p.n.s2[:1])
-#pragma acc enter data copyin(p.n.s2->e[:N]) copyin(p.n.s2->f[:1]) \
-		       copyin(p.m.s2->f[:1])
-#pragma acc parallel loop copy(p.m.s2->f->a[:N]) copy(p.n.s2->f->a[:N])
+#pragma acc enter data copyin(p.m.s2[ :1]) copyin(p.n.s2[ :1])
+#pragma acc enter data copyin(p.n.s2->e[ :N]) copyin(p.n.s2->f[ :1]) \
+		       copyin(p.m.s2->f[ :1])
+#pragma acc parallel loop copy(p.m.s2->f->a[ :N]) copy(p.n.s2->f->a[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    p.m.s2->f->a[j]++;
 	    p.n.s2->f->a[j]++;
 	    p.n.s2->e[j]++;
 	  }
-#pragma acc exit data delete(p.m.s2->f[:1]) delete(p.n.s2->f[:1]) \
-		      copyout(p.n.s2->e[:N])
-#pragma acc exit data delete(p.m.s2[:1]) delete(p.n.s2[:1])
+#pragma acc exit data delete(p.m.s2->f[ :1]) delete(p.n.s2->f[ :1]) \
+		      copyout(p.n.s2->e[ :N])
+#pragma acc exit data delete(p.m.s2[ :1]) delete(p.n.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -148,11 +148,11 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(q->m.s2[:1])
-#pragma acc parallel loop copy(q->m.s2->e[:N])
+#pragma acc enter data copyin(q->m.s2[ :1])
+#pragma acc parallel loop copy(q->m.s2->e[ :N])
       for (int j = 0; j < N; j++)
 	q->m.s2->e[j]++;
-#pragma acc exit data delete(q->m.s2[:1])
+#pragma acc exit data delete(q->m.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -163,16 +163,16 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(q->m.s2[:1])
-#pragma acc enter data copyin(q->m.s2->f[:1])
-#pragma acc parallel loop copy(q->m.s2->f->a[:N]) copy(q->m.s2->f->c[:N])
+#pragma acc enter data copyin(q->m.s2[ :1])
+#pragma acc enter data copyin(q->m.s2->f[ :1])
+#pragma acc parallel loop copy(q->m.s2->f->a[ :N]) copy(q->m.s2->f->c[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    q->m.s2->f->a[j]++;
 	    q->m.s2->f->c[j]++;
 	  }
-#pragma acc exit data delete(q->m.s2->f[:1])
-#pragma acc exit data delete(q->m.s2[:1])
+#pragma acc exit data delete(q->m.s2->f[ :1])
+#pragma acc exit data delete(q->m.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -183,10 +183,10 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(q->m.s2[:1]) copyin(q->n.s2[:1])
-#pragma acc enter data copyin(q->m.s2->f[:1]) copyin(q->n.s2->f[:1])
-#pragma acc parallel loop copy(q->m.s2->f->a[:N]) copy(q->m.s2->f->c[:N]) \
-			  copy(q->n.s2->f->a[:N]) copy(q->n.s2->f->c[:N])
+#pragma acc enter data copyin(q->m.s2[ :1]) copyin(q->n.s2[ :1])
+#pragma acc enter data copyin(q->m.s2->f[ :1]) copyin(q->n.s2->f[ :1])
+#pragma acc parallel loop copy(q->m.s2->f->a[ :N]) copy(q->m.s2->f->c[ :N]) \
+			  copy(q->n.s2->f->a[ :N]) copy(q->n.s2->f->c[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    q->m.s2->f->a[j]++;
@@ -194,8 +194,8 @@ int main (int argc, char* argv[])
 	    q->n.s2->f->a[j]++;
 	    q->n.s2->f->c[j]++;
 	  }
-#pragma acc exit data delete(q->m.s2->f[:1]) delete(q->n.s2->f[:1])
-#pragma acc exit data delete(q->m.s2[:1]) delete(q->n.s2[:1])
+#pragma acc exit data delete(q->m.s2->f[ :1]) delete(q->n.s2->f[ :1])
+#pragma acc exit data delete(q->m.s2[ :1]) delete(q->n.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
@@ -207,19 +207,19 @@ int main (int argc, char* argv[])
 
   for (int i = 0; i < 99; i++)
     {
-#pragma acc enter data copyin(q->m.s2[:1]) copyin(q->n.s2[:1])
-#pragma acc enter data copyin(q->n.s2->e[:N]) copyin(q->m.s2->f[:1]) \
-		       copyin(q->n.s2->f[:1])
-#pragma acc parallel loop copy(q->m.s2->f->a[:N]) copy(q->n.s2->f->a[:N])
+#pragma acc enter data copyin(q->m.s2[ :1]) copyin(q->n.s2[ :1])
+#pragma acc enter data copyin(q->n.s2->e[ :N]) copyin(q->m.s2->f[ :1]) \
+		       copyin(q->n.s2->f[ :1])
+#pragma acc parallel loop copy(q->m.s2->f->a[ :N]) copy(q->n.s2->f->a[ :N])
 	for (int j = 0; j < N; j++)
 	  {
 	    q->m.s2->f->a[j]++;
 	    q->n.s2->f->a[j]++;
 	    q->n.s2->e[j]++;
 	  }
-#pragma acc exit data delete(q->m.s2->f[:1]) delete(q->n.s2->f[:1]) \
-		      copyout(q->n.s2->e[:N])
-#pragma acc exit data delete(q->m.s2[:1]) delete(q->n.s2[:1])
+#pragma acc exit data delete(q->m.s2->f[ :1]) delete(q->n.s2->f[ :1]) \
+		      copyout(q->n.s2->e[ :N])
+#pragma acc exit data delete(q->m.s2[ :1]) delete(q->n.s2[ :1])
     }
 
   for (i = 0; i < N; i++)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
index cec764b..65950c5 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
@@ -12,18 +12,18 @@ main ()
   for (i = 0; i < n; i++)
     a[i] = i+1;
 
-#pragma acc enter data copyin(a[:n]) create(b)
+#pragma acc enter data copyin(a[ :n]) create(b)
 
   b = a;
   acc_attach ((void **)&b);
 
-#pragma acc parallel loop present (b[:n])
+#pragma acc parallel loop present (b[ :n])
   for (i = 0; i < n; i++)
     b[i] = i+1;
 
   acc_detach ((void **)&b);
 
-#pragma acc exit data copyout(a[:n], b)
+#pragma acc exit data copyout(a[ :n], b)
 
   for (i = 0; i < 10; i++)
     assert (a[i] == b[i]);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
index 8874ca0..acbca9d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
@@ -14,7 +14,7 @@ sum_nodes (struct node *head)
 {
   int i = 0, sum = 0;
 
-#pragma acc parallel reduction(+:sum) present(head[:1])
+#pragma acc parallel reduction(+:sum) present(head[ :1])
   {
     for (; head != NULL; head = head->next)
       sum += head->val;
@@ -37,7 +37,7 @@ insert (struct node *head, int val)
   n->next = head->next;
   head->next = n;
 
-#pragma acc enter data copyin(n[:1])
+#pragma acc enter data copyin(n[ :1])
 #pragma acc enter data attach(head->next)
   if (n->next)
     {
@@ -57,7 +57,7 @@ destroy (struct node *head)
 	{
 #pragma acc exit data detach(n->next)
 	}
-#pragma acc exit data delete (n[:1])
+#pragma acc exit data delete (n[ :1])
       if (head->next)
 	{
 #pragma acc enter data attach(head->next)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
index 89cafbb..ce2f0d3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
@@ -13,7 +13,7 @@ sum_nodes (struct node *head)
 {
   int i = 0, sum = 0;
 
-#pragma acc parallel reduction(+:sum) present(head[:1])
+#pragma acc parallel reduction(+:sum) present(head[ :1])
   {
     for (; head != NULL; head = head->next)
       sum += head->val;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
index 3911494..9603bdc 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
@@ -24,7 +24,7 @@ main ()
     {
 #pragma acc data copy(v)
       {
-#pragma acc data copy(v.b[:n])
+#pragma acc data copy(v.b[ :n])
 	{
 	  for (i = 0; i < n; i++)
 	    {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
index 13e5ca2..0bc7351 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
@@ -31,7 +31,7 @@ main ()
       for (i = 0; i < n; i++)
 	v.b[i] = v.a + i;
 
-#pragma acc exit data copyout(v.b[:n]) finalize
+#pragma acc exit data copyout(v.b[ :n]) finalize
 #pragma acc exit data delete(v.a)
 
       for (i = 0; i < n; i++)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
index e705f78..a132cb6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
@@ -35,15 +35,15 @@ test (unsigned variant)
 
   for (k = 0; k < 16; k++)
     {
-#pragma acc enter data copyin(v.a, v.b[:n], v.c[:n], v.d[:n])
+#pragma acc enter data copyin(v.a, v.b[ :n], v.c[ :n], v.d[ :n])
 
 #pragma acc parallel loop
       for (i = 0; i < n; i++)
 	v.b[i] = v.a + i;
 
-#pragma acc exit data copyout(v.b[:n])
-#pragma acc exit data copyout(v.c[:n])
-#pragma acc exit data copyout(v.d[:n])
+#pragma acc exit data copyout(v.b[ :n])
+#pragma acc exit data copyout(v.c[ :n])
+#pragma acc exit data copyout(v.d[ :n])
 #pragma acc exit data copyout(v.a)
 
       for (i = 0; i < n; i++)
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
new file mode 100644
index 0000000..15393b4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+use openacc
+implicit none (type, external)
+integer,pointer :: a, b(:)
+integer,allocatable :: c, d(:)
+
+call acc_attach(a)  ! ICE
+call acc_attach_async(b, 4)
+call acc_attach(c)
+
+call acc_detach(a)
+call acc_detach_async(b, 4)
+call acc_detach_finalize(c)
+call acc_detach_finalize_async(d,7)
+end
+
+! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
new file mode 100644
index 0000000..b2204ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
@@ -0,0 +1,62 @@
+! { dg-do run }
+
+use openacc
+implicit none (type, external)
+integer, target :: tgt_a, tgt_b(5)
+
+integer, pointer :: p1, p2(:)
+
+type t
+  integer,pointer :: a => null ()
+  integer,pointer :: b(:) => null ()
+  integer,allocatable :: c, d(:)
+end type t
+
+type(t), target :: var
+
+tgt_a = 51
+tgt_b = [11,22,33,44,55]
+
+var%b => tgt_b
+!$acc enter data copyin(var, tgt_a, tgt_b)
+var%a => tgt_a
+
+call acc_attach(var%a)
+call acc_attach(var%b)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%a /= 51) stop 1
+  if (any (var%b /= [11,22,33,44,55])) stop 2
+!$acc end serial
+
+call acc_detach(var%a)
+call acc_detach(var%b)
+
+!$acc exit data delete(var, tgt_a, tgt_b)
+
+var%c = 9
+var%d = [1,2,3]
+
+p1 => var%c
+p2 => var%d
+
+!$acc enter data copyin(p1, p2)
+!$acc enter data copyin(var)
+call acc_attach(var%c)
+call acc_attach(var%d)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%c /= 9) stop 3
+  if (any (var%d /= [1,2,3])) stop 4
+!$acc end serial
+
+call acc_detach(var%c)
+call acc_detach(var%d)
+
+!$acc exit data delete(var, p1, p2)
+
+deallocate(var%d)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
new file mode 100644
index 0000000..8f3a8f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
@@ -0,0 +1,113 @@
+! { dg-prune-output "using .vector_length \\(32\\)" }
+
+! PR libgomp/93226  */
+
+module m
+  use iso_c_binding
+  use openacc
+  implicit none (external, type)
+
+  integer, parameter :: N = 1024
+
+  integer :: D(N)
+  !$acc declare device_resident(D)
+
+contains
+
+  integer(c_intptr_t) function init_d()
+    !$acc routine
+    integer :: i
+    do i = 1, N
+      D(i) = 27*i
+    end do
+    init_d = loc(D)
+  end
+end module
+
+program main
+  use m
+  implicit none (external, type)
+
+  integer, allocatable, target :: a(:), b(:), e(:)
+  type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
+  integer(c_intptr_t) intptr
+  integer :: i
+  logical fail
+
+  fail = .false.
+
+  allocate(a(N), b(N), e(N))
+  d_c = acc_malloc (N*c_sizeof (i))
+  d_f = acc_malloc (N*c_sizeof (i))
+
+  e = huge(e)
+  call acc_copyin (e, N*c_sizeof (i));
+  d_e = acc_deviceptr (e);
+
+  !$acc serial copyout(intptr)
+    intptr = init_d ()
+  !$acc end serial
+  d_d = transfer(intptr, d_d)
+  call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
+
+  !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
+    block
+      integer, pointer :: cc(:), dd(:)
+      call c_f_pointer (d_c, cc, [N])
+      call c_f_pointer (d_d, dd, [N])
+      a = cc
+      do i = 1, N
+        if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
+          fail = .true.
+          stop 1
+        end if
+      end do
+    end block
+  !$acc end serial
+  if (fail) error stop 1
+
+  do i = 1, N
+    a(i) = 11*i
+    b(i) = 31*i
+  end do
+
+  call acc_copyin (a, N*c_sizeof (i))
+  d_a = acc_deviceptr (a)
+  call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
+
+  !$acc parallel deviceptr(d_c) private(i) async
+    block
+      integer, pointer :: cc(:)
+      call c_f_pointer (d_c, cc, [N])
+      !$acc loop
+      do i = 1, N
+        cc(i) = -17*i
+      end do
+    end block
+  !$acc end parallel
+
+  call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
+  call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
+  call acc_wait (acc_async_noval)
+  d_b = acc_deviceptr (b)
+  call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
+  call acc_wait (acc_async_noval)
+
+  !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
+    block
+    integer, pointer :: dd(:), ee(:), ff(:)
+    call c_f_pointer (d_d, dd, [N])
+    call c_f_pointer (d_e, ee, [N])
+    call c_f_pointer (d_f, ff, [N])
+    do i = 1, N
+      if (dd(i) /= 11*i        &
+          .or. ee(i) /= 31*i   &
+          .or. ff(i) /= -17*i) then
+        fail = .true.
+        stop 2
+      end if
+    end do
+    end block
+  !$acc end serial
+  if (fail) error stop 2
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
index deb2c28..f6bd27a 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
@@ -19,11 +19,10 @@ program main
         end do
       !$acc end parallel
     end do
-  !$acc end data
 
   call acc_wait_all_async (nprocs + 1)
-
   call acc_wait (nprocs + 1)
+  !$acc end data
 
   if (acc_async_test (1) .neqv. .TRUE.) stop 1
   if (acc_async_test (2) .neqv. .TRUE.) stop 2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr92970-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr92970-1.f90
new file mode 100644
index 0000000..c623534
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr92970-1.f90
@@ -0,0 +1,71 @@
+! Verify that 'acc_delete' etc. on non-present data is a no-op.
+!
+! Fortran version to libgomp.oacc-c-c++-common/pr92970-1.c
+
+program main
+use openacc
+implicit none (type, external)
+
+integer :: a, b, async
+
+! Side remark: 'sizeof' is a GNU extension;
+! for standard conforming code, use c_sizeof or (in bits) storage_size.
+
+async = 0
+!$acc exit data copyout (a)
+  call acc_copyout (a, sizeof (a))
+!$acc exit data copyout (a) async (async)
+  async = async + 1
+  call acc_copyout_async (a, sizeof (a), async)
+  async = async + 1
+!$acc exit data copyout (a) finalize
+  call acc_copyout_finalize (a, sizeof (a))
+!$acc exit data copyout (a) finalize async (async)
+  async = async + 1
+  call acc_copyout_finalize_async (a, sizeof (a), async)
+  async = async + 1
+
+!$acc exit data delete (a)
+  call acc_delete (a, sizeof (a))
+!$acc exit data delete (a) async (async)
+  async = async + 1
+  call acc_delete_async (a, sizeof (a), async)
+  async = async + 1
+!$acc exit data delete (a) finalize
+  call acc_delete_finalize (a, sizeof (a))
+!$acc exit data delete (a) finalize async (async)
+  async = async + 1
+  call acc_delete_finalize_async (a, sizeof (a), async)
+  async = async + 1
+
+
+! Same but taking the byte size from the argument
+
+!$acc exit data copyout (b)
+  call acc_copyout (b)
+!$acc exit data copyout (b) async (async)
+  async = async + 1
+  call acc_copyout_async (b, async)
+  async = async + 1
+!$acc exit data copyout (b) finalize
+  call acc_copyout_finalize (b)
+!$acc exit data copyout (b) finalize async (async)
+  async = async + 1
+  call acc_copyout_finalize_async (b, async)
+  async = async + 1
+
+!$acc exit data delete (b)
+  call acc_delete (b)
+!$acc exit data delete (b) async (async)
+  async = async + 1
+  call acc_delete_async (b, async)
+  async = async + 1
+!$acc exit data delete (b) finalize
+  call acc_delete_finalize (b)
+!$acc exit data delete (b) finalize async (async)
+  async = async + 1
+  call acc_delete_finalize_async (b, async)
+  async = async + 1
+
+  call acc_wait_all ()
+end