aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r--libgomp/testsuite/lib/libgomp.exp134
-rw-r--r--libgomp/testsuite/libgomp.c++/allocator-1.C13
-rw-r--r--libgomp/testsuite/libgomp.c++/allocator-2.C9
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-1.C87
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-2.C55
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-3.C63
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-4.C63
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-5.C52
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-6.C37
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-7.C59
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-mapper-8.C61
-rw-r--r--libgomp/testsuite/libgomp.c++/declare_target-2.C25
-rw-r--r--libgomp/testsuite/libgomp.c++/pr106445-1-O0.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr106445-1.C18
-rw-r--r--libgomp/testsuite/libgomp.c++/pr119692-1-4.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr119692-1-5.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr96390.C2
-rw-r--r--libgomp/testsuite/libgomp.c++/target-cdtor-1.C104
-rw-r--r--libgomp/testsuite/libgomp.c++/target-cdtor-2.C140
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C12
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-10.C215
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-100.C210
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-101.C136
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-11.C444
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-12.C736
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-2000.C32
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-2001.C61
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-2002.C97
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-2003.C176
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-30.C51
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-300.C49
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-31.C80
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-32.C50
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-33.C52
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-41.C94
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-60.C46
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-61.C54
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-62.C50
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-70.C26
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-80.C49
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-81.C75
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-90.C107
-rw-r--r--libgomp/testsuite/libgomp.c++/target-flex-common.h40
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C62
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C69
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__cmath.C340
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__complex.C175
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C64
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C71
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C70
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C60
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C67
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C83
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C83
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C70
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C68
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C62
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__numbers.C93
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C68
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C7
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C66
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C66
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C65
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C59
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C66
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__valarray-1.C179
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__valarray-1.output22
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C66
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C5
-rw-r--r--libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C63
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c64
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c59
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c94
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c55
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c57
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c62
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/interop-2.c129
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c8
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c62
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c80
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c62
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/pr96390.c2
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c3
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c1
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c89
-rw-r--r--libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c58
-rw-r--r--libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c8
-rw-r--r--libgomp/testsuite/libgomp.c/declare-variant-3.h8
-rw-r--r--libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c8
-rw-r--r--libgomp/testsuite/libgomp.c/declare-variant-4.h8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cublas-full.c176
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cublas-libonly.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cuda-full.c162
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cuda-libonly.c11
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-amd-full.c10
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c11
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c11
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c13
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c12
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip.h234
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c9
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas.h240
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hsa.c205
-rw-r--r--libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c74
-rw-r--r--libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c50
-rw-r--r--libgomp/testsuite/libgomp.c/target-map-zero-sized.c107
-rw-r--r--libgomp/testsuite/libgomp.fortran/alloc-comp-4.f9075
-rw-r--r--libgomp/testsuite/libgomp.fortran/allocate-8a.f9045
-rw-r--r--libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F9010
-rw-r--r--libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F909
-rw-r--r--libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F9012
-rw-r--r--libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F9011
-rw-r--r--libgomp/testsuite/libgomp.fortran/interop-hip.h214
-rw-r--r--libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f9011
-rw-r--r--libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f9019
-rw-r--r--libgomp/testsuite/libgomp.fortran/metadirective-1.f909
-rw-r--r--libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f9067
-rw-r--r--libgomp/testsuite/libgomp.fortran/omp_target_memset.f9039
-rw-r--r--libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90532
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c125
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c96
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f9025
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f9062
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90113
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-13.f903
149 files changed, 9840 insertions, 44 deletions
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index bc38e3c..fd475ac 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -30,6 +30,7 @@ load_gcc_lib scandump.exp
load_gcc_lib scanlang.exp
load_gcc_lib scanrtl.exp
load_gcc_lib scansarif.exp
+load_gcc_lib scanhtml.exp
load_gcc_lib scantree.exp
load_gcc_lib scanltrans.exp
load_gcc_lib scanoffload.exp
@@ -553,7 +554,23 @@ int main() {
} } "-lcuda" ]
}
-# Return 1 if cublas_v2.h and -lcublas are available.
+# Return 1 if -lcuda is available (header not required).
+
+proc check_effective_target_openacc_libcuda { } {
+ return [check_no_compiler_messages openacc_libcuda executable {
+typedef enum { CUDA_SUCCESS } CUresult;
+typedef int CUdevice;
+CUresult cuDeviceGet (CUdevice *, int);
+int main() {
+ CUdevice dev;
+ CUresult r = cuDeviceGet (&dev, 0);
+ if (r != CUDA_SUCCESS)
+ return 1;
+ return 0;
+} } "-lcuda" ]
+}
+
+# Return 1 if cublas_v2.h, cuda.h, -lcublas and -lcuda are available.
proc check_effective_target_openacc_cublas { } {
return [check_no_compiler_messages openacc_cublas executable {
@@ -573,7 +590,25 @@ int main() {
} } "-lcuda -lcublas" ]
}
-# Return 1 if cuda_runtime_api.h and -lcudart are available.
+# Return 1 if -lcublas is available header not required).
+
+proc check_effective_target_openacc_libcublas { } {
+ return [check_no_compiler_messages openacc_libcublas executable {
+typedef enum { CUBLAS_STATUS_SUCCESS } cublasStatus_t;
+typedef struct cublasContext* cublasHandle_t;
+#define cublasCreate cublasCreate_v2
+cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+int main() {
+ cublasStatus_t s;
+ cublasHandle_t h;
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ return 1;
+ return 0;
+} } "-lcublas" ]
+}
+
+# Return 1 if cuda_runtime_api.h, cuda.h, -lcuda and -lcudart are available.
proc check_effective_target_openacc_cudart { } {
return [check_no_compiler_messages openacc_cudart executable {
@@ -592,3 +627,98 @@ int main() {
return 0;
} } "-lcuda -lcudart" ]
}
+
+# Return 1 if -lcudart is available (no header required).
+
+proc check_effective_target_openacc_libcudart { } {
+ return [check_no_compiler_messages openacc_libcudart executable {
+typedef int cudaError_t;
+cudaError_t cudaGetDevice(int *);
+enum { cudaSuccess };
+int main() {
+ cudaError_t e;
+ int devn;
+ e = cudaGetDevice (&devn);
+ if (e != cudaSuccess)
+ return 1;
+ return 0;
+} } "-lcudart" ]
+}
+
+# Return 1 if hip.h is available (no link check; AMD platform).
+
+proc check_effective_target_gomp_hip_header_amd { } {
+ return [check_no_compiler_messages gomp_hip_header_amd assembly {
+#define __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime_api.h>
+int main() {
+ hipDevice_t dev;
+ hipError_t r = hipDeviceGet (&dev, 0);
+ if (r != hipSuccess)
+ return 1;
+ return 0;
+} }]
+}
+
+# Return 1 if hip.h is available (no link check; Nvidia/CUDA platform).
+
+proc check_effective_target_gomp_hip_header_nvidia { } {
+ return [check_no_compiler_messages gomp_hip_header_nvidia assembly {
+#define __HIP_PLATFORM_NVIDIA__
+#include <hip/hip_runtime_api.h>
+int main() {
+ hipDevice_t dev;
+ hipError_t r = hipDeviceGet (&dev, 0);
+ if (r != hipSuccess)
+ return 1;
+ return 0;
+} } "-Wno-deprecated-declarations"]
+}
+
+# Return 1 if the Fortran hipfort module is available (no link check)
+
+proc check_effective_target_gomp_hipfort_module { } {
+ return [check_no_compiler_messages gomp_hipfort_module assembly {
+! Fortran
+use hipfort
+implicit none
+integer(kind(hipSuccess)) :: r
+integer(c_int) :: dev
+r = hipDeviceGet (dev, 0)
+if (r /= hipSuccess) error stop
+end
+}]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libamdhip64 { } {
+ return [check_no_compiler_messages gomp_libamdhip64 executable {
+typedef int hipError_t;
+typedef int hipDevice_t;
+enum { hipSuccess = 0 };
+hipError_t hipDeviceGet(hipDevice_t*, int);
+int main() {
+ hipDevice_t dev;
+ hipError_t r = hipDeviceGet (&dev, 0);
+ if (r != hipSuccess)
+ return 1;
+ return 0;
+} } "-lamdhip64" ]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libhipblas { } {
+ return [check_no_compiler_messages gomp_libhipblas executable {
+typedef enum { HIPBLAS_STATUS_SUCCESS = 0 } hipblasStatus_t;
+typedef void* hipblasHandle_t;
+hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+int main() {
+ hipblasHandle_t handle;
+ hipblasStatus_t stat = hipblasCreate (&handle);
+ if (stat != HIPBLAS_STATUS_SUCCESS)
+ return 1;
+ return 0;
+} } "-lhipblas" ]
+}
diff --git a/libgomp/testsuite/libgomp.c++/allocator-1.C b/libgomp/testsuite/libgomp.c++/allocator-1.C
index f820722..49425386 100644
--- a/libgomp/testsuite/libgomp.c++/allocator-1.C
+++ b/libgomp/testsuite/libgomp.c++/allocator-1.C
@@ -78,7 +78,10 @@ void test_inequality ()
CHECK_INEQUALITY (omp::allocator::cgroup_mem, void);
CHECK_INEQUALITY (omp::allocator::pteam_mem, void);
CHECK_INEQUALITY (omp::allocator::thread_mem, void);
+#ifdef __gnu_linux__
+ /* Pinning not implemented on other targets. */
CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, void);
+#endif
/* And again with the same type passed to the allocator. */
CHECK_INEQUALITY (omp::allocator::null_allocator, T);
CHECK_INEQUALITY (omp::allocator::default_mem, T);
@@ -89,7 +92,9 @@ void test_inequality ()
CHECK_INEQUALITY (omp::allocator::cgroup_mem, T);
CHECK_INEQUALITY (omp::allocator::pteam_mem, T);
CHECK_INEQUALITY (omp::allocator::thread_mem, T);
+#ifdef __gnu_linux__
CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, T);
+#endif
}
#undef CHECK_INEQUALITY
@@ -121,7 +126,9 @@ int main ()
test<int, omp::allocator::cgroup_mem>(42);
test<int, omp::allocator::pteam_mem>(42);
test<int, omp::allocator::thread_mem>(42);
+#ifdef __gnu_linux__
test<int, ompx::allocator::gnu_pinned_mem>(42);
+#endif
test<long long, omp::allocator::null_allocator>(42);
test<long long, omp::allocator::default_mem>(42);
@@ -132,7 +139,9 @@ int main ()
test<long long, omp::allocator::cgroup_mem>(42);
test<long long, omp::allocator::pteam_mem>(42);
test<long long, omp::allocator::thread_mem>(42);
+#ifdef __gnu_linux__
test<long long, ompx::allocator::gnu_pinned_mem>(42);
+#endif
test<S, omp::allocator::null_allocator>( S{42, true, 128.f});
test<S, omp::allocator::default_mem>( S{42, true, 128.f});
@@ -143,7 +152,9 @@ int main ()
test<S, omp::allocator::cgroup_mem>( S{42, true, 128.f});
test<S, omp::allocator::pteam_mem>( S{42, true, 128.f});
test<S, omp::allocator::thread_mem>( S{42, true, 128.f});
+#ifdef __gnu_linux__
test<S, ompx::allocator::gnu_pinned_mem>(S{42, true, 128.f});
+#endif
test_inequality<int, omp::allocator::null_allocator>();
test_inequality<int, omp::allocator::default_mem>();
@@ -154,5 +165,7 @@ int main ()
test_inequality<int, omp::allocator::cgroup_mem>();
test_inequality<int, omp::allocator::pteam_mem>();
test_inequality<int, omp::allocator::thread_mem>();
+#ifdef __gnu_linux__
test_inequality<int, ompx::allocator::gnu_pinned_mem>();
+#endif
}
diff --git a/libgomp/testsuite/libgomp.c++/allocator-2.C b/libgomp/testsuite/libgomp.c++/allocator-2.C
index d25b755..ca94fc7 100644
--- a/libgomp/testsuite/libgomp.c++/allocator-2.C
+++ b/libgomp/testsuite/libgomp.c++/allocator-2.C
@@ -86,7 +86,10 @@ int main ()
f<int, omp::allocator::cgroup_mem >(0, 1, 2, 3);
f<int, omp::allocator::pteam_mem >(0, 1, 2, 3);
f<int, omp::allocator::thread_mem >(0, 1, 2, 3);
+#ifdef __gnu_linux__
+ /* Pinning not implemented on other targets. */
f<int, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3);
+#endif
f<long long, omp::allocator::null_allocator >(0, 1, 2, 3);
f<long long, omp::allocator::default_mem >(0, 1, 2, 3);
@@ -97,7 +100,9 @@ int main ()
f<long long, omp::allocator::cgroup_mem >(0, 1, 2, 3);
f<long long, omp::allocator::pteam_mem >(0, 1, 2, 3);
f<long long, omp::allocator::thread_mem >(0, 1, 2, 3);
+#ifdef __gnu_linux__
f<long long, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3);
+#endif
S0 s0_0{ 42, true, 111128.f};
S0 s0_1{ 142, false, 11128.f};
@@ -112,7 +117,9 @@ int main ()
f<S0, omp::allocator::cgroup_mem >(s0_0, s0_1, s0_2, s0_3);
f<S0, omp::allocator::pteam_mem >(s0_0, s0_1, s0_2, s0_3);
f<S0, omp::allocator::thread_mem >(s0_0, s0_1, s0_2, s0_3);
+#ifdef __gnu_linux__
f<S0, ompx::allocator::gnu_pinned_mem>(s0_0, s0_1, s0_2, s0_3);
+#endif
S1 s1_0{ 42, true, 111128.f};
S1 s1_1{ 142, false, 11128.f};
@@ -128,5 +135,7 @@ int main ()
f<S1, omp::allocator::cgroup_mem >(s1_0, s1_1, s1_2, s1_3);
f<S1, omp::allocator::pteam_mem >(s1_0, s1_1, s1_2, s1_3);
f<S1, omp::allocator::thread_mem >(s1_0, s1_1, s1_2, s1_3);
+#ifdef __gnu_linux__
f<S1, ompx::allocator::gnu_pinned_mem>(s1_0, s1_1, s1_2, s1_3);
+#endif
}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
new file mode 100644
index 0000000..aba4f42
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
@@ -0,0 +1,87 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+#define N 64
+
+struct points
+{
+ double *x;
+ double *y;
+ double *z;
+ size_t len;
+};
+
+#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \
+ map(p.x[0:p.len]) \
+ map(p.y[0:p.len]) \
+ map(p.z[0:p.len])
+
+struct shape
+{
+ points tmp;
+ points *pts;
+ int metadata[128];
+};
+
+#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp)
+
+void
+alloc_points (points *pts, size_t sz)
+{
+ pts->x = new double[sz];
+ pts->y = new double[sz];
+ pts->z = new double[sz];
+ pts->len = sz;
+ for (int i = 0; i < sz; i++)
+ pts->x[i] = pts->y[i] = pts->z[i] = 0;
+}
+
+int main (int argc, char *argv[])
+{
+ shape myshape;
+ points mypts;
+
+ myshape.pts = &mypts;
+
+ alloc_points (&myshape.tmp, N);
+ myshape.pts = new points;
+ alloc_points (myshape.pts, N);
+
+ #pragma omp target map(myshape)
+ {
+ for (int i = 0; i < N; i++)
+ {
+ myshape.pts->x[i]++;
+ myshape.pts->y[i]++;
+ myshape.pts->z[i]++;
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (myshape.pts->x[i] == 1);
+ assert (myshape.pts->y[i] == 1);
+ assert (myshape.pts->z[i] == 1);
+ }
+
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ {
+ myshape.pts->x[i]++;
+ myshape.pts->y[i]++;
+ myshape.pts->z[i]++;
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (myshape.pts->x[i] == 2);
+ assert (myshape.pts->y[i] == 2);
+ assert (myshape.pts->z[i] == 2);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
new file mode 100644
index 0000000..d848fdb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
@@ -0,0 +1,55 @@
+// { dg-do run }
+
+#include <cassert>
+
+#define N 256
+
+struct doublebuf
+{
+ int buf_a[N][N];
+ int buf_b[N][N];
+};
+
+#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \
+ map(b.buf_b[0:N/2][0:N])
+
+#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \
+ map(b.buf_b[N/2:N/2][0:N])
+
+int main (int argc, char *argv[])
+{
+ doublebuf db;
+
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ db.buf_a[i][j] = db.buf_b[i][j] = 0;
+
+ #pragma omp target map(mapper(lo), tofrom:db)
+ {
+ for (int i = 0; i < N / 2; i++)
+ for (int j = 0; j < N; j++)
+ {
+ db.buf_a[i][j]++;
+ db.buf_b[i][j]++;
+ }
+ }
+
+ #pragma omp target map(mapper(hi), tofrom:db)
+ {
+ for (int i = N / 2; i < N; i++)
+ for (int j = 0; j < N; j++)
+ {
+ db.buf_a[i][j]++;
+ db.buf_b[i][j]++;
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ {
+ assert (db.buf_a[i][j] == 1);
+ assert (db.buf_b[i][j] == 1);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
new file mode 100644
index 0000000..ea9b7de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+ int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+ void incr_a (S my_s)
+ {
+#pragma omp target
+ {
+ for (int i = 0; i < 100; i++)
+ my_s.myarr[i]++;
+ }
+ }
+}
+
+namespace B
+{
+ void incr_b (S my_s)
+ {
+#pragma omp target
+ {
+ for (int i = 100; i < 200; i++)
+ my_s.myarr[i]++;
+ }
+ }
+}
+
+int main (int argc, char *argv[])
+{
+ S my_s;
+
+ my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+ {
+ for (int i = 0; i < 20; i++)
+ my_s.myarr[i]++;
+ }
+
+ A::incr_a (my_s);
+ B::incr_b (my_s);
+
+ for (int i = 0; i < 200; i++)
+ assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
new file mode 100644
index 0000000..f194e63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+ int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+ void incr_a (S &my_s)
+ {
+#pragma omp target
+ {
+ for (int i = 0; i < 100; i++)
+ my_s.myarr[i]++;
+ }
+ }
+}
+
+namespace B
+{
+ void incr_b (S &my_s)
+ {
+#pragma omp target
+ {
+ for (int i = 100; i < 200; i++)
+ my_s.myarr[i]++;
+ }
+ }
+}
+
+int main (int argc, char *argv[])
+{
+ S my_s;
+
+ my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+ {
+ for (int i = 0; i < 20; i++)
+ my_s.myarr[i]++;
+ }
+
+ A::incr_a (my_s);
+ B::incr_b (my_s);
+
+ for (int i = 0; i < 200; i++)
+ assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
new file mode 100644
index 0000000..0030de8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
@@ -0,0 +1,52 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+ int *myarr;
+ int len;
+};
+
+class C
+{
+ S smemb;
+#pragma omp declare mapper (custom:S s) map(to:s.myarr) \
+ map(tofrom:s.myarr[0:s.len])
+
+public:
+ C(int l)
+ {
+ smemb.myarr = new int[l];
+ smemb.len = l;
+ for (int i = 0; i < l; i++)
+ smemb.myarr[i] = 0;
+ }
+ void bump();
+ void check();
+};
+
+void
+C::bump ()
+{
+#pragma omp target map(mapper(custom), tofrom: smemb)
+ {
+ for (int i = 0; i < smemb.len; i++)
+ smemb.myarr[i]++;
+ }
+}
+
+void
+C::check ()
+{
+ for (int i = 0; i < smemb.len; i++)
+ assert (smemb.myarr[i] == 1);
+}
+
+int main (int argc, char *argv[])
+{
+ C test (100);
+ test.bump ();
+ test.check ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
new file mode 100644
index 0000000..14ed10d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
@@ -0,0 +1,37 @@
+// { dg-do run }
+
+#include <cassert>
+
+template <typename T>
+void adjust (T param)
+{
+#pragma omp declare mapper (T x) map(to:x.len, x.base) \
+ map(tofrom:x.base[0:x.len])
+
+#pragma omp target
+ for (int i = 0; i < param.len; i++)
+ param.base[i]++;
+}
+
+struct S {
+ int len;
+ int *base;
+};
+
+int main (int argc, char *argv[])
+{
+ S a;
+
+ a.len = 100;
+ a.base = new int[a.len];
+
+ for (int i = 0; i < a.len; i++)
+ a.base[i] = 0;
+
+ adjust (a);
+
+ for (int i = 0; i < a.len; i++)
+ assert (a.base[i] == 1);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
new file mode 100644
index 0000000..ba4792a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+ int *myarr;
+};
+
+struct T
+{
+ S *s;
+};
+
+#pragma omp declare mapper (s100: S x) map(to: x.myarr) \
+ map(tofrom: x.myarr[0:100])
+// Define this because ...
+#pragma omp declare mapper (default: S x) map(to: x.myarr) \
+ map(tofrom: x.myarr[0:100])
+
+
+void
+bump (T t)
+{
+ /* Here we have an implicit/default mapper invoking a named mapper. We
+ need to make sure that can be located properly at gimplification
+ time. */
+
+// ... the following is invalid in OpenMP - albeit supported by GCC
+// (after disabling: error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ )
+
+// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0])
+
+// ... thus, we now use ...
+#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0])
+
+#pragma omp target
+ for (int i = 0; i < 100; i++)
+ t.s->myarr[i]++;
+}
+
+int main (int argc, char *argv[])
+{
+ S my_s;
+ T my_t;
+
+ my_s.myarr = new int[100];
+ my_t.s = &my_s;
+
+ for (int i = 0; i < 100; i++)
+ my_s.myarr[i] = 0;
+
+ bump (my_t);
+
+ for (int i = 0; i < 100; i++)
+ assert (my_s.myarr[i] == 1);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
new file mode 100644
index 0000000..3818e52
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
@@ -0,0 +1,61 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+ int *myarr;
+ int len;
+};
+
+template<typename T>
+class C
+{
+ T memb;
+#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \
+ map(tofrom:t.myarr[0:t.len])
+
+public:
+ C(int sz);
+ ~C();
+ void bump();
+ void check();
+};
+
+template<typename T>
+C<T>::C(int sz)
+{
+ memb.myarr = new int[sz];
+ for (int i = 0; i < sz; i++)
+ memb.myarr[i] = 0;
+ memb.len = sz;
+}
+
+template<typename T>
+C<T>::~C()
+{
+ delete[] memb.myarr;
+}
+
+template<typename T>
+void C<T>::bump()
+{
+#pragma omp target map(memb)
+ for (int i = 0; i < memb.len; i++)
+ memb.myarr[i]++;
+}
+
+template<typename T>
+void C<T>::check()
+{
+ for (int i = 0; i < memb.len; i++)
+ assert (memb.myarr[i] == 1);
+}
+
+int main(int argc, char *argv[])
+{
+ C<S> c_int(100);
+ c_int.bump();
+ c_int.check();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare_target-2.C b/libgomp/testsuite/libgomp.c++/declare_target-2.C
new file mode 100644
index 0000000..ab94a55
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare_target-2.C
@@ -0,0 +1,25 @@
+// { dg-do link }
+
+// Actually not needed: -fipa-cp is default with -O2:
+// { dg-additional-options "-O2 -fipa-cp" }
+
+// The code failed because 'std::endl' becoḿes implicitly 'declare target'
+// but not the 'widen' function it calls. While the linker had no issues
+// (endl is never called, either because it is inlined or optimized away),
+// the IPA-CP (enabled by -O2 and higher) failed as the definition for
+// 'widen' did not exist on the offload side.
+
+#include <iostream>
+
+void func (int m)
+{
+ if (m < 0)
+ std::cout << "should not happen" << std::endl;
+}
+
+
+int main()
+{
+ #pragma omp target
+ func (1);
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
new file mode 100644
index 0000000..bcd499c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr106445-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1.C b/libgomp/testsuite/libgomp.c++/pr106445-1.C
new file mode 100644
index 0000000..329ce62
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1.C
@@ -0,0 +1,18 @@
+#include <vector>
+
+int main()
+{
+#pragma omp target
+ {
+ {
+ std::vector<int> v;
+ if (!v.empty())
+ __builtin_abort();
+ }
+ {
+ std::vector<int> v(100);
+ if (v.capacity() < 100)
+ __builtin_abort();
+ }
+ }
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
index 6995f26..af9fe1c 100644
--- a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
@@ -3,6 +3,9 @@
/* { dg-additional-options -DDEFAULT=defaultmap(firstprivate) }
Wrong code for offloading execution.
{ dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
/* { dg-additional-options -fdump-tree-gimple } */
#include "pr119692-1-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
index 02121b6..e5c6e07 100644
--- a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
@@ -3,6 +3,9 @@
/* { dg-additional-options -DDEFAULT=defaultmap(to) }
Wrong code for offloading execution.
{ dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
/* { dg-additional-options -fdump-tree-gimple } */
#include "pr119692-1-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr96390.C b/libgomp/testsuite/libgomp.c++/pr96390.C
index 1f3c3e0..be19601 100644
--- a/libgomp/testsuite/libgomp.c++/pr96390.C
+++ b/libgomp/testsuite/libgomp.c++/pr96390.C
@@ -1,6 +1,4 @@
/* { dg-additional-options "-O0 -fdump-tree-omplower" } */
-/* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */
-/* { dg-xfail-if "PR 97106/PR 97102 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */
#include <cstdlib>
#include <type_traits>
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
new file mode 100644
index 0000000..ecb029e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -0,0 +1,104 @@
+/* Offloaded C++ objects construction and destruction. */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
new file mode 100644
index 0000000..75e48ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -0,0 +1,140 @@
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */
+
+/* { dg-require-effective-target init_priority } */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {initH1, 1[\r\n]+} }
+ { dg-output {initDH1, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {finiDH1, 1[\r\n]+} }
+ { dg-output {finiH1, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
index 3848295..a862652 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
@@ -23,3 +23,6 @@
PR119692.
{ dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
index 8861740..ff15c9f 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
@@ -22,3 +22,6 @@
For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
{ dg-shouldfail {'MyException' exception} { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
index 3cdedf4..d4dccf1 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
@@ -14,8 +14,10 @@
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
index ef996cf..724e34b 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
@@ -14,8 +14,10 @@
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
index a73e7f8..24eb7a5 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
@@ -9,10 +9,6 @@
/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C',
'../../../gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C'. */
-/* Help nvptx offloading overcome a code generation issue;
- PR106445, PR118518. */
-#define ALWAYS_INLINE __attribute__((always_inline))
-
#pragma omp begin declare target
bool ok = false;
@@ -20,12 +16,10 @@ bool ok = false;
template <typename T>
struct C
{
- ALWAYS_INLINE
C()
{
ok = true;
}
- ALWAYS_INLINE
C(int) {};
~C() {};
@@ -57,7 +51,9 @@ int main()
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
index 2467061..a4e7a10 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
- { dg-ice PR119737 { offload_target_amdgcn } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
#include "../libgomp.oacc-c++/exceptions-throw-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
index e85e6c3..97f4845 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
- { dg-ice PR119737 { offload_target_amdgcn } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
#include "../libgomp.oacc-c++/exceptions-throw-2.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C
new file mode 100644
index 0000000..8fa9af7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C
@@ -0,0 +1,215 @@
+/* Basic container usage. */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+bool vector_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::vector<int> vector;
+ ok = vector.empty();
+ }
+ return ok;
+}
+
+bool deque_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::deque<int> deque;
+ ok = deque.empty();
+ }
+ return ok;
+}
+
+bool list_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::list<int> list;
+ ok = list.empty();
+ }
+ return ok;
+}
+
+bool map_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::map<int, int> map;
+ ok = map.empty();
+ }
+ return ok;
+}
+
+bool set_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::set<int> set;
+ ok = set.empty();
+ }
+ return ok;
+}
+
+bool multimap_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::multimap<int, int> multimap;
+ ok = multimap.empty();
+ }
+ return ok;
+}
+
+bool multiset_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::multiset<int, int> multiset;
+ ok = multiset.empty();
+ }
+ return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+ static constexpr std::size_t array_size = 42;
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::array<int, array_size> array{};
+ ok = array[0] == 0
+ && array[array_size - 1] == 0;
+ }
+ return ok;
+}
+
+bool forward_list_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::forward_list<int> forward_list;
+ ok = forward_list.empty();
+ }
+ return ok;
+}
+
+bool unordered_map_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::unordered_map<int, int> unordered_map;
+ ok = unordered_map.empty();
+ }
+ return ok;
+}
+
+bool unordered_set_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::unordered_set<int> unordered_set;
+ ok = unordered_set.empty();
+ }
+ return ok;
+}
+
+bool unordered_multimap_test()
+{
+
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::unordered_multimap<int, int> unordered_multimap;
+ ok = unordered_multimap.empty();
+ }
+ return ok;
+}
+
+bool unordered_multiset_test()
+{
+
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ std::unordered_multiset<int> unordered_multiset;
+ ok = unordered_multiset.empty();
+ }
+ return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+ const bool vec_res = vector_test();
+ __builtin_printf("vector : %s\n", vec_res ? "PASS" : "FAIL");
+ const bool deque_res = deque_test();
+ __builtin_printf("deque : %s\n", deque_res ? "PASS" : "FAIL");
+ const bool list_res = list_test();
+ __builtin_printf("list : %s\n", list_res ? "PASS" : "FAIL");
+ const bool map_res = map_test();
+ __builtin_printf("map : %s\n", map_res ? "PASS" : "FAIL");
+ const bool set_res = set_test();
+ __builtin_printf("set : %s\n", set_res ? "PASS" : "FAIL");
+ const bool multimap_res = multimap_test();
+ __builtin_printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL");
+ const bool multiset_res = multiset_test();
+ __builtin_printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL");
+ const bool array_res = array_test();
+ __builtin_printf("array : %s\n", array_res ? "PASS" : "FAIL");
+ const bool forward_list_res = forward_list_test();
+ __builtin_printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL");
+ const bool unordered_map_res = unordered_map_test();
+ __builtin_printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL");
+ const bool unordered_set_res = unordered_set_test();
+ __builtin_printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL");
+ const bool unordered_multimap_res = unordered_multimap_test();
+ __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+ const bool unordered_multiset_res = unordered_multiset_test();
+ __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+ const bool ok = vec_res
+ && deque_res
+ && list_res
+ && map_res
+ && set_res
+ && multimap_res
+ && multiset_res
+ && array_res
+ && forward_list_res
+ && unordered_map_res
+ && unordered_set_res
+ && unordered_multimap_res
+ && unordered_multiset_res;
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C
new file mode 100644
index 0000000..7ab047f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C
@@ -0,0 +1,210 @@
+/* Container adaptors in target region.
+ Does not test comparison operators other than equality to allow these tests
+ to be generalized to arbitrary input data. */
+
+#include <algorithm>
+#include <cstdio>
+#include <deque>
+#include <queue>
+#include <stack>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test_stack(T (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ const std::size_t half_size = Size / 2;
+ const T first_element = arr[0];
+ const T middle_element = arr[half_size - 1];
+ const T last_element = arr[Size - 1];
+ typedef std::stack<T, std::vector<T> > stack_type;
+ stack_type stack;
+ VERIFY (stack.empty());
+ VERIFY (stack.size() == 0);
+ {
+ /* Do half with push. */
+ std::size_t idx = 0;
+ for (; idx < half_size; ++idx)
+ {
+ stack.push(arr[idx]);
+ VERIFY (stack.top() == arr[idx]);
+ }
+ VERIFY (stack.size() == half_size);
+ VERIFY (static_cast<const stack_type&>(stack).size() == half_size);
+ for (; idx < Size; ++idx)
+ {
+ #if __cplusplus >= 201103L
+ /* Do the rest with emplace if C++11 or higher. */
+ stack.emplace(arr[idx]);
+ #else
+ /* Otherwise just use push again. */
+ stack.push(arr[idx]);
+ #endif
+ VERIFY (stack.top() == arr[idx]);
+ }
+ VERIFY (stack.size() == Size);
+ VERIFY (static_cast<const stack_type&>(stack).size() == Size);
+
+ const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size));
+ VERIFY (stack == stack_orig);
+ /* References are contained in their own scope so we don't accidently
+ add tests referencing them after they have been invalidated. */
+ {
+ const T& const_top = static_cast<const stack_type&>(stack).top();
+ VERIFY (const_top == last_element);
+ T& mutable_top = stack.top();
+ mutable_top = first_element;
+ VERIFY (const_top == first_element);
+ }
+ /* Will only compare inequal if the first and last elements are different. */
+ VERIFY (first_element != last_element || stack != stack_orig);
+ for (std::size_t count = Size - half_size; count != 0; --count)
+ stack.pop();
+ VERIFY (stack.top() == middle_element);
+ const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size));
+ VERIFY (stack == stack_half_orig);
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_queue(T (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ const std::size_t half_size = Size / 2;
+ const T first_element = arr[0];
+ const T last_element = arr[Size - 1];
+ typedef std::queue<T, std::deque<T> > queue_type;
+ queue_type queue;
+ VERIFY (queue.empty());
+ VERIFY (queue.size() == 0);
+ {
+ /* Do half with push. */
+ std::size_t idx = 0;
+ for (; idx < half_size; ++idx)
+ {
+ queue.push(arr[idx]);
+ VERIFY (queue.back() == arr[idx]);
+ VERIFY (queue.front() == first_element);
+ }
+ VERIFY (queue.size() == half_size);
+ VERIFY (static_cast<const queue_type&>(queue).size() == half_size);
+ for (; idx < Size; ++idx)
+ {
+ #if __cplusplus >= 201103L
+ /* Do the rest with emplace if C++11 or higher. */
+ queue.emplace(arr[idx]);
+ #else
+ /* Otherwise just use push again. */
+ queue.push(arr[idx]);
+ #endif
+ VERIFY (queue.back() == arr[idx]);
+ }
+ VERIFY (queue.size() == Size);
+ VERIFY (static_cast<const queue_type&>(queue).size() == Size);
+
+ const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size));
+ VERIFY (queue == queue_orig);
+
+ /* References are contained in their own scope so we don't accidently
+ add tests referencing them after they have been invalidated. */
+ {
+ const T& const_front = static_cast<const queue_type&>(queue).front();
+ VERIFY (const_front == first_element);
+ T& mutable_front = queue.front();
+
+ const T& const_back = static_cast<const queue_type&>(queue).back();
+ VERIFY (const_back == last_element);
+ T& mutable_back = queue.back();
+ {
+ using std::swap;
+ swap(mutable_front, mutable_back);
+ }
+ VERIFY (const_front == last_element);
+ VERIFY (const_back == first_element);
+ /* Will only compare inequal if the first and last elements are different. */
+ VERIFY (first_element != last_element || queue != queue_orig);
+ /* Return the last element to normal for the next comparison. */
+ mutable_back = last_element;
+ }
+
+ const T middle_element = arr[half_size];
+ for (std::size_t count = Size - half_size; count != 0; --count)
+ queue.pop();
+ VERIFY (queue.front() == middle_element);
+ const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size));
+ VERIFY (queue == queue_upper_half);
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value)
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ typedef std::priority_queue<T, std::vector<T> > priority_queue_type;
+ {
+ priority_queue_type pqueue;
+ VERIFY (pqueue.empty());
+ VERIFY (pqueue.size() == 0);
+ }
+ {
+ priority_queue_type pqueue(arr, arr + Size);
+ VERIFY (!pqueue.empty());
+ VERIFY (pqueue.size() == Size);
+ VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size);
+
+ const T old_max = pqueue.top();
+
+ #if __cplusplus >= 201103L
+ pqueue.emplace(max_value);
+ #else
+ pqueue.push(max_value);
+ #endif
+ VERIFY (pqueue.top() == max_value);
+ pqueue.pop();
+ VERIFY (pqueue.top() == old_max);
+ pqueue.push(min_value);
+ VERIFY (pqueue.top() == old_max);
+ pqueue.push(max_value);
+ VERIFY (pqueue.top() == max_value);
+ pqueue.pop();
+ VERIFY (pqueue.top() == old_max);
+ VERIFY (pqueue.size() == Size + 1);
+
+ for (std::size_t count = Size; count != 0; --count)
+ pqueue.pop();
+ VERIFY (pqueue.size() == 1);
+ VERIFY (pqueue.top() == min_value);
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+int main()
+{
+ int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+
+ return test_stack(arr)
+ && test_queue(arr)
+ && test_priority_queue(arr, 0, 1000) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C
new file mode 100644
index 0000000..be9037e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C
@@ -0,0 +1,136 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* C++23 container adaptors in target region.
+ Severely needs additional tests. */
+
+#include <cstdio>
+#include <utility>
+#include <version>
+
+#if __cpp_lib_flat_map >= 202207L
+#define ENABLE_FLAT_MAP 1
+#endif
+#if __cpp_lib_flat_set >= 202207L
+#define ENABLE_FLAT_SET 1
+#endif
+
+#ifdef ENABLE_FLAT_MAP
+#include <flat_map>
+#endif
+#ifdef ENABLE_FLAT_SET
+#include <flat_set>
+#endif
+
+#include "target-flex-common.h"
+
+#ifdef ENABLE_FLAT_MAP
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ using flat_map_type = std::flat_map<K, V>;
+ flat_map_type map = {arr, arr + Size};
+
+ VERIFY (!map.empty());
+ for (const auto& element : arr)
+ VERIFY (map.contains(element.first));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ using flat_map_type = std::flat_map<K, V>;
+ flat_map_type map = {arr, arr + Size};
+
+ VERIFY (!map.empty());
+ for (const auto& element : arr)
+ VERIFY (map.contains(element.first));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+#else
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; }
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; }
+#endif
+
+#ifdef ENABLE_FLAT_SET
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ using flat_set_type = std::flat_set<T>;
+ flat_set_type set = {arr, arr + Size};
+
+ VERIFY (!set.empty());
+ for (const auto& element : arr)
+ VERIFY (set.contains(element));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size])
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ using flat_multiset_type = std::flat_multiset<T>;
+ flat_multiset_type multiset = {arr, arr + Size};
+
+ VERIFY (!multiset.empty());
+ for (const auto& element : arr)
+ VERIFY (multiset.contains(element));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+#else
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size]) { return true; }
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size]) { return true; }
+#endif
+
+int main()
+{
+ int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+ std::pair<int, int> pairs[10] = {{ 1, 2}, { 2, 4}, { 3, 6}, { 4, 8}, { 5, 10},
+ { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}};
+
+ return test_flat_set(arr)
+ && test_flat_multiset(arr)
+ && test_flat_map(pairs)
+ && test_flat_multimap(pairs) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C
new file mode 100644
index 0000000..6d55129
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C
@@ -0,0 +1,444 @@
+/* Check constructors/destructors are called in containers. */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#include <utility>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include "target-flex-common.h"
+
+struct indirect_counter
+{
+ typedef int counter_value_type;
+ counter_value_type *_count_ptr;
+
+ indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) {
+ ++(*_count_ptr);
+ }
+ indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) {
+ ++(*_count_ptr);
+ }
+ /* Don't declare a move constructor, we want to copy no matter what. */
+ ~indirect_counter() {
+ --(*_count_ptr);
+ }
+};
+
+bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+ { return lhs._count_ptr == rhs._count_ptr; }
+bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+ { return lhs._count_ptr < rhs._count_ptr; }
+
+#if __cplusplus >= 201103L
+template<>
+struct std::hash<indirect_counter>
+{
+ std::size_t operator()(const indirect_counter& ic) const noexcept
+ { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); }
+};
+#endif
+
+/* Not a container, just a sanity check really. */
+bool automatic_lifetime_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ indirect_counter c = indirect_counter(&counter);
+ indirect_counter(static_cast<int*>(&counter));
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool vector_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::vector<indirect_counter> vec(42, indirect_counter(&counter));
+ VERIFY (counter == 42);
+ vec.resize(32, indirect_counter(&counter));
+ VERIFY (counter == 32);
+ vec.push_back(indirect_counter(&counter));
+ VERIFY (counter == 33);
+ vec.pop_back();
+ VERIFY (counter == 32);
+ vec.pop_back();
+ VERIFY (counter == 31);
+ vec.resize(100, indirect_counter(&counter));
+ VERIFY (counter == 100);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool deque_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::deque<indirect_counter> vec(42, indirect_counter(&counter));
+ VERIFY (counter == 42);
+ vec.resize(32, indirect_counter(&counter));
+ VERIFY (counter == 32);
+ vec.push_back(indirect_counter(&counter));
+ VERIFY (counter == 33);
+ vec.pop_back();
+ VERIFY (counter == 32);
+ vec.pop_back();
+ VERIFY (counter == 31);
+ vec.resize(100, indirect_counter(&counter));
+ VERIFY (counter == 100);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool list_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::list<indirect_counter> list(42, indirect_counter(&counter));
+ VERIFY (counter == 42);
+ list.resize(32, indirect_counter(&counter));
+ VERIFY (counter == 32);
+ list.push_back(indirect_counter(&counter));
+ VERIFY (counter == 33);
+ list.pop_back();
+ VERIFY (counter == 32);
+ list.pop_back();
+ VERIFY (counter == 31);
+ list.resize(100, indirect_counter(&counter));
+ VERIFY (counter == 100);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool map_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::map<int, indirect_counter> map;
+ map.insert(std::make_pair(1, indirect_counter(&counter)));
+ VERIFY (counter == 1);
+ map.insert(std::make_pair(1, indirect_counter(&counter)));
+ VERIFY (counter == 1);
+ map.insert(std::make_pair(2, indirect_counter(&counter)));
+ VERIFY (counter == 2);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool set_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter0 = 0;
+ int counter1 = 0;
+ {
+ std::set<indirect_counter> set;
+ set.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ set.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ set.insert(indirect_counter(&counter1));
+ VERIFY (counter0 == 1 && counter1 == 1);
+ }
+ VERIFY (counter0 == 0 && counter1 == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool multimap_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::multimap<int, indirect_counter> multimap;
+ multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+ VERIFY (counter == 1);
+ multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+ VERIFY (counter == 2);
+ multimap.insert(std::make_pair(2, indirect_counter(&counter)));
+ VERIFY (counter == 3);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool multiset_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter0 = 0;
+ int counter1 = 0;
+ {
+ std::multiset<indirect_counter> multiset;
+ multiset.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ multiset.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 2);
+ multiset.insert(indirect_counter(&counter1));
+ VERIFY (counter0 == 2 && counter1 == 1);
+ }
+ VERIFY (counter0 == 0 && counter1 == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ indirect_counter ic(&counter);
+ std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic,
+ ic, ic, ic, ic, ic};
+ VERIFY (counter == 11);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool forward_list_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter));
+ VERIFY (counter == 42);
+ forward_list.resize(32, indirect_counter(&counter));
+ VERIFY (counter == 32);
+ forward_list.push_front(indirect_counter(&counter));
+ VERIFY (counter == 33);
+ forward_list.pop_front();
+ VERIFY (counter == 32);
+ forward_list.pop_front();
+ VERIFY (counter == 31);
+ forward_list.resize(100, indirect_counter(&counter));
+ VERIFY (counter == 100);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool unordered_map_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::unordered_map<int, indirect_counter> unordered_map;
+ unordered_map.insert({1, indirect_counter(&counter)});
+ VERIFY (counter == 1);
+ unordered_map.insert({1, indirect_counter(&counter)});
+ VERIFY (counter == 1);
+ unordered_map.insert({2, indirect_counter(&counter)});
+ VERIFY (counter == 2);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool unordered_set_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter0 = 0;
+ int counter1 = 0;
+ {
+ std::unordered_set<indirect_counter> unordered_set;
+ unordered_set.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ unordered_set.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ unordered_set.insert(indirect_counter(&counter1));
+ VERIFY (counter0 == 1 && counter1 == 1);
+ }
+ VERIFY (counter0 == 0 && counter1 == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool unordered_multimap_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter = 0;
+ {
+ std::unordered_multimap<int, indirect_counter> unordered_multimap;
+ unordered_multimap.insert({1, indirect_counter(&counter)});
+ VERIFY (counter == 1);
+ unordered_multimap.insert({1, indirect_counter(&counter)});
+ VERIFY (counter == 2);
+ unordered_multimap.insert({2, indirect_counter(&counter)});
+ VERIFY (counter == 3);
+ }
+ VERIFY (counter == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+bool unordered_multiset_test()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ bool inner_ok = true;
+ int counter0 = 0;
+ int counter1 = 0;
+ {
+ std::unordered_multiset<indirect_counter> unordered_multiset;
+ unordered_multiset.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 1);
+ unordered_multiset.insert(indirect_counter(&counter0));
+ VERIFY (counter0 == 2);
+ unordered_multiset.insert(indirect_counter(&counter1));
+ VERIFY (counter0 == 2 && counter1 == 1);
+ }
+ VERIFY (counter0 == 0 && counter1 == 0);
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+ const bool auto_res = automatic_lifetime_test();
+ const bool vec_res = vector_test();
+ const bool deque_res = deque_test();
+ const bool list_res = list_test();
+ const bool map_res = map_test();
+ const bool set_res = set_test();
+ const bool multimap_res = multimap_test();
+ const bool multiset_res = multiset_test();
+ const bool array_res = array_test();
+ const bool forward_list_res = forward_list_test();
+ const bool unordered_map_res = unordered_map_test();
+ const bool unordered_set_res = unordered_set_test();
+ const bool unordered_multimap_res = unordered_multimap_test();
+ const bool unordered_multiset_res = unordered_multiset_test();
+ std::printf("sanity check : %s\n", auto_res ? "PASS" : "FAIL");
+ std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL");
+ std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL");
+ std::printf("list : %s\n", list_res ? "PASS" : "FAIL");
+ std::printf("map : %s\n", map_res ? "PASS" : "FAIL");
+ std::printf("set : %s\n", set_res ? "PASS" : "FAIL");
+ std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL");
+ std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL");
+ std::printf("array : %s\n", array_res ? "PASS" : "FAIL");
+ std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL");
+ std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL");
+ std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL");
+ std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+ std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+ const bool ok = auto_res
+ && vec_res
+ && deque_res
+ && list_res
+ && map_res
+ && set_res
+ && multimap_res
+ && multiset_res
+ && array_res
+ && forward_list_res
+ && unordered_map_res
+ && unordered_set_res
+ && unordered_multimap_res
+ && unordered_multiset_res;
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C
new file mode 100644
index 0000000..024fb73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C
@@ -0,0 +1,736 @@
+/* Populated with mapped data, validate, mutate, validate again.
+ The cases using sets do not mutate.
+ Note: Some of the code in here really sucks due to being made to be
+ compatible with c++98. */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include <limits>
+#include <iterator>
+
+#include "target-flex-common.h"
+
+template<bool B, class T = void>
+struct enable_if {};
+
+template<class T>
+struct enable_if<true, T> { typedef T type; };
+
+struct identity_func
+{
+#if __cplusplus < 201103L
+ template<typename T>
+ T& operator()(T& arg) const BL_NOEXCEPT { return arg; }
+ template<typename T>
+ T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; }
+#else
+ template<typename T>
+ constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); }
+#endif
+};
+
+/* Applies projection to the second iterator. */
+template<typename It0, typename It1, typename Proj>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+ const It1 begin1, const It1 end1,
+ Proj proj) BL_NOEXCEPT
+{
+ It0 it0 = begin0;
+ It1 it1 = begin1;
+ for (; it0 != end0; ++it0, ++it1)
+ {
+ /* Sizes mismatch, don't bother aborting though just fail the test. */
+ if (it1 == end1)
+ return false;
+ if (*it0 != proj(*it1))
+ return false;
+ }
+ /* Sizes mismatch, do as above. */
+ if (it1 != end1)
+ return false;
+ return true;
+}
+
+template<typename It0, typename It1>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+ const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+ return validate_sequential_elements(begin0, end0, begin1, end1, identity_func());
+}
+
+/* Inefficient, but simple. */
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+ for (It it = begin; it != end; ++it, ++out)
+ *out = *it;
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+ for (It it = begin; it != end; ++it)
+ *it = mut_fn(*it);
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool vector_test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ T out_mut_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::vector<T> vector(arr, arr + Size);
+ VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+ arr, arr + Size));
+ simple_copy(vector.begin(), vector.end(), out_arr);
+ simple_mutate(vector.begin(), vector.end(), MutationFunc());
+ VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+ arr, arr + Size, MutationFunc()));
+ simple_copy(vector.begin(), vector.end(), out_mut_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+ arr, arr + Size));
+ VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+ arr, arr + Size, MutationFunc()));
+ return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool deque_test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ T out_mut_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::deque<T> deque(arr, arr + Size);
+ VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+ arr, arr + Size));
+ simple_copy(deque.begin(), deque.end(), out_arr);
+ simple_mutate(deque.begin(), deque.end(), MutationFunc());
+ VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+ arr, arr + Size, MutationFunc()));
+ simple_copy(deque.begin(), deque.end(), out_mut_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+ arr, arr + Size));
+ VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+ arr, arr + Size, MutationFunc()));
+ return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool list_test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ T out_mut_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::list<T> list(arr, arr + Size);
+ VERIFY (validate_sequential_elements(list.begin(), list.end(),
+ arr, arr + Size));
+ simple_copy(list.begin(), list.end(), out_arr);
+ simple_mutate(list.begin(), list.end(), MutationFunc());
+ VERIFY (validate_sequential_elements(list.begin(), list.end(),
+ arr, arr + Size, MutationFunc()));
+ simple_copy(list.begin(), list.end(), out_mut_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+ arr, arr + Size));
+ VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+ arr, arr + Size, MutationFunc()));
+ return true;
+}
+
+template<typename T>
+const T& get_key(const T& arg) BL_NOEXCEPT
+ { return arg; }
+template<typename K, typename V>
+const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT
+ { return pair.first; }
+template<typename T>
+const T& get_value(const T& arg) BL_NOEXCEPT
+ { return arg; }
+template<typename K, typename V>
+const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT
+ { return pair.second; }
+
+template<typename T>
+struct key_type { typedef T type; };
+template<typename K, typename V>
+struct key_type<std::pair<K, V> > { typedef K type; };
+
+template<typename Proj, typename Container, typename It>
+bool validate_associative(const Container& container,
+ const It compare_begin,
+ const It compare_end,
+ Proj proj) BL_NOEXCEPT
+{
+ const typename Container::const_iterator elem_end = container.end();
+ for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+ {
+ const typename Container::const_iterator elem_it = container.find(get_key(*compare_it));
+ VERIFY_NON_TARGET (elem_it != elem_end);
+ VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it));
+ }
+ return true;
+}
+
+template<typename Container, typename It>
+bool validate_associative(const Container& container,
+ const It compare_begin,
+ const It compare_end) BL_NOEXCEPT
+{
+ return validate_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+ for (It it = begin; it != end; ++it)
+ it->second = mut_fn(it->second);
+}
+
+template<typename It, typename OutIter>
+void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT
+{
+ /* In case anyone reads this, I want it to be known that I hate c++98. */
+ typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+ std::set<key_t> already_seen;
+ for (It it = begin; it != end; ++it, ++out)
+ {
+ key_t key = get_key(*it);
+ if (already_seen.find(key) != already_seen.end())
+ continue;
+ already_seen.insert(key);
+ *out = *it;
+ }
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool map_test(const std::pair<K, V> (&arr)[Size])
+{
+ std::map<K, V> reference_map(arr, arr + Size);
+ bool ok;
+ /* Both sizes should be the same. */
+ std::pair<K, V> out_pairs[Size];
+ std::size_t out_size;
+ std::pair<K, V> out_pairs_mut[Size];
+ std::size_t out_size_mut;
+ #pragma omp target map(from: ok, out_pairs[:Size], out_size, \
+ out_pairs_mut[:Size], out_size_mut) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::vector<std::pair<K, V> > unique_elems;
+ simple_copy_unique(arr, arr + Size,
+ std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+ std::map<K, V> map(arr, arr + Size);
+ VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+ simple_copy(map.begin(), map.end(), out_pairs);
+ out_size = map.size();
+ simple_mutate_map(map.begin(), map.end(), MutationFunc());
+ VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+ MutationFunc()));
+ simple_copy(map.begin(), map.end(), out_pairs_mut);
+ out_size_mut = map.size();
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (out_size == out_size_mut);
+ VERIFY_NON_TARGET (validate_associative(reference_map,
+ out_pairs, out_pairs + out_size));
+ simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+ VERIFY_NON_TARGET (validate_associative(reference_map,
+ out_pairs_mut, out_pairs_mut + out_size_mut));
+ return true;
+}
+
+template<typename T, std::size_t Size>
+bool set_test(const T (&arr)[Size])
+{
+ std::set<T> reference_set(arr, arr + Size);
+ bool ok;
+ /* Both sizes should be the same. */
+ T out_arr[Size];
+ std::size_t out_size;
+ #pragma omp target map(from: ok, out_arr[:Size], out_size) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::vector<T> unique_elems;
+ simple_copy_unique(arr, arr + Size,
+ std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+ std::set<T> set(arr, arr + Size);
+ VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+ simple_copy(set.begin(), set.end(), out_arr);
+ out_size = set.size();
+ /* Sets can't be mutated, we could create another set with mutated
+ but it gets a little annoying and probably isn't an interesting test. */
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_associative(reference_set,
+ out_arr, out_arr + out_size));
+ return true;
+}
+
+template<typename Proj, typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+ const It compare_begin,
+ const It compare_end,
+ Proj proj) BL_NOEXCEPT
+{
+ /* Once again, for the poor soul reviewing these, I hate c++98. */
+ typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+ typedef std::map<key_t, std::size_t> counter_map;
+ counter_map key_count_map;
+ for (It it = compare_begin; it != compare_end; ++it)
+ {
+ const key_t& key = get_key(*it);
+ typename counter_map::iterator counter_it
+ = key_count_map.find(key);
+ if (counter_it != key_count_map.end())
+ ++counter_it->second;
+ else
+ key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1)));
+ }
+ const typename Container::const_iterator elem_end = container.end();
+ for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+ {
+ const key_t& key = get_key(*compare_it);
+ typename counter_map::iterator count_it = key_count_map.find(key);
+ std::size_t key_count = count_it != key_count_map.end() ? count_it->second
+ : std::size_t(0);
+ VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen");
+ /* This gets tested multiple times but that should be fine. */
+ VERIFY_NON_TARGET (key_count == container.count(key));
+ typename Container::const_iterator elem_it = container.find(key);
+ /* This will never happen if the previous case passed. */
+ VERIFY_NON_TARGET (elem_it != elem_end);
+ bool found_element = false;
+ for (; elem_it != elem_end; ++elem_it)
+ if (proj(get_value(*compare_it)) == get_value(*elem_it))
+ {
+ found_element = true;
+ break;
+ }
+ VERIFY_NON_TARGET (found_element);
+ }
+ return true;
+}
+
+template<typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+ const It compare_begin,
+ const It compare_end) BL_NOEXCEPT
+{
+ return validate_multi_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+ std::multimap<K, V> reference_multimap(arr, arr + Size);
+ bool ok;
+ std::pair<K, V> out_pairs[Size];
+ std::pair<K, V> out_pairs_mut[Size];
+ #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::multimap<K, V> multimap(arr, arr + Size);
+ VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+ simple_copy(multimap.begin(), multimap.end(), out_pairs);
+ simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+ VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+ simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+ out_pairs, out_pairs + Size));
+ simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+ out_pairs_mut, out_pairs_mut + Size));
+ return true;
+}
+
+template<typename T, std::size_t Size>
+bool multiset_test(const T (&arr)[Size])
+{
+ std::multiset<T> reference_multiset(arr, arr + Size);
+ bool ok;
+ T out_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::multiset<T> set(arr, arr + Size);
+ VERIFY (validate_multi_associative(set, arr, arr + Size));
+ simple_copy(set.begin(), set.end(), out_arr);
+ /* Sets can't be mutated, we could create another set with mutated
+ but it gets a little annoying and probably isn't an interesting test. */
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+ out_arr, out_arr + Size));
+ return true;
+}
+
+#if __cplusplus >= 201103L
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool array_test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ T out_mut_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::array<T, Size> std_array{};
+ /* Special case for std::array since it can't be initialized
+ with iterators. */
+ {
+ T zero_val = T{};
+ for (auto it = std_array.begin(); it != std_array.end(); ++it)
+ VERIFY (*it == zero_val);
+ }
+ simple_copy(arr, arr + Size, std_array.begin());
+ VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+ arr, arr + Size));
+ simple_copy(std_array.begin(), std_array.end(), out_arr);
+ simple_mutate(std_array.begin(), std_array.end(), MutationFunc());
+ VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+ arr, arr + Size, MutationFunc()));
+ simple_copy(std_array.begin(), std_array.end(), out_mut_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+ arr, arr + Size));
+ VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+ arr, arr + Size, MutationFunc()));
+ return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool forward_list_test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ T out_mut_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::forward_list<T> fwd_list(arr, arr + Size);
+ VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+ arr, arr + Size));
+ simple_copy(fwd_list.begin(), fwd_list.end(), out_arr);
+ simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc());
+ VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+ arr, arr + Size, MutationFunc()));
+ simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+ arr, arr + Size));
+ VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+ arr, arr + Size, MutationFunc()));
+ return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_map_test(const std::pair<K, V> (&arr)[Size])
+{
+ std::unordered_map<K, V> reference_map(arr, arr + Size);
+ bool ok;
+ /* Both sizes should be the same. */
+ std::pair<K, V> out_pairs[Size];
+ std::size_t out_size;
+ std::pair<K, V> out_pairs_mut[Size];
+ std::size_t out_size_mut;
+ #pragma omp target map(from: ok, out_pairs[:Size], out_size, \
+ out_pairs_mut[:Size], out_size_mut) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::vector<std::pair<K, V> > unique_elems;
+ simple_copy_unique(arr, arr + Size,
+ std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+ std::unordered_map<K, V> map(arr, arr + Size);
+ VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+ simple_copy(map.begin(), map.end(), out_pairs);
+ out_size = map.size();
+ simple_mutate_map(map.begin(), map.end(), MutationFunc());
+ VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+ MutationFunc()));
+ simple_copy(map.begin(), map.end(), out_pairs_mut);
+ out_size_mut = map.size();
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (out_size == out_size_mut);
+ VERIFY_NON_TARGET (validate_associative(reference_map,
+ out_pairs, out_pairs + out_size));
+ simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+ VERIFY_NON_TARGET (validate_associative(reference_map,
+ out_pairs_mut, out_pairs_mut + out_size_mut));
+ return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_set_test(const T (&arr)[Size])
+{
+ std::unordered_set<T> reference_set(arr, arr + Size);
+ bool ok;
+ /* Both sizes should be the same. */
+ T out_arr[Size];
+ std::size_t out_size;
+ #pragma omp target map(from: ok, out_arr[:Size], out_size) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::vector<T> unique_elems;
+ simple_copy_unique(arr, arr + Size,
+ std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+ std::unordered_set<T> set(arr, arr + Size);
+ VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+ simple_copy(set.begin(), set.end(), out_arr);
+ out_size = set.size();
+ /* Sets can't be mutated, we could create another set with mutated
+ but it gets a little annoying and probably isn't an interesting test. */
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_associative(reference_set,
+ out_arr, out_arr + out_size));
+ return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+ std::unordered_multimap<K, V> reference_multimap(arr, arr + Size);
+ bool ok;
+ std::pair<K, V> out_pairs[Size];
+ std::pair<K, V> out_pairs_mut[Size];
+ #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::unordered_multimap<K, V> multimap(arr, arr + Size);
+ VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+ simple_copy(multimap.begin(), multimap.end(), out_pairs);
+ simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+ VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+ simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+ out_pairs, out_pairs + Size));
+ simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+ out_pairs_mut, out_pairs_mut + Size));
+ return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_multiset_test(const T (&arr)[Size])
+{
+ std::unordered_multiset<T> reference_multiset(arr, arr + Size);
+ bool ok;
+ T out_arr[Size];
+ #pragma omp target map(from: ok, out_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::unordered_multiset<T> set(arr, arr + Size);
+ VERIFY (validate_multi_associative(set, arr, arr + Size));
+ simple_copy(set.begin(), set.end(), out_arr);
+ /* Sets can't be mutated, we could create another set with mutated
+ but it gets a little annoying and probably isn't an interesting test. */
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+ out_arr, out_arr + Size));
+ return true;
+}
+
+#else
+template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; }
+#endif
+
+/* This clamps to the maximum value to guard against overflowing,
+ assuming std::numeric_limits is specialized for T. */
+struct multiply_by_2
+{
+ template<typename T>
+ typename enable_if<std::numeric_limits<T>::is_specialized, T>::type
+ operator()(T arg) const BL_NOEXCEPT {
+ if (arg < static_cast<T>(0))
+ {
+ if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg)
+ return std::numeric_limits<T>::min();
+ }
+ else
+ {
+ if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg)
+ return std::numeric_limits<T>::max();
+ }
+ return arg * 2;
+ }
+ template<typename T>
+ typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type
+ operator()(T arg) const BL_NOEXCEPT {
+ return arg * 2;
+ }
+};
+
+int main()
+{
+ int data[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ std::pair<int, int> pairs[10] = {std::pair<int, int>( 1, 2),
+ std::pair<int, int>( 2, 4),
+ std::pair<int, int>( 3, 6),
+ std::pair<int, int>( 4, 8),
+ std::pair<int, int>( 5, 10),
+ std::pair<int, int>( 6, 12),
+ std::pair<int, int>( 7, 14),
+ std::pair<int, int>( 8, 16),
+ std::pair<int, int>( 9, 18),
+ std::pair<int, int>(10, 20)};
+ const bool vec_res = vector_test<multiply_by_2>(data);
+ const bool deque_res = deque_test<multiply_by_2>(data);
+ const bool list_res = list_test<multiply_by_2>(data);
+ const bool map_res = map_test<multiply_by_2>(pairs);
+ const bool set_res = set_test(data);
+ const bool multimap_res = multimap_test<multiply_by_2>(pairs);
+ const bool multiset_res = multiset_test(data);
+ const bool array_res = array_test<multiply_by_2>(data);
+ const bool forward_list_res = forward_list_test<multiply_by_2>(data);
+ const bool unordered_map_res = unordered_map_test<multiply_by_2>(pairs);
+ const bool unordered_set_res = unordered_set_test(data);
+ const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs);
+ const bool unordered_multiset_res = unordered_multiset_test(data);
+ std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL");
+ std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL");
+ std::printf("list : %s\n", list_res ? "PASS" : "FAIL");
+ std::printf("map : %s\n", map_res ? "PASS" : "FAIL");
+ std::printf("set : %s\n", set_res ? "PASS" : "FAIL");
+ std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL");
+ std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL");
+ std::printf("array : %s\n", array_res ? "PASS" : "FAIL");
+ std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL");
+ std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL");
+ std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL");
+ std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+ std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+ const bool ok = vec_res
+ && deque_res
+ && list_res
+ && map_res
+ && set_res
+ && multimap_res
+ && multiset_res
+ && array_res
+ && forward_list_res
+ && unordered_map_res
+ && unordered_set_res
+ && unordered_multimap_res
+ && unordered_multiset_res;
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
new file mode 100644
index 0000000..688c014
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
@@ -0,0 +1,32 @@
+/* Tiny tuple test. */
+
+#include <tuple>
+
+#include "target-flex-common.h"
+
+bool test(int arg)
+{
+ bool ok;
+ int out;
+ std::tuple tup = {'a', arg, 3.14f};
+ #pragma omp target map(from: ok, out) map(to: tup)
+ {
+ bool inner_ok = true;
+ {
+ VERIFY (std::get<0>(tup) == 'a');
+ out = std::get<1>(tup);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (out == arg);
+ return true;
+}
+
+int main()
+{
+ volatile int arg = 42u;
+ return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
new file mode 100644
index 0000000..f1a6c12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
@@ -0,0 +1,61 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* Functional */
+
+#include <functional>
+#include <utility>
+
+#include "target-flex-common.h"
+
+template<typename T,typename Fn>
+auto invoke_unary(T&& a, Fn&& fn) noexcept
+{
+ return std::invoke(std::forward<Fn>(fn),
+ std::forward<T>(a));
+}
+
+template<typename T, typename U, typename Fn>
+auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept
+{
+ return std::invoke(std::forward<Fn>(fn),
+ std::forward<T>(a),
+ std::forward<U>(b));
+}
+
+bool test(unsigned arg)
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ VERIFY (std::plus{}(arg, 2) == arg + 2);
+ auto bound_plus_arg = std::bind_front(std::plus{}, arg);
+ VERIFY (bound_plus_arg(10) == arg + 10);
+ VERIFY (bound_plus_arg(20) == arg + 20);
+
+ VERIFY (std::not_fn(std::not_equal_to{})(arg, arg));
+ VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{})));
+ auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg);
+ VERIFY (bound_equals_arg(arg));
+ VERIFY (std::not_fn(bound_equals_arg)(arg + 1));
+ VERIFY (invoke_unary(arg, bound_equals_arg));
+
+ VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg));
+ VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{})));
+ auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg);
+ VERIFY (bound_ranges_equals_arg(arg));
+ VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1));
+ VERIFY (invoke_unary(arg, bound_ranges_equals_arg));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+int main()
+{
+ volatile unsigned arg = 42u;
+ return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
new file mode 100644
index 0000000..f738806
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
@@ -0,0 +1,97 @@
+/* { dg-additional-options "-std=c++23" } */
+
+/* expected/optional */
+
+#include <optional>
+#include <expected>
+
+#include "target-flex-common.h"
+
+std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept
+{
+ if (!b)
+ return std::nullopt;
+ return {arg};
+}
+
+bool test_optional(unsigned arg)
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ auto null_opt = make_optional(false);
+ VERIFY (!null_opt);
+ VERIFY (!null_opt.has_value());
+ VERIFY (null_opt.value_or(arg * 2u) == arg * 2u);
+ VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; })
+ .transform([](int a){ return a * 2u; })
+ .value_or(0) == arg * 2u);
+
+ auto opt = make_optional(true, arg);
+ VERIFY (opt);
+ VERIFY (opt.has_value());
+ VERIFY (opt.value() == arg);
+ VERIFY (*opt == arg);
+ VERIFY (opt.value_or(arg + 42) == arg);
+ VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; })
+ .transform([](int a){ return a * 2u; })
+ .value_or(0) == arg * 2u);
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+struct my_error
+{
+ int _e;
+};
+
+std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept
+{
+ if (!b)
+ return std::unexpected{my_error{-1}};
+ return {arg};
+}
+
+bool test_expected(unsigned arg)
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ auto unexpected = make_expected(false);
+ VERIFY (!unexpected);
+ VERIFY (!unexpected.has_value());
+ VERIFY (unexpected.error()._e == -1);
+ VERIFY (unexpected.value_or(arg * 2u) == arg * 2u);
+ VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; })
+ .transform([](int a){ return a * 2u; })
+ .value_or(0) == arg * 2u);
+
+ auto expected = make_expected(true, arg);
+ VERIFY (expected);
+ VERIFY (expected.has_value());
+ VERIFY (expected.value() == arg);
+ VERIFY (*expected == arg);
+ VERIFY (expected.value_or(arg + 42) == arg);
+ VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; })
+ .transform([](int a){ return a * 2u; })
+ .value_or(0) == arg * 2u);
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+int main()
+{
+ volatile unsigned arg = 42;
+ return test_optional(arg)
+ && test_expected(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
new file mode 100644
index 0000000..8e8ca8e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
@@ -0,0 +1,176 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* bit_cast and memcpy */
+
+#include <bit>
+#include <cstring>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+ int _v0;
+ char _v1;
+ long long _v2;
+};
+
+struct S1
+{
+ int _v0;
+ char _v1;
+ long long _v2;
+};
+
+bool test_bit_cast(int arg)
+{
+ bool ok;
+ S1 s1_out;
+ #pragma omp target map(from: ok, s1_out) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ long long v = static_cast<long long>(arg + 42ll);
+ S0 s = {arg, 'a', v};
+ VERIFY (std::bit_cast<S1>(s)._v0 == arg);
+ VERIFY (std::bit_cast<S1>(s)._v1 == 'a');
+ VERIFY (std::bit_cast<S1>(s)._v2 == v);
+ s1_out = std::bit_cast<S1>(s);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ long long v = static_cast<long long>(arg + 42ll);
+ VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg);
+ VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a');
+ VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v);
+ return true;
+}
+
+
+struct OutStruct
+{
+ std::size_t _id;
+ void *_next;
+};
+
+struct Extendable1
+{
+ std::size_t _id;
+ void *_next;
+ int _v;
+};
+
+struct Extendable2
+{
+ std::size_t _id;
+ void *_next;
+ char _str[256];
+};
+
+struct Extendable3
+{
+ std::size_t _id;
+ void *_next;
+ const int *_nums;
+ std::size_t _size;
+};
+
+struct ExtendableUnknown
+{
+ std::size_t _id;
+ void *_next;
+};
+
+template<typename To, std::size_t Id>
+To *get_extendable(void *p)
+{
+ while (p != nullptr)
+ {
+ OutStruct out;
+ std::memcpy(&out, p, sizeof(OutStruct));
+ if (out._id == Id)
+ return static_cast<To *>(p);
+ p = out._next;
+ }
+ return nullptr;
+}
+
+bool test_memcpy(int arg, const int *nums, std::size_t nums_size)
+{
+ bool ok;
+ Extendable2 e2_out;
+ #pragma omp target map(from: ok, e2_out) map(to: arg, nums[:nums_size], nums_size)
+ {
+ bool inner_ok = true;
+ {
+ Extendable3 e3 = {3u, nullptr, nums, nums_size};
+ ExtendableUnknown u1 = {100u, &e3};
+ Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}};
+ ExtendableUnknown u2 = {101u, &e2};
+ ExtendableUnknown u3 = {102u, &u2};
+ ExtendableUnknown u4 = {142u, &u3};
+ Extendable1 e1 = {1u, &u4, arg};
+
+ void *p = &e1;
+ while (p != nullptr)
+ {
+ /* You can always cast a pointer to a struct to a pointer to
+ the type of it's first member. */
+ switch (*static_cast<std::size_t *>(p))
+ {
+ case 1:
+ {
+ Extendable1 *e1_p = static_cast<Extendable1 *>(p);
+ p = e1_p->_next;
+ VERIFY (e1_p->_v == arg);
+ break;
+ }
+ case 2:
+ {
+ Extendable2 *e2_p = static_cast<Extendable2 *>(p);
+ p = e2_p->_next;
+ VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0);
+ break;
+ }
+ case 3:
+ {
+ Extendable3 *e3_p = static_cast<Extendable3 *>(p);
+ p = e3_p->_next;
+ VERIFY (nums == e3_p->_nums);
+ VERIFY (nums_size == e3_p->_size);
+ break;
+ }
+ default:
+ {
+ /* Casting to a pointer to OutStruct invokes undefined
+ behavior though, memcpy is required to extract the _next
+ member. */
+ OutStruct out;
+ std::memcpy(&out, p, sizeof(OutStruct));
+ p = out._next;
+ }
+ }
+ }
+ Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1);
+ VERIFY (e2_p != nullptr);
+ e2_out = *e2_p;
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (e2_out._id == 2u);
+ VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0);
+ return true;
+}
+
+int main()
+{
+ volatile int arg = 42;
+ int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ return test_bit_cast(arg)
+ && test_memcpy(arg, arr, 8) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C
new file mode 100644
index 0000000..c66075b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C
@@ -0,0 +1,51 @@
+/* std::initializer_list in target region. */
+
+#include <initializer_list>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+ static constexpr std::size_t out_arr_size = 7;
+ int out_arr[out_arr_size];
+ bool ok;
+ #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ auto il = {0, 1, 2, 3, 4, 5, arg};
+
+ int sum = 0;
+ for (auto const& e : il)
+ sum += e;
+ VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+ auto* out_it = out_arr;
+ const auto* const out_end = out_arr + out_arr_size;
+ for (auto const& e : il)
+ {
+ VERIFY (out_it != out_end);
+ *out_it = e;
+ ++out_it;
+ }
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+
+ std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+ const auto *out_arr_it = out_arr;
+ for (auto const& e : reference_array)
+ VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+ return true;
+}
+
+int main()
+{
+ volatile int arg = 42;
+ return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C
new file mode 100644
index 0000000..ef9e5a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C
@@ -0,0 +1,49 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* numerics */
+
+#include <algorithm>
+#include <numeric>
+#include <ranges>
+#include <span>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::iota)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+bool test(std::size_t arg)
+{
+ bool ok;
+ int midpoint_out;
+ std::vector<int> vec(arg);
+ int *data = vec.data();
+ std::size_t size = vec.size();
+ #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[:size]) map(to: arg, size)
+ {
+ std::span span = {data, size};
+ bool inner_ok = true;
+ {
+ VERIFY (stdr::all_of(span, [](int v){ return v == int{}; }));
+ stdr::iota(span, 0);
+ midpoint_out = *std::midpoint(span.data(), span.data() + span.size());
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size()))));
+ VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size())
+ == midpoint_out);
+ return true;
+}
+
+int main()
+{
+ volatile std::size_t arg = 42;
+ return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C
new file mode 100644
index 0000000..adaf18f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C
@@ -0,0 +1,80 @@
+/* std::initializer_list in target region. */
+
+#include <initializer_list>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+ int _v;
+ S0(std::initializer_list<int> il)
+ : _v(0)
+ {
+ for (auto const& e : il)
+ _v += e;
+ }
+};
+
+struct S1
+{
+ int _v;
+ template<typename T>
+ S1(std::initializer_list<T> il)
+ : _v(0)
+ {
+ for (auto const& e : il)
+ _v += e;
+ }
+};
+
+template<typename T>
+struct S2
+{
+ T _v;
+ S2(std::initializer_list<T> il)
+ : _v(0)
+ {
+ for (auto const& e : il)
+ _v += e;
+ }
+};
+
+#if __cplusplus >= 201703L
+template<typename T>
+S2(std::initializer_list<T>) -> S2<T>;
+#endif
+
+bool test_initializer_list(int arg)
+{
+ bool ok;
+ #pragma omp target map(from: ok) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5;
+
+ S0 s0{0, 1, 2, 3, 4, 5, arg};
+ VERIFY (s0._v == partial_sum + arg);
+
+ S1 s1{0, 1, 2, 3, 4, 5, arg};
+ VERIFY (s1._v == partial_sum + arg);
+
+ S2<int> s2{0, 1, 2, 3, 4, 5, arg};
+ VERIFY (s2._v == partial_sum + arg);
+
+ #if __cplusplus >= 201703L
+ S2 s2_ctad{0, 1, 2, 3, 4, 5, arg};
+ VERIFY (s2_ctad._v == partial_sum + arg);
+ #endif
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+int main()
+{
+ volatile int arg = 42;
+ return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C
new file mode 100644
index 0000000..7f74401a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C
@@ -0,0 +1,50 @@
+/* std::initializer_list constructor of std::vector (explicit template arg) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+ static constexpr std::size_t out_arr_size = 7;
+ int out_arr[out_arr_size];
+ bool ok;
+ #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ std::vector<int> vec{0, 1, 2, 3, 4, 5, arg};
+ int sum = 0;
+ for (auto const& e : vec)
+ sum += e;
+ VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+ auto* out_it = out_arr;
+ const auto* const out_end = out_arr + out_arr_size;
+ for (auto const& e : vec)
+ {
+ VERIFY (out_it != out_end);
+ *out_it = e;
+ ++out_it;
+ }
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+
+ std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+ const auto *out_arr_it = out_arr;
+ for (auto const& e : reference_array)
+ VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+ return true;
+}
+
+int main()
+{
+ volatile int arg = 42;
+ return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C
new file mode 100644
index 0000000..bb8a39b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-std=c++17" } */
+
+/* deduced std::initializer_list constructor of std::vector (CTAD) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+ static constexpr std::size_t out_arr_size = 7;
+ int out_arr[out_arr_size];
+ bool ok;
+ #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+ {
+ bool inner_ok = true;
+ {
+ std::vector vec{0, 1, 2, 3, 4, 5, arg};
+ int sum = 0;
+ for (auto const& e : vec)
+ sum += e;
+ VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+ auto* out_it = out_arr;
+ const auto* const out_end = out_arr + out_arr_size;
+ for (auto const& e : vec)
+ {
+ VERIFY (out_it != out_end);
+ *out_it = e;
+ ++out_it;
+ }
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+
+ std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+ const auto *out_arr_it = out_arr;
+ for (auto const& e : reference_array)
+ VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+ return true;
+}
+
+int main()
+{
+ volatile int arg = 42;
+ return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C
new file mode 100644
index 0000000..4d36341
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* <iterator> c++20 */
+
+/* std::common_iterator uses std::variant. */
+
+#include <vector>
+#include <iterator>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::distance, std::ranges::next)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(const It0 begin0, const It0 end0,
+ const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+ It0 it0 = begin0;
+ It1 it1 = begin1;
+ for (; it0 != end0; ++it0, ++it1)
+ if (it1 == end1 || *it0 != *it1)
+ return false;
+ return true;
+}
+
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+ for (It it = begin; it != end; ++it, ++out)
+ *out = *it;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_rev_arr[Size];
+ T out_fwd_arr[Size];
+ T out_first_half_arr[Size / 2];
+ #pragma omp target defaultmap(none) \
+ map(from: ok, out_rev_arr[:Size], out_fwd_arr[:Size], \
+ out_first_half_arr[:Size / 2]) \
+ map(to: arr[:Size])
+ {
+ bool inner_ok = true;
+ {
+ std::span<const T> span = {arr, Size};
+ std::vector<T> rev_vec(std::reverse_iterator{span.end()},
+ std::reverse_iterator{span.begin()});
+ VERIFY (std::distance(span.begin(), span.end())
+ == std::distance(rev_vec.begin(), rev_vec.end()));
+ VERIFY (stdr::distance(span.begin(), span.end())
+ == stdr::distance(rev_vec.begin(), rev_vec.end()));
+ VERIFY (stdr::distance(span) == stdr::distance(rev_vec));
+ VERIFY (simple_equal(span.begin(), span.end(),
+ std::reverse_iterator{rev_vec.end()},
+ std::reverse_iterator{rev_vec.begin()}));
+ simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr);
+ simple_copy(std::reverse_iterator{rev_vec.end()},
+ std::reverse_iterator{rev_vec.begin()},
+ out_fwd_arr);
+ using counted_iter = std::counted_iterator<decltype(span.begin())>;
+ using common_iter = std::common_iterator<counted_iter,
+ std::default_sentinel_t>;
+ std::vector<T> front_half;
+ simple_copy(common_iter{counted_iter{span.begin(), Size / 2}},
+ common_iter{std::default_sentinel},
+ std::back_insert_iterator{front_half});
+ VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2),
+ front_half.begin(), front_half.end()));
+ simple_copy(front_half.begin(), front_half.end(), out_first_half_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size},
+ std::reverse_iterator{arr},
+ out_rev_arr, out_rev_arr + Size));
+ VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+ out_fwd_arr, out_fwd_arr + Size));
+ VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2,
+ out_first_half_arr, out_first_half_arr + Size / 2));
+ return ok;
+}
+
+int main()
+{
+ int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+ return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C
new file mode 100644
index 0000000..014b9f5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C
@@ -0,0 +1,46 @@
+/* algorithms pre c++20 */
+
+#include <algorithm>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_2x_arr[Size];
+ T out_shifted_arr[Size];
+ #pragma omp target map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ std::vector<T> vec(Size);
+ std::vector<T> mutated(Size);
+ bool inner_ok = true;
+ {
+ std::copy(arr, arr + Size, vec.begin());
+ VERIFY (std::equal(arr, arr + Size, vec.begin()));
+ std::transform(vec.begin(), vec.end(), mutated.begin(),
+ [](const T& v){ return v * 2; });
+ std::copy(mutated.begin(), mutated.end(), out_2x_arr);
+ std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end());
+ std::copy(vec.begin(), vec.end(), out_shifted_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr,
+ [](const T& a, const T& b){ return a * 2 == b; }));
+ std::vector<T> shifted(arr, arr + Size);
+ std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end());
+ VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin()));
+ return true;
+}
+
+int main()
+{
+ int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+ return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C
new file mode 100644
index 0000000..9070c2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C
@@ -0,0 +1,54 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* ranged algorithms c++20 */
+
+#include <algorithm>
+#include <ranges>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_2x_arr[Size];
+ T out_shifted_arr[Size];
+ #pragma omp target defaultmap(none) \
+ map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \
+ map(to: arr[:Size])
+ {
+ std::vector<T> vec(Size);
+ std::vector<T> mutated(Size);
+ bool inner_ok = true;
+ {
+ stdr::copy(arr, vec.begin());
+ VERIFY (stdr::equal(arr, vec));
+ stdr::transform(vec, mutated.begin(),
+ [](const T& v){ return v * 2; });
+ stdr::copy(mutated, out_2x_arr);
+ stdr::rotate(vec, std::next(vec.begin(), Size / 2));
+ stdr::copy(vec, out_shifted_arr);
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; }));
+ std::vector<T> shifted(arr, arr + Size);
+ stdr::rotate(shifted, std::next(shifted.begin(), Size / 2));
+ VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted));
+ return true;
+}
+
+int main()
+{
+ int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+ return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C
new file mode 100644
index 0000000..ef6b942
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C
@@ -0,0 +1,50 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* std::views stuff. Also tests std::tuple with std::views::zip. */
+
+#include <algorithm>
+#include <ranges>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+namespace stdv = std::views;
+
+bool f()
+{
+ const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0};
+
+ bool ok;
+ #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[:8], arr_rev[:8])
+ {
+ std::span<const int> fwd = {arr_fwd, 8};
+ std::span<const int> rev = {arr_rev, 8};
+ bool inner_ok = true;
+ {
+ VERIFY(stdr::equal(fwd, rev | stdv::reverse));
+ VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse,
+ rev | stdv::take(4)));
+ for (auto [first, second] : stdv::zip(fwd, rev))
+ VERIFY(first + second == 7);
+ auto plus = [](int a, int b){ return a + b; };
+ auto is_even = [](int v){ return v % 2 == 0; };
+ VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus)
+ == 12);
+ VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }),
+ is_even));
+ }
+ end:
+ ok = inner_ok;
+ }
+ return ok;
+}
+
+int main()
+{
+ return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C
new file mode 100644
index 0000000..9e9383d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C
@@ -0,0 +1,26 @@
+/* CTAD in target regions. */
+
+template<typename T>
+struct S
+{
+ T _v;
+};
+
+template<typename T>
+S(T) -> S<T>;
+
+bool f()
+{
+ bool ok;
+ #pragma omp target map(from: ok)
+ {
+ S s{42};
+ ok = s._v == 42;
+ }
+ return ok;
+}
+
+int main()
+{
+ return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C
new file mode 100644
index 0000000..f41a1bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C
@@ -0,0 +1,49 @@
+// { dg-additional-options "-std=c++20" }
+
+/* std::span */
+
+#include <span>
+
+#include "target-flex-common.h"
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+ It1 it1, const It1 end1) noexcept
+{
+ for (; it0 != end0; ++it0, ++it1)
+ if (it1 == end1 || *it0 != *it1)
+ return false;
+ return true;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+ bool ok;
+ T out_arr[Size];
+ #pragma omp target map(from: ok) map(to: arr[:Size])
+ {
+ std::span span = {arr, Size};
+ bool inner_ok = true;
+ {
+ VERIFY (!span.empty());
+ VERIFY (span.size() == Size);
+ auto out_it = out_arr;
+ for (auto elem : span)
+ *(out_it++) = elem;
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+ out_arr, out_arr + Size));
+ return true;
+}
+
+int main()
+{
+ int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C
new file mode 100644
index 0000000..a86fefb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C
@@ -0,0 +1,75 @@
+/* { dg-additional-options "-std=c++20" } */
+
+#include <ranges>
+#include <span>
+#include <type_traits>
+#include <vector>
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+ It1 it1, const It1 end1) noexcept
+{
+ for (; it0 != end0; ++it0, ++it1)
+ if (it1 == end1 || *it0 != *it1)
+ return false;
+ return true;
+}
+
+template<typename Rn0, typename Rn1>
+bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept
+{
+ return simple_equal(stdr::begin(rn0), stdr::end(rn0),
+ stdr::begin(rn1), stdr::end(rn1));
+}
+
+template<typename Rn>
+bool test(Rn&& range)
+{
+ using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>;
+ std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)};
+ value_type *data = vec.data();
+ std::size_t size = vec.size();
+ bool ok;
+ #pragma omp target map(from: ok) map(tofrom: data[:size]) map(to: size)
+ {
+ std::vector<value_type> orig = {data, data + size};
+ std::span<value_type> span = {data, size};
+ bool inner_ok = true;
+ {
+ auto mul_by_2 = [](const value_type& v){ return v * 2; };
+ VERIFY (simple_equal(orig, span));
+ for (auto& elem : span)
+ elem = mul_by_2(elem);
+ VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span));
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ auto mul_by_2 = [](const value_type& v){ return v * 2; };
+ VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec));
+ return true;
+}
+
+struct my_int
+{
+ int _v;
+ bool operator==(my_int const&) const = default;
+ my_int operator*(int rhs) const noexcept {
+ return {_v * rhs};
+ }
+};
+
+int main()
+{
+ std::vector<int> ints = {1, 2, 3, 4, 5};
+ const bool ints_res = test(ints);
+ std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}};
+ const bool my_ints_res = test(my_ints);
+ return ints_res && my_ints_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C
new file mode 100644
index 0000000..b3f1197
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C
@@ -0,0 +1,107 @@
+/* structured bindings */
+
+#include <array>
+#include <tuple>
+
+#include "target-flex-common.h"
+
+template<typename Array, typename Tuple, typename Struct>
+bool test(Array array, Tuple tuple, Struct s)
+{
+ bool ok;
+ auto array_2nd_in = std::get<2>(array);
+ auto tuple_2nd_in = std::get<2>(tuple);
+ auto s_2nd_in = s._2;
+ decltype(array_2nd_in) array_2nd_out_0;
+ decltype(tuple_2nd_in) tuple_2nd_out_0;
+ decltype(s_2nd_in) s_2nd_out_0;
+ decltype(array_2nd_in) array_2nd_out_1;
+ decltype(tuple_2nd_in) tuple_2nd_out_1;
+ decltype(s_2nd_in) s_2nd_out_1;
+ decltype(array_2nd_in) array_2nd_out_2;
+ decltype(tuple_2nd_in) tuple_2nd_out_2;
+ decltype(s_2nd_in) s_2nd_out_2;
+ #pragma omp target map(from: ok, \
+ array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \
+ array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \
+ array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \
+ map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s)
+ {
+ bool inner_ok = true;
+ {
+ {
+ auto [array_0th, array_1st, array_2nd] = array;
+ VERIFY (array_2nd_in == array_2nd);
+ VERIFY (std::get<2>(array) == array_2nd);
+ array_2nd_out_0 = array_2nd;
+ auto [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+ VERIFY (tuple_2nd_in == tuple_2nd);
+ VERIFY (std::get<2>(tuple) == tuple_2nd);
+ tuple_2nd_out_0 = tuple_2nd;
+ auto [s_0th, s_1st, s_2nd] = s;
+ VERIFY (s_2nd_in == s_2nd);
+ VERIFY (s._2 == s_2nd);
+ s_2nd_out_0 = s_2nd;
+ }
+ {
+ auto& [array_0th, array_1st, array_2nd] = array;
+ VERIFY (array_2nd_in == array_2nd);
+ VERIFY (std::get<2>(array) == array_2nd);
+ array_2nd_out_1 = array_2nd;
+ auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+ VERIFY (tuple_2nd_in == tuple_2nd);
+ VERIFY (std::get<2>(tuple) == tuple_2nd);
+ tuple_2nd_out_1 = tuple_2nd;
+ auto& [s_0th, s_1st, s_2nd] = s;
+ VERIFY (s_2nd_in == s_2nd);
+ VERIFY (s._2 == s_2nd);
+ s_2nd_out_1 = s_2nd;
+ }
+ {
+ const auto& [array_0th, array_1st, array_2nd] = array;
+ VERIFY (array_2nd_in == array_2nd);
+ VERIFY (std::get<2>(array) == array_2nd);
+ array_2nd_out_2 = array_2nd;
+ const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+ VERIFY (tuple_2nd_in == tuple_2nd);
+ VERIFY (std::get<2>(tuple) == tuple_2nd);
+ tuple_2nd_out_2 = tuple_2nd;
+ const auto& [s_0th, s_1st, s_2nd] = s;
+ VERIFY (s_2nd_in == s_2nd);
+ VERIFY (s._2 == s_2nd);
+ s_2nd_out_2 = s_2nd;
+ }
+ }
+ end:
+ ok = inner_ok;
+ }
+ if (!ok)
+ return false;
+ VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in);
+ VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in);
+ VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in);
+ VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in);
+ VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in);
+ VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in);
+ VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in);
+ VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in);
+ VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in);
+
+ return true;
+}
+
+struct S
+{
+ char _0;
+ float _1;
+ int _2;
+};
+
+int main()
+{
+ const bool test_res
+ = test(std::array{0, 1, 2},
+ std::tuple{'a', 3.14f, 42},
+ S{'a', 3.14f, 42});
+ return test_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h
new file mode 100644
index 0000000..14523c4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h
@@ -0,0 +1,40 @@
+#include <cstdio>
+
+#if __cplusplus >= 201103L
+ #define BL_NOEXCEPT noexcept
+#else
+ #define BL_NOEXCEPT throw()
+#endif
+
+#if defined __has_builtin
+# if __has_builtin (__builtin_LINE)
+# define VERIFY_LINE __builtin_LINE ()
+# endif
+#endif
+#if !defined VERIFY_LINE
+# define VERIFY_LINE __LINE__
+#endif
+
+/* I'm not a huge fan of macros but in the interest of keeping the code that
+ isn't being tested as simple as possible, we use them. */
+
+#define VERIFY(EXPR) \
+ do { \
+ if (!(EXPR)) \
+ { \
+ std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \
+ VERIFY_LINE); \
+ inner_ok = false; \
+ goto end; \
+ } \
+ } while (false)
+
+#define VERIFY_NON_TARGET(EXPR) \
+ do { \
+ if (!(EXPR)) \
+ { \
+ std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \
+ VERIFY_LINE); \
+ return false; \
+ } \
+ } while (false)
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
new file mode 100644
index 0000000..9923783
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__array-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
new file mode 100644
index 0000000..c42105a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <array>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::array<int,N> &arr, int data[])
+{
+ for (int i = 0; i < N; ++i)
+ if (arr[i] != data[i] * data[i])
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+ std::array<int,N> arr;
+
+ srand (time (NULL));
+ init (data);
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: arr)
+#endif
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&arr) std::array<int,N> ();
+#endif
+ std::copy (data, data + N, arr.begin ());
+ }
+
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < N; ++i)
+ arr[i] *= arr[i];
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (arr, data);
+#ifndef MEM_SHARED
+ arr.~array ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
new file mode 100644
index 0000000..9023ef8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__bitset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
new file mode 100644
index 0000000..4fcce93
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
@@ -0,0 +1,69 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <bitset>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+ std::set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array.
+ do
+ data[i] = rand () % MAX;
+ while (_set.find (data[i]) != _set.end ());
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::bitset<MAX> _set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: _set)
+#endif
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_set) std::bitset<MAX> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _set[data[i]] = true;
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ if (_set[i])
+ sum += i;
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _set.~bitset ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
new file mode 100644
index 0000000..aaf7152
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
@@ -0,0 +1,340 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_basic ()
+{
+ T x = -3.456789;
+ T y = 1.234567;
+ T z = 5.678901;
+
+ if (std::abs (x) != -x)
+ return false;
+ if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x))
+ return false;
+ if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y)))
+ return false;
+ if (!FP_EQUAL (std::fma (x, y, z), x * y + z))
+ return false;
+ if (std::fmax (x, y) != (x > y ? x : y))
+ return false;
+ if (std::fmin (x, y) != (x < y ? x : y))
+ return false;
+ if (std::fdim (x, y) != std::max(x - y, (T) 0.0))
+ return false;
+ if (std::fdim (y, x) != std::max(y - x, (T) 0.0))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_exp ()
+{
+ T x = -4.567890;
+ T y = 2.345678;
+
+ if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x)))
+ return false;
+ if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y)))
+ return false;
+ if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0))
+ return false;
+ if (!FP_EQUAL (std::log (std::exp (x)), x))
+ return false;
+ if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y))
+ return false;
+ if (!FP_EQUAL (std::log2 (std::exp2 (y)), y))
+ return false;
+ if (!FP_EQUAL (std::log1p (std::expm1 (y)), y))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_power ()
+{
+ T x = 7.234251;
+ T y = 0.340128;
+
+ if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y))
+ return false;
+ if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x))
+ return false;
+ if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x))
+ return false;
+ if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y)))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_trig ()
+{
+ T theta = std::numbers::pi / 4;
+ T phi = std::numbers::pi / 6;
+
+ if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2))
+ return false;
+ if (!FP_EQUAL (std::sin (phi), 0.5))
+ return false;
+ if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2))
+ return false;
+ if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2))
+ return false;
+ if (!FP_EQUAL (std::tan (theta), 1.0))
+ return false;
+ if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3))
+ return false;
+
+ T x = 0.33245623;
+
+ if (!FP_EQUAL (std::asin (std::sin (x)), x))
+ return false;
+ if (!FP_EQUAL (std::acos (std::cos (x)), x))
+ return false;
+ if (!FP_EQUAL (std::atan (std::tan (x)), x))
+ return false;
+ if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_hyperbolic ()
+{
+ T x = 0.7423532;
+
+ if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0))
+ return false;
+ if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0))
+ return false;
+ if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x)))
+ return false;
+ if (!FP_EQUAL (std::asinh (std::sinh (x)), x))
+ return false;
+ if (!FP_EQUAL (std::acosh (std::cosh (x)), x))
+ return false;
+ if (!FP_EQUAL (std::atanh (std::tanh (x)), x))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_erf ()
+{
+ if (!FP_EQUAL (std::erf ((T) 0), 0))
+ return false;
+ if (!FP_EQUAL (std::erf ((T) INFINITY), 1))
+ return false;
+ if (!FP_EQUAL (std::erf ((T) -INFINITY), -1))
+ return false;
+
+ if (!FP_EQUAL (std::erfc (0), 1))
+ return false;
+ if (!FP_EQUAL (std::erfc ((T) INFINITY), 0))
+ return false;
+ if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2))
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_gamma ()
+{
+ if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1))
+ return false;
+ if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>)))
+ return false;
+ if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>)))
+ return false;
+ if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))
+ return false;
+ if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>)))
+ return false;
+
+ if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1)))
+ return false;
+ if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>))))
+ return false;
+ if (!FP_EQUAL (std::lgamma ((T) 2.5),
+ std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>))))
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_rounding ()
+{
+ T x = -2.5678;
+ T y = 3.6789;
+
+ if (std::ceil (x) != -2)
+ return false;
+ if (std::floor (x) != -3)
+ return false;
+ if (std::trunc (x) != -2)
+ return false;
+ if (std::round (x) != -3)
+ return false;
+
+ if (std::ceil (y) != 4)
+ return false;
+ if (std::floor (y) != 3)
+ return false;
+ if (std::trunc (y) != 3)
+ return false;
+ if (std::round (y) != 4)
+ return false;
+
+ /* Not testing std::rint and std::nearbyint due to dependence on
+ floating-point environment. */
+
+ return true;
+}
+
+template<typename T> bool test_fpmanip ()
+{
+ T x = -2.3456789;
+ T y = 3.6789012;
+ int exp;
+
+ T mantissa = std::frexp (x, &exp);
+ if (std::ldexp (mantissa, exp) != x)
+ return false;
+ if (std::logb (x) + 1 != exp)
+ return false;
+ if (std::ilogb (x) + 1 != exp)
+ return false;
+ if (std::scalbn (x, -exp) != mantissa)
+ return false;
+
+ T next = std::nextafter (x, y);
+ if (!(next > x && next < y))
+ return false;
+
+#if 0
+ /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a
+ '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error
+ with x86_64 host and nvptx, GCN offload compilers, or
+ '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error
+ with powerpc64le host and nvptx offload compiler, for example;
+ PR71064 'nvptx offloading: "long double" data type'.
+ It ought to work on systems where the host's 'long double' is the same as
+ 'double' ('DF'): aarch64, for example? */
+ next = std::nexttoward (x, y);
+ if (!(next > x && next < y))
+ return false;
+#endif
+
+ if (std::copysign (x, y) != std::abs (x))
+ return false;
+ if (std::copysign (y, x) != -y)
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_classify ()
+{
+ T x = -2.3456789;
+ T y = 3.6789012;
+
+ if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL)
+ return false;
+ if (std::fpclassify ((T) INFINITY) != FP_INFINITE
+ || std::fpclassify ((T) -INFINITY) != FP_INFINITE)
+ return false;
+ if (std::fpclassify ((T) 0.0) != FP_ZERO)
+ return false;
+ if (std::fpclassify ((T) NAN) != FP_NAN)
+ return false;
+ if (!std::isfinite (x) || !std::isfinite (y))
+ return false;
+ if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY))
+ return false;
+ if (std::isinf (x) || std::isinf (y))
+ return false;
+ if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY))
+ return false;
+ if (std::isnan (x) || std::isnan (y))
+ return false;
+ if (!std::isnan ((T) 0.0 / (T) 0.0))
+ return false;
+ if (std::isnan (x) || std::isnan (y))
+ return false;
+ if (!std::isnormal (x) || !std::isnormal (y))
+ return false;
+ if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN))
+ return false;
+ if (!std::signbit (x) || std::signbit (y))
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_compare ()
+{
+ T x = 5.6789012;
+ T y = 8.9012345;
+
+ if (std::isgreater (x, y))
+ return false;
+ if (std::isgreater (x, x))
+ return false;
+ if (std::isgreaterequal (x, y))
+ return false;
+ if (!std::isgreaterequal (x, x))
+ return false;
+ if (!std::isless (x, y))
+ return false;
+ if (std::isless (x, x))
+ return false;
+ if (!std::islessequal (x, y))
+ return false;
+ if (!std::islessequal (x, x))
+ return false;
+ if (!std::islessgreater (x, y))
+ return false;
+ if (std::islessgreater (x, x))
+ return false;
+ if (std::isunordered (x, y))
+ return false;
+ if (!std::isunordered (x, NAN))
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+ pass++; \
+ bool ok = test_##func<float> (); \
+ if (!ok) { result = pass; break; } \
+ pass++; \
+ ok = test_##func<double> (); \
+ if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+ int result = 0;
+
+ #pragma omp target map (tofrom: result)
+ do {
+ int pass = 0;
+
+ RUN_TEST (basic);
+ RUN_TEST (exp);
+ RUN_TEST (power);
+ RUN_TEST (trig);
+ RUN_TEST (hyperbolic);
+ RUN_TEST (erf);
+ RUN_TEST (gamma);
+ RUN_TEST (rounding);
+ RUN_TEST (fpmanip);
+ RUN_TEST (classify);
+ RUN_TEST (compare);
+ } while (false);
+
+ return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C
new file mode 100644
index 0000000..e392d17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C
@@ -0,0 +1,175 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <complex>
+#include <numbers>
+
+using namespace std::complex_literals;
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \
+ && FP_EQUAL ((x).imag (), (y).imag ()))
+
+#pragma omp declare target
+template<typename T> bool test_complex ()
+{
+ std::complex<T> z (-1.334, 5.763);
+
+ if (!FP_EQUAL (z.real (), (T) -1.334))
+ return false;
+ if (!FP_EQUAL (z.imag (), (T) 5.763))
+ return false;
+ if (!FP_EQUAL (std::abs (z),
+ std::sqrt (z.real () * z.real () + z.imag () * z.imag ())))
+ return false;
+ if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ())))
+ return false;
+ if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ()))
+ return false;
+
+ auto conj = std::conj (z);
+ if (!FP_EQUAL (conj.real (), z.real ())
+ || !FP_EQUAL (conj.imag (), -z.imag ()))
+ return false;
+
+ if (std::proj (z) != z)
+ return false;
+
+ auto infz1 = std::proj (std::complex<float> (INFINITY, -1));
+ if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0)
+ return false;
+ auto infz2 = std::proj (std::complex<float> (0, -INFINITY));
+ if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0)
+ return false;
+
+ auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4);
+ if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4))
+ || !FP_EQUAL (polarz.imag (),
+ (T) 1.5* std::sin (std::numbers::pi_v<T> / 4)))
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_complex_exp_log ()
+{
+ std::complex<T> z (-1.724, -3.763);
+
+ // Euler's identity
+ auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi));
+ eulerz += 1.0;
+ if (!COMPLEX_EQUAL (eulerz, std::complex<T> ()))
+ return false;
+
+ auto my_exp_z
+ = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()),
+ std::exp (z.real ()) * std::sin (z.imag ()));
+ if (!COMPLEX_EQUAL (std::exp (z), my_exp_z))
+ return false;
+
+ if (!COMPLEX_EQUAL (std::log10 (z),
+ std::log (z) / std::log (std::complex<T> (10))))
+ return false;
+
+ return true;
+}
+
+template<typename T> bool test_complex_trig ()
+{
+ std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10);
+ const std::complex<T> i (0, 1);
+
+ auto my_sin_z
+ = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()),
+ std::cos (z.real ()) * std::sinh (z.imag ()));
+ if (!COMPLEX_EQUAL (std::sin (z), my_sin_z))
+ return false;
+
+ auto my_cos_z
+ = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()),
+ -std::sin (z.real ()) * std::sinh (z.imag ()));
+ if (!COMPLEX_EQUAL (std::cos (z), my_cos_z))
+ return false;
+
+ auto my_tan_z
+ = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ()))
+ / (std::cos (2*z.real ()) + std::cosh (2*z.imag ()));
+ if (!COMPLEX_EQUAL (std::tan (z), my_tan_z))
+ return false;
+
+ auto my_sinh_z
+ = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()),
+ std::cosh (z.real ()) * std::sin (z.imag ()));
+ if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z))
+ return false;
+
+ auto my_cosh_z
+ = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()),
+ std::sinh (z.real ()) * std::sin (z.imag ()));
+ if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z))
+ return false;
+
+ auto my_tanh_z
+ = std::complex<T> (std::sinh (2*z.real ()),
+ std::sin (2*z.imag ()))
+ / (std::cosh (2*z.real ()) + std::cos (2*z.imag ()));
+ if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z))
+ return false;
+
+ auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+ if (!COMPLEX_EQUAL (std::asin (z), my_asin_z))
+ return false;
+
+ auto my_acos_z
+ = std::complex<T> (std::numbers::pi / 2)
+ + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+ if (!COMPLEX_EQUAL (std::acos (z), my_acos_z))
+ return false;
+
+ auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z)));
+ if (!COMPLEX_EQUAL (std::atan (z), my_atan_z))
+ return false;
+
+ auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0));
+ if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z))
+ return false;
+
+ auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0));
+ if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z))
+ return false;
+
+ auto my_atanh_z
+ = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z));
+ if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z))
+ return false;
+
+ return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+ pass++; \
+ bool ok = test_##func<float> (); \
+ if (!ok) { result = pass; break; } \
+ pass++; \
+ ok = test_##func<double> (); \
+ if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+ int result = 0;
+
+ #pragma omp target map (tofrom: result)
+ do {
+ int pass = 0;
+
+ RUN_TEST (complex);
+ RUN_TEST (complex_exp_log);
+ RUN_TEST (complex_trig);
+ } while (false);
+
+ return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
new file mode 100644
index 0000000..863a1de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__deque-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
new file mode 100644
index 0000000..9c2d6fa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
@@ -0,0 +1,64 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <deque>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::deque<int> &_deque, int data[])
+{
+ for (int i = 0; i < N; ++i)
+ if (_deque[i] != data[i] * data[i])
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+
+ srand (time (NULL));
+ init (data);
+
+#ifdef MEM_SHARED
+ std::deque<int> _deque (std::begin (data), std::end (data));
+#else
+ std::deque<int> _deque;
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: _deque)
+#endif
+ {
+#ifndef MEM_SHARED
+ #pragma omp target
+ new (&_deque) std::deque<int> (std::begin (data), std::end (data));
+#endif
+
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < N; ++i)
+ _deque[i] *= _deque[i];
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (_deque, data);
+#ifndef MEM_SHARED
+ _deque.~deque ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
new file mode 100644
index 0000000..9e59907
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
@@ -0,0 +1,71 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } }
+ { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } }
+ (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <flat_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+ std::set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array if unique is true.
+ do
+ data[i] = rand ();
+ while (unique && _set.count (data[i]) > 0);
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::flat_map<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, true);
+ init (data, false);
+
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::flat_map<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map[keys[i]] = data[i];
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < N; ++i)
+ sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~flat_map ();
+#endif
+
+ #pragma omp target exit data map (release: _map)
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
new file mode 100644
index 0000000..1dc60c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } }
+ { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } }
+ (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::flat_multimap<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, KEY_MAX);
+ init (data, RAND_MAX);
+
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::flat_multimap<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map.insert({keys[i], data[i]});
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < KEY_MAX; ++i)
+ {
+ auto range = _map.equal_range (i);
+ for (auto it = range.first; it != range.second; ++it) {
+ sum += (long long) it->first * it->second;
+ }
+ }
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~flat_multimap ();
+#endif
+
+ #pragma omp target exit data map (release: _map)
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
new file mode 100644
index 0000000..59b59bf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
@@ -0,0 +1,60 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::flat_multiset<int> set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+ #pragma omp target data map (to: data[:N]) map (alloc: set)
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&set) std::flat_multiset<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ set.~flat_multiset ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
new file mode 100644
index 0000000..b255cd5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
@@ -0,0 +1,67 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+ std::flat_set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array.
+ do
+ data[i] = rand () % MAX;
+ while (_set.count (data[i]) != 0);
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::flat_set<int> _set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+ #pragma omp target data map (to: data[:N]) map (alloc: _set)
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_set) std::flat_set<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ if (_set.count (i) > 0)
+ sum += i;
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _set.~flat_set ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
new file mode 100644
index 0000000..60d5cee
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__forward_list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
new file mode 100644
index 0000000..6b0ee65
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <forward_list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::forward_list<int> &list, int data[])
+{
+ int i = 0;
+ for (auto &v : list)
+ {
+ if (v != data[i] * data[i])
+ return false;
+ ++i;
+ }
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+
+ srand (time (NULL));
+ init (data);
+
+#ifdef MEM_SHARED
+ std::forward_list<int> list (std::begin (data), std::end (data));
+#else
+ std::forward_list<int> list;
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: list)
+#endif
+ {
+#ifndef MEM_SHARED
+ #pragma omp target
+ new (&list) std::forward_list<int> (std::begin (data), std::end (data));
+#endif
+
+ #pragma omp target teams
+ do
+ {
+ int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+ int start = len * omp_get_team_num ();
+ if (start >= N)
+ break;
+ if (start + len >= N)
+ len = N - start;
+ auto it = list.begin ();
+ std::advance (it, start);
+ for (int i = 0; i < len; ++i)
+ {
+ *it *= *it;
+ ++it;
+ }
+ } while (false);
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (list, data);
+#ifndef MEM_SHARED
+ list.~forward_list ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
new file mode 100644
index 0000000..5057bf9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
new file mode 100644
index 0000000..1f44a17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::list<int> &_list, int data[])
+{
+ int i = 0;
+ for (auto &v : _list)
+ {
+ if (v != data[i] * data[i])
+ return false;
+ ++i;
+ }
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+
+ srand (time (NULL));
+ init (data);
+
+#ifdef MEM_SHARED
+ std::list<int> _list (std::begin (data), std::end (data));
+#else
+ std::list<int> _list;
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: _list)
+#endif
+ {
+#ifndef MEM_SHARED
+ #pragma omp target
+ new (&_list) std::list<int> (std::begin (data), std::end (data));
+#endif
+
+ #pragma omp target teams
+ do
+ {
+ int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+ int start = len * omp_get_team_num ();
+ if (start >= N)
+ break;
+ if (start + len >= N)
+ len = N - start;
+ auto it = _list.begin ();
+ std::advance (it, start);
+ for (int i = 0; i < len; ++i)
+ {
+ *it *= *it;
+ ++it;
+ }
+ } while (false);
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (_list, data);
+#ifndef MEM_SHARED
+ _list.~list ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
new file mode 100644
index 0000000..fe37426
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__map-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
new file mode 100644
index 0000000..36556ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+ std::set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array if unique is true.
+ do
+ data[i] = rand ();
+ while (unique && _set.find (data[i]) != _set.end ());
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::map<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, true);
+ init (data, false);
+
+#ifndef MEM_SHARED
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+#endif
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::map<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map[keys[i]] = data[i];
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < N; ++i)
+ sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~map ();
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target exit data map (release: _map)
+#endif
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
new file mode 100644
index 0000000..79f9245
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multimap-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
new file mode 100644
index 0000000..6a4a4e8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
@@ -0,0 +1,68 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand () % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::multimap<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, KEY_MAX);
+ init (data, RAND_MAX);
+
+#ifndef MEM_SHARED
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+#endif
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::multimap<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map.insert({keys[i], data[i]});
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < KEY_MAX; ++i)
+ {
+ auto range = _map.equal_range (i);
+ for (auto it = range.first; it != range.second; ++it)
+ sum += (long long) it->first * it->second;
+ }
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~multimap ();
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target exit data map (release: _map)
+#endif
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
new file mode 100644
index 0000000..2d80756
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multiset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
new file mode 100644
index 0000000..b12402e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::multiset<int> set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: set)
+#endif
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&set) std::multiset<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ set.~multiset ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
new file mode 100644
index 0000000..a6b3665
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
@@ -0,0 +1,93 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_pi ()
+{
+ if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0))
+ return false;
+ if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0))
+ return false;
+ if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0))
+ return false;
+ if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T>
+ * std::numbers::inv_sqrtpi_v<T>, (T) 1.0))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_sqrt ()
+{
+ if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0))
+ return false;
+ if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_phi ()
+{
+ T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0;
+ if (!FP_EQUAL (myphi, std::numbers::phi_v<T>))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_log ()
+{
+ if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>))
+ return false;
+ if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>))
+ return false;
+ if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>))
+ return false;
+ if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>))
+ return false;
+ return true;
+}
+
+template<typename T> bool test_egamma ()
+{
+ T myegamma = 0.0;
+ #pragma omp parallel for reduction(+:myegamma)
+ for (int k = 2; k < 100000; ++k)
+ myegamma += (std::riemann_zeta (k) - 1) / k;
+ myegamma = (T) 1 - myegamma;
+ if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>))
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+ pass++; \
+ bool ok = test_##func<float> (); \
+ if (!ok) { result = pass; break; } \
+ pass++; \
+ ok = test_##func<double> (); \
+ if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+ int result = 0;
+
+ #pragma omp target map (tofrom: result)
+ do {
+ int pass = 0;
+
+ RUN_TEST (pi);
+ RUN_TEST (sqrt);
+ RUN_TEST (phi);
+ RUN_TEST (log);
+ RUN_TEST (egamma);
+ } while (false);
+
+ return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
new file mode 100644
index 0000000..54f62e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__set-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
new file mode 100644
index 0000000..cd23128
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
@@ -0,0 +1,68 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+ std::set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array.
+ do
+ data[i] = rand () % MAX;
+ while (_set.find (data[i]) != _set.end ());
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::set<int> _set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: _set)
+#endif
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_set) std::set<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ if (_set.find (i) != _set.end ())
+ sum += i;
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _set.~set ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
new file mode 100644
index 0000000..7ef16bf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-std=c++20" }
+
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__span-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
new file mode 100644
index 0000000..046b3c1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <span>
+
+#define N 64
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::span<int, N> &span, int data[])
+{
+ for (int i = 0; i < N; ++i)
+ if (span[i] != data[i] * data[i])
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+ int elements[N];
+ std::span<int, N> span(elements);
+
+ srand (time (NULL));
+ init (data);
+
+#ifndef MEM_SHARED
+ #pragma omp target enter data map (to: data[:N]) map (alloc: elements, span)
+#endif
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&span) std::span<int, N> (elements);
+#endif
+ std::copy (data, data + N, span.begin ());
+ }
+
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < N; ++i)
+ span[i] *= span[i];
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (span, data);
+#ifndef MEM_SHARED
+ span.~span ();
+#endif
+ }
+
+#ifndef MEM_SHARED
+ #pragma omp target exit data map (release: elements, span)
+#endif
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
new file mode 100644
index 0000000..00d7943
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <unordered_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+ std::set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array if unique is true.
+ do
+ data[i] = rand ();
+ while (unique && _set.count (data[i]) > 0);
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::unordered_map<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, true);
+ init (data, false);
+
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::unordered_map<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map[keys[i]] = data[i];
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < N; ++i)
+ sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~unordered_map ();
+#endif
+
+ #pragma omp target exit data map (release: _map)
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
new file mode 100644
index 0000000..2567634
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
@@ -0,0 +1,65 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+ long long total = 0;
+ for (int i = 0; i < N; ++i)
+ total += (long long) keys[i] * data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int keys[N], data[N];
+ std::unordered_multimap<int,int> _map;
+
+ srand (time (NULL));
+ init (keys, KEY_MAX);
+ init (data, RAND_MAX);
+
+ #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_map) std::unordered_multimap<int,int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _map.insert({keys[i], data[i]});
+ }
+
+ long long sum = 0;
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < KEY_MAX; ++i)
+ {
+ auto range = _map.equal_range (i);
+ for (auto it = range.first; it != range.second; ++it) {
+ sum += (long long) it->first * it->second;
+ }
+ }
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _map.~unordered_multimap ();
+#endif
+
+ #pragma omp target exit data map (release: _map)
+
+ bool ok = validate (sum, keys, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
new file mode 100644
index 0000000..da6c875
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::unordered_multiset<int> set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+ #pragma omp target data map (to: data[:N]) map (alloc: set)
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&set) std::unordered_multiset<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ set.~unordered_multiset ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
new file mode 100644
index 0000000..b7bd935
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+ std::unordered_set<int> _set;
+ for (int i = 0; i < N; ++i)
+ {
+ // Avoid duplicates in data array.
+ do
+ data[i] = rand () % MAX;
+ while (_set.count (data[i]) != 0);
+ _set.insert (data[i]);
+ }
+}
+
+bool validate (int sum, int data[])
+{
+ int total = 0;
+ for (int i = 0; i < N; ++i)
+ total += data[i];
+ return sum == total;
+}
+
+int main (void)
+{
+ int data[N];
+ std::unordered_set<int> _set;
+ int sum = 0;
+
+ srand (time (NULL));
+ init (data);
+
+ #pragma omp target data map (to: data[:N]) map (alloc: _set)
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&_set) std::unordered_set<int> ();
+#endif
+ for (int i = 0; i < N; ++i)
+ _set.insert (data[i]);
+ }
+
+ #pragma omp target teams distribute parallel for reduction (+:sum)
+ for (int i = 0; i < MAX; ++i)
+ if (_set.count (i) > 0)
+ sum += i;
+
+#ifndef MEM_SHARED
+ #pragma omp target
+ _set.~unordered_set ();
+#endif
+ }
+
+ bool ok = validate (sum, data);
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
new file mode 100644
index 0000000..865cde2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
@@ -0,0 +1,179 @@
+// { dg-additional-options -std=c++20 }
+// { dg-output-file target-std__valarray-1.output }
+
+#include <valarray>
+#include <ostream>
+#include <sstream>
+
+
+/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467".
+
+We can't:
+
+ #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf)
+
+... because:
+
+ error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’
+
+Therefore, use dummy classes in '#pragma omp declare target':
+*/
+
+#pragma omp declare target
+
+// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf':
+
+class dummy_basic_streambuf__char
+ : public std::basic_streambuf<char>
+{
+public:
+ dummy_basic_streambuf__char() {}
+};
+
+// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()':
+
+class dummy_basic_ios__char
+ : public std::basic_ios<char>
+{
+public:
+ dummy_basic_ios__char() {}
+};
+
+#pragma omp end declare target
+
+
+int main()
+{
+ // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device.
+ std::initializer_list<int> v1_i = {10, 20, 30, 40, 50};
+ const int *v1_i_data = std::data(v1_i);
+ size_t v1_i_size = v1_i.size();
+ std::initializer_list<int> v2_i = {5, 4, 3, 2, 1};
+ const int *v2_i_data = std::data(v2_i);
+ size_t v2_i_size = v2_i.size();
+ std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5};
+ const int *shiftData_i_data = std::data(shiftData_i);
+ size_t shiftData_i_size = shiftData_i.size();
+#pragma omp target \
+ defaultmap(none) \
+ map(to: v1_i_data[:v1_i_size], v1_i_size, \
+ v2_i_data[:v2_i_size], v2_i_size, \
+ shiftData_i_data[:shiftData_i_size], shiftData_i_size)
+ {
+ /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region. */
+ std::stringbuf out_b;
+ std::ostream out(&out_b);
+
+ std::valarray<int> v1(v1_i_data, v1_i_size);
+ out << "\nv1:";
+ for (auto val : v1)
+ out << " " << val;
+
+ std::valarray<int> v2(v2_i_data, v2_i_size);
+ out << "\nv2:";
+ for (auto val : v2)
+ out << " " << val;
+
+ std::valarray<int> sum = v1 + v2;
+ out << "\nv1 + v2:";
+ for (auto val : sum)
+ out << " " << val;
+
+ std::valarray<int> diff = v1 - v2;
+ out << "\nv1 - v2:";
+ for (auto val : diff)
+ out << " " << val;
+
+ std::valarray<int> product = v1 * v2;
+ out << "\nv1 * v2:";
+ for (auto val : product)
+ out << " " << val;
+
+ std::valarray<int> quotient = v1 / v2;
+ out << "\nv1 / v2:";
+ for (auto val : quotient)
+ out << " " << val;
+
+ std::valarray<int> squares = pow(v1, 2);
+ out << "\npow(v1, 2):";
+ for (auto val : squares)
+ out << " " << val;
+
+ std::valarray<int> sinhs = sinh(v2);
+ out << "\nsinh(v2):";
+ for (auto val : sinhs)
+ out << " " << val;
+
+ std::valarray<int> logs = log(v1 * v2);
+ out << "\nlog(v1 * v2):";
+ for (auto val : logs)
+ out << " " << val;
+
+ std::valarray<int> data(12);
+ for (size_t i = 0; i < data.size(); ++i)
+ data[i] = i;
+ out << "\nOriginal array:";
+ for (auto val : data)
+ out << " " << val;
+
+ std::slice slice1(2, 5, 1);
+ std::valarray<int> sliced1 = data[slice1];
+ out << "\nSlice(2, 5, 1):";
+ for (auto val : sliced1)
+ out << " " << val;
+
+ std::slice slice2(1, 4, 3);
+ std::valarray<int> sliced2 = data[slice2];
+ out << "\nSlice(1, 4, 3):";
+ for (auto val : sliced2)
+ out << " " << val;
+
+ data[slice1] = 99;
+ out << "\nArray after slice modification:";
+ for (auto val : data)
+ out << " " << val;
+
+ std::valarray<bool> mask = (v1 > 20);
+ out << "\nElements of v1 > 20:";
+ for (size_t i = 0; i < v1.size(); ++i)
+ {
+ if (mask[i])
+ out << " " << v1[i];
+ }
+
+ std::valarray<int> masked = v1[mask];
+ out << "\nMasked array:";
+ for (auto val : masked)
+ out << " " << val;
+
+ std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size);
+ out << "\nOriginal shiftData:";
+ for (auto val : shiftData)
+ out << " " << val;
+
+ std::valarray<int> shifted = shiftData.shift(2);
+ out << "\nshift(2):";
+ for (auto val : shifted)
+ out << " " << val;
+
+ std::valarray<int> cshifted = shiftData.cshift(-1);
+ out << "\ncshift(-1):";
+ for (auto val : cshifted)
+ out << " " << val;
+
+ out << "\nSum(v1): " << v1.sum();
+ out << "\nMin(v1): " << v1.min();
+ out << "\nMax(v1): " << v1.max();
+
+ out << "\n";
+
+ /* Terminate with a NUL. Otherwise, we'd have to use:
+ __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data());
+ ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO). */
+ out << '\0';
+ std::string_view out_b_sv = out_b.view();
+ __builtin_printf("%s", out_b_sv.data());
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
new file mode 100644
index 0000000..c441e06
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
@@ -0,0 +1,22 @@
+
+v1: 10 20 30 40 50
+v2: 5 4 3 2 1
+v1 + v2: 15 24 33 42 51
+v1 - v2: 5 16 27 38 49
+v1 * v2: 50 80 90 80 50
+v1 / v2: 2 5 10 20 50
+pow(v1, 2): 100 400 900 1600 2500
+sinh(v2): 74 27 10 3 1
+log(v1 * v2): 3 4 4 4 3
+Original array: 0 1 2 3 4 5 6 7 8 9 10 11
+Slice(2, 5, 1): 2 3 4 5 6
+Slice(1, 4, 3): 1 4 7 10
+Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11
+Elements of v1 > 20: 30 40 50
+Masked array: 30 40 50
+Original shiftData: 1 2 3 4 5
+shift(2): 3 4 5 0 0
+cshift(-1): 5 1 2 3 4
+Sum(v1): 150
+Min(v1): 10
+Max(v1): 50
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
new file mode 100644
index 0000000..41ec80e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__valarray-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
new file mode 100644
index 0000000..8933072b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <valarray>
+
+#define N 50000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::valarray<int> &arr, int data[])
+{
+ for (int i = 0; i < N; ++i)
+ if (arr[i] != data[i] * data[i] + i)
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+
+ srand (time (NULL));
+ init (data);
+
+#ifdef MEM_SHARED
+ std::valarray<int> arr (data, N);
+#else
+ std::valarray<int> arr;
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: arr)
+#endif
+ {
+ #pragma omp target
+ {
+#ifndef MEM_SHARED
+ new (&arr) std::valarray<int> (data, N);
+#endif
+ arr *= arr;
+ }
+
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < N; ++i)
+ arr[i] += i;
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (arr, data);
+#ifndef MEM_SHARED
+ arr.~valarray ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
new file mode 100644
index 0000000..967bff3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__vector-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
new file mode 100644
index 0000000..a94b4cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <vector>
+
+#define N 50000
+
+void init (int data[])
+{
+ for (int i = 0; i < N; ++i)
+ data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::vector<int> &vec, int data[])
+{
+ for (int i = 0; i < N; ++i)
+ if (vec[i] != data[i] * data[i])
+ return false;
+ return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+ int data[N];
+ bool ok;
+
+ srand (time (NULL));
+ init (data);
+
+#ifdef MEM_SHARED
+ std::vector<int> vec (data, data + N);
+#else
+ std::vector<int> vec;
+#endif
+
+#ifndef MEM_SHARED
+ #pragma omp target data map (to: data[:N]) map (alloc: vec)
+#endif
+ {
+#ifndef MEM_SHARED
+ #pragma omp target
+ new (&vec) std::vector<int> (data, data + N);
+#endif
+
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < N; ++i)
+ vec[i] *= vec[i];
+
+ #pragma omp target map (from: ok)
+ {
+ ok = validate (vec, data);
+#ifndef MEM_SHARED
+ vec.~vector ();
+#endif
+ }
+ }
+
+ return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
new file mode 100644
index 0000000..00eb48b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+ int *arr;
+ int size;
+} B;
+
+#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \
+ map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+ map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+ int *arr1;
+ B *arr2;
+ int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+ struct A var;
+
+ memset (&var, 0, sizeof var);
+ var.arr1 = (int *) calloc (N, sizeof (int));
+ var.arr2 = (B *) malloc (sizeof (B));
+ var.arr2->arr = (int *) calloc (N, sizeof (float));
+ var.arr2->size = N;
+
+ {
+ // ... permitted here:
+ #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+ map(tofrom: x.arr1[0:N]) \
+ map(mapper(default), tofrom: x.arr2[0:1])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ {
+ var.arr1[i]++;
+ var.arr2->arr[i]++;
+ }
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (var.arr1[i] == 1);
+ assert (var.arr2->arr[i] == 1);
+ assert (var.arr3[i] == 0);
+ }
+
+ free (var.arr1);
+ free (var.arr2->arr);
+ free (var.arr2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
new file mode 100644
index 0000000..942d6a5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct B_tag {
+ int *arr;
+ int size;
+} B;
+
+#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \
+ map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+ int *arr1;
+ B *arr2;
+ int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+ struct A var;
+
+ memset (&var, 0, sizeof var);
+ var.arr1 = (int *) calloc (N, sizeof (int));
+ var.arr2 = (B *) malloc (sizeof (B));
+ var.arr2->arr = (int *) calloc (N, sizeof (int));
+ var.arr2->size = N;
+
+ {
+ #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+ map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ {
+ var.arr1[i]++;
+ var.arr2->arr[i]++;
+ }
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (var.arr1[i] == 1);
+ assert (var.arr2->arr[i] == 1);
+ assert (var.arr3[i] == 0);
+ }
+
+ free (var.arr1);
+ free (var.arr2->arr);
+ free (var.arr2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
new file mode 100644
index 0000000..cfc6a91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+ int *arr;
+ int size;
+} B;
+
+#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \
+ map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+ map(tofrom: myb.arr[0:myb.size])
+typedef struct {
+ int *arr;
+ int size;
+} C;
+
+
+struct A {
+ int *arr1;
+ B *arr2;
+ C *arr3;
+};
+
+int
+main (int argc, char *argv[])
+{
+ struct A var;
+
+ memset (&var, 0, sizeof var);
+ var.arr1 = (int *) calloc (N, sizeof (int));
+ var.arr2 = (B *) malloc (sizeof (B));
+ var.arr2->arr = (int *) calloc (N, sizeof (int));
+ var.arr2->size = N;
+ var.arr3 = (C *) malloc (sizeof (C));
+ var.arr3->arr = (int *) calloc (N, sizeof (int));
+ var.arr3->size = N;
+
+ {
+ // ... permitted here.
+ #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+ map(tofrom: x.arr1[0:N]) \
+ map(mapper(default), tofrom: x.arr2[0:1])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ {
+ var.arr1[i]++;
+ var.arr2->arr[i]++;
+ }
+ }
+ }
+
+ {
+ #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \
+ map(tofrom: myc.arr[0:myc.size])
+ // While GCC handles more, only default is ...
+ #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \
+ map(tofrom: myc.arr[0:myc.size])
+ // ... permitted here.
+ #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \
+ map(tofrom: x.arr1[0:N]) \
+ map(mapper( default ) , tofrom: *x.arr3)
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ {
+ var.arr1[i]++;
+ var.arr3->arr[i]++;
+ }
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (var.arr1[i] == 2);
+ assert (var.arr2->arr[i] == 1);
+ assert (var.arr3->arr[i] == 1);
+ }
+
+ free (var.arr1);
+ free (var.arr2->arr);
+ free (var.arr2);
+ free (var.arr3->arr);
+ free (var.arr3);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
new file mode 100644
index 0000000..c4784eb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <assert.h>
+
+struct T {
+ int a;
+ int b;
+ int c;
+};
+
+void foo (void)
+{
+ struct T x;
+ x.a = x.b = x.c = 0;
+
+#pragma omp target
+ {
+ x.a++;
+ x.c++;
+ }
+
+ assert (x.a == 1);
+ assert (x.b == 0);
+ assert (x.c == 1);
+}
+
+// An identity mapper. This should do the same thing as the default!
+#pragma omp declare mapper (struct T v) map(v)
+
+void bar (void)
+{
+ struct T x;
+ x.a = x.b = x.c = 0;
+
+#pragma omp target
+ {
+ x.b++;
+ }
+
+#pragma omp target map(x)
+ {
+ x.a++;
+ }
+
+ assert (x.a == 1);
+ assert (x.b == 1);
+ assert (x.c == 0);
+}
+
+int main (int argc, char *argv[])
+{
+ foo ();
+ bar ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
new file mode 100644
index 0000000..3e6027e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct Z {
+ int *arr;
+};
+
+void baz (struct Z *zarr, int len)
+{
+#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \
+ map(tofrom: myvar.arr[0:len])
+ zarr[0].arr = (int *) calloc (len, sizeof (int));
+ zarr[5].arr = (int *) calloc (len, sizeof (int));
+
+#pragma omp target map(zarr, *zarr)
+ {
+ for (int i = 0; i < len; i++)
+ zarr[0].arr[i]++;
+ }
+
+#pragma omp target map(zarr, zarr[5])
+ {
+ for (int i = 0; i < len; i++)
+ zarr[5].arr[i]++;
+ }
+
+#pragma omp target map(zarr[5])
+ {
+ for (int i = 0; i < len; i++)
+ zarr[5].arr[i]++;
+ }
+
+#pragma omp target map(zarr, zarr[5:1])
+ {
+ for (int i = 0; i < len; i++)
+ zarr[5].arr[i]++;
+ }
+
+ for (int i = 0; i < len; i++)
+ assert (zarr[0].arr[i] == 1);
+
+ for (int i = 0; i < len; i++)
+ assert (zarr[5].arr[i] == 3);
+
+ free (zarr[5].arr);
+ free (zarr[0].arr);
+}
+
+int
+main (int argc, char *argv[])
+{
+ struct Z myzarr[10];
+ baz (myzarr, 256);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
new file mode 100644
index 0000000..324d535
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+struct A {
+ int *arr1;
+ float *arr2;
+ int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+ struct A var;
+
+ memset (&var, 0, sizeof var);
+ var.arr1 = (int *) calloc (N, sizeof (int));
+ var.arr2 = (float *) calloc (N, sizeof (float));
+
+ {
+ #pragma omp declare mapper (struct A x) map(to: x.arr1) \
+ map(tofrom: x.arr1[0:N])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ var.arr1[i]++;
+ }
+ }
+
+ {
+ #pragma omp declare mapper (struct A x) map(to: x.arr2) \
+ map(tofrom: x.arr2[0:N])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ var.arr2[i]++;
+ }
+ }
+
+ {
+ #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N])
+ #pragma omp target
+ {
+ for (int i = 0; i < N; i++)
+ var.arr3[i]++;
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ {
+ assert (var.arr1[i] == 1);
+ assert (var.arr2[i] == 1);
+ assert (var.arr3[i] == 1);
+ }
+
+ free (var.arr1);
+ free (var.arr2);
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
new file mode 100644
index 0000000..a7526dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
@@ -0,0 +1,129 @@
+/* { dg-do run } */
+/* { dg-additional-options "-lm" } */
+
+/* Note: At the time this program was written, Nvptx was not asynchronous
+ enough to trigger the issue (with a 'nowait' added); however, one
+ AMD GPUs, it triggered. */
+
+/* Test whether nowait / dependency is handled correctly.
+ Motivated by OpenMP_VV's 5.1/interop/test_interop_target.c
+
+ The code actually only creates a streaming object without actually using it,
+ except for dependency tracking.
+
+ Note that there is a difference between having a steaming (targetsync) object
+ and not (= omp_interop_none); at least if one assumes that omp_interop_none
+ does not include 'targetsync' as (effective) interop type - in that case,
+ 'nowait' has no effect and the 'depend' is active as included task, otherwise
+ the code continues with the depend being active only for the about to be
+ destroyed or used thread.
+
+ The OpenMP spec states (here 6.0):
+ "If the interop-type set includes 'targetsync', an empty mergeable task is
+ generated. If the 'nowait' clause is not present on the construct then
+ the task is also an included task. If the interop-type set does not
+ include 'targetsync', the 'nowait' clause has no effect. Any depend
+ clauses that are present on the construct apply to the generated task. */
+
+#include <omp.h>
+
+void
+test_async (const int dev)
+{
+ constexpr int N = 2048;
+ constexpr int ulp = 4;
+ constexpr double M_PI = 2.0 * __builtin_acos (0.0);
+ omp_interop_t obj1, obj2;
+ double A[N] = { };
+ int B[N] = { };
+
+ /* Create interop object. */
+ #pragma omp interop device(dev) init(targetsync : obj1, obj2)
+
+ if (dev == omp_initial_device || dev == omp_get_num_devices ())
+ {
+ if (obj1 != omp_interop_none || obj2 != omp_interop_none)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (obj1 == omp_interop_none || obj2 == omp_interop_none)
+ __builtin_abort ();
+ }
+
+ /* DOUBLE */
+
+ /* Now in the background update it, slowly enough that the
+ code afterwards is reached while still running asynchronously.
+ As OpenMP_VV's Issue #863 shows, the overhead is high enough to
+ fail even when only doing an atomic integer increment. */
+
+ #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait
+ for (int i = 0; i < N; i++)
+ #pragma omp atomic update
+ A[i] += __builtin_sin (2*i*M_PI/N);
+
+ /* DESTROY take care of the dependeny such that ... */
+
+ if (obj1 == omp_interop_none)
+ {
+ // Same as below as 'nowait' is ignored.
+ #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait
+ }
+ else
+ {
+ #pragma omp interop destroy(obj1) depend(in: A[:N])
+ }
+
+ /* ... this code is only executed once the dependency as been fulfilled. */
+
+ /* Check the value - part I: quick, avoid A[0] == sin(0) = 0. */
+ for (int i = 1; i < N; i++)
+ if (A[i] == 0.0)
+ __builtin_abort ();
+
+ /* Check the value - part II: throughly */
+ for (int i = 0; i < N; i++)
+ {
+ double x = A[i];
+ double y = __builtin_sin (2*i*M_PI/N);
+ if (__builtin_fabs (x - y) > ulp * __builtin_fabs (x+y) * __DBL_EPSILON__)
+ __builtin_abort ();
+ }
+
+ /* Integer */
+
+ #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait
+ for (int i = 0; i < N; i++)
+ #pragma omp atomic update
+ B[i] += 42;
+
+ /* Same - but using USE. */
+ if (obj2 == omp_interop_none)
+ {
+ // Same as below as 'nowait' is ignored.
+ #pragma omp interop use(obj2) depend(in: B[:N]) nowait
+ }
+ else
+ {
+ #pragma omp interop use(obj2) depend(in: B[:N])
+ }
+
+ for (int i = 0; i < N; i++)
+ if (B[i] != 42)
+ __builtin_abort ();
+
+ #pragma omp interop destroy(obj2)
+}
+
+int
+main ()
+{
+ int ndev = omp_get_num_devices ();
+
+ for (int dev = 0; dev <= ndev; dev++)
+ test_async (dev);
+ test_async (omp_initial_device);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
index a57d6fd..fbe4ac3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
@@ -1,4 +1,5 @@
-/* { dg-do run } */
+/* { dg-do run { target { ! offload_target_nvptx } } } */
+/* { dg-do compile { target offload_target_nvptx } } */
#define N 100
@@ -7,12 +8,17 @@ f (int x[], int y[], int z[])
{
int i;
+ // The following fails as on the host the target side cannot be
+ // resolved - and the 'teams' or not status affects how 'target'
+ // is called.
+ // Note also the dg-do compile above for offload_target_nvptx
#pragma omp target map(to: x[0:N], y[0:N]) map(from: z[0:N])
#pragma omp metadirective \
when (device={arch("nvptx")}: teams loop) \
default (parallel loop)
for (i = 0; i < N; i++)
z[i] = x[i] * y[i];
+ /* { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-6 } */
}
int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
new file mode 100644
index 0000000..b36d2f5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+// Async version
+
+#include <omp.h>
+
+int main()
+{
+ #pragma omp parallel for
+ for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++)
+ {
+ char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+ omp_depend_t dep;
+ #pragma omp depobj(dep) depend(inout: ptr)
+
+ /* Play also around with the alignment - as hsa_amd_memory_fill operates
+ on multiples of 4 bytes (uint32_t). */
+
+ for (int start = 0; start < 32; start++)
+ for (int tail = 0; tail < 32; tail++)
+ {
+ unsigned char val = '0' + start + tail;
+#if __cplusplus
+ void *ptr2 = omp_target_memset_async (ptr + start, val,
+ 1024 - start - tail, dev, 0);
+#else
+ void *ptr2 = omp_target_memset_async (ptr + start, val,
+ 1024 - start - tail, dev, 0, nullptr);
+#endif
+ if (ptr + start != ptr2)
+ __builtin_abort ();
+
+ #pragma omp taskwait
+
+ #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+ for (int i = start; i < 1024 - start - tail; i++)
+ {
+ if (ptr[i] != val)
+ __builtin_abort ();
+ ptr[i] += 2;
+ }
+
+ omp_target_memset_async (ptr + start, val + 3,
+ 1024 - start - tail, dev, 1, &dep);
+
+ #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+ for (int i = start; i < 1024 - start - tail; i++)
+ {
+ if (ptr[i] != val + 3)
+ __builtin_abort ();
+ ptr[i] += 1;
+ }
+
+ omp_target_memset_async (ptr + start, val - 3,
+ 1024 - start - tail, dev, 1, &dep);
+
+ #pragma omp taskwait depend (depobj: dep)
+ }
+ #pragma omp depobj(dep) destroy
+ omp_target_free (ptr, dev);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
new file mode 100644
index 0000000..c0e4fa9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
@@ -0,0 +1,80 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <omp.h>
+
+#define MIN(x,y) ((x) < (y) ? x : y)
+
+enum { N = 524288 + 8 };
+
+static void
+init_val (int8_t *ptr, int val, size_t count)
+{
+ #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+ __builtin_memset (ptr, val, count);
+}
+
+static void
+check_val (int8_t *ptr, int val, size_t count)
+{
+ if (count == 0)
+ return;
+ #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+ for (size_t i = 0; i < count; i++)
+ if (ptr[i] != val) __builtin_abort ();
+}
+
+static void
+test_it (int8_t *ptr, int lshift, size_t count)
+{
+ if (N < count + lshift) __builtin_abort ();
+ if (lshift >= 4) __builtin_abort ();
+ ptr += lshift;
+
+ init_val (ptr, 'z', MIN (count + 32, N - lshift));
+
+ omp_target_memset (ptr, '1', count, omp_get_default_device());
+
+ check_val (ptr, '1', count);
+ check_val (ptr + count, 'z', MIN (32, N - lshift - count));
+}
+
+
+int main()
+{
+ size_t size;
+ int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device());
+ ptr += (4 - (uintptr_t) ptr % 4) % 4;
+ if ((uintptr_t) ptr % 4 != 0) __builtin_abort ();
+
+ test_it (ptr, 0, 1);
+ test_it (ptr, 3, 1);
+ test_it (ptr, 0, 4);
+ test_it (ptr, 3, 4);
+ test_it (ptr, 0, 5);
+ test_it (ptr, 3, 5);
+ test_it (ptr, 0, 6);
+ test_it (ptr, 3, 6);
+
+ for (int i = 1; i <= 9; i++)
+ {
+ switch (i)
+ {
+ case 1: size = 16; break; // = 2^4 bytes
+ case 2: size = 32; break; // = 2^5 bytes
+ case 3: size = 64; break; // = 2^7 bytes
+ case 4: size = 128; break; // = 2^7 bytes
+ case 5: size = 256; break; // = 2^8 bytes
+ case 6: size = 512; break; // = 2^9 bytes
+ case 7: size = 65536; break; // = 2^16 bytes
+ case 8: size = 262144; break; // = 2^18 bytes
+ case 9: size = 524288; break; // = 2^20 bytes
+ default: __builtin_abort ();
+ }
+ test_it (ptr, 0, size);
+ test_it (ptr, 3, size);
+ test_it (ptr, 0, size + 1);
+ test_it (ptr, 3, size + 1);
+ test_it (ptr, 3, size + 2);
+ }
+ omp_target_free (ptr, omp_get_default_device());
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
new file mode 100644
index 0000000..01909f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+
+#include <omp.h>
+
+int main()
+{
+ for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++)
+ {
+ char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+ /* Play also around with the alignment - as hsa_amd_memory_fill operates
+ on multiples of 4 bytes (uint32_t). */
+
+ for (int start = 0; start < 32; start++)
+ for (int tail = 0; tail < 32; tail++)
+ {
+ unsigned char val = '0' + start + tail;
+ void *ptr2 = omp_target_memset (ptr + start, val,
+ 1024 - start - tail, dev);
+ if (ptr + start != ptr2)
+ __builtin_abort ();
+
+ #pragma omp target device(dev) is_device_ptr(ptr)
+ for (int i = start; i < 1024 - start - tail; i++)
+ if (ptr[i] != val)
+ __builtin_abort ();
+
+ }
+
+ /* Check 'small' values for correctness. */
+
+ for (int start = 0; start < 32; start++)
+ for (int size = 0; size <= 64 + 32; size++)
+ {
+ omp_target_memset (ptr, 'a' - 2, 1024, dev);
+
+ unsigned char val = '0' + start + size % 32;
+ void *ptr2 = omp_target_memset (ptr + start, val, size, dev);
+
+ if (ptr + start != ptr2)
+ __builtin_abort ();
+
+ if (size == 0)
+ continue;
+
+ #pragma omp target device(dev) is_device_ptr(ptr)
+ {
+ for (int i = 0; i < start; i++)
+ if (ptr[i] != 'a' - 2)
+ __builtin_abort ();
+ for (int i = start; i < start + size; i++)
+ if (ptr[i] != val)
+ __builtin_abort ();
+ for (int i = start + size + 1; i < 1024; i++)
+ if (ptr[i] != 'a' - 2)
+ __builtin_abort ();
+ }
+ }
+
+ omp_target_free (ptr, dev);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
index b89f934..ca7865d 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
@@ -1,7 +1,7 @@
/* { dg-additional-options "-O0 -fdump-tree-omplower" } */
/* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */
/* { dg-require-alias "" } */
-/* { dg-xfail-if "PR 97102/PR 97106 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */
+/* { dg-xfail-if PR105018 { offload_target_nvptx } } */
#ifdef __cplusplus
extern "C" {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
new file mode 100644
index 0000000..9bf949a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
@@ -0,0 +1,3 @@
+/* { dg-additional-options -O0 } */
+
+#include "target-abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
new file mode 100644
index 0000000..d9268af
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
@@ -0,0 +1 @@
+#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
new file mode 100644
index 0000000..e6099cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
@@ -0,0 +1,89 @@
+/* Offloaded 'constructor' and 'destructor' functions. */
+
+#include <omp.h>
+
+#pragma omp declare target
+
+static void
+__attribute__((constructor))
+initHD1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((constructor))
+initHD2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+static void
+__attribute__((constructor))
+initH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiH2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run.
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
index 03d93cc..02e02dc 100644
--- a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
@@ -9,8 +9,8 @@
void __attribute__ ((noipa))
parallel_reduction ()
{
- int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
- int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
svint32_t va = svld1_s32 (svptrue_b32 (), b);
int i = 0;
int64_t res;
@@ -30,8 +30,8 @@ parallel_reduction ()
void __attribute__ ((noipa))
for_reduction ()
{
- int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
- int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+ int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+ int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
svint32_t va = svld1_s32 (svptrue_b32 (), b);
int j;
int64_t res;
@@ -58,13 +58,13 @@ simd_reduction ()
for (j = 0; j < 8; j++)
a[j] = 1;
- #pragma omp simd reduction (+:va, i)
+ #pragma omp simd reduction (+:va)
for (j = 0; j < 16; j++)
- va = svld1_s32 (svptrue_b32 (), a);
+ va += svld1_s32 (svptrue_b32 (), a);
res = svaddv_s32 (svptrue_b32 (), va);
- if (res != 8)
+ if (res != 128)
__builtin_abort ();
}
@@ -72,22 +72,57 @@ void __attribute__ ((noipa))
inscan_reduction_incl ()
{
svint32_t va = svindex_s32 (0, 0);
+ int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+ int b[64] = { 0 };
int j;
int64_t res = 0;
- #pragma omp parallel
- #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate (res)
+ #pragma omp parallel for reduction (inscan, +:va)
for (j = 0; j < 8; j++)
{
- va = svindex_s32 (1, 0);
+ va += svld1_s32 (svptrue_b32 (), a);
#pragma omp scan inclusive (va)
- res += svaddv_s32 (svptrue_b32 (), va);
+ svst1_s32 (svptrue_b32 (), b + j * 8, va);
+ }
+
+ res = svaddv_s32 (svptrue_b32 (), va);
+
+ if (res != 64)
+ __builtin_abort ();
+
+ for (j = 0; j < 64; j+=8)
+ if (b[j] != (j / 8 + 1))
+ __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+inscan_reduction_excl ()
+{
+ svint32_t va = svindex_s32 (0, 0);
+ int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+ int b[64] = { 0 };
+ int j;
+ int64_t res = 0;
+
+ #pragma omp parallel for reduction (inscan, +:va)
+ for (j = 0; j < 8; j++)
+ {
+ svst1_s32 (svptrue_b32 (), b + j * 8, va);
+ #pragma omp scan exclusive (va)
+ va += svld1_s32 (svptrue_b32 (), a);
}
+ res = svaddv_s32 (svptrue_b32 (), va);
+
if (res != 64)
__builtin_abort ();
+
+ for (j = 0; j < 64; j+=8)
+ if (b[j] != j / 8)
+ __builtin_abort ();
}
+
int
main ()
{
@@ -95,4 +130,5 @@ main ()
for_reduction ();
simd_reduction ();
inscan_reduction_incl ();
+ inscan_reduction_excl ();
}
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
new file mode 100644
index 0000000..e6941d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_nvptx } } } */
+/* { dg-additional-options -foffload=nvptx-none } */
+/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-3.h"
+
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h
index c9c8f4a..f5695a2 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-3.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h
@@ -37,6 +37,13 @@ f53 (void)
__attribute__ ((noipa))
int
+f61 (void)
+{
+ return 61;
+}
+
+__attribute__ ((noipa))
+int
f70 (void)
{
return 70;
@@ -68,6 +75,7 @@ f89 (void)
#pragma omp declare variant (f37) match (device={isa("sm_37")})
#pragma omp declare variant (f52) match (device={isa("sm_52")})
#pragma omp declare variant (f53) match (device={isa("sm_53")})
+#pragma omp declare variant (f61) match (device={isa("sm_61")})
#pragma omp declare variant (f70) match (device={isa("sm_70")})
#pragma omp declare variant (f75) match (device={isa("sm_75")})
#pragma omp declare variant (f80) match (device={isa("sm_80")})
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
new file mode 100644
index 0000000..d1df550
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx942 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h
index 53788d2..2257f4c 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h
@@ -37,6 +37,13 @@ gfx90c (void)
__attribute__ ((noipa))
int
+gfx942 (void)
+{
+ return 0x942;
+}
+
+__attribute__ ((noipa))
+int
gfx1030 (void)
{
return 0x1030;
@@ -68,6 +75,7 @@ gfx1103 (void)
#pragma omp declare variant(gfx908) match(device = {isa("gfx908")})
#pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")})
#pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")})
+#pragma omp declare variant(gfx942) match(device = {isa("gfx942")})
#pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")})
#pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")})
#pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")})
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
new file mode 100644
index 0000000..2df5277
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
@@ -0,0 +1,176 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c
+ to test the fallback version. */
+
+/* Check whether cuBlas' daxpy works with an interop object.
+ daxpy(N, DA, DX, INCX, DY, INCY)
+ calculates (for DX = DY = 1):
+ DY(1:N) = DY(1:N) + DA * DX(1:N)
+ and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if USE_CUDA_FALLBACK_HEADER
+ // Don't warn.
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #elif !__has_include(<cuda_runtime.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef enum {
+ CUBLAS_STATUS_SUCCESS = 0,
+ } cublasStatus_t;
+
+ typedef CUstream cudaStream_t;
+ typedef struct cublasContext* cublasHandle_t;
+
+ #define cublasCreate cublasCreate_v2
+ cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+ #define cublasSetStream cublasSetStream_v2
+ cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+ #define cublasDaxpy cublasDaxpy_v2
+ cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+#endif
+
+static int used_variant = 0;
+
+void
+run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+ used_variant = 1;
+
+ omp_interop_rc_t res;
+ cublasStatus_t stat;
+
+ omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success && fr == omp_ifr_cuda);
+
+ cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ cublasHandle_t handle;
+ stat = cublasCreate (&handle);
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+
+ stat = cublasSetStream (handle, stream);
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+
+ /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */
+ stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+}
+
+
+#pragma omp declare variant(run_cuBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("cuda")))
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+ used_variant = 2;
+
+ if (incx == 1 && incy == 1)
+ #pragma omp simd
+ for (int i = 0; i < n; i++)
+ dy[i] += da * dx[i];
+ else
+ {
+ int ix = 0;
+ int iy = 0;
+ for (int i = 0; i < n; i++)
+ {
+ dy[iy] += da * dx[ix];
+ ix += incx;
+ iy += incy;
+ }
+ }
+}
+
+
+void
+run_test (int dev)
+{
+ constexpr int N = 1024;
+
+ // A = {1,2,...,N}
+ // B = {-1, -2, ..., N}
+ // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+ // = B + 3*A
+ // -> B' = {0, 2, 4, 6, ... }
+
+ double A[N], B[N];
+ double factor = 3.0;
+ for (int i = 0; i < N; i++)
+ {
+ A[i] = i;
+ B[i] = -i;
+ }
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target enter data device(dev) map(A, B)
+ }
+
+ used_variant = 99;
+ #pragma omp dispatch device(dev)
+ run_daxpy (N, factor, A, 1, B, 1);
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+ int tmp = omp_get_default_device ();
+ omp_set_default_device (dev);
+ if (on_device_arch_nvptx ())
+ assert (used_variant == 1);
+ else
+ assert (used_variant == 2);
+ omp_set_default_device (tmp);
+ }
+ else
+ assert (used_variant == 2);
+
+ for (int i = 0; i < N; i++)
+ assert (B[i] == 2*i);
+}
+
+int
+main ()
+{
+ int ndev = omp_get_num_devices ();
+
+ for (int dev = 0; dev <= ndev; dev++)
+ run_test (dev);
+ run_test (omp_initial_device);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
new file mode 100644
index 0000000..89c0652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* Same as interop-cudablas-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cublas-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
new file mode 100644
index 0000000..c48a934
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -0,0 +1,162 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c
+ to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER. */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if USE_CUDA_FALLBACK_HEADER
+ // Don't warn.
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef int cudaError_t;
+ typedef CUstream cudaStream_t;
+ enum {
+ cudaSuccess = 0
+ };
+
+ enum cudaDeviceAttr {
+ cudaDevAttrClockRate = 13,
+ cudaDevAttrMaxGridDimX = 5
+ };
+
+ cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+ cudaError_t cudaStreamQuery(cudaStream_t);
+ CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+ CUresult cuStreamGetCtx (CUstream, CUcontext *);
+#endif
+
+int
+main ()
+{
+ int ivar;
+ unsigned uvar;
+ omp_interop_rc_t res;
+ omp_interop_t obj_cuda = omp_interop_none;
+ omp_interop_t obj_cuda_driver = omp_interop_none;
+ cudaError_t cuda_err;
+ CUresult cu_err;
+
+ #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \
+ init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \
+
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_cuda);
+
+ fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_cuda_driver);
+
+ ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ assert (ivar == 11);
+
+ ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ assert (ivar == 11);
+
+
+ /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device. */
+
+ CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (ivar > 1024);
+
+ int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+ assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ...
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev);
+ assert (cuda_err == cudaSuccess);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev);
+ assert (cuda_err == cudaSuccess);
+ assert (ivar > 1024);
+
+
+
+
+ /* Check whether the omp_ipr_device_context -> CUcontext yields a context. */
+
+ CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD. */
+ uvar = 99;
+ cu_err = cuCtxGetApiVersion (cu_ctx, &uvar);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (uvar > 0);
+
+
+ /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream. */
+
+ cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams
+
+ int dev_stream = 99;
+#if CUDA_VERSION >= 12080
+ cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream);
+ assert (cuda_err == cudaSuccess);
+#else
+ cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS;
+ assert (cu_err == CUDA_SUCCESS);
+#endif
+ assert (dev_stream == cuda_dev);
+
+ /* All jobs should have been completed (as there were none none) */
+ cuda_err = cudaStreamQuery (cuda_sm);
+ assert (cuda_err == cudaSuccess);
+
+ cu_err = cuStreamQuery (cu_sm);
+ assert (cu_err == CUDA_SUCCESS);
+
+ #pragma omp interop destroy(obj_cuda, obj_cuda_driver)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
new file mode 100644
index 0000000..bc257a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* Same as interop-cuda-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cuda-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
new file mode 100644
index 0000000..bd44f44
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -0,0 +1,10 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
new file mode 100644
index 0000000..91ad987
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
new file mode 100644
index 0000000..d5dc236
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
new file mode 100644
index 0000000..7cff2cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
new file mode 100644
index 0000000..7b7dc74
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -0,0 +1,12 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h
new file mode 100644
index 0000000..20a1ccb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip.h
@@ -0,0 +1,234 @@
+/* Minimal check whether HIP works - by checking whether the API routines
+ seem to work. This includes various fallbacks if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER)
+ #include <hip/hip_runtime_api.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__"
+ #endif
+
+ typedef struct ihipStream_t* hipStream_t;
+ typedef struct ihipCtx_t* hipCtx_t;
+ typedef int hipError_t;
+ typedef int hipDevice_t;
+ enum {
+ hipSuccess = 0,
+ hipErrorNotSupported = 801
+ };
+
+ typedef enum hipDeviceAttribute_t {
+ hipDeviceAttributeClockRate = 5,
+ hipDeviceAttributeMaxGridDimX = 29
+ } hipDeviceAttribute_t;
+
+ hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t);
+ hipError_t hipCtxGetApiVersion (hipCtx_t, int *);
+ hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *);
+ hipError_t hipStreamQuery (hipStream_t);
+
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__"
+ #endif
+
+ #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #else
+ #if defined(USE_CUDA_FALLBACK_HEADER)
+ // no warning
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #endif
+
+ #include "../../../include/cuda/cuda.h"
+
+ typedef int cudaError_t;
+ enum {
+ cudaSuccess = 0
+ };
+
+ enum cudaDeviceAttr {
+ cudaDevAttrClockRate = 13,
+ cudaDevAttrMaxGridDimX = 5
+ };
+
+ cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+ CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+ CUresult cuStreamGetCtx (CUstream, CUcontext *);
+ #endif
+
+ typedef CUstream hipStream_t;
+ typedef CUcontext hipCtx_t;
+ typedef CUdevice hipDevice_t;
+
+ typedef int hipError_t;
+ typedef int hipDevice_t;
+ enum {
+ hipSuccess = 0,
+ hipErrorNotSupported = 801
+ };
+
+
+ typedef enum hipDeviceAttribute_t {
+ hipDeviceAttributeClockRate = 5,
+ hipDeviceAttributeMaxGridDimX = 29
+ } hipDeviceAttribute_t;
+
+ inline static hipError_t
+ hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev)
+ {
+ enum cudaDeviceAttr cuattr;
+ switch (attr)
+ {
+ case hipDeviceAttributeClockRate:
+ cuattr = cudaDevAttrClockRate;
+ break;
+ case hipDeviceAttributeMaxGridDimX:
+ cuattr = cudaDevAttrMaxGridDimX;
+ break;
+ default:
+ assert (0);
+ }
+ return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess;
+ }
+
+ inline static hipError_t
+ hipCtxGetApiVersion (hipCtx_t ctx, int *ver)
+ {
+ unsigned uver;
+ hipError_t err;
+ err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS;
+ *ver = (int) uver;
+ return err;
+ }
+
+ inline static hipError_t
+ hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev)
+ {
+#if CUDA_VERSION >= 12080
+ return cudaStreamGetDevice (stream, dev);
+#else
+ hipError_t err;
+ CUcontext ctx;
+ err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxGetDevice (dev) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS;
+ return err;
+#endif
+ }
+
+ inline static hipError_t
+ hipStreamQuery (hipStream_t stream)
+ {
+ return cuStreamQuery (stream) != CUDA_SUCCESS;
+ }
+
+#else
+ #error "should be unreachable"
+#endif
+
+int
+main ()
+{
+ int ivar;
+ omp_interop_rc_t res;
+ omp_interop_t obj = omp_interop_none;
+ hipError_t hip_err;
+
+ #pragma omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_hip);
+
+ ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ int vendor_is_amd = ivar == 1;
+ #if defined(__HIP_PLATFORM_AMD__)
+ assert (ivar == 1);
+ #elif defined(__HIP_PLATFORM_NVIDIA__)
+ assert (ivar == 11);
+ #else
+ assert (0);
+ #endif
+
+
+ /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device. */
+
+ hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev);
+ assert (hip_err == hipSuccess);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev);
+ assert (hip_err == hipSuccess);
+ assert (ivar > 1024);
+
+
+ /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context. */
+
+ hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */
+ ivar = -99;
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ hip_err = hipCtxGetApiVersion (hip_ctx, &ivar);
+ #pragma GCC diagnostic pop
+
+ if (vendor_is_amd)
+ assert (hip_err == hipErrorNotSupported && ivar == -99);
+ else
+ {
+ assert (hip_err == hipSuccess);
+ assert (ivar > 0);
+ }
+
+
+ /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. */
+
+ hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ hipDevice_t dev_stream = 99;
+ hip_err = hipStreamGetDevice (hip_sm, &dev_stream);
+ assert (hip_err == hipSuccess);
+ assert (dev_stream == hip_dev);
+
+ /* All jobs should have been completed (as there were none none) */
+ hip_err = hipStreamQuery (hip_sm);
+ assert (hip_err == hipSuccess);
+
+ #pragma omp interop destroy(obj)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
new file mode 100644
index 0000000..53c05bd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
new file mode 100644
index 0000000..0ea3133
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
new file mode 100644
index 0000000..ed428c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
new file mode 100644
index 0000000..1a31b30
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
new file mode 100644
index 0000000..f85c13b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+ daxpy(N, DA, DX, INCX, DY, INCY)
+ calculates (for DX = DY = 1):
+ DY(1:N) = DY(1:N) + DA * DX(1:N)
+ and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+ #ifdef __HIP_PLATFORM_NVIDIA__
+ /* There seems to be an issue with hip/library_types.h including
+ CUDA's "library_types.h". Include CUDA's one explicitly here.
+ Could possibly worked around by using -isystem vs. -I. */
+ #include <library_types.h>
+
+ /* For some reasons, the following symbols do not seem to get
+ mapped from HIP to CUDA, causing link errors. */
+ #define hipblasSetStream cublasSetStream_v2
+ #define hipblasDaxpy cublasDaxpy_v2
+ #define hipblasCreate cublasCreate_v2
+ #endif
+ #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+ #endif
+
+ typedef enum
+ {
+ HIPBLAS_STATUS_SUCCESS = 0
+
+ } hipblasStatus_t;
+
+ typedef struct ihipStream_t* hipStream_t;
+ typedef void* hipblasHandle_t;
+
+ hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+ hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+ hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+ #endif
+
+ #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+
+ #else
+ /* Add a poor man's fallback declaration. */
+ #if defined(USE_CUDA_FALLBACK_HEADER)
+ // no warning
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #elif !__has_include(<cuda_runtime.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef enum {
+ CUBLAS_STATUS_SUCCESS = 0,
+ } cublasStatus_t;
+
+ typedef CUstream cudaStream_t;
+ typedef struct cublasContext* cublasHandle_t;
+
+ #define cublasCreate cublasCreate_v2
+ cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+ #define cublasSetStream cublasSetStream_v2
+ cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+ #define cublasDaxpy cublasDaxpy_v2
+ cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+ #endif
+
+ #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+ #define hipblasStatus_t cublasStatus_t
+ #define hipStream_t cudaStream_t
+ #define hipblasHandle_t cublasHandle_t
+ #define hipblasCreate cublasCreate
+ #define hipblasSetStream cublasSetStream
+ #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+ used_variant = 1;
+
+ omp_interop_rc_t res;
+ hipblasStatus_t stat;
+
+ omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+ hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ hipblasHandle_t handle;
+ stat = hipblasCreate (&handle);
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+ stat = hipblasSetStream (handle, stream);
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+ /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */
+ stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+ used_variant = 2;
+
+ if (incx == 1 && incy == 1)
+ #pragma omp simd
+ for (int i = 0; i < n; i++)
+ dy[i] += da * dx[i];
+ else
+ {
+ int ix = 0;
+ int iy = 0;
+ for (int i = 0; i < n; i++)
+ {
+ dy[iy] += da * dx[ix];
+ ix += incx;
+ iy += incy;
+ }
+ }
+}
+
+
+void
+run_test (int dev)
+{
+ constexpr int N = 1024;
+
+ // A = {1,2,...,N}
+ // B = {-1, -2, ..., N}
+ // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+ // = B + 3*A
+ // -> B' = {0, 2, 4, 6, ... }
+
+ double A[N], B[N];
+ double factor = 3.0;
+ for (int i = 0; i < N; i++)
+ {
+ A[i] = i;
+ B[i] = -i;
+ }
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target enter data device(dev) map(A, B)
+ }
+
+ used_variant = 99;
+ #pragma omp dispatch device(dev)
+ run_daxpy (N, factor, A, 1, B, 1);
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+ int tmp = omp_get_default_device ();
+ omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+ if (on_device_arch_gcn ())
+#else
+ if (on_device_arch_nvptx ())
+#endif
+ assert (used_variant == 1);
+ else
+ assert (used_variant == 2);
+ omp_set_default_device (tmp);
+ }
+ else
+ assert (used_variant == 2);
+
+ for (int i = 0; i < N; i++)
+ assert (B[i] == 2*i);
+}
+
+int
+main ()
+{
+ int ndev = omp_get_num_devices ();
+
+ for (int dev = 0; dev <= ndev; dev++)
+ run_test (dev);
+ run_test (omp_initial_device);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c
new file mode 100644
index 0000000..21ac91c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hsa.c
@@ -0,0 +1,205 @@
+/* { dg-additional-options "-ldl" } */
+/* { dg-require-effective-target offload_device_gcn }
+ The 'asm' insert is valid for GCN only:
+ { dg-additional-options -foffload=amdgcn-amdhsa } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include "../../../include/hsa.h"
+#include "../../config/gcn/libgomp-gcn.h"
+
+#define STACKSIZE (100 * 1024)
+#define HEAPSIZE (10 * 1024 * 1024)
+#define ARENASIZE HEAPSIZE
+
+/* This code fragment must be optimized or else the host-fallback kernel has
+ * invalid ASM inserts. The rest of the file can be compiled safely at -O0. */
+#pragma omp declare target
+uintptr_t __attribute__((optimize("O1")))
+get_kernel_ptr ()
+{
+ uintptr_t val;
+ if (!omp_is_initial_device ())
+ /* "main._omp_fn.0" is the name GCC gives the first OpenMP target
+ * region in the "main" function.
+ * The ".kd" suffix is added by the LLVM assembler when it creates the
+ * kernel meta-data, and this is what we need to launch a kernel. */
+ asm ("s_getpc_b64 %0\n\t"
+ "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t"
+ "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4"
+ : "=Sg"(val));
+ return val;
+}
+#pragma omp end declare target
+
+int
+main(int argc, char** argv)
+{
+
+ /* Load the HSA runtime DLL. */
+ void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY);
+ assert (hsalib);
+
+ hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value,
+ uint32_t num_consumers,
+ const hsa_agent_t *consumers,
+ hsa_signal_t *signal)
+ = dlsym (hsalib, "hsa_signal_create");
+ assert (hsa_signal_create);
+
+ uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue)
+ = dlsym (hsalib, "hsa_queue_load_write_index_relaxed");
+ assert (hsa_queue_load_write_index_relaxed);
+
+ void (*hsa_signal_store_relaxed) (hsa_signal_t signal,
+ hsa_signal_value_t value)
+ = dlsym (hsalib, "hsa_signal_store_relaxed");
+ assert (hsa_signal_store_relaxed);
+
+ hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal,
+ hsa_signal_condition_t condition,
+ hsa_signal_value_t compare_value,
+ uint64_t timeout_hint,
+ hsa_wait_state_t wait_state_hint)
+ = dlsym (hsalib, "hsa_signal_wait_relaxed");
+ assert (hsa_signal_wait_relaxed);
+
+ void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue,
+ uint64_t value)
+ = dlsym (hsalib, "hsa_queue_store_write_index_relaxed");
+ assert (hsa_queue_store_write_index_relaxed);
+
+ hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal)
+ = dlsym (hsalib, "hsa_signal_destroy");
+ assert (hsa_signal_destroy);
+
+ /* Set up the device data environment. */
+ int test_data_value = 0;
+#pragma omp target enter data map(test_data_value)
+
+ /* Get the interop details. */
+ int device_num = omp_get_default_device();
+ hsa_agent_t *gpu_agent;
+ hsa_queue_t *hsa_queue = NULL;
+
+ omp_interop_t interop = omp_interop_none;
+#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num)
+ assert (interop != omp_interop_none);
+
+ omp_interop_rc_t retcode;
+ omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode);
+ assert (retcode == omp_irc_success);
+ assert (fr == omp_ifr_hsa);
+
+ gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode);
+ assert (retcode == omp_irc_success);
+
+ hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode);
+ assert (retcode == omp_irc_success);
+ assert (hsa_queue);
+
+ /* Call an offload kernel via OpenMP/libgomp.
+ *
+ * This kernel serves two purposes:
+ * 1) Lookup the device-side load-address of itself (thus avoiding the
+ * need to access the libgomp internals).
+ * 2) Count how many times it is called.
+ * We then call it once using OpenMP, and once manually, and check
+ * the counter reads "2". */
+ uint64_t kernel_object = 0;
+#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value)
+ {
+ kernel_object = get_kernel_ptr ();
+ ++test_data_value;
+ }
+
+ assert (kernel_object != 0);
+
+ /* Configure the same kernel to run again, using HSA manually this time. */
+ hsa_status_t status;
+ hsa_signal_t signal;
+ status = hsa_signal_create(1, 0, NULL, &signal);
+ assert (status == HSA_STATUS_SUCCESS);
+
+ /* The kernel is built by GCC for OpenMP, so we need to pass the same
+ * data pointers that libgomp would pass in. */
+ struct {
+ uintptr_t test_data_value;
+ uintptr_t kernel_object;
+ } tgtaddrs;
+
+#pragma omp target data use_device_addr(test_data_value)
+ {
+ tgtaddrs.test_data_value = (uintptr_t)&test_data_value;
+ tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num);
+ }
+
+ /* We also need to duplicate the launch ABI used by plugin-gcn.c. */
+ struct kernargs_abi args; /* From libgomp-gcn.h. */
+ args.dummy1 = (int64_t)&tgtaddrs;
+ args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side. */
+ args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num);
+ args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num);
+ args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num);
+ args.arena_size_per_team = ARENASIZE;
+ args.stack_size_per_thread = STACKSIZE;
+
+ /* Build the HSA dispatch packet, and insert it into the queue. */
+ uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue);
+ const uint32_t queueMask = hsa_queue->size - 1;
+ hsa_kernel_dispatch_packet_t *dispatch_packet =
+ &(((hsa_kernel_dispatch_packet_t *)
+ (hsa_queue->base_address))[packet_id & queueMask]);
+
+ dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ dispatch_packet->workgroup_size_x = 1;
+ dispatch_packet->workgroup_size_y = 64;
+ dispatch_packet->workgroup_size_z = 1;
+ dispatch_packet->grid_size_x = 1;
+ dispatch_packet->grid_size_y = 64;
+ dispatch_packet->grid_size_z = 1;
+ dispatch_packet->completion_signal = signal;
+ dispatch_packet->kernel_object = kernel_object;
+ dispatch_packet->kernarg_address = &args;
+ dispatch_packet->private_segment_size = 0;
+ dispatch_packet->group_segment_size = 1536;
+
+ uint16_t header = 0;
+ header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+ /* Finish writing the packet header with an atomic release. */
+ __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE);
+
+ hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1);
+
+ ;/* Run the kernel and wait for it to complete. */
+ hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id);
+ while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1,
+ UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+ ;
+
+ /* Clean up HSA. */
+ hsa_signal_destroy(signal);
+ free ((void*)args.out_ptr);
+ omp_target_free ((void*)args.heap_ptr, device_num);
+ omp_target_free ((void*)args.arena_ptr, device_num);
+ omp_target_free ((void*)args.stack_ptr, device_num);
+ omp_target_free ((void*)tgtaddrs.kernel_object, device_num);
+
+ /* Clean up OpenMP. */
+ #pragma omp interop destroy(interop)
+
+ /* Bring the data back from the device. */
+#pragma omp target exit data map(test_data_value)
+
+ /* Ensure the kernel was called twice. Once by OpenMP, once by HSA. */
+ assert (test_data_value == 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
new file mode 100644
index 0000000..3220828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
@@ -0,0 +1,74 @@
+int
+main ()
+{
+ int i, n;
+ int data[] = {1,2};
+ struct S { int **ptrset; };
+
+// -----------------------------------
+
+/* The produced mapping for sptr1->ptrset[i][:n]
+
+ GOMP_MAP_STRUCT (size = 1)
+ GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+ GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+ GOMP_MAP_ATTACH
+ GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+
+which get split into 3 separate map_vars call; in particular,
+the latter is separate and points to an unmpapped variable.
+
+Thus, it failed with:
+ libgomp: pointer target not mapped for attach */
+
+ struct S s1, *sptr1;
+ s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+ s1.ptrset[0] = data;
+ s1.ptrset[1] = data;
+ s1.ptrset[2] = data;
+ sptr1 = &s1;
+
+ i = 1;
+ n = 0;
+ #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3])
+ #pragma omp target enter data map(sptr1->ptrset[i][:n])
+
+ #pragma omp target exit data map(sptr1->ptrset[i][:n])
+ #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3])
+
+ __builtin_free (s1.ptrset);
+
+// -----------------------------------
+
+/* The produced mapping for sptr2->ptrset[i][:n] is similar:
+
+ GOMP_MAP_STRUCT (size = 1)
+ GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+ GOMP_MAP_TO ! this one has now a finite size
+ GOMP_MAP_ATTACH
+ GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO
+
+As the latter GOMP_MAP_ATTACH has now a pointer target,
+the attachment worked. */
+
+ struct S s2, *sptr2;
+ s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+ s2.ptrset[0] = data;
+ s2.ptrset[1] = data;
+ s2.ptrset[2] = data;
+ sptr2 = &s2;
+
+ i = 1;
+ n = 2;
+ #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3])
+ #pragma omp target enter data map(sptr2->ptrset[i][:n])
+
+ #pragma omp target
+ if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2)
+ __builtin_abort ();
+
+ #pragma omp target exit data map(sptr2->ptrset[i][:n])
+ #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3])
+
+ __builtin_free (s2.ptrset);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
new file mode 100644
index 0000000..580c6ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
@@ -0,0 +1,50 @@
+int
+main ()
+{
+ int i, n;
+ int data[] = {1,2};
+ struct S {
+ int **ptrset;
+ int **ptrset2;
+ };
+
+ /* This is the same as target-map-zero-sized-3.c, but by mixing
+ mapped and non-mapped items, the mapping before the ATTACH
+ might (or here: is) not actually associated with the the
+ pointer used for attaching. Thus, if one does a simple
+
+ if (openmp_p
+ && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)
+ && mapnum == 1)
+ check in target.c's gomp_map_vars_internal will fail
+ as mapnum > 1 but still the map associated with this
+ ATTACH is in a different set. */
+
+ struct S s1, *sptr1;
+ s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+ s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3);
+ s1.ptrset[0] = data;
+ s1.ptrset[1] = data;
+ s1.ptrset[2] = data;
+ s1.ptrset2[0] = data;
+ s1.ptrset2[1] = data;
+ s1.ptrset2[2] = data;
+ sptr1 = &s1;
+
+ i = 1;
+ n = 0;
+ #pragma omp target enter data map(data)
+ #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+ #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+
+ #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+ if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2)
+ __builtin_abort ();
+
+ #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+ #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+ #pragma omp target exit data map(data)
+
+ __builtin_free (s1.ptrset);
+ __builtin_free (s1.ptrset2);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
new file mode 100644
index 0000000..7c4ab80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O0" } */
+
+/* Issue showed up in the real world when large data was distributed
+ over multiple MPI progresses - such that for one process n == 0
+ happend at run time.
+
+ Before map(var[:0]) and map(var[:n]) with n > 0 was handled,
+ this patch now also handles map(var[:n]) with n == 0.
+
+ Failed before with "libgomp: pointer target not mapped for attach". */
+
+/* Here, the base address is shifted - which should have no effect,
+ but must work as well. */
+void
+with_offset ()
+{
+ struct S {
+ int *ptr1, *ptr2;
+ };
+ struct S s1, s2;
+ int *a, *b, *c, *d;
+ s1.ptr1 = (int *) 0L;
+ s1.ptr2 = (int *) 0xdeedbeef;
+ s2.ptr1 = (int *) 0L;
+ s2.ptr2 = (int *) 0xdeedbeef;
+ a = (int *) 0L;
+ b = (int *) 0xdeedbeef;
+ c = (int *) 0L;
+ d = (int *) 0xdeedbeef;
+
+ int n1, n2, n3, n4;
+ n1 = n2 = n3 = n4 = 0;
+
+ #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+
+ #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4])
+ {
+ if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+ || c != (void *) 0L || d != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+ {
+ if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+ || a != (void *) 0L || b != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target
+ {
+ if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+ || a != (void *) 0L || b != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+}
+
+int
+main ()
+{
+ struct S {
+ int *ptr1, *ptr2;
+ };
+ struct S s1, s2;
+ int *a, *b, *c, *d;
+ s1.ptr1 = (int *) 0L;
+ s1.ptr2 = (int *) 0xdeedbeef;
+ s2.ptr1 = (int *) 0L;
+ s2.ptr2 = (int *) 0xdeedbeef;
+ a = (int *) 0L;
+ b = (int *) 0xdeedbeef;
+ c = (int *) 0L;
+ d = (int *) 0xdeedbeef;
+
+ int n1, n2, n3, n4;
+ n1 = n2 = n3 = n4 = 0;
+
+ #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+ #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4])
+ {
+ if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+ || c != (void *) 0L || d != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+ {
+ if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+ || a != (void *) 0L || b != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target
+ {
+ if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+ || a != (void *) 0L || b != (void *) 0xdeedbeef)
+ __builtin_abort ();
+ }
+
+ #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+ with_offset ();
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
new file mode 100644
index 0000000..d5e982b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
@@ -0,0 +1,75 @@
+!
+! Check that mapping with map(var%tiles(1)) works.
+!
+! This uses deep mapping to handle the allocatable
+! derived-type components
+!
+! The tricky part is that GCC generates intermittently
+! an SSA_NAME that needs to be resolved.
+!
+module m
+type t
+ integer, allocatable :: den1(:,:), den2(:,:)
+end type t
+
+type t2
+ type(t), allocatable :: tiles(:)
+end type t2
+end
+
+use m
+use iso_c_binding
+implicit none (type, external)
+type(t2), target :: var
+logical :: is_self_map
+type(C_ptr) :: pden1, pden2, ptiles, ptiles1
+
+allocate(var%tiles(1))
+var%tiles(1)%den1 = reshape([1,2,3,4],[2,2])
+var%tiles(1)%den2 = reshape([11,22,33,44],[2,2])
+
+ptiles = c_loc(var%tiles)
+ptiles1 = c_loc(var%tiles(1))
+pden1 = c_loc(var%tiles(1)%den1)
+pden2 = c_loc(var%tiles(1)%den2)
+
+
+is_self_map = .false.
+!$omp target map(to: is_self_map)
+ is_self_map = .true.
+!$omp end target
+
+!$omp target enter data map(var%tiles(1))
+
+!$omp target firstprivate(ptiles, ptiles1, pden1, pden2)
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1
+ if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 2
+ var%tiles(1)%den1 = var%tiles(1)%den1 + 5
+ var%tiles(1)%den2 = var%tiles(1)%den2 + 7
+
+ if (is_self_map) then
+ if (.not. c_associated (ptiles, c_loc(var%tiles))) stop 3
+ if (.not. c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+ if (.not. c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+ if (.not. c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ else
+ if (c_associated (ptiles, c_loc(var%tiles))) stop 3
+ if (c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+ if (c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+ if (c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ endif
+!$omp end target
+
+if (is_self_map) then
+ if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+ if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+else
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 7
+ if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 8
+endif
+
+!$omp target exit data map(var%tiles(1))
+
+if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+end
diff --git a/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90
new file mode 100644
index 0000000..5f6c8c1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90
@@ -0,0 +1,45 @@
+! { dg-additional-options "-fopenmp-allocators" }
+! { dg-additional-options "-fdump-tree-omplower" }
+program main
+ use iso_c_binding
+ use omp_lib
+ implicit none (type, external)
+ integer(omp_allocator_handle_kind):: alloc_h
+ integer :: i, N
+ integer(c_intptr_t) :: intptr
+ integer, allocatable :: A(:)
+ type(omp_alloctrait):: traits(1) = [omp_alloctrait(omp_atk_alignment, 128)]
+
+ N = 10
+ alloc_h = omp_init_allocator(omp_default_mem_space, 1, traits)
+
+ !$omp allocate(A) allocator(alloc_h)
+ allocate(A(N))
+ a(:) = [(i, i=1,N)]
+ if (mod (transfer (loc(a), intptr),128) /= 0) &
+ stop 1
+ if (any (a /= [(i, i=1,N)])) &
+ stop 2
+ deallocate(A)
+ !$omp allocate(A) allocator(alloc_h) align(512)
+ allocate(A(N))
+ block
+ integer, allocatable :: B(:)
+ !$omp allocators allocate(allocator(alloc_h), align(256) : B)
+ allocate(B(N))
+ B(:) = [(2*i, i=1,N)]
+ A(:) = B
+ if (mod (transfer (loc(B), intptr), 256) /= 0) &
+ stop 1
+ ! end of scope deallocation
+ end block
+ if (mod (transfer (loc(a), intptr),512) /= 0) &
+ stop 1
+ if (any (a /= [(2*i, i=1,N)])) &
+ stop 2
+ deallocate(A) ! Must deallocate here - before deallocator is destroyed
+ call omp_destroy_allocator(alloc_h)
+ ! No auto dealloc of A because it is SAVE
+end
+! { dg-final { scan-tree-dump-times "__builtin_GOMP_alloc \\(" 3 "omplower" } }
+! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(" 3 "omplower" } }
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
new file mode 100644
index 0000000..eb2f437
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
@@ -0,0 +1,10 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define HAVE_HIPFORT 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
new file mode 100644
index 0000000..0ebbe80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
@@ -0,0 +1,9 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
new file mode 100644
index 0000000..d29a689
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
@@ -0,0 +1,12 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target openacc_cudart }
+! { dg-require-effective-target openacc_cuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define HAVE_HIPFORT 1
+#define USE_CUDA_NAMES 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
new file mode 100644
index 0000000..2063610
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
@@ -0,0 +1,11 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
+! { dg-require-effective-target openacc_libcudart }
+! { dg-require-effective-target openacc_libcuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define USE_CUDA_NAMES 1
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h
new file mode 100644
index 0000000..753ccce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h
@@ -0,0 +1,214 @@
+! Minimal check whether HIP works - by checking whether the API routines
+! seem to work. This includes a fallback if hipfort is not available
+
+#ifndef HAVE_HIPFORT
+#ifndef USE_HIP_FALLBACK_MODULE
+#if USE_CUDA_NAMES
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)"
+#else
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset"
+#endif
+#endif
+module hipfort ! Minimal implementation for the testsuite
+ implicit none
+
+ enum, bind(c)
+ enumerator :: hipSuccess = 0
+ enumerator :: hipErrorNotSupported = 801
+ end enum
+
+ enum, bind(c)
+ enumerator :: hipDeviceAttributeClockRate = 5
+ enumerator :: hipDeviceAttributeMaxGridDimX = 29
+ end enum
+
+ interface
+ integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) &
+#if USE_CUDA_NAMES
+ bind(c, name="cudaDeviceGetAttribute")
+#else
+ bind(c, name="hipDeviceGetAttribute")
+#endif
+ use iso_c_binding, only: c_ptr, c_int
+ import
+ implicit none
+ type(c_ptr), value :: ip
+ integer(kind(hipDeviceAttributeClockRate)), value :: attr
+ integer(c_int), value :: dev
+ end
+
+ integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) &
+#if USE_CUDA_NAMES
+ bind(c, name="cudaCtxGetApiVersion")
+#else
+ bind(c, name="hipCtxGetApiVersion")
+#endif
+ use iso_c_binding, only: c_ptr
+ import
+ implicit none
+ type(c_ptr), value :: ctx, ip
+ end
+
+ integer(kind(hipSuccess)) function hipStreamQuery (stream) &
+#if USE_CUDA_NAMES
+ bind(c, name="cudaStreamQuery")
+#else
+ bind(c, name="hipStreamQuery")
+#endif
+ use iso_c_binding, only: c_ptr
+ import
+ implicit none
+ type(c_ptr), value :: stream
+ end
+
+ integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) &
+#if USE_CUDA_NAMES
+ bind(c, name="cudaStreamGetFlags")
+#else
+ bind(c, name="hipStreamGetFlags")
+#endif
+ use iso_c_binding, only: c_ptr
+ import
+ implicit none
+ type(c_ptr), value :: stream
+ type(c_ptr), value :: flags
+ end
+ end interface
+end module
+#endif
+
+program main
+ use iso_c_binding, only: c_ptr, c_int, c_loc
+ use omp_lib
+ use hipfort
+ implicit none (type, external)
+
+! Only supported since CUDA 12.8 - skip for better compatibility
+! ! Manally implement hipStreamGetDevice as hipfort misses it
+! ! -> https://github.com/ROCm/hipfort/issues/238
+! interface
+! integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) &
+!#if USE_CUDA_NAMES
+! bind(c, name="cudaStreamGetDevice")
+!#else
+! bind(c, name="hipStreamGetDevice")
+!#endif
+! use iso_c_binding, only: c_ptr, c_int
+! import
+! implicit none
+! type(c_ptr), value :: stream
+! integer(c_int) :: dev
+! end
+! end interface
+
+ integer(c_int), target :: ivar
+ integer(omp_interop_rc_kind) :: res
+ integer(omp_interop_kind) :: obj
+ integer(omp_interop_fr_kind) :: fr
+ integer(kind(hipSuccess)) :: hip_err
+ integer(c_int) :: hip_dev, dev_stream
+ type(c_ptr) :: hip_ctx, hip_sm
+
+ logical :: vendor_is_amd
+
+ obj = omp_interop_none
+
+ !$omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+ fr = omp_get_interop_int (obj, omp_ipr_fr_id, res)
+ if (res /= omp_irc_success) error stop 1
+ if (fr /= omp_ifr_hip) error stop 1
+
+ ivar = omp_get_interop_int (obj, omp_ipr_vendor, res)
+ if (ivar == 1) then ! AMD
+ vendor_is_amd = .true.
+ else if (ivar == 11) then ! Nvidia
+ vendor_is_amd = .false.
+ else
+ error stop 1 ! Unknown
+ endif
+#if USE_CUDA_NAMES
+ if (vendor_is_amd) error stop 1
+#else
+ if (.not. vendor_is_amd) error stop 1
+#endif
+
+ ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device.
+
+ hip_dev = omp_get_interop_int (obj, omp_ipr_device, res)
+ if (res /= omp_irc_success) error stop 1
+
+! AMD messed up in Fortran with the attribute handling, missing the
+! translation table it has for C.
+block
+ enum, bind(c)
+ enumerator :: cudaDevAttrClockRate = 13
+ enumerator :: cudaDevAttrMaxGridDimX = 5
+ end enum
+
+ ! Assume a clock size is available and > 1 GHz; value is in kHz.
+ ! c_loc is completely bogus, but as AMD messed up the interface ...
+ ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+ hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev)
+else
+ hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev)
+endif
+ if (hip_err /= hipSuccess) error stop 1
+ if (ivar <= 1000000) error stop 1 ! in kHz
+
+ ! Assume that the MaxGridDimX is available and > 1024
+ ! c_loc is completely bogus, but as AMD messed up the interface ...
+ ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+ hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev)
+else
+ hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev)
+endif
+ if (hip_err /= hipSuccess) error stop 1
+ if (ivar <= 1024) error stop 1
+end block
+
+
+ ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context.
+
+ hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res)
+ if (res /= omp_irc_success) error stop 1
+
+! ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */
+! ivar = -99
+! ! AMD deprectated hipCtxGetApiVersion (in C/C++)
+! hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar))
+!
+! if (vendor_is_amd) then
+! if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1
+! else
+! if (hip_err /= hipSuccess) error stop 1
+! if (ivar <= 0) error stop 1
+! end if
+
+
+ ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.
+
+ hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res)
+ if (res /= omp_irc_success) error stop 1
+
+! Skip as this is only in CUDA 12.8
+! dev_stream = 99
+! ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238
+! ! hip_err = hipStreamGetDevice (hip_sm, dev_stream)
+! hip_err = my_hipStreamGetDevice (hip_sm, dev_stream)
+! if (hip_err /= hipSuccess) error stop 1
+! if (dev_stream /= hip_dev) error stop 1
+
+ ! Get flags of the stream
+ hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar))
+ if (hip_err /= hipSuccess) error stop 1
+ ! Accept any value
+
+ ! All jobs should have been completed (as there were none none)
+ hip_err = hipStreamQuery (hip_sm)
+ if (hip_err /= hipSuccess) error stop 1
+
+ !$omp interop destroy(obj)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
new file mode 100644
index 0000000..90378c0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
@@ -0,0 +1,11 @@
+! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" }
+!
+! We silence the warning:
+! Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp]
+!
+! Ensure that polymorphic mapping is diagnosed as undefined behavior
+! Ensure that static access to polymorphic variables works
+
+! Run map-alloc-comp-9.f90 in unified-shared-memory mode
+
+#include "map-alloc-comp-9.f90"
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
index 3cec392..26c73d7 100644
--- a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
@@ -1,8 +1,19 @@
+! { dg-additional-options "-cpp" }
+!
! Ensure that polymorphic mapping is diagnosed as undefined behavior
! Ensure that static access to polymorphic variables works
+! Some extended tests are only run with shared memory
+! To enforce this (where possible) on the device side:
+! #define USE_USM_REQUIREMENT
+! which is done in map-alloc-comp-9-usm.f90
+
subroutine test(case)
implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+ !$omp requires unified_shared_memory
+#endif
+
type t
integer :: x(4)
end type t
@@ -73,10 +84,14 @@ var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444]
var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444]
var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444]
+#ifdef USE_USM_REQUIREMENT
+is_shared_mem = .true.
+#else
is_shared_mem = .false.
!$omp target map(to: is_shared_mem)
is_shared_mem = .true.
!$omp end target
+#endif
if (case == 1) then
! implicit mapping
@@ -532,6 +547,10 @@ end subroutine test
program main
use omp_lib
implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+ !$omp requires unified_shared_memory
+#endif
+
interface
subroutine test(case)
integer, value :: case
diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
index 7b3e09f..d6f4d5b 100644
--- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
@@ -1,4 +1,5 @@
-! { dg-do run }
+! { dg-do run { target { ! offload_target_nvptx } } }
+! { dg-do compile { target offload_target_nvptx } }
program test
implicit none
@@ -33,6 +34,10 @@ program test
contains
subroutine f (x, y, z)
integer :: x(N), y(N), z(N)
+ ! The following fails as on the host the target side cannot be
+ ! resolved - and the 'teams' or not status affects how 'target'
+ ! is called. -> See PR118694, esp. comment 9.
+ ! Note also the dg-do compile above for offload_target_nvptx
!$omp target map (to: x, y) map(from: z)
block
@@ -43,6 +48,7 @@ contains
z(i) = x(i) * y(i)
enddo
end block
+ ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */
end subroutine
subroutine g (x, y, z)
integer :: x(N), y(N), z(N)
@@ -56,6 +62,7 @@ contains
z(i) = x(i) * y(i)
enddo
end block
+ ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */
!$omp end target
end subroutine
end program
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
new file mode 100644
index 0000000..2641086
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
@@ -0,0 +1,67 @@
+! PR libgomp/120444
+! Async version
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+integer(c_int) :: dev
+
+!$omp parallel do
+do dev = omp_initial_device, omp_get_num_devices ()
+block
+ integer(c_int) :: i, val, start, tail
+ type(c_ptr) :: ptr, ptr2, tmpptr
+ integer(c_int8_t), pointer, contiguous :: fptr(:)
+ integer(c_intptr_t) :: intptr
+ integer(c_size_t), parameter :: count = 1024
+ integer(omp_depend_kind) :: dep(1)
+
+ ptr = omp_target_alloc (count, dev)
+
+ !$omp depobj(dep(1)) depend(inout: ptr)
+
+ ! Play also around with the alignment - as hsa_amd_memory_fill operates
+ ! on multiples of 4 bytes (c_int32_t)
+
+ do start = 0, 31
+ do tail = 0, 31
+ val = iachar('0') + start + tail
+
+ tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+ ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0)
+
+ if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+ !$omp taskwait
+
+ !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+ do i = 1 + start, int(count, c_int) - start - tail
+ call c_f_pointer (ptr, fptr, [count])
+ if (fptr(i) /= int (val, c_int8_t)) stop 2
+ fptr(i) = fptr(i) + 2_c_int8_t
+ end do
+ !$omp end target
+
+ ptr2 = omp_target_memset_async (tmpptr, val + 3, &
+ count - start - tail, dev, 1, dep)
+
+ !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+ do i = 1 + start, int(count, c_int) - start - tail
+ call c_f_pointer (ptr, fptr, [count])
+ if (fptr(i) /= int (val + 3, c_int8_t)) stop 3
+ fptr(i) = fptr(i) - 1_c_int8_t
+ end do
+ !$omp end target
+
+ ptr2 = omp_target_memset_async (tmpptr, val - 3, &
+ count - start - tail, dev, 1, dep)
+
+ !$omp taskwait depend (depobj: dep(1))
+ end do
+ end do
+
+ !$omp depobj(dep(1)) destroy
+ call omp_target_free (ptr, dev);
+end block
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
new file mode 100644
index 0000000..1ee184a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
@@ -0,0 +1,39 @@
+! PR libgomp/120444
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+
+integer(c_int) :: dev, i, val, start, tail
+type(c_ptr) :: ptr, ptr2, tmpptr
+integer(c_int8_t), pointer, contiguous :: fptr(:)
+integer(c_intptr_t) :: intptr
+integer(c_size_t), parameter :: count = 1024
+
+do dev = omp_initial_device, omp_get_num_devices ()
+ ptr = omp_target_alloc (count, dev)
+
+ ! Play also around with the alignment - as hsa_amd_memory_fill operates
+ ! on multiples of 4 bytes (c_int32_t)
+
+ do start = 0, 31
+ do tail = 0, 31
+ val = iachar('0') + start + tail
+
+ tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+ ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev)
+
+ if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+ !$omp target device(dev) is_device_ptr(ptr)
+ do i = 1 + start, int(count, c_int) - start - tail
+ call c_f_pointer (ptr, fptr, [count])
+ if (fptr(i) /= int (val, c_int8_t)) stop 2
+ end do
+ !$omp end target
+ end do
+ end do
+
+ call omp_target_free (ptr, dev);
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
new file mode 100644
index 0000000..c6d671c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
@@ -0,0 +1,532 @@
+! { dg-additional-options "-cpp" }
+
+! FIXME: Some tests do not work yet. Those are for now in '#if 0'
+
+! Check that 'map(alloc:' properly works with
+! - deferred-length character strings
+! - arrays with array descriptors
+! For those, the array descriptor / string length must be mapped with 'to:'
+
+program main
+implicit none
+
+type t
+ integer :: ic(2:5), ic2
+ character(len=11) :: ccstr(3:4), ccstr2
+ character(len=11,kind=4) :: cc4str(3:7), cc4str2
+ integer, pointer :: pc(:), pc2
+ character(len=:), pointer :: pcstr(:), pcstr2
+ character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+end type t
+
+type(t) :: dt
+
+integer :: ii(5), ii2
+character(len=11) :: clstr(-1:1), clstr2
+character(len=11,kind=4) :: cl4str(0:3), cl4str2
+integer, pointer :: ip(:), ip2
+integer, allocatable :: ia(:), ia2
+character(len=:), pointer :: pstr(:), pstr2
+character(len=:), allocatable :: astr(:), astr2
+character(len=:,kind=4), pointer :: p4str(:), p4str2
+character(len=:,kind=4), allocatable :: a4str(:), a4str2
+
+
+allocate(dt%pc(5), dt%pc2)
+allocate(character(len=2) :: dt%pcstr(2))
+allocate(character(len=4) :: dt%pcstr2)
+
+allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+allocate(ip(5), ip2, ia(8), ia2)
+allocate(character(len=2) :: pstr(-2:0))
+allocate(character(len=4) :: pstr2)
+allocate(character(len=6) :: astr(3:5))
+allocate(character(len=8) :: astr2)
+
+allocate(character(len=3,kind=4) :: p4str(2:4))
+allocate(character(len=5,kind=4) :: p4str2)
+allocate(character(len=7,kind=4) :: a4str(-2:3))
+allocate(character(len=9,kind=4) :: a4str2)
+
+
+! integer :: ic(2:5), ic2
+
+!$omp target enter data map(alloc: dt%ic)
+!$omp target map(alloc: dt%ic)
+ if (size(dt%ic) /= 4) error stop
+ if (lbound(dt%ic, 1) /= 2) error stop
+ if (ubound(dt%ic, 1) /= 5) error stop
+ dt%ic = [22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%ic)
+if (size(dt%ic) /= 4) error stop
+if (lbound(dt%ic, 1) /= 2) error stop
+if (ubound(dt%ic, 1) /= 5) error stop
+if (any (dt%ic /= [22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%ic2)
+!$omp target map(alloc: dt%ic2)
+ dt%ic2 = 42
+!$omp end target
+!$omp target exit data map(from: dt%ic2)
+if (dt%ic2 /= 42) error stop
+
+
+! character(len=11) :: ccstr(3:4), ccstr2
+
+!$omp target enter data map(alloc: dt%ccstr)
+!$omp target map(alloc: dt%ccstr)
+ if (len(dt%ccstr) /= 11) error stop
+ if (size(dt%ccstr) /= 2) error stop
+ if (lbound(dt%ccstr, 1) /= 3) error stop
+ if (ubound(dt%ccstr, 1) /= 4) error stop
+ dt%ccstr = ["12345678901", "abcdefghijk"]
+!$omp end target
+!$omp target exit data map(from: dt%ccstr)
+if (len(dt%ccstr) /= 11) error stop
+if (size(dt%ccstr) /= 2) error stop
+if (lbound(dt%ccstr, 1) /= 3) error stop
+if (ubound(dt%ccstr, 1) /= 4) error stop
+if (any (dt%ccstr /= ["12345678901", "abcdefghijk"])) error stop
+
+!$omp target enter data map(alloc: dt%ccstr2)
+!$omp target map(alloc: dt%ccstr2)
+ if (len(dt%ccstr2) /= 11) error stop
+ dt%ccstr2 = "ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%ccstr2)
+if (len(dt%ccstr2) /= 11) error stop
+if (dt%ccstr2 /= "ABCDEFGHIJK") error stop
+
+
+! character(len=11,kind=4) :: cc4str(3:7), cc4str2
+
+#if 0
+! Value check fails
+!$omp target map(alloc: dt%cc4str)
+ if (len(dt%cc4str) /= 11) error stop
+ if (size(dt%cc4str) /= 5) error stop
+ if (lbound(dt%cc4str, 1) /= 3) error stop
+ if (ubound(dt%cc4str, 1) /= 7) error stop
+ dt%cc4str = [4_"12345678901", 4_"abcdefghijk", &
+ 4_"qerftcea6ds", 4_"a1f9g37ga4.", &
+ 4_"45ngwj56sj2"]
+!$omp end target
+!$omp target exit data map(from: dt%cc4str)
+if (len(dt%cc4str) /= 11) error stop
+if (size(dt%cc4str) /= 5) error stop
+if (lbound(dt%cc4str, 1) /= 3) error stop
+if (ubound(dt%cc4str, 1) /= 7) error stop
+if (dt%cc4str(3) /= 4_"12345678901") error stop
+if (dt%cc4str(4) /= 4_"abcdefghijk") error stop
+if (dt%cc4str(5) /= 4_"qerftcea6ds") error stop
+if (dt%cc4str(6) /= 4_"a1f9g37ga4.") error stop
+if (dt%cc4str(7) /= 4_"45ngwj56sj2") error stop
+#endif
+
+!$omp target enter data map(alloc: dt%cc4str2)
+!$omp target map(alloc: dt%cc4str2)
+ if (len(dt%cc4str2) /= 11) error stop
+ dt%cc4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%cc4str2)
+if (len(dt%cc4str2) /= 11) error stop
+if (dt%cc4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! integer, pointer :: pc(:), pc2
+! allocate(dt%pc(5), dt%pc2)
+
+!$omp target enter data map(alloc: dt%pc)
+!$omp target map(alloc: dt%pc)
+ if (.not. associated(dt%pc)) error stop
+ if (size(dt%pc) /= 5) error stop
+ if (lbound(dt%pc, 1) /= 1) error stop
+ if (ubound(dt%pc, 1) /= 5) error stop
+ dt%pc = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%pc)
+if (.not. associated(dt%pc)) error stop
+if (size(dt%pc) /= 5) error stop
+if (lbound(dt%pc, 1) /= 1) error stop
+if (ubound(dt%pc, 1) /= 5) error stop
+if (any (dt%pc /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%pc2)
+!$omp target map(alloc: dt%pc2)
+ if (.not. associated(dt%pc2)) error stop
+ dt%pc2 = 99
+!$omp end target
+!$omp target exit data map(from: dt%pc2)
+if (dt%pc2 /= 99) error stop
+if (.not. associated(dt%pc2)) error stop
+
+
+! character(len=:), pointer :: pcstr(:), pcstr2
+! allocate(character(len=2) :: dt%pcstr(2))
+! allocate(character(len=4) :: dt%pcstr2)
+
+!$omp target enter data map(alloc: dt%pcstr)
+!$omp target map(alloc: dt%pcstr)
+ if (.not. associated(dt%pcstr)) error stop
+ if (len(dt%pcstr) /= 2) error stop
+ if (size(dt%pcstr) /= 2) error stop
+ if (lbound(dt%pcstr, 1) /= 1) error stop
+ if (ubound(dt%pcstr, 1) /= 2) error stop
+ dt%pcstr = ["01", "jk"]
+!$omp end target
+!$omp target exit data map(from: dt%pcstr)
+if (.not. associated(dt%pcstr)) error stop
+if (len(dt%pcstr) /= 2) error stop
+if (size(dt%pcstr) /= 2) error stop
+if (lbound(dt%pcstr, 1) /= 1) error stop
+if (ubound(dt%pcstr, 1) /= 2) error stop
+if (any (dt%pcstr /= ["01", "jk"])) error stop
+
+
+!$omp target enter data map(alloc: dt%pcstr2)
+!$omp target map(alloc: dt%pcstr2)
+ if (.not. associated(dt%pcstr2)) error stop
+ if (len(dt%pcstr2) /= 4) error stop
+ dt%pcstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: dt%pcstr2)
+if (.not. associated(dt%pcstr2)) error stop
+if (len(dt%pcstr2) /= 4) error stop
+if (dt%pcstr2 /= "HIJK") error stop
+
+
+! character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+! allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+! allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+!$omp target enter data map(alloc: dt%pc4str)
+!$omp target map(alloc: dt%pc4str)
+ if (.not. associated(dt%pc4str)) error stop
+ if (len(dt%pc4str) /= 3) error stop
+ if (size(dt%pc4str) /= 2) error stop
+ if (lbound(dt%pc4str, 1) /= 2) error stop
+ if (ubound(dt%pc4str, 1) /= 3) error stop
+ dt%pc4str = [4_"456", 4_"tzu"]
+!$omp end target
+!$omp target exit data map(from: dt%pc4str)
+if (.not. associated(dt%pc4str)) error stop
+if (len(dt%pc4str) /= 3) error stop
+if (size(dt%pc4str) /= 2) error stop
+if (lbound(dt%pc4str, 1) /= 2) error stop
+if (ubound(dt%pc4str, 1) /= 3) error stop
+if (dt%pc4str(2) /= 4_"456") error stop
+if (dt%pc4str(3) /= 4_"tzu") error stop
+
+!$omp target enter data map(alloc: dt%pc4str2)
+!$omp target map(alloc: dt%pc4str2)
+ if (.not. associated(dt%pc4str2)) error stop
+ if (len(dt%pc4str2) /= 5) error stop
+ dt%pc4str2 = 4_"98765"
+!$omp end target
+!$omp target exit data map(from: dt%pc4str2)
+if (.not. associated(dt%pc4str2)) error stop
+if (len(dt%pc4str2) /= 5) error stop
+if (dt%pc4str2 /= 4_"98765") error stop
+
+
+! integer :: ii(5), ii2
+
+!$omp target enter data map(alloc: ii)
+!$omp target map(alloc: ii)
+ if (size(ii) /= 5) error stop
+ if (lbound(ii, 1) /= 1) error stop
+ if (ubound(ii, 1) /= 5) error stop
+ ii = [-1, -2, -3, -4, -5]
+!$omp end target
+!$omp target exit data map(from: ii)
+if (size(ii) /= 5) error stop
+if (lbound(ii, 1) /= 1) error stop
+if (ubound(ii, 1) /= 5) error stop
+if (any (ii /= [-1, -2, -3, -4, -5])) error stop
+
+!$omp target enter data map(alloc: ii2)
+!$omp target map(alloc: ii2)
+ ii2 = -410
+!$omp end target
+!$omp target exit data map(from: ii2)
+if (ii2 /= -410) error stop
+
+
+! character(len=11) :: clstr(-1:1), clstr2
+
+!$omp target enter data map(alloc: clstr)
+!$omp target map(alloc: clstr)
+ if (len(clstr) /= 11) error stop
+ if (size(clstr) /= 3) error stop
+ if (lbound(clstr, 1) /= -1) error stop
+ if (ubound(clstr, 1) /= 1) error stop
+ clstr = ["12345678901", "abcdefghijk", "ABCDEFGHIJK"]
+!$omp end target
+!$omp target exit data map(from: clstr)
+if (len(clstr) /= 11) error stop
+if (size(clstr) /= 3) error stop
+if (lbound(clstr, 1) /= -1) error stop
+if (ubound(clstr, 1) /= 1) error stop
+if (any (clstr /= ["12345678901", "abcdefghijk", "ABCDEFGHIJK"])) error stop
+
+!$omp target enter data map(alloc: clstr2)
+!$omp target map(alloc: clstr2)
+ if (len(clstr2) /= 11) error stop
+ clstr2 = "ABCDEFghijk"
+!$omp end target
+!$omp target exit data map(from: clstr2)
+if (len(clstr2) /= 11) error stop
+if (clstr2 /= "ABCDEFghijk") error stop
+
+
+! character(len=11,kind=4) :: cl4str(0:3), cl4str2
+
+!$omp target enter data map(alloc: cl4str)
+!$omp target map(alloc: cl4str)
+ if (len(cl4str) /= 11) error stop
+ if (size(cl4str) /= 4) error stop
+ if (lbound(cl4str, 1) /= 0) error stop
+ if (ubound(cl4str, 1) /= 3) error stop
+ cl4str = [4_"12345678901", 4_"abcdefghijk", &
+ 4_"qerftcea6ds", 4_"a1f9g37ga4."]
+!$omp end target
+!$omp target exit data map(from: cl4str)
+if (len(cl4str) /= 11) error stop
+if (size(cl4str) /= 4) error stop
+if (lbound(cl4str, 1) /= 0) error stop
+if (ubound(cl4str, 1) /= 3) error stop
+if (cl4str(0) /= 4_"12345678901") error stop
+if (cl4str(1) /= 4_"abcdefghijk") error stop
+if (cl4str(2) /= 4_"qerftcea6ds") error stop
+if (cl4str(3) /= 4_"a1f9g37ga4.") error stop
+
+!$omp target enter data map(alloc: cl4str2)
+!$omp target map(alloc: cl4str2)
+ if (len(cl4str2) /= 11) error stop
+ cl4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: cl4str2)
+if (len(cl4str2) /= 11) error stop
+if (cl4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ip)
+!$omp target map(alloc: ip)
+ if (.not. associated(ip)) error stop
+ if (size(ip) /= 5) error stop
+ if (lbound(ip, 1) /= 1) error stop
+ if (ubound(ip, 1) /= 5) error stop
+ ip = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: ip)
+if (.not. associated(ip)) error stop
+if (size(ip) /= 5) error stop
+if (lbound(ip, 1) /= 1) error stop
+if (ubound(ip, 1) /= 5) error stop
+if (any (ip /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: ip2)
+!$omp target map(alloc: ip2)
+ if (.not. associated(ip2)) error stop
+ ip2 = 99
+!$omp end target
+!$omp target exit data map(from: ip2)
+if (ip2 /= 99) error stop
+if (.not. associated(ip2)) error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ia)
+!$omp target map(alloc: ia)
+ if (.not. allocated(ia)) error stop
+ if (size(ia) /= 8) error stop
+ if (lbound(ia, 1) /= 1) error stop
+ if (ubound(ia, 1) /= 8) error stop
+ ia = [1,2,3,4,5,6,7,8]
+!$omp end target
+!$omp target exit data map(from: ia)
+if (.not. allocated(ia)) error stop
+if (size(ia) /= 8) error stop
+if (lbound(ia, 1) /= 1) error stop
+if (ubound(ia, 1) /= 8) error stop
+if (any (ia /= [1,2,3,4,5,6,7,8])) error stop
+
+!$omp target enter data map(alloc: ia2)
+!$omp target map(alloc: ia2)
+ if (.not. allocated(ia2)) error stop
+ ia2 = 102
+!$omp end target
+!$omp target exit data map(from: ia2)
+if (ia2 /= 102) error stop
+if (.not. allocated(ia2)) error stop
+
+
+! character(len=:), pointer :: pstr(:), pstr2
+! allocate(character(len=2) :: pstr(-2:0))
+! allocate(character(len=4) :: pstr2)
+
+!$omp target enter data map(alloc: pstr)
+!$omp target map(alloc: pstr)
+ if (.not. associated(pstr)) error stop
+ if (len(pstr) /= 2) error stop
+ if (size(pstr) /= 3) error stop
+ if (lbound(pstr, 1) /= -2) error stop
+ if (ubound(pstr, 1) /= 0) error stop
+ pstr = ["01", "jk", "aq"]
+!$omp end target
+!$omp target exit data map(from: pstr)
+if (.not. associated(pstr)) error stop
+if (len(pstr) /= 2) error stop
+if (size(pstr) /= 3) error stop
+if (lbound(pstr, 1) /= -2) error stop
+if (ubound(pstr, 1) /= 0) error stop
+if (any (pstr /= ["01", "jk", "aq"])) error stop
+
+!$omp target enter data map(alloc: pstr2)
+!$omp target map(alloc: pstr2)
+ if (.not. associated(pstr2)) error stop
+ if (len(pstr2) /= 4) error stop
+ pstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: pstr2)
+if (.not. associated(pstr2)) error stop
+if (len(pstr2) /= 4) error stop
+if (pstr2 /= "HIJK") error stop
+
+
+! character(len=:), allocatable :: astr(:), astr2
+! allocate(character(len=6) :: astr(3:5))
+! allocate(character(len=8) :: astr2)
+
+
+!$omp target enter data map(alloc: astr)
+!$omp target map(alloc: astr)
+ if (.not. allocated(astr)) error stop
+ if (len(astr) /= 6) error stop
+ if (size(astr) /= 3) error stop
+ if (lbound(astr, 1) /= 3) error stop
+ if (ubound(astr, 1) /= 5) error stop
+ astr = ["01db45", "jk$D%S", "zutg47"]
+!$omp end target
+!$omp target exit data map(from: astr)
+if (.not. allocated(astr)) error stop
+if (len(astr) /= 6) error stop
+if (size(astr) /= 3) error stop
+if (lbound(astr, 1) /= 3) error stop
+if (ubound(astr, 1) /= 5) error stop
+if (any (astr /= ["01db45", "jk$D%S", "zutg47"])) error stop
+
+
+!$omp target enter data map(alloc: astr2)
+!$omp target map(alloc: astr2)
+ if (.not. allocated(astr2)) error stop
+ if (len(astr2) /= 8) error stop
+ astr2 = "HIJKhijk"
+!$omp end target
+!$omp target exit data map(from: astr2)
+if (.not. allocated(astr2)) error stop
+if (len(astr2) /= 8) error stop
+if (astr2 /= "HIJKhijk") error stop
+
+
+! character(len=:,kind=4), pointer :: p4str(:), p4str2
+! allocate(character(len=3,kind=4) :: p4str(2:4))
+! allocate(character(len=5,kind=4) :: p4str2)
+
+! FAILS with value check
+
+!$omp target enter data map(alloc: p4str)
+!$omp target map(alloc: p4str)
+ if (.not. associated(p4str)) error stop
+ if (len(p4str) /= 3) error stop
+ if (size(p4str) /= 3) error stop
+ if (lbound(p4str, 1) /= 2) error stop
+ if (ubound(p4str, 1) /= 4) error stop
+ p4str(:) = [4_"f85", 4_"8af", 4_"A%F"]
+!$omp end target
+!$omp target exit data map(from: p4str)
+if (.not. associated(p4str)) error stop
+if (len(p4str) /= 3) error stop
+if (size(p4str) /= 3) error stop
+if (lbound(p4str, 1) /= 2) error stop
+if (ubound(p4str, 1) /= 4) error stop
+if (p4str(2) /= 4_"f85") error stop
+if (p4str(3) /= 4_"8af") error stop
+if (p4str(4) /= 4_"A%F") error stop
+
+!$omp target enter data map(alloc: p4str2)
+!$omp target map(alloc: p4str2)
+ if (.not. associated(p4str2)) error stop
+ if (len(p4str2) /= 5) error stop
+ p4str2 = 4_"9875a"
+!$omp end target
+!$omp target exit data map(from: p4str2)
+if (.not. associated(p4str2)) error stop
+if (len(p4str2) /= 5) error stop
+if (p4str2 /= 4_"9875a") error stop
+
+
+! character(len=:,kind=4), allocatable :: a4str(:), a4str2
+! allocate(character(len=7,kind=4) :: a4str(-2:3))
+! allocate(character(len=9,kind=4) :: a4str2)
+
+!$omp target enter data map(alloc: a4str)
+!$omp target map(alloc: a4str)
+ if (.not. allocated(a4str)) error stop
+ if (len(a4str) /= 7) error stop
+ if (size(a4str) /= 6) error stop
+ if (lbound(a4str, 1) /= -2) error stop
+ if (ubound(a4str, 1) /= 3) error stop
+ ! See PR fortran/107508 why '(:)' is required
+ a4str(:) = [4_"sf456aq", 4_"3dtzu24", 4_"_4fh7sm", 4_"=ff85s7", 4_"j=8af4d", 4_".,A%Fsz"]
+!$omp end target
+!$omp target exit data map(from: a4str)
+if (.not. allocated(a4str)) error stop
+if (len(a4str) /= 7) error stop
+if (size(a4str) /= 6) error stop
+if (lbound(a4str, 1) /= -2) error stop
+if (ubound(a4str, 1) /= 3) error stop
+if (a4str(-2) /= 4_"sf456aq") error stop
+if (a4str(-1) /= 4_"3dtzu24") error stop
+if (a4str(0) /= 4_"_4fh7sm") error stop
+if (a4str(1) /= 4_"=ff85s7") error stop
+if (a4str(2) /= 4_"j=8af4d") error stop
+if (a4str(3) /= 4_".,A%Fsz") error stop
+
+!$omp target enter data map(alloc: a4str2)
+!$omp target map(alloc: a4str2)
+ if (.not. allocated(a4str2)) error stop
+ if (len(a4str2) /= 9) error stop
+ a4str2 = 4_"98765a23d"
+!$omp end target
+!$omp target exit data map(from: a4str2)
+if (.not. allocated(a4str2)) error stop
+if (len(a4str2) /= 9) error stop
+if (a4str2 /= 4_"98765a23d") error stop
+
+
+deallocate(dt%pc, dt%pc2)
+deallocate(dt%pcstr)
+deallocate(dt%pcstr2)
+
+deallocate(dt%pc4str)
+deallocate(dt%pc4str2)
+
+deallocate(ip, ip2, ia, ia2)
+deallocate(pstr)
+deallocate(pstr2)
+deallocate(astr)
+deallocate(astr2)
+
+deallocate(p4str)
+deallocate(p4str2)
+deallocate(a4str)
+deallocate(a4str2)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
index 0545601..6957a6c 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
@@ -52,3 +52,6 @@ int main()
PR119692.
{ dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
index 24399ef..0f84cf2 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
@@ -58,3 +58,6 @@ int main()
For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
{ dg-shouldfail {'std::bad_cast' exception} { ! openacc_host_selected } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+ 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+ { dg-timeout 10 } ... to make sure that happens quickly. */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
index f2ef751..08c5766 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
- { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
- { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
/* See also '../libgomp.c++/target-exceptions-throw-1.C'. */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
index f6dc970..a7408cd 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
@@ -6,9 +6,6 @@
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
{ dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
- { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
/* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
{ dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
new file mode 100644
index 0000000..4b54171
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
@@ -0,0 +1,125 @@
+/* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'. */
+
+/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'. */
+
+/* To exercise PR119835 (if optimizations enabled): disable inlining, so that
+ GIMPLE passes still see the functions that return aggregate types. */
+#pragma GCC optimize "-fno-inline"
+
+typedef struct {} empty; /* See 'gcc/doc/extend.texi', "Empty Structures". */
+typedef struct {char a;} schar;
+typedef struct {short a;} sshort;
+typedef struct {int a;} sint;
+typedef struct {long long a;} slonglong;
+typedef struct {int a, b[12];} sint_13;
+
+#pragma omp declare target
+
+#define M(T) ({T t; t.a = sizeof t; t;})
+
+static __SIZE_TYPE__ empty_a;
+#pragma acc declare create(empty_a)
+#pragma acc routine
+static empty rempty(void)
+{
+ return ({empty t; empty_a = sizeof t; t;});
+}
+
+#pragma acc routine
+static schar rschar(void)
+{
+ return M(schar);
+}
+
+#pragma acc routine
+static sshort rsshort(void)
+{
+ return M(sshort);
+}
+
+#pragma acc routine
+static sint rsint(void)
+{
+ return M(sint);
+}
+
+#pragma acc routine
+static slonglong rslonglong(void)
+{
+ return M(slonglong);
+}
+
+#pragma acc routine
+static sint_13 rsint_13(void)
+{
+ return M(sint_13);
+}
+
+#pragma acc routine
+static void aempty(empty empty)
+{
+ (void) empty;
+
+ __SIZE_TYPE__ empty_a_exp;
+#ifndef __cplusplus
+ empty_a_exp = 0;
+#else
+ empty_a_exp = sizeof (char);
+#endif
+ if (empty_a != empty_a_exp)
+ __builtin_abort();
+}
+
+#pragma acc routine
+static void aschar(schar schar)
+{
+ if (schar.a != sizeof (char))
+ __builtin_abort();
+}
+
+#pragma acc routine
+static void asshort(sshort sshort)
+{
+ if (sshort.a != sizeof (short))
+ __builtin_abort();
+}
+
+#pragma acc routine
+static void asint(sint sint)
+{
+ if (sint.a != sizeof (int))
+ __builtin_abort();
+}
+
+#pragma acc routine
+static void aslonglong(slonglong slonglong)
+{
+ if (slonglong.a != sizeof (long long))
+ __builtin_abort();
+}
+
+#pragma acc routine
+static void asint_13(sint_13 sint_13)
+{
+ if (sint_13.a != (sizeof (int) * 13))
+ __builtin_abort();
+}
+
+#pragma omp end declare target
+
+int main()
+{
+#pragma omp target
+#pragma acc serial
+ /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+ {
+ aempty(rempty());
+ aschar(rschar());
+ asshort(rsshort());
+ asint(rsint());
+ aslonglong(rslonglong());
+ asint_13(rsint_13());
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
new file mode 100644
index 0000000..eda651d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
@@ -0,0 +1,96 @@
+/* { dg-prune-output "using .vector_length \\(32\\)" } */
+
+/* PR libgomp/93226 */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <openacc.h>
+
+enum { N = 1024 };
+
+static int D[N];
+#pragma acc declare device_resident(D)
+
+#pragma acc routine
+intptr_t init_d()
+{
+ for (int i = 0; i < N; i++)
+ D[i] = 27*i;
+ return (intptr_t) &D[0];
+}
+
+int
+main ()
+{
+ int *a, *b, *e;
+ void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f;
+ intptr_t intptr;
+ bool fail = false;
+
+ a = (int *) malloc (N*sizeof (int));
+ b = (int *) malloc (N*sizeof (int));
+ e = (int *) malloc (N*sizeof (int));
+ d_c = acc_malloc (N*sizeof (int));
+ d_f = acc_malloc (N*sizeof (int));
+
+ memset (e, 0xff, N*sizeof (int));
+ d_e = acc_copyin (e, N*sizeof (int));
+
+ #pragma acc serial copyout(intptr)
+ intptr = init_d ();
+ d_d = (void*) intptr;
+ acc_memcpy_device (d_c, d_d, N*sizeof (int));
+
+ #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr)
+ {
+ int *cc = (int *) d_c;
+ int *dd = (int *) intptr;
+ for (int i = 0; i < N; i++)
+ if (dd[i] != 27*i || cc[i] != 27*i)
+ {
+ fail = true;
+ __builtin_abort ();
+ }
+ }
+ if (fail) __builtin_abort ();
+
+ for (int i = 0; i < N; i++)
+ a[i] = 11*i;
+ for (int i = 0; i < N; i++)
+ b[i] = 31*i;
+
+ d_a = acc_copyin (a, N*sizeof (int));
+ acc_copyin_async (b, N*sizeof (int), acc_async_noval);
+
+ #pragma acc parallel deviceptr(d_c) async
+ {
+ int *cc = (int *) d_c;
+ #pragma acc loop
+ for (int i = 0; i < N; i++)
+ cc[i] = -17*i;
+ }
+
+ acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval);
+ acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval);
+ acc_wait (acc_async_noval);
+ d_b = acc_deviceptr (b);
+ acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval);
+ acc_wait (acc_async_noval);
+
+ #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail)
+ {
+ int *dd = (int *) d_d;
+ int *ee = (int *) d_e;
+ int *ff = (int *) d_f;
+ for (int i = 0; i < N; i++)
+ if (dd[i] != 11*i
+ || ee[i] != 31*i
+ || ff[i] != -17*i)
+ {
+ fail = true;
+ __builtin_abort ();
+ }
+ }
+ if (fail) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
new file mode 100644
index 0000000..15393b4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+use openacc
+implicit none (type, external)
+integer,pointer :: a, b(:)
+integer,allocatable :: c, d(:)
+
+call acc_attach(a) ! ICE
+call acc_attach_async(b, 4)
+call acc_attach(c)
+
+call acc_detach(a)
+call acc_detach_async(b, 4)
+call acc_detach_finalize(c)
+call acc_detach_finalize_async(d,7)
+end
+
+! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
new file mode 100644
index 0000000..b2204ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
@@ -0,0 +1,62 @@
+! { dg-do run }
+
+use openacc
+implicit none (type, external)
+integer, target :: tgt_a, tgt_b(5)
+
+integer, pointer :: p1, p2(:)
+
+type t
+ integer,pointer :: a => null ()
+ integer,pointer :: b(:) => null ()
+ integer,allocatable :: c, d(:)
+end type t
+
+type(t), target :: var
+
+tgt_a = 51
+tgt_b = [11,22,33,44,55]
+
+var%b => tgt_b
+!$acc enter data copyin(var, tgt_a, tgt_b)
+var%a => tgt_a
+
+call acc_attach(var%a)
+call acc_attach(var%b)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+ if (var%a /= 51) stop 1
+ if (any (var%b /= [11,22,33,44,55])) stop 2
+!$acc end serial
+
+call acc_detach(var%a)
+call acc_detach(var%b)
+
+!$acc exit data delete(var, tgt_a, tgt_b)
+
+var%c = 9
+var%d = [1,2,3]
+
+p1 => var%c
+p2 => var%d
+
+!$acc enter data copyin(p1, p2)
+!$acc enter data copyin(var)
+call acc_attach(var%c)
+call acc_attach(var%d)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+ if (var%c /= 9) stop 3
+ if (any (var%d /= [1,2,3])) stop 4
+!$acc end serial
+
+call acc_detach(var%c)
+call acc_detach(var%d)
+
+!$acc exit data delete(var, p1, p2)
+
+deallocate(var%d)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
new file mode 100644
index 0000000..8f3a8f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
@@ -0,0 +1,113 @@
+! { dg-prune-output "using .vector_length \\(32\\)" }
+
+! PR libgomp/93226 */
+
+module m
+ use iso_c_binding
+ use openacc
+ implicit none (external, type)
+
+ integer, parameter :: N = 1024
+
+ integer :: D(N)
+ !$acc declare device_resident(D)
+
+contains
+
+ integer(c_intptr_t) function init_d()
+ !$acc routine
+ integer :: i
+ do i = 1, N
+ D(i) = 27*i
+ end do
+ init_d = loc(D)
+ end
+end module
+
+program main
+ use m
+ implicit none (external, type)
+
+ integer, allocatable, target :: a(:), b(:), e(:)
+ type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
+ integer(c_intptr_t) intptr
+ integer :: i
+ logical fail
+
+ fail = .false.
+
+ allocate(a(N), b(N), e(N))
+ d_c = acc_malloc (N*c_sizeof (i))
+ d_f = acc_malloc (N*c_sizeof (i))
+
+ e = huge(e)
+ call acc_copyin (e, N*c_sizeof (i));
+ d_e = acc_deviceptr (e);
+
+ !$acc serial copyout(intptr)
+ intptr = init_d ()
+ !$acc end serial
+ d_d = transfer(intptr, d_d)
+ call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
+
+ !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
+ block
+ integer, pointer :: cc(:), dd(:)
+ call c_f_pointer (d_c, cc, [N])
+ call c_f_pointer (d_d, dd, [N])
+ a = cc
+ do i = 1, N
+ if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
+ fail = .true.
+ stop 1
+ end if
+ end do
+ end block
+ !$acc end serial
+ if (fail) error stop 1
+
+ do i = 1, N
+ a(i) = 11*i
+ b(i) = 31*i
+ end do
+
+ call acc_copyin (a, N*c_sizeof (i))
+ d_a = acc_deviceptr (a)
+ call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
+
+ !$acc parallel deviceptr(d_c) private(i) async
+ block
+ integer, pointer :: cc(:)
+ call c_f_pointer (d_c, cc, [N])
+ !$acc loop
+ do i = 1, N
+ cc(i) = -17*i
+ end do
+ end block
+ !$acc end parallel
+
+ call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
+ call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
+ call acc_wait (acc_async_noval)
+ d_b = acc_deviceptr (b)
+ call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
+ call acc_wait (acc_async_noval)
+
+ !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
+ block
+ integer, pointer :: dd(:), ee(:), ff(:)
+ call c_f_pointer (d_d, dd, [N])
+ call c_f_pointer (d_e, ee, [N])
+ call c_f_pointer (d_f, ff, [N])
+ do i = 1, N
+ if (dd(i) /= 11*i &
+ .or. ee(i) /= 31*i &
+ .or. ff(i) /= -17*i) then
+ fail = .true.
+ stop 2
+ end if
+ end do
+ end block
+ !$acc end serial
+ if (fail) error stop 2
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
index deb2c28..f6bd27a 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
@@ -19,11 +19,10 @@ program main
end do
!$acc end parallel
end do
- !$acc end data
call acc_wait_all_async (nprocs + 1)
-
call acc_wait (nprocs + 1)
+ !$acc end data
if (acc_async_test (1) .neqv. .TRUE.) stop 1
if (acc_async_test (2) .neqv. .TRUE.) stop 2