diff options
Diffstat (limited to 'libgomp')
206 files changed, 16346 insertions, 156 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index a60e51a..d3384b0 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,634 @@ +2025-06-24 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (acc_attach, acc_detach): Update for Fortran + version. + * openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}): + Add. + * openacc_lib.h: Likewise. + * testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test. + * testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test. + +2025-06-19 Tobias Burnus <tburnus@baylibre.com> + + * target.c (GOMP_REQUIRES_NAME_BUF_LEN): Define. + (GOMP_offload_register_ver, gomp_target_init): Use it for the + char buffer size. + +2025-06-19 Tobias Burnus <tburnus@baylibre.com> + waffl3x <waffl3x@baylibre.com> + + * libgomp.texi (omp_init_allocator): Refer to 'Memory allocation' + for available memory spaces. + (OMP_ALLOCATOR): Move list of traits and predefined memspaces + and allocators to ... + (Memory allocation): ... here. Document omp(x)::allocator::*; + minor wording tweaks, be more explicit about memkind, pinned and + pool_size. + +2025-06-17 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c++/declare_target-2.C: New test. + +2025-06-10 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c/declare-variant-4.h (gfx942): New variant function. + * testsuite/libgomp.c/declare-variant-4-gfx942.c: New test. + +2025-06-06 Tobias Burnus <tburnus@baylibre.com> + Sandra Loosemore <sloosemore@baylibre.com> + + * libgomp.texi (omp_get_num_devices, omp_get_intrinsic_device): + Document builtin handling. + +2025-06-06 Tobias Burnus <tburnus@baylibre.com> + + PR target/120530 + * testsuite/libgomp.c/target-map-zero-sized-3.c (main): Add missing + map clause; remove unused variable. + +2025-06-04 Tobias Burnus <tburnus@baylibre.com> + Sandra Loosemore <sloosemore@baylibre.com> + + * libgomp.texi (omp_interop_{int,ptr,str,rc_desc}): Add note about + the 'ret_code' type change in OpenMP 6. + +2025-06-03 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/120444 + * testsuite/libgomp.c-c++-common/omp_target_memset-3.c (test_it): + Change ptr argument type from void * to int8_t *. + (main): Change ptr variable type from void * to int8_t * and cast + omp_target_alloc result to the latter type. + +2025-06-02 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/120444 + * libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare. + * libgomp.h (struct gomp_device_descr): Add memset_func. + * libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}. + * libgomp.texi (Device Memory Routines): Document them. + * omp.h.in (omp_target_memset, omp_target_memset_async): Declare. + * omp_lib.f90.in (omp_target_memset, omp_target_memset_async): + Add interfaces. + * omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise. + * plugin/cuda-lib.def: Add cuMemsetD8. + * plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add + hsa_amd_memory_fill_fn. + (init_hsa_runtime_functions): DLSYM_OPT_FN load it. + (GOMP_OFFLOAD_memset): New. + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New. + * target.c (omp_target_memset_int, omp_target_memset, + omp_target_memset_async_helper, omp_target_memset_async): New. + (gomp_load_plugin_for_device): Add DLSYM (memset). + * testsuite/libgomp.c-c++-common/omp_target_memset.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test. + * testsuite/libgomp.fortran/omp_target_memset.f90: New test. + * testsuite/libgomp.fortran/omp_target_memset-2.f90: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__valarray-1.C: New. + * testsuite/libgomp.c++/target-std__valarray-1.output: Likewise. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__array-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__map-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__set-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__span-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Adjust. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + + * testsuite/libgomp.c++/target-std__cmath.C: New. + * testsuite/libgomp.c++/target-std__complex.C: Likewise. + * testsuite/libgomp.c++/target-std__numbers.C: Likewise. + +2025-05-30 Waffl3x <waffl3x@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-flex-10.C: New test. + * testsuite/libgomp.c++/target-flex-100.C: New test. + * testsuite/libgomp.c++/target-flex-101.C: New test. + * testsuite/libgomp.c++/target-flex-11.C: New test. + * testsuite/libgomp.c++/target-flex-12.C: New test. + * testsuite/libgomp.c++/target-flex-2000.C: New test. + * testsuite/libgomp.c++/target-flex-2001.C: New test. + * testsuite/libgomp.c++/target-flex-2002.C: New test. + * testsuite/libgomp.c++/target-flex-2003.C: New test. + * testsuite/libgomp.c++/target-flex-30.C: New test. + * testsuite/libgomp.c++/target-flex-300.C: New test. + * testsuite/libgomp.c++/target-flex-31.C: New test. + * testsuite/libgomp.c++/target-flex-32.C: New test. + * testsuite/libgomp.c++/target-flex-33.C: New test. + * testsuite/libgomp.c++/target-flex-41.C: New test. + * testsuite/libgomp.c++/target-flex-60.C: New test. + * testsuite/libgomp.c++/target-flex-61.C: New test. + * testsuite/libgomp.c++/target-flex-62.C: New test. + * testsuite/libgomp.c++/target-flex-70.C: New test. + * testsuite/libgomp.c++/target-flex-80.C: New test. + * testsuite/libgomp.c++/target-flex-81.C: New test. + * testsuite/libgomp.c++/target-flex-90.C: New test. + * testsuite/libgomp.c++/target-flex-common.h: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + Richard Biener <rguenther@suse.de> + + PR middle-end/119835 + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: + '#pragma GCC optimize "-fno-inline"'. + * testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New. + * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust. + +2025-05-30 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: Enable for C. + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-30 Julian Brown <julian@codesourcery.com> + Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c++/declare-mapper-1.C: New test. + * testsuite/libgomp.c++/declare-mapper-2.C: New test. + * testsuite/libgomp.c++/declare-mapper-3.C: New test. + * testsuite/libgomp.c++/declare-mapper-4.C: New test. + * testsuite/libgomp.c++/declare-mapper-5.C: New test. + * testsuite/libgomp.c++/declare-mapper-6.C: New test. + * testsuite/libgomp.c++/declare-mapper-7.C: New test. + * testsuite/libgomp.c++/declare-mapper-8.C: New test. + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: New test (only + enabled for C++ for now). + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-29 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/93226 + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New + prototype. + * libgomp.h (struct acc_dispatch_t): Add dev2dev_func. + (gomp_copy_dev2dev): New prototype. + * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}. + * libgomp.texi (acc_memcpy_device): New. + * oacc-mem.c (memcpy_tofrom_device): Change to take from/to + device boolean; use memcpy not memmove; add early return if + size == 0 or same device + same ptr. + (acc_memcpy_to_device, acc_memcpy_to_device_async, + acc_memcpy_from_device, acc_memcpy_from_device_async): Update. + (acc_memcpy_device, acc_memcpy_device_async): New. + * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async): + Add interface. + * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async): + Likewise. + * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add + prototype. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): + Update comment. + (GOMP_OFFLOAD_openacc_async_dev2host): Update call. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New. + (GOMP_OFFLOAD_dev2dev): Call it. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * target.c (gomp_copy_dev2dev): New. + (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev. + * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test. + * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test. + +2025-05-28 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.fortran/metadirective-1.f90: xfail when + compiling (also) for nvptx offloading as an error is then expected. + +2025-05-23 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when + compiling (also) for nvptx offloading as an error is then expected. + +2025-05-19 Thomas Schwinge <tschwinge@baylibre.com> + + PR lto/120308 + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Add empty + structure testing. + +2025-05-19 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: New. + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Likewise. + +2025-05-19 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after + wait API calls. + +2025-05-15 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/alloc-comp-4.f90: New test. + +2025-05-14 Tobias Burnus <tburnus@baylibre.com> + + * target.c (gomp_attach_pointer): Return bool; accept additional + bool to optionally silence the fatal pointee-not-found error. + (gomp_map_vars_internal): If the pointee could not be found, + check whether it was mapped as GOMP_MAP_ZERO_LEN_ARRAY_SECTION. + * libgomp.h (gomp_attach_pointer): Update prototype. + * oacc-mem.c (acc_attach_async, goacc_enter_data_internal): Update + calls. + * testsuite/libgomp.c/target-map-zero-sized.c: New test. + * testsuite/libgomp.c/target-map-zero-sized-2.c: New test. + * testsuite/libgomp.c/target-map-zero-sized-3.c: New test. + +2025-05-12 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119692 + * testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 }'. + * testsuite/libgomp.c++/pr119692-1-5.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + +2025-05-12 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c/declare-variant-3-sm61.c: New. + * testsuite/libgomp.c/declare-variant-3.h: Adjust. + +2025-05-09 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c/interop-cuda-full.c: Use 'link' instead + of 'run' when the default device is "! offload_device_nvptx". + * testsuite/libgomp.c/interop-cuda-libonly.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: Likewise. + * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: Likewise. + * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: Likewise. + * testsuite/libgomp.c/interop-hip-amd-full.c: Use 'link' instead + of 'run' when the default device is "! offload_device_gcn". + * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: Likewise. + * testsuite/libgomp.fortran/interop-hip-amd-full.F90: Likewise. + * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: Likewise. + +2025-05-09 David Malcolm <dmalcolm@redhat.com> + + PR other/116792 + * testsuite/lib/libgomp.exp: Add load_lib of scanhtml.exp. + +2025-05-07 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/map-alloc-comp-9.f90: Process differently + when USE_USM_REQUIREMENT is set. + * testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90: New test. + +2025-05-06 Tejas Belagod <tejas.belagod@arm.com> + + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test. + +2025-05-05 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c/interop-hsa.c: GCN offloading only. + +2025-05-01 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/allocate-8a.f90: New test. + +2025-04-25 Andrew Stubbs <ams@baylibre.com> + + * testsuite/libgomp.c/interop-hsa.c: New test. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119853 + PR target/119854 + * target-cxa-dso-dtor.c: New. + * config/accel/target-cxa-dso-dtor.c: Likewise. + * Makefile.am (libgomp_la_SOURCES): Add it. + * Makefile.in: Regenerate. + * testsuite/libgomp.c++/target-cdtor-1.C: New. + * testsuite/libgomp.c++/target-cdtor-2.C: Likewise. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c-c++-common/target-cdtor-1.c: New. + +2025-04-25 Andrew Pinski <quic_apinski@quicinc.com> + Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119737 + * testsuite/libgomp.c++/target-exceptions-throw-1.C: Remove + PR119737 XFAILing. + * testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Adjust for + 'targetm.arm_eabi_unwinder'. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_gomp_hip_header_nvidia): Compile with + "-Wno-deprecated-declarations". + * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas.h: Add workarounds + when using the HIP headers with __HIP_PLATFORM_NVIDIA__. + +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas, + check_effective_target_openacc_cudart): Update description as + the check requires more. + (check_effective_target_openacc_libcuda, + check_effective_target_openacc_libcublas, + check_effective_target_openacc_libcudart, + check_effective_target_gomp_hip_header_amd, + check_effective_target_gomp_hip_header_nvidia, + check_effective_target_gomp_hipfort_module, + check_effective_target_gomp_libamdhip64, + check_effective_target_gomp_libhipblas): New. + * testsuite/libgomp.c-c++-common/interop-2.c: New test. + * testsuite/libgomp.c/interop-cublas-full.c: New test. + * testsuite/libgomp.c/interop-cublas-libonly.c: New test. + * testsuite/libgomp.c/interop-cuda-full.c: New test. + * testsuite/libgomp.c/interop-cuda-libonly.c: New test. + * testsuite/libgomp.c/interop-hip-amd-full.c: New test. + * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip.h: New test. + * testsuite/libgomp.c/interop-hipblas-amd-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas.h: New test. + * testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip.h: New test. + +2025-04-23 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/target-enter-data-8.f90: New test. + +2025-04-17 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/119849 + * testsuite/libgomp.c++/allocator-1.C (test_inequality, main): Guard + ompx::allocator::gnu_pinned_mem uses with #ifdef __gnu_linux__. + * testsuite/libgomp.c++/allocator-2.C (main): Likewise. + +2025-04-17 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (gcn interop, nvptx interop): For HIP with C/C++, add + a note about setting a preprocessor define. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Remove + 'ALWAYS_INLINE' workaround. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/106445 + * testsuite/libgomp.c++/pr106445-1.C: New. + * testsuite/libgomp.c++/pr106445-1-O0.C: Likewise. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/97106 + * testsuite/libgomp.c++/pr96390.C: Un-XFAIL nvptx offloading. + * testsuite/libgomp.c-c++-common/pr96390.c: Adjust. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (gcn, nvptx): Mention self_maps clause + besides unified_shared_memory in the requirements item. + +2025-04-15 waffl3x <waffl3x@baylibre.com> + + * omp.h.in: Add omp::allocator::* and ompx::allocator::* allocators. + (__detail::__allocator_templ<T, omp_allocator_handle_t>): + New struct template. + (null_allocator<T>): New struct template. + (default_mem<T>): Likewise. + (large_cap_mem<T>): Likewise. + (const_mem<T>): Likewise. + (high_bw_mem<T>): Likewise. + (low_lat_mem<T>): Likewise. + (cgroup_mem<T>): Likewise. + (pteam_mem<T>): Likewise. + (thread_mem<T>): Likewise. + (ompx::allocator::gnu_pinned_mem<T>): Likewise. + * testsuite/libgomp.c++/allocator-1.C: New test. + * testsuite/libgomp.c++/allocator-2.C: New test. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (5.0 Impl. Status): Mark mapping alloc comps as 'Y'. + * testsuite/libgomp.fortran/allocatable-comp.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-3.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-4.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-5.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-6.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-7.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-9.f90: New test. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C: + Set '-foffload-options=-mno-fake-exceptions'. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Adjust. + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-O0.C: New. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-3.C: New. + * testsuite/libgomp.oacc-c++/exceptions-throw-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-2.C: New. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-1.C: New. + * testsuite/libgomp.c++/target-exceptions-throw-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-3.C: New. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: New. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: New. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: New. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR c++/119692 + * testsuite/libgomp.c++/pr119692-1-1.C: New. + * testsuite/libgomp.c++/pr119692-1-2.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-3.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-4.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-5.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-1.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-2.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-3.C: Likewise. + +2025-04-10 Richard Sandiford <richard.sandiford@arm.com> + + * testsuite/libgomp.c-target/aarch64/firstprivate.c: Add +sve pragma. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Add an -march option. + (for_reduction): Use "+=" in the reduction loop. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/119662 + * testsuite/libgomp.c/append-args-fr-1.c: New test. + * testsuite/libgomp.c/append-args-fr.h: New test. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + * Makefile.am (%.mod): Add -Wno-c-binding-type. + * Makefile.in: Regenerate. + +2025-04-08 Tejas Belagod <tejas.belagod@arm.com> + + * testsuite/libgomp.c-target/aarch64/aarch64.exp: Test driver. + * testsuite/libgomp.c-target/aarch64/firstprivate.c: New test. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Likewise. + +2025-04-07 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (omp_target_memcpy_rect_async, + omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'. + (AMD Radeon (GCN)): Document how memcpy_rect is implemented. + (nvptx): Move item about memcpy_rect item down; use present tense. + 2025-03-26 Thomas Schwinge <thomas@codesourcery.com> PR driver/101544 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 855f0af..19479ae 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c + oacc-target.c target-indirect.c target-cxa-dso-dtor.c include $(top_srcdir)/plugin/Makefrag.am @@ -97,7 +97,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 25cb6fc..6d22b3d 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo target-indirect.lo $(am__objects_1) + oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c $(am__append_3) + oacc-target.c target-indirect.c target-cxa-dso-dtor.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -780,6 +782,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @@ -1388,7 +1391,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c new file mode 100644 index 0000000..e40a5f0 --- /dev/null +++ b/libgomp/config/accel/target-cxa-dso-dtor.c @@ -0,0 +1,62 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API + + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp.h" + +extern void __cxa_finalize (void *); + +/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>. + + Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit' + calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host. + '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx + results in 'ld' error: + + unresolved symbol __dso_handle + collect2: error: ld returned 1 exit status + nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status + + ..., or for GCN get an implicit definition (running with + '--trace-symbol=__dso_handle'): + + ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle + <internal>: definition of __dso_handle + + ..., which might be fine, but let's just make it explicit. */ + +/* There are no DSOs; this is the main program. */ +attribute_hidden void * const __dso_handle = 0; + +/* If this file gets linked in, that means that '__dso_handle' has been + referenced (for '__cxa_atexit'), and in that case, we also have to run + '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy + function '__GCC_offload___cxa_finalize'. */ + +void +__GCC_offload___cxa_finalize (void *dso_handle) +{ + __cxa_finalize (dso_handle); +} diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 924fc1f..191106b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -177,6 +177,7 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *, size_t, size_t, size_t, size_t, size_t, const void *, size_t, size_t, size_t, size_t, size_t); +extern bool GOMP_OFFLOAD_memset (int, void *, int, size_t); extern bool GOMP_OFFLOAD_can_run (void *); extern void GOMP_OFFLOAD_run (int, void *, void *, void **); extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *); @@ -200,6 +201,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, struct goacc_asyncqueue *); +extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t, + struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index d97768f..a433983 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1360,6 +1360,7 @@ typedef struct acc_dispatch_t __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func; __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func; } async; __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func; @@ -1420,9 +1421,10 @@ struct gomp_device_descr __typeof (GOMP_OFFLOAD_free) *free_func; __typeof (GOMP_OFFLOAD_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func; __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func; - __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; + __typeof (GOMP_OFFLOAD_memset) *memset_func; __typeof (GOMP_OFFLOAD_can_run) *can_run_func; __typeof (GOMP_OFFLOAD_run) *run_func; __typeof (GOMP_OFFLOAD_async_run) *async_run_func; @@ -1467,11 +1469,14 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *, extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); +extern void gomp_copy_dev2dev (struct gomp_device_descr *, + struct goacc_asyncqueue *, void *, const void *, + size_t); extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); -extern void gomp_attach_pointer (struct gomp_device_descr *, +extern bool gomp_attach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree, splay_tree_key, uintptr_t, size_t, - struct gomp_coalesce_buf *, bool); + struct gomp_coalesce_buf *, bool, bool); extern void gomp_detach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree_key, uintptr_t, bool, struct gomp_coalesce_buf *); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index eae2f53..f6aee7c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -453,6 +453,12 @@ GOMP_6.0 { omp_get_uid_from_device_8_; } GOMP_5.1.3; +GOMP_6.0.1 { + global: + omp_target_memset; + omp_target_memset_async; +} GOMP_6.0; + OACC_2.0 { global: acc_get_num_devices; @@ -609,6 +615,12 @@ OACC_2.6 { acc_get_property_string_h_; } OACC_2.5.1; +OACC_2.6.1 { + global: + acc_memcpy_device; + acc_memcpy_device_async; +} OACC_2.6; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 4217c29..5518033 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -258,7 +258,7 @@ The OpenMP 4.5 specification is fully supported. device memory mapped by an array section @tab P @tab @item Mapping of Fortran pointer and allocatable variables, including pointer and allocatable components of variables - @tab P @tab Mapping of vars with allocatable components unsupported + @tab Y @tab @item @code{defaultmap} extensions @tab Y @tab @item @code{declare mapper} directive @tab N @tab @item @code{omp_get_supported_active_levels} routine @tab Y @tab @@ -603,7 +603,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @code{omp_get_device_teams_thread_limit}, and @code{omp_set_device_teams_thread_limit} routines @tab N @tab @item @code{omp_target_memset} and @code{omp_target_memset_async} routines - @tab N @tab + @tab Y @tab @item Fortran version of the interop runtime routines @tab Y @tab @item Routines for obtaining memory spaces/allocators for shared/device memory @tab N @tab @@ -1802,6 +1802,11 @@ Returns the number of available non-host devices. The effect of running this routine in a @code{target} region is unspecified. +Note that in GCC the function is marked pure, i.e. as returning always the +same number. When GCC was not configured to support offloading, it is replaced +by zero; compile with @option{-fno-builtin-omp_get_num_devices} if a run-time +function is desired. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{int omp_get_num_devices(void);} @@ -1812,6 +1817,9 @@ The effect of running this routine in a @code{target} region is unspecified. @item @emph{Interface}: @tab @code{integer function omp_get_num_devices()} @end multitable +@item @emph{See also}: +@ref{omp_get_initial_device} + @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.31. @end table @@ -1950,6 +1958,12 @@ the value of @code{omp_initial_device}. The effect of running this routine in a @code{target} region is unspecified. +Note that GCC inlines this function unless you compile with +@option{-fno-builtin-omp_get_initial_device}. If GCC was not configured to +support offloading, it expands to constant zero; in non-host code it expands +to @code{omp_initial_device}; and otherwise it is replaced with a call to +@code{omp_get_num_devices}. + @item @emph{C/C++} @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{int omp_get_initial_device(void);} @@ -1984,8 +1998,8 @@ pointers on devices. They have C linkage and do not throw exceptions. * omp_target_memcpy_async:: Copy data between devices asynchronously * omp_target_memcpy_rect:: Copy a subvolume of data between devices * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously -@c * omp_target_memset:: <fixme>/TR12 -@c * omp_target_memset_async:: <fixme>/TR12 +* omp_target_memset:: Set bytes in device memory +* omp_target_memset_async:: Set bytes in device memory asynchronously * omp_target_associate_ptr:: Associate a device pointer with a host pointer * omp_target_disassociate_ptr:: Remove device--host pointer association * omp_get_mapped_ptr:: Return device pointer to a host pointer @@ -2316,7 +2330,7 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy} +@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6 @@ -2391,13 +2405,105 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async} +@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8 @end table +@node omp_target_memset +@subsection @code{omp_target_memset} -- Set bytes in device memory +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset( &} +@item @tab @code{ ptr, val, count, device_num) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num} +@end multitable + +@item @emph{See also}: +@ref{omp_target_memset_async} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.1 +@end table + + + +@node omp_target_memset_async +@subsection @code{omp_target_memset} -- Set bytes in device memory asynchronously +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. Task dependence +is expressed by passing an array of depend objects to @var{depobj_list}, where +the number of array elements is passed as @var{depobj_count}; if the count is +zero, the @var{depobj_list} argument is ignored. In C++ and Fortran, the +@var{depobj_list} argument can also be omitted in that case. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy_async(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num,} +@item @tab @code{ int depobj_count,} +@item @tab @code{ omp_depend_t *depobj_list)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset_async( &} +@item @tab @code{ ptr, val, count, device_num, &} +@item @tab @code{ depobj_count, depobj_list) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num, depobj_count} +@item @tab @code{integer(omp_depend_kind), optional :: depobj_list(*)} +@end multitable + + +@item @emph{See also}: +@ref{omp_target_memset} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.2 +@end table + + @node omp_target_associate_ptr @subsection @code{omp_target_associate_ptr} -- Associate a device pointer with a host pointer @@ -3038,6 +3144,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: the function has C binding and @var{interop} and @var{property_id} are @c passed by value, which permits use of the same ABI as the C function. This does @@ -3084,6 +3195,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: the function has C binding and @var{interop} and @var{property_id} are @c passed by value, which permits use of the same ABI as the C function. This does @@ -3130,6 +3246,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: @var{interop} and @var{property_id} are passed by value. This does not @c affect the usage of the function when GCC's @code{omp_lib} module or @@ -3256,6 +3377,11 @@ the @var{ret_code} in human-readable form. The behavior is unspecified if value of @var{ret_code} was not set by an interoperability routine invoked for @var{interop}. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int} was used for the @var{ret_code} argument +in place of the enumerated type @code{omp_interop_rc_t}. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_rc_desc(const omp_interop_t interop, @@ -3327,7 +3453,7 @@ traits; if an allocator that fulfills the requirements cannot be created, @code{omp_null_allocator} is returned. The predefined memory spaces and available traits can be found at -@ref{OMP_ALLOCATOR}, where the trait names have to be prefixed by +@ref{Memory allocation}, where the trait names have to be prefixed by @code{omp_atk_} (e.g. @code{omp_atk_pinned}) and the named trait values by @code{omp_atv_} (e.g. @code{omp_atv_true}); additionally, @code{omp_atv_default} may be used as trait value to specify that the default value should be used. @@ -3350,7 +3476,7 @@ may be used as trait value to specify that the default value should be used. @end multitable @item @emph{See also}: -@ref{OMP_ALLOCATOR}, @ref{Memory allocation}, @ref{omp_destroy_allocator} +@ref{Memory allocation}, @ref{OMP_ALLOCATOR}, @ref{omp_destroy_allocator} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.7.2 @@ -3931,63 +4057,15 @@ The value can either be a predefined allocator or a predefined memory space or a predefined memory space followed by a colon and a comma-separated list of memory trait and value pairs, separated by @code{=}. +See @ref{Memory allocation} for a list of supported prefedined allocators, +memory spaces, and traits. + Note: The corresponding device environment variables are currently not supported. Therefore, the non-host @var{def-allocator-var} ICVs are always initialized to @code{omp_default_mem_alloc}. However, on all devices, the @code{omp_set_default_allocator} API routine can be used to change value. -@multitable @columnfractions .45 .45 -@headitem Predefined allocators @tab Associated predefined memory spaces -@item omp_default_mem_alloc @tab omp_default_mem_space -@item omp_large_cap_mem_alloc @tab omp_large_cap_mem_space -@item omp_const_mem_alloc @tab omp_const_mem_space -@item omp_high_bw_mem_alloc @tab omp_high_bw_mem_space -@item omp_low_lat_mem_alloc @tab omp_low_lat_mem_space -@item omp_cgroup_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item omp_pteam_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item omp_thread_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension) -@end multitable - -The predefined allocators use the default values for the traits, -as listed below. Except that the last three allocators have the -@code{access} trait set to @code{cgroup}, @code{pteam}, and -@code{thread}, respectively. - -@multitable @columnfractions .25 .40 .25 -@headitem Trait @tab Allowed values @tab Default value -@item @code{sync_hint} @tab @code{contended}, @code{uncontended}, - @code{serialized}, @code{private} - @tab @code{contended} -@item @code{alignment} @tab Positive integer being a power of two - @tab 1 byte -@item @code{access} @tab @code{all}, @code{cgroup}, - @code{pteam}, @code{thread} - @tab @code{all} -@item @code{pool_size} @tab Positive integer - @tab See @ref{Memory allocation} -@item @code{fallback} @tab @code{default_mem_fb}, @code{null_fb}, - @code{abort_fb}, @code{allocator_fb} - @tab See below -@item @code{fb_data} @tab @emph{unsupported as it needs an allocator handle} - @tab (none) -@item @code{pinned} @tab @code{true}, @code{false} - @tab See below -@item @code{partition} @tab @code{environment}, @code{nearest}, - @code{blocked}, @code{interleaved} - @tab @code{environment} -@end multitable - -For the @code{fallback} trait, the default value is @code{null_fb} for the -@code{omp_default_mem_alloc} allocator and any allocator that is associated -with device memory; for all other allocators, it is @code{default_mem_fb} -by default. - -For the @code{pinned} trait, the default value is @code{true} for -predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and -@code{false} for all others. - Examples: @smallexample OMP_ALLOCATOR=omp_high_bw_mem_alloc @@ -4763,6 +4841,7 @@ acceleration device. present on device. * acc_memcpy_to_device:: Copy host memory to device memory. * acc_memcpy_from_device:: Copy device memory to host memory. +* acc_memcpy_device:: Copy memory within a device. * acc_attach:: Let device pointer point to device-pointer target. * acc_detach:: Let device pointer point to host-pointer target. @@ -5837,6 +5916,44 @@ This function copies device memory specified by device address of +@node acc_memcpy_device +@section @code{acc_memcpy_device} -- Copy memory within a device. +@table @asis +@item @emph{Description} +This function copies device memory from one memory location to another +on the current device. It copies @var{bytes} bytes of data from the device +address, specified by @var{data_dev_src}, to the device address +@var{data_dev_dest}. The @code{_async} version performs the transfer +asynchronously using the queue associated with @var{async_arg}. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes);} +@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes)} +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes, async_arg)} +@item @tab @code{type(c_ptr), value :: data_dev_dest} +@item @tab @code{type(c_ptr), value :: data_dev_src} +@item @tab @code{integer(c_size_t), value :: bytes} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.28. +@end table + + + @node acc_attach @section @code{acc_attach} -- Let device pointer point to device-pointer target. @table @asis @@ -5850,19 +5967,19 @@ address to pointing to the corresponding device data. @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.34. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. + @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table @@ -5882,21 +5999,21 @@ address to pointing to the corresponding host data. @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.35. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. +@uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table @@ -6718,6 +6835,7 @@ on more architectures, GCC currently does not match any @code{arch} or @tab See @code{-march=} in ``Nvidia PTX Options'' @end multitable + @node Memory allocation @section Memory allocation @@ -6752,11 +6870,94 @@ The description below applies to: @code{_Alignof} and C++'s @code{alignof}. @end itemize -For the available predefined allocators and, as applicable, their associated -predefined memory spaces and for the available traits and their default values, -see @ref{OMP_ALLOCATOR}. Predefined allocators without an associated memory -space use the @code{omp_default_mem_space} memory space. See additionally -@ref{Offload-Target Specifics}. +GCC supports the following predefined allocators and predefined memory spaces: + +@multitable @columnfractions .45 .45 +@headitem Predefined allocators @tab Associated predefined memory spaces +@item omp_default_mem_alloc @tab omp_default_mem_space +@item omp_large_cap_mem_alloc @tab omp_large_cap_mem_space +@item omp_const_mem_alloc @tab omp_const_mem_space +@item omp_high_bw_mem_alloc @tab omp_high_bw_mem_space +@item omp_low_lat_mem_alloc @tab omp_low_lat_mem_space +@item omp_cgroup_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item omp_pteam_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item omp_thread_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension) +@end multitable + +Each predefined allocator, including @code{omp_null_allocator}, has a corresponding +allocator class template that meet the C++ allocator completeness requirements. +These are located in the @code{omp::allocator} namespace, and the +@code{ompx::allocator} namespace for gnu extensions. This allows the +allocator-aware C++ standard library containers to use OpenMP allocation routines; +for instance: + +@smallexample +std::vector<int, omp::allocator::cgroup_mem<int>> vec; +@end smallexample + +The following allocator templates are supported: + +@multitable @columnfractions .45 .45 +@headitem Predefined allocators @tab Associated allocator template +@item omp_null_allocator @tab omp::allocator::null_allocator +@item omp_default_mem_alloc @tab omp::allocator::default_mem +@item omp_large_cap_mem_alloc @tab omp::allocator::large_cap_mem +@item omp_const_mem_alloc @tab omp::allocator::const_mem +@item omp_high_bw_mem_alloc @tab omp::allocator::high_bw_mem +@item omp_low_lat_mem_alloc @tab omp::allocator::low_lat_mem +@item omp_cgroup_mem_alloc @tab omp::allocator::cgroup_mem +@item omp_pteam_mem_alloc @tab omp::allocator::pteam_mem +@item omp_thread_mem_alloc @tab omp::allocator::thread_mem +@item ompx_gnu_pinned_mem_alloc @tab ompx::allocator::gnu_pinned_mem +@end multitable + +The following traits are available when constructing a new allocator; +if a trait is not specified or with the value @code{default}, the +specified default value is used for that trait. The predefined +allocators use the default values of each trait, except that the +@code{omp_cgroup_mem_alloc}, @code{omp_pteam_mem_alloc}, and +@code{omp_thread_mem_alloc} allocators have the @code{access} trait +set to @code{cgroup}, @code{pteam}, and @code{thread}, respectively. +For each trait, a named constant prefixed by @code{omp_atk_} exists; +for each non-numeric value, a named constant prefixed by @code{omp_atv_} +exists. + +@multitable @columnfractions .25 .40 .25 +@headitem Trait @tab Allowed values @tab Default value +@item @code{sync_hint} @tab @code{contended}, @code{uncontended}, + @code{serialized}, @code{private} + @tab @code{contended} +@item @code{alignment} @tab Positive integer being a power of two + @tab 1 byte +@item @code{access} @tab @code{all}, @code{cgroup}, + @code{pteam}, @code{thread} + @tab @code{all} +@item @code{pool_size} @tab Positive integer (bytes) + @tab See below. +@item @code{fallback} @tab @code{default_mem_fb}, @code{null_fb}, + @code{abort_fb}, @code{allocator_fb} + @tab See below +@item @code{fb_data} @tab @emph{allocator handle} + @tab (none) +@item @code{pinned} @tab @code{true}, @code{false} + @tab See below +@item @code{partition} @tab @code{environment}, @code{nearest}, + @code{blocked}, @code{interleaved} + @tab @code{environment} +@end multitable + +For the @code{fallback} trait, the default value is @code{null_fb} for the +@code{omp_default_mem_alloc} allocator and any allocator that is associated +with device memory; for all other allocators, it is @code{default_mem_fb} +by default. + +For the @code{pinned} trait, the default value is @code{true} for +predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and +@code{false} for all others. + +The following description applies to the initial device (the host) and largely +also to non-host devices; for the latter, also see @ref{Offload-Target Specifics}. For the memory spaces, the following applies: @itemize @@ -6771,14 +6972,16 @@ For the memory spaces, the following applies: @end itemize On Linux systems, where the @uref{https://github.com/memkind/memkind, memkind -library} (@code{libmemkind.so.0}) is available at runtime, it is used when -creating memory allocators requesting +library} (@code{libmemkind.so.0}) is available at runtime and the respective +memkind kind is supported, it is used when creating memory allocators requesting @itemize -@item the memory space @code{omp_high_bw_mem_space} -@item the memory space @code{omp_large_cap_mem_space} -@item the @code{partition} trait @code{interleaved}; note that for - @code{omp_large_cap_mem_space} the allocation will not be interleaved +@item the @code{partition} trait @code{interleaved} except when the memory space + is @code{omp_large_cap_mem_space} (uses @code{MEMKIND_HBW_INTERLEAVE}) +@item the memory space is @code{omp_high_bw_mem_space} (uses + @code{MEMKIND_HBW_PREFERRED}) +@item the memory space is @code{omp_large_cap_mem_space} (uses + @code{MEMKIND_DAX_KMEM_ALL} or, if not available, @code{MEMKIND_DAX_KMEM}) @end itemize On Linux systems, where the @uref{https://github.com/numactl/numactl, numa @@ -6804,10 +7007,15 @@ a @code{nearest} allocation. Additional notes regarding the traits: @itemize @item The @code{pinned} trait is supported on Linux hosts, but is subject to - the OS @code{ulimit}/@code{rlimit} locked memory settings. + the OS @code{ulimit}/@code{rlimit} locked memory settings. It currently + uses @code{mmap} and is therefore optimized for few allocations, including + large data. If the conditions for numa or memkind allocations are + fulfilled, those allocators are used instead. @item The default for the @code{pool_size} trait is no pool and for every (re)allocation the associated library routine is called, which might - internally use a memory pool. + internally use a memory pool. Currently, the same applies when a + @code{pool_size} has been specified, except that once allocations exceed + the the pool size, the action of the @code{fallback} trait applies. @item For the @code{partition} trait, the partition part size will be the same as the requested size (i.e. @code{interleaved} or @code{blocked} has no effect), except for @code{interleaved} when the memkind library is @@ -6816,13 +7024,15 @@ Additional notes regarding the traits: that allocated the memory; on Linux, this is in particular the case when the memory placement policy is set to preferred. @item The @code{access} trait has no effect such that memory is always - accessible by all threads. + accessible by all threads. (Except on supported no-host devices.) @item The @code{sync_hint} trait has no effect. @end itemize See also: @ref{Offload-Target Specifics} + + @c --------------------------------------------------------------------- @c Offload-Target Specifics @c --------------------------------------------------------------------- @@ -6888,7 +7098,7 @@ The implementation remark: @code{device(ancestor:1)}) are processed serially per @code{target} region such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} is only supported if all AMD GPUs have the @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for discrete GPUs, this may require setting the @code{HSA_XNACK} environment @@ -6911,6 +7121,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 3D memory-copy function + of the HSA library. Higher dimensions call this functions in a loop and + are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, is the value returned by the HSA runtime library for @code{HSA_AMD_AGENT_INFO_UUID}. For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex digits, @@ -6940,6 +7155,9 @@ or string (str) data type, call @code{omp_get_interop_int}, Note that @code{device_num} is the OpenMP device number while @code{device} is the HIP device number or HSA device handle. +When using HIP with C and C++, the @code{__HIP_PLATFORM_AMD__} preprocessor +macro must be defined before including the HIP header files. + For the API routine call, add the prefix @code{omp_ipr_} to the property name; for instance: @smallexample @@ -7040,7 +7258,7 @@ The implementation remark: Per device, reverse offload regions are processed serially such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} runs on nvptx devices if and only if all of those support the @code{pageableMemoryAccess} property;@footnote{ @uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}} @@ -7048,11 +7266,6 @@ The implementation remark: devices (``host fallback''). @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack} in the GCC manual. -@item The OpenMP routines @code{omp_target_memcpy_rect} and - @code{omp_target_memcpy_rect_async} and the @code{target update} - directive for non-contiguous list items will use the 2D and 3D - memory-copy functions of the CUDA library. Higher dimensions will - call those functions in a loop and are therefore supported. @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the the @code{access} trait is set to @code{cgroup}, and libgomp has been built for PTX ISA version 4.1 or higher (such as in GCC's @@ -7070,6 +7283,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 2D and 3D memory-copy + functions of the CUDA library. Higher dimensions call those functions + in a loop and are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, consists of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by the CUDA runtime library. This UUID is output in grouped lower-case @@ -7102,6 +7320,9 @@ or string (str) data type, call @code{omp_get_interop_int}, Note that @code{device_num} is the OpenMP device number while @code{device} is the CUDA, CUDA Driver, or HIP device number. +When using HIP with C and C++, the @code{__HIP_PLATFORM_NVIDIA__} preprocessor +macro must be defined before including the HIP header files. + For the API routine call, add the prefix @code{omp_ipr_} to the property name; for instance: @smallexample diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 718252b..5b8ba7e 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -171,21 +171,22 @@ acc_free (void *d) } static void -memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, - const char *libfnname) +memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src, + size_t s, int async, const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ struct goacc_thread *thr = goacc_thread (); assert (thr && thr->dev); + if (s == 0) + return; if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - if (from) - memmove (h, d, s); - else - memmove (d, h, s); + if (src == dst) + return; + memcpy (dst, src, s); return; } @@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, } goacc_aq aq = get_goacc_asyncqueue (async); - if (from) - gomp_copy_dev2host (thr->dev, aq, h, d, s); + if (dev_to && dev_from) + { + if (dst != src) + gomp_copy_dev2dev (thr->dev, aq, dst, src, s); + } + else if (dev_from) + gomp_copy_dev2host (thr->dev, aq, dst, src, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, void acc_memcpy_to_device (void *d, void *h, size_t s) { - memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) { - memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__); } void acc_memcpy_from_device (void *h, void *d, size_t s) { - memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) { - memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__); +} + +void +acc_memcpy_device (void *dst, void *src, size_t s) +{ + memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__); +} + +void +acc_memcpy_device_async (void *dst, void *src, size_t s, int async) +{ + memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL @@ -951,7 +969,7 @@ acc_attach_async (void **hostaddr, int async) } gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, - 0, NULL, false); + 0, NULL, false, true); gomp_mutex_unlock (&acc_dev->lock); } @@ -1158,7 +1176,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) { gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, - (uintptr_t) h, s, NULL, false); + (uintptr_t) h, s, NULL, false, true); /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference counts ('n->refcount', 'n->dynamic_refcount'). */ } @@ -1176,7 +1194,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, = lookup_host (acc_dev, hostaddrs[j], sizeof (void *)); gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m, (uintptr_t) hostaddrs[j], sizes[j], NULL, - false); + false, true); } bool processed = false; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index d5e8be4..4f2bc46 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -347,6 +347,10 @@ extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__, const __SIZE_TYPE__ *, int, int, int, omp_depend_t * __GOMP_DEFAULT_NULL) __GOMP_NOTHROW; +extern void *omp_target_memset (void *, int, __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern void *omp_target_memset_async (void *, int, __SIZE_TYPE__, int, + int, omp_depend_t * __GOMP_DEFAULT_NULL) + __GOMP_NOTHROW; extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__, __SIZE_TYPE__, int) __GOMP_NOTHROW; extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW; @@ -432,4 +436,136 @@ extern const char *omp_get_uid_from_device (int) __GOMP_NOTHROW; } #endif +#if __cplusplus >= 201103L + +/* std::__throw_bad_alloc and std::__throw_bad_array_new_length. */ +#include <bits/functexcept.h> + +namespace omp +{ +namespace allocator +{ + +namespace __detail +{ + +template<typename __T, omp_allocator_handle_t __Handle> +struct __allocator_templ +{ + using value_type = __T; + using pointer = __T*; + using const_pointer = const __T*; + using size_type = __SIZE_TYPE__; + using difference_type = __PTRDIFF_TYPE__; + + __T* + allocate (size_type __n) + { + if (__SIZE_MAX__ / sizeof(__T) < __n) + std::__throw_bad_array_new_length (); + void *__p = omp_aligned_alloc (alignof(__T), __n * sizeof(__T), __Handle); + if (!__p) + std::__throw_bad_alloc (); + return static_cast<__T*>(__p); + } + + void + deallocate (__T *__p, size_type) __GOMP_NOTHROW + { + omp_free (static_cast<void*>(__p), __Handle); + } +}; + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return true; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return true; +} + +} /* namespace __detail */ + +template<typename __T> +struct null_allocator + : __detail::__allocator_templ<__T, omp_null_allocator> {}; + +template<typename __T> +struct default_mem + : __detail::__allocator_templ<__T, omp_default_mem_alloc> {}; + +template<typename __T> +struct large_cap_mem + : __detail::__allocator_templ<__T, omp_large_cap_mem_alloc> {}; + +template<typename __T> +struct const_mem + : __detail::__allocator_templ<__T, omp_const_mem_alloc> {}; + +template<typename __T> +struct high_bw_mem + : __detail::__allocator_templ<__T, omp_high_bw_mem_alloc> {}; + +template<typename __T> +struct low_lat_mem + : __detail::__allocator_templ<__T, omp_low_lat_mem_alloc> {}; + +template<typename __T> +struct cgroup_mem + : __detail::__allocator_templ<__T, omp_cgroup_mem_alloc> {}; + +template<typename __T> +struct pteam_mem + : __detail::__allocator_templ<__T, omp_pteam_mem_alloc> {}; + +template<typename __T> +struct thread_mem + : __detail::__allocator_templ<__T, omp_thread_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace omp */ + +namespace ompx +{ + +namespace allocator +{ + +template<typename __T> +struct gnu_pinned_mem + : omp::allocator::__detail::__allocator_templ<__T, ompx_gnu_pinned_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace ompx */ + +#endif /* __cplusplus */ + #endif /* _OMP_H */ diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index cb6b95f..ce866c0 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -904,6 +904,29 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) :: omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + depobj_count, depobj_list) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & device_offset, device_num) bind(c) use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index f7af5ff..9047095 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -505,6 +505,31 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + & depobj_count, depobj_list) & + & bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & & device_offset, device_num) & & bind(c) diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index 8ef107e..3f2db45 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -797,6 +797,9 @@ module openacc public :: acc_copyout_finalize, acc_delete_finalize public :: acc_memcpy_to_device, acc_memcpy_to_device_async public :: acc_memcpy_from_device, acc_memcpy_from_device_async + public :: acc_memcpy_device, acc_memcpy_device_async + public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async + public :: acc_detach_finalize, acc_detach_finalize_async integer, parameter :: openacc_version = 201711 @@ -1046,6 +1049,69 @@ module openacc end subroutine end interface + interface + subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, & + bytes, async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + interface acc_copyin_async procedure :: acc_copyin_async_32_h procedure :: acc_copyin_async_64_h diff --git a/libgomp/openacc.h b/libgomp/openacc.h index a520bbe..3085b00 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_attach (void **) __GOACC_NOTHROW; void acc_attach_async (void **, int) __GOACC_NOTHROW; void acc_detach (void **) __GOACC_NOTHROW; @@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_detach_finalize (void **) __GOACC_NOTHROW; void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; -/* Async functions, specified in OpenACC 2.5. */ +/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; void acc_create_async (void *, size_t, int) __GOACC_NOTHROW; void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW; @@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW; void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index b0d287e..dbdc4d7 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -528,6 +528,30 @@ end subroutine end interface + interface + subroutine acc_memcpy_device(data_dev_dest, data_dev_src, & + & bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async(data_dev_dest, & + & data_dev_src, bytes, & + & async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface acc_copyin_async subroutine acc_copyin_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t @@ -683,3 +707,45 @@ integer (acc_handle_kind) async_ end subroutine end interface + + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index eb562ac..7f4ddcc 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync) CUDA_ONE_CALL (cuMemcpy2D) CUDA_ONE_CALL (cuMemcpy2DUnaligned) CUDA_ONE_CALL (cuMemcpy3D) +CUDA_ONE_CALL (cuMemsetD8) CUDA_ONE_CALL (cuMemFree) CUDA_ONE_CALL (cuMemFreeHost) CUDA_ONE_CALL (cuMemGetAddressRange) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 4b42a59..498b549 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -208,6 +208,8 @@ struct hsa_runtime_fn_info hsa_status_t (*hsa_code_object_deserialize_fn) (void *serialized_code_object, size_t serialized_code_object_size, const char *options, hsa_code_object_t *code_object); + hsa_status_t (*hsa_amd_memory_fill_fn)(void *ptr, uint32_t value, + size_t count); hsa_status_t (*hsa_amd_memory_lock_fn) (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent, void **agent_ptr); @@ -1456,6 +1458,7 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_signal_load_acquire) DLSYM_FN (hsa_queue_destroy) DLSYM_FN (hsa_code_object_deserialize) + DLSYM_OPT_FN (hsa_amd_memory_fill) DLSYM_OPT_FN (hsa_amd_memory_lock) DLSYM_OPT_FN (hsa_amd_memory_unlock) DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect) @@ -4435,6 +4438,83 @@ init_hip_runtime_functions (void) return true; } +bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + + /* A memset feature is only provided via hsa_amd_memory_fill; while it + is fast, it is an HSA extension and it has two requirements: The memory + must be aligned to multiples of 4 bytes - and, by construction, only + multiples of 4 bytes can be filled (uint32_t value argument). + + This means: Either not using that function or up to three function calls: + - copy 1 to 3 bytes to get alignment (hsa_memory_copy), if unaligned + - call hsa_amd_memory_fill + - copy remaining 1 to 3 bytes (hsa_memory_copy), if after alignment + count is not a multiple of 4 bytes. + + Having more than one function call is only profitable if there is + enough data to process; see below for the used heuristic values. */ + + uint8_t v8 = (uint8_t) val; + size_t before = (4 - (uintptr_t) ptr % 4) % 4; /* 0 to 3 bytes. */ + size_t tail = (count - before) % 4; /* 0 to 3 bytes. */ + + /* Heuristic */ + enum { + /* Prefer alloca to malloc up to ... */ + alloca_size = 256, /* bytes */ + /* Call hsa_amd_memory_fill also when two copy calls are required. */ + always_use_fill = 256*1024, /* bytes */ + /* Call hsa_amd_memory_fill also when on copy call is required. */ + use_fill_one_copy = (128+64)*1024 /* bytes */ + }; + + /* Do not call hsa_amd_memory_fill when any of the following conditions + is true. Note that it is always preferred if available and + before == tail == 0. */ + if (__builtin_expect (!hsa_fns.hsa_amd_memory_fill_fn, 0) + || (before && tail && count < always_use_fill) + || ((before || tail) && count < use_fill_one_copy)) + before = count; + + /* Copy call for alignment - or all data, if condition above is true. */ + if (before) + { + void *data; + if (before > alloca_size) + data = malloc (before * sizeof (uint8_t)); + else + data = alloca (before * sizeof (uint8_t)); + memset (data, val, before); + status = hsa_fns.hsa_memory_copy_fn (ptr, data, before); + if (before > alloca_size) + free (data); + if (data == 0 || status != HSA_STATUS_SUCCESS) + goto fail; + count -= before; + } + + if (count == 0) + return true; + + ptr += before; + + uint32_t values = v8 | (v8 << 8) | (v8 << 16) | (v8 << 24); + status = hsa_fns.hsa_amd_memory_fill_fn (ptr, values, count / 4); + if (tail && status == HSA_STATUS_SUCCESS) + { + ptr += count - tail; + status = hsa_fns.hsa_memory_copy_fn (ptr, &values, tail); + } + if (status == HSA_STATUS_SUCCESS) + return true; + +fail: + GOMP_PLUGIN_error ("memory set failed"); + return false; +} void GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, @@ -5079,7 +5159,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, queue_push_callback (aq, fn, data); } -/* Queue up an asynchronous data copy from host to DEVICE. */ +/* Queue up an asynchronous data copy from host to DEVICE. + (Also handles dev2host and dev2dev.) */ bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, @@ -5097,10 +5178,16 @@ bool GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { - struct agent_info *agent = get_agent_info (device); - assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n); - return true; + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); +} + +/* Queue up an asynchronous data copy from DEVICE to DEVICE. */ + +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); } union goacc_property_value diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index a5cf859..0ba445e 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2019,6 +2019,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2077,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2267,6 +2298,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size, } bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + if (!nvptx_attach_host_thread_to_device (ord)) + return false; + CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count); + return true; +} + +bool GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { @@ -2288,6 +2328,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c new file mode 100644 index 0000000..d1a898d --- /dev/null +++ b/libgomp/target-cxa-dso-dtor.c @@ -0,0 +1,3 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */ + +/* Nothing needed here. */ diff --git a/libgomp/target.c b/libgomp/target.c index a64ee96..cda092b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } +attribute_hidden void +gomp_copy_dev2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, + void *dst, const void *src, size_t sz) +{ + if (__builtin_expect (aq != NULL, 0)) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func, + "dev", dst, "dev", src, NULL, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst, + "dev", src, sz); +} + static void gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) { @@ -800,12 +813,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, (void *) cur_node.host_end); } -attribute_hidden void +/* Update the devptr by setting it to the device address of the host pointee + 'attach_to'; devptr is obtained from the splay_tree_key n. + When the pointer is already attached or the host pointee is either + NULL or in memory map, this function returns true. + Otherwise, the device pointer is set to point to the host pointee and: + - If allow_zero_length_array_sections is set, true is returned. + - Else, if fail_if_not_found is set, a fatal error is issued. + - Otherwise, false is returned. */ + +attribute_hidden bool gomp_attach_pointer (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, splay_tree mem_map, splay_tree_key n, uintptr_t attach_to, size_t bias, struct gomp_coalesce_buf *cbufp, - bool allow_zero_length_array_sections) + bool allow_zero_length_array_sections, + bool fail_if_not_found) { struct splay_tree_key_s s; size_t size, idx; @@ -860,7 +883,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); - return; + return true; } s.host_start = target + bias; @@ -869,15 +892,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, if (!tn) { - if (allow_zero_length_array_sections) - /* When allowing attachment to zero-length array sections, we - copy the host pointer when the target region is not mapped. */ - data = target; - else + /* We copy the host pointer when the target region is not mapped; + for allow_zero_length_array_sections, that's permitted. + Otherwise, it depends on the context. Return false in that + case, unless fail_if_not_found. */ + if (!allow_zero_length_array_sections && fail_if_not_found) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("pointer target not mapped for attach"); } + data = target; } else data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; @@ -889,10 +913,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); + if (!tn && !allow_zero_length_array_sections) + return false; } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, (void *) attach_to, (int) n->aux->attach_count[idx]); + return true; } attribute_hidden void @@ -1587,9 +1614,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, bool zlas = ((kind & typemask) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); - gomp_attach_pointer (devicep, aq, mem_map, n, - (uintptr_t) hostaddrs[i], sizes[i], - cbufp, zlas); + /* For 'target enter data', the map clauses are split; + however, for more complex code with struct and + pointer members, the mapping and the attach can end up + in different sets; or the wrong mapping with the + attach. As there is no way to know whether a size + zero like 'var->ptr[i][:0]' happend in the same + directive or not, the not-attached check is now + fully silenced for 'enter data'. */ + if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)) + zlas = true; + if (!gomp_attach_pointer (devicep, aq, mem_map, n, + (uintptr_t) hostaddrs[i], sizes[i], + cbufp, zlas, !openmp_p)) + { + /* Pointee not found; that's an error except for + map(var[:n]) with n == 0; the compiler adds a + runtime condition such that for those the kind is + always GOMP_MAP_ZERO_LEN_ARRAY_SECTION. */ + for (j = i; j > 0; j--) + if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i] + && sizes[j-1] == 0 + && (GOMP_MAP_ZERO_LEN_ARRAY_SECTION + == (get_kind (short_mapkind, kinds, j-1) + & typemask))) + break; + if (j == 0) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("pointer target not mapped for attach"); + } + } } else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) { @@ -2586,6 +2641,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, } } +#define GOMP_REQUIRES_NAME_BUF_LEN \ + sizeof ("unified_address, unified_shared_memory, " \ + "self_maps, reverse_offload") + static void gomp_requires_to_name (char *buf, size_t size, int requires_mask) { @@ -2634,10 +2693,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table, if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) { - char buf1[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; - char buf2[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; + char buf1[GOMP_REQUIRES_NAME_BUF_LEN]; + char buf2[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf2, sizeof (buf2), omp_req != GOMP_REQUIRES_TARGET_USED ? omp_req : omp_requires_mask); @@ -4948,6 +5005,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, return 0; } +static void +omp_target_memset_int (void *ptr, int val, size_t count, + struct gomp_device_descr *devicep) +{ + if (__builtin_expect (count == 0, 0)) + return; + if (devicep == NULL) + { + memset (ptr, val, count); + return; + } + + gomp_mutex_lock (&devicep->lock); + int ret = devicep->memset_func (devicep->target_id, ptr, val, count); + gomp_mutex_unlock (&devicep->lock); + if (!ret) + gomp_fatal ("omp_target_memset failed"); +} + +void* +omp_target_memset (void *ptr, int val, size_t count, int device_num) +{ + struct gomp_device_descr *devicep; + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_int (ptr, val, count, devicep); + return ptr; +} + +typedef struct +{ + void *ptr; + size_t count; + struct gomp_device_descr *devicep; + int val; +} omp_target_memset_data; + +static void +omp_target_memset_async_helper (void *args) +{ + omp_target_memset_data *a = args; + omp_target_memset_int (a->ptr, a->val, a->count, a->devicep); +} + +void* +omp_target_memset_async (void *ptr, int val, size_t count, int device_num, + int depobj_count, omp_depend_t *depobj_list) +{ + void *depend[depobj_count + 5]; + struct gomp_device_descr *devicep; + unsigned flags = 0; + int i; + + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count, + .devicep = devicep}; + if (depobj_count > 0 && depobj_list != NULL) + { + flags |= GOMP_TASK_FLAG_DEPEND; + depend[0] = 0; + depend[1] = (void *) (uintptr_t) depobj_count; + depend[2] = depend[3] = depend[4] = 0; + for (i = 0; i < depobj_count; ++i) + depend[i + 5] = &depobj_list[i]; + } + + GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s), + __alignof__ (s), true, flags, depend, 0, NULL); + return ptr; +} + int omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, size_t size, size_t device_offset, int device_num) @@ -5513,6 +5652,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM_OPT (async_run, async_run); DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); + DLSYM (memset); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { @@ -5531,6 +5671,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev) || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have @@ -5647,8 +5788,7 @@ gomp_target_init (void) found = true; if (found) { - char buf[sizeof ("unified_address, unified_shared_memory, " - "reverse_offload")]; + char buf[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf, sizeof (buf), omp_req); char *name = (char *) malloc (cur_len + 1); memcpy (name, cur, cur_len); diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index bc38e3c..fd475ac 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -30,6 +30,7 @@ load_gcc_lib scandump.exp load_gcc_lib scanlang.exp load_gcc_lib scanrtl.exp load_gcc_lib scansarif.exp +load_gcc_lib scanhtml.exp load_gcc_lib scantree.exp load_gcc_lib scanltrans.exp load_gcc_lib scanoffload.exp @@ -553,7 +554,23 @@ int main() { } } "-lcuda" ] } -# Return 1 if cublas_v2.h and -lcublas are available. +# Return 1 if -lcuda is available (header not required). + +proc check_effective_target_openacc_libcuda { } { + return [check_no_compiler_messages openacc_libcuda executable { +typedef enum { CUDA_SUCCESS } CUresult; +typedef int CUdevice; +CUresult cuDeviceGet (CUdevice *, int); +int main() { + CUdevice dev; + CUresult r = cuDeviceGet (&dev, 0); + if (r != CUDA_SUCCESS) + return 1; + return 0; +} } "-lcuda" ] +} + +# Return 1 if cublas_v2.h, cuda.h, -lcublas and -lcuda are available. proc check_effective_target_openacc_cublas { } { return [check_no_compiler_messages openacc_cublas executable { @@ -573,7 +590,25 @@ int main() { } } "-lcuda -lcublas" ] } -# Return 1 if cuda_runtime_api.h and -lcudart are available. +# Return 1 if -lcublas is available header not required). + +proc check_effective_target_openacc_libcublas { } { + return [check_no_compiler_messages openacc_libcublas executable { +typedef enum { CUBLAS_STATUS_SUCCESS } cublasStatus_t; +typedef struct cublasContext* cublasHandle_t; +#define cublasCreate cublasCreate_v2 +cublasStatus_t cublasCreate_v2 (cublasHandle_t *); +int main() { + cublasStatus_t s; + cublasHandle_t h; + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + return 1; + return 0; +} } "-lcublas" ] +} + +# Return 1 if cuda_runtime_api.h, cuda.h, -lcuda and -lcudart are available. proc check_effective_target_openacc_cudart { } { return [check_no_compiler_messages openacc_cudart executable { @@ -592,3 +627,98 @@ int main() { return 0; } } "-lcuda -lcudart" ] } + +# Return 1 if -lcudart is available (no header required). + +proc check_effective_target_openacc_libcudart { } { + return [check_no_compiler_messages openacc_libcudart executable { +typedef int cudaError_t; +cudaError_t cudaGetDevice(int *); +enum { cudaSuccess }; +int main() { + cudaError_t e; + int devn; + e = cudaGetDevice (&devn); + if (e != cudaSuccess) + return 1; + return 0; +} } "-lcudart" ] +} + +# Return 1 if hip.h is available (no link check; AMD platform). + +proc check_effective_target_gomp_hip_header_amd { } { + return [check_no_compiler_messages gomp_hip_header_amd assembly { +#define __HIP_PLATFORM_AMD__ +#include <hip/hip_runtime_api.h> +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} }] +} + +# Return 1 if hip.h is available (no link check; Nvidia/CUDA platform). + +proc check_effective_target_gomp_hip_header_nvidia { } { + return [check_no_compiler_messages gomp_hip_header_nvidia assembly { +#define __HIP_PLATFORM_NVIDIA__ +#include <hip/hip_runtime_api.h> +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} } "-Wno-deprecated-declarations"] +} + +# Return 1 if the Fortran hipfort module is available (no link check) + +proc check_effective_target_gomp_hipfort_module { } { + return [check_no_compiler_messages gomp_hipfort_module assembly { +! Fortran +use hipfort +implicit none +integer(kind(hipSuccess)) :: r +integer(c_int) :: dev +r = hipDeviceGet (dev, 0) +if (r /= hipSuccess) error stop +end +}] +} + +# Return 1 if AMD HIP's -lamdhip64 is available (no header required). + +proc check_effective_target_gomp_libamdhip64 { } { + return [check_no_compiler_messages gomp_libamdhip64 executable { +typedef int hipError_t; +typedef int hipDevice_t; +enum { hipSuccess = 0 }; +hipError_t hipDeviceGet(hipDevice_t*, int); +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} } "-lamdhip64" ] +} + +# Return 1 if AMD HIP's -lamdhip64 is available (no header required). + +proc check_effective_target_gomp_libhipblas { } { + return [check_no_compiler_messages gomp_libhipblas executable { +typedef enum { HIPBLAS_STATUS_SUCCESS = 0 } hipblasStatus_t; +typedef void* hipblasHandle_t; +hipblasStatus_t hipblasCreate (hipblasHandle_t*); +int main() { + hipblasHandle_t handle; + hipblasStatus_t stat = hipblasCreate (&handle); + if (stat != HIPBLAS_STATUS_SUCCESS) + return 1; + return 0; +} } "-lhipblas" ] +} diff --git a/libgomp/testsuite/libgomp.c++/allocator-1.C b/libgomp/testsuite/libgomp.c++/allocator-1.C new file mode 100644 index 0000000..49425386 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-1.C @@ -0,0 +1,171 @@ +// { dg-do run } + +#include <omp.h> +#include <memory> +#include <limits> + +template<typename T, template<typename> class Alloc> +void test (T const initial_value = T()) +{ + using Allocator = Alloc<T>; + Allocator a; + using Traits = std::allocator_traits<Allocator>; + static_assert (__is_same(typename Traits::allocator_type, Allocator )); + static_assert (__is_same(typename Traits::value_type, T )); + static_assert (__is_same(typename Traits::pointer, T* )); + static_assert (__is_same(typename Traits::const_pointer, T const* )); + static_assert (__is_same(typename Traits::void_pointer, void* )); + static_assert (__is_same(typename Traits::const_void_pointer, void const* )); + static_assert (__is_same(typename Traits::difference_type, __PTRDIFF_TYPE__)); + static_assert (__is_same(typename Traits::size_type, __SIZE_TYPE__ )); + static_assert (Traits::propagate_on_container_copy_assignment::value == false); + static_assert (Traits::propagate_on_container_move_assignment::value == false); + static_assert (Traits::propagate_on_container_swap::value == false); + static_assert (Traits::is_always_equal::value == true); + + static constexpr __SIZE_TYPE__ correct_max_size + = std::numeric_limits<__SIZE_TYPE__>::max () / sizeof (T); + if (Traits::max_size (a) != correct_max_size) + __builtin_abort (); + + static constexpr __SIZE_TYPE__ alloc_count = 1; + T *p = Traits::allocate (a, alloc_count); + if (p == nullptr) + __builtin_abort (); + Traits::construct (a, p, initial_value); + if (*p != initial_value) + __builtin_abort (); + Traits::destroy (a, p); + Traits::deallocate (a, p, alloc_count); + /* Not interesting but might as well test it. */ + static_cast<void>(Traits::select_on_container_copy_construction (a)); + + if (!(a == Allocator())) + __builtin_abort (); + if (a != Allocator()) + __builtin_abort (); + if (!(a == Alloc<void>())) + __builtin_abort (); + if (a != Alloc<void>()) + __builtin_abort (); +} + +#define CHECK_INEQUALITY(other_alloc_templ, type) \ +do { \ + /* Skip tests for itself, those are equal. Intantiate each */ \ + /* one with void so we can easily tell if they are the same. */ \ + if (!__is_same (AllocTempl<void>, other_alloc_templ<void>)) \ + { \ + other_alloc_templ<type> other; \ + if (a == other) \ + __builtin_abort (); \ + if (!(a != other)) \ + __builtin_abort (); \ + } \ +} while (false) + +template<typename T, template<typename> class AllocTempl> +void test_inequality () +{ + using Allocator = AllocTempl<T>; + Allocator a; + CHECK_INEQUALITY (omp::allocator::null_allocator, void); + CHECK_INEQUALITY (omp::allocator::default_mem, void); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, void); + CHECK_INEQUALITY (omp::allocator::const_mem, void); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, void); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, void); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, void); + CHECK_INEQUALITY (omp::allocator::pteam_mem, void); + CHECK_INEQUALITY (omp::allocator::thread_mem, void); +#ifdef __gnu_linux__ + /* Pinning not implemented on other targets. */ + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, void); +#endif + /* And again with the same type passed to the allocator. */ + CHECK_INEQUALITY (omp::allocator::null_allocator, T); + CHECK_INEQUALITY (omp::allocator::default_mem, T); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, T); + CHECK_INEQUALITY (omp::allocator::const_mem, T); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, T); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, T); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, T); + CHECK_INEQUALITY (omp::allocator::pteam_mem, T); + CHECK_INEQUALITY (omp::allocator::thread_mem, T); +#ifdef __gnu_linux__ + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, T); +#endif +} + +#undef CHECK_INEQUALITY + +struct S +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S const& other) const noexcept { + return !this->operator==(other); + } +}; + +int main () +{ + test<int, omp::allocator::null_allocator>(42); + test<int, omp::allocator::default_mem>(42); + test<int, omp::allocator::large_cap_mem>(42); + test<int, omp::allocator::const_mem>(42); + test<int, omp::allocator::high_bw_mem>(42); + test<int, omp::allocator::low_lat_mem>(42); + test<int, omp::allocator::cgroup_mem>(42); + test<int, omp::allocator::pteam_mem>(42); + test<int, omp::allocator::thread_mem>(42); +#ifdef __gnu_linux__ + test<int, ompx::allocator::gnu_pinned_mem>(42); +#endif + + test<long long, omp::allocator::null_allocator>(42); + test<long long, omp::allocator::default_mem>(42); + test<long long, omp::allocator::large_cap_mem>(42); + test<long long, omp::allocator::const_mem>(42); + test<long long, omp::allocator::high_bw_mem>(42); + test<long long, omp::allocator::low_lat_mem>(42); + test<long long, omp::allocator::cgroup_mem>(42); + test<long long, omp::allocator::pteam_mem>(42); + test<long long, omp::allocator::thread_mem>(42); +#ifdef __gnu_linux__ + test<long long, ompx::allocator::gnu_pinned_mem>(42); +#endif + + test<S, omp::allocator::null_allocator>( S{42, true, 128.f}); + test<S, omp::allocator::default_mem>( S{42, true, 128.f}); + test<S, omp::allocator::large_cap_mem>( S{42, true, 128.f}); + test<S, omp::allocator::const_mem>( S{42, true, 128.f}); + test<S, omp::allocator::high_bw_mem>( S{42, true, 128.f}); + test<S, omp::allocator::low_lat_mem>( S{42, true, 128.f}); + test<S, omp::allocator::cgroup_mem>( S{42, true, 128.f}); + test<S, omp::allocator::pteam_mem>( S{42, true, 128.f}); + test<S, omp::allocator::thread_mem>( S{42, true, 128.f}); +#ifdef __gnu_linux__ + test<S, ompx::allocator::gnu_pinned_mem>(S{42, true, 128.f}); +#endif + + test_inequality<int, omp::allocator::null_allocator>(); + test_inequality<int, omp::allocator::default_mem>(); + test_inequality<int, omp::allocator::large_cap_mem>(); + test_inequality<int, omp::allocator::const_mem>(); + test_inequality<int, omp::allocator::high_bw_mem>(); + test_inequality<int, omp::allocator::low_lat_mem>(); + test_inequality<int, omp::allocator::cgroup_mem>(); + test_inequality<int, omp::allocator::pteam_mem>(); + test_inequality<int, omp::allocator::thread_mem>(); +#ifdef __gnu_linux__ + test_inequality<int, ompx::allocator::gnu_pinned_mem>(); +#endif +} diff --git a/libgomp/testsuite/libgomp.c++/allocator-2.C b/libgomp/testsuite/libgomp.c++/allocator-2.C new file mode 100644 index 0000000..ca94fc7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-2.C @@ -0,0 +1,141 @@ +// { dg-do run } +// { dg-additional-options "-Wno-psabi" } + +#include <omp.h> +#include <vector> + +template<typename T> +bool ptr_is_aligned(T *ptr, std::size_t alignment) +{ + /* ALIGNMENT must be a power of 2. */ + if ((alignment & (alignment - 1)) != 0) + __builtin_abort (); + __UINTPTR_TYPE__ ptr_value + = reinterpret_cast<__UINTPTR_TYPE__>(static_cast<void*>(ptr)); + return (ptr_value % alignment) == 0; +} + +template<typename T, template<typename> class Alloc> +void f (T v0, T v1, T v2, T v3) +{ + std::vector<T, Alloc<T>> vec; + vec.push_back (v0); + vec.push_back (v1); + vec.push_back (v2); + vec.push_back (v3); + if (vec.at (0) != v0) + __builtin_abort (); + if (vec.at (1) != v1) + __builtin_abort (); + if (vec.at (2) != v2) + __builtin_abort (); + if (vec.at (3) != v3) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (0), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (1), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (2), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (3), alignof (T))) + __builtin_abort (); +} + +struct S0 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S0 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S0 const& other) const noexcept { + return !this->operator==(other); + } +}; + +struct alignas(128) S1 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S1 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S1 const& other) const noexcept { + return !this->operator==(other); + } +}; + +/* Note: the test for const_mem should be disabled in the future. */ + +int main () +{ + f<int, omp::allocator::null_allocator >(0, 1, 2, 3); + f<int, omp::allocator::default_mem >(0, 1, 2, 3); + f<int, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<int, omp::allocator::const_mem >(0, 1, 2, 3); + f<int, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<int, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<int, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<int, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<int, omp::allocator::thread_mem >(0, 1, 2, 3); +#ifdef __gnu_linux__ + /* Pinning not implemented on other targets. */ + f<int, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); +#endif + + f<long long, omp::allocator::null_allocator >(0, 1, 2, 3); + f<long long, omp::allocator::default_mem >(0, 1, 2, 3); + f<long long, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<long long, omp::allocator::const_mem >(0, 1, 2, 3); + f<long long, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<long long, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<long long, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<long long, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<long long, omp::allocator::thread_mem >(0, 1, 2, 3); +#ifdef __gnu_linux__ + f<long long, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); +#endif + + S0 s0_0{ 42, true, 111128.f}; + S0 s0_1{ 142, false, 11128.f}; + S0 s0_2{ 1142, true, 1128.f}; + S0 s0_3{11142, false, 128.f}; + f<S0, omp::allocator::null_allocator >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::default_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::large_cap_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::const_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::high_bw_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::low_lat_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::cgroup_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::pteam_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::thread_mem >(s0_0, s0_1, s0_2, s0_3); +#ifdef __gnu_linux__ + f<S0, ompx::allocator::gnu_pinned_mem>(s0_0, s0_1, s0_2, s0_3); +#endif + + S1 s1_0{ 42, true, 111128.f}; + S1 s1_1{ 142, false, 11128.f}; + S1 s1_2{ 1142, true, 1128.f}; + S1 s1_3{11142, false, 128.f}; + + f<S1, omp::allocator::null_allocator >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::default_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::large_cap_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::const_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::high_bw_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::low_lat_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::cgroup_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::pteam_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::thread_mem >(s1_0, s1_1, s1_2, s1_3); +#ifdef __gnu_linux__ + f<S1, ompx::allocator::gnu_pinned_mem>(s1_0, s1_1, s1_2, s1_3); +#endif +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C new file mode 100644 index 0000000..aba4f42 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C @@ -0,0 +1,87 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +#define N 64 + +struct points +{ + double *x; + double *y; + double *z; + size_t len; +}; + +#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \ + map(p.x[0:p.len]) \ + map(p.y[0:p.len]) \ + map(p.z[0:p.len]) + +struct shape +{ + points tmp; + points *pts; + int metadata[128]; +}; + +#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp) + +void +alloc_points (points *pts, size_t sz) +{ + pts->x = new double[sz]; + pts->y = new double[sz]; + pts->z = new double[sz]; + pts->len = sz; + for (int i = 0; i < sz; i++) + pts->x[i] = pts->y[i] = pts->z[i] = 0; +} + +int main (int argc, char *argv[]) +{ + shape myshape; + points mypts; + + myshape.pts = &mypts; + + alloc_points (&myshape.tmp, N); + myshape.pts = new points; + alloc_points (myshape.pts, N); + + #pragma omp target map(myshape) + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 1); + assert (myshape.pts->y[i] == 1); + assert (myshape.pts->z[i] == 1); + } + + #pragma omp target + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 2); + assert (myshape.pts->y[i] == 2); + assert (myshape.pts->z[i] == 2); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C new file mode 100644 index 0000000..d848fdb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C @@ -0,0 +1,55 @@ +// { dg-do run } + +#include <cassert> + +#define N 256 + +struct doublebuf +{ + int buf_a[N][N]; + int buf_b[N][N]; +}; + +#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \ + map(b.buf_b[0:N/2][0:N]) + +#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \ + map(b.buf_b[N/2:N/2][0:N]) + +int main (int argc, char *argv[]) +{ + doublebuf db; + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + db.buf_a[i][j] = db.buf_b[i][j] = 0; + + #pragma omp target map(mapper(lo), tofrom:db) + { + for (int i = 0; i < N / 2; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + #pragma omp target map(mapper(hi), tofrom:db) + { + for (int i = N / 2; i < N; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + assert (db.buf_a[i][j] == 1); + assert (db.buf_b[i][j] == 1); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C new file mode 100644 index 0000000..ea9b7de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C new file mode 100644 index 0000000..f194e63 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S &my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S &my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C new file mode 100644 index 0000000..0030de8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C @@ -0,0 +1,52 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +class C +{ + S smemb; +#pragma omp declare mapper (custom:S s) map(to:s.myarr) \ + map(tofrom:s.myarr[0:s.len]) + +public: + C(int l) + { + smemb.myarr = new int[l]; + smemb.len = l; + for (int i = 0; i < l; i++) + smemb.myarr[i] = 0; + } + void bump(); + void check(); +}; + +void +C::bump () +{ +#pragma omp target map(mapper(custom), tofrom: smemb) + { + for (int i = 0; i < smemb.len; i++) + smemb.myarr[i]++; + } +} + +void +C::check () +{ + for (int i = 0; i < smemb.len; i++) + assert (smemb.myarr[i] == 1); +} + +int main (int argc, char *argv[]) +{ + C test (100); + test.bump (); + test.check (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C new file mode 100644 index 0000000..14ed10d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C @@ -0,0 +1,37 @@ +// { dg-do run } + +#include <cassert> + +template <typename T> +void adjust (T param) +{ +#pragma omp declare mapper (T x) map(to:x.len, x.base) \ + map(tofrom:x.base[0:x.len]) + +#pragma omp target + for (int i = 0; i < param.len; i++) + param.base[i]++; +} + +struct S { + int len; + int *base; +}; + +int main (int argc, char *argv[]) +{ + S a; + + a.len = 100; + a.base = new int[a.len]; + + for (int i = 0; i < a.len; i++) + a.base[i] = 0; + + adjust (a); + + for (int i = 0; i < a.len; i++) + assert (a.base[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C new file mode 100644 index 0000000..ba4792a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C @@ -0,0 +1,59 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; +}; + +struct T +{ + S *s; +}; + +#pragma omp declare mapper (s100: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) +// Define this because ... +#pragma omp declare mapper (default: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) + + +void +bump (T t) +{ + /* Here we have an implicit/default mapper invoking a named mapper. We + need to make sure that can be located properly at gimplification + time. */ + +// ... the following is invalid in OpenMP - albeit supported by GCC +// (after disabling: error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ ) + +// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0]) + +// ... thus, we now use ... +#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0]) + +#pragma omp target + for (int i = 0; i < 100; i++) + t.s->myarr[i]++; +} + +int main (int argc, char *argv[]) +{ + S my_s; + T my_t; + + my_s.myarr = new int[100]; + my_t.s = &my_s; + + for (int i = 0; i < 100; i++) + my_s.myarr[i] = 0; + + bump (my_t); + + for (int i = 0; i < 100; i++) + assert (my_s.myarr[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C new file mode 100644 index 0000000..3818e52 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C @@ -0,0 +1,61 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +template<typename T> +class C +{ + T memb; +#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \ + map(tofrom:t.myarr[0:t.len]) + +public: + C(int sz); + ~C(); + void bump(); + void check(); +}; + +template<typename T> +C<T>::C(int sz) +{ + memb.myarr = new int[sz]; + for (int i = 0; i < sz; i++) + memb.myarr[i] = 0; + memb.len = sz; +} + +template<typename T> +C<T>::~C() +{ + delete[] memb.myarr; +} + +template<typename T> +void C<T>::bump() +{ +#pragma omp target map(memb) + for (int i = 0; i < memb.len; i++) + memb.myarr[i]++; +} + +template<typename T> +void C<T>::check() +{ + for (int i = 0; i < memb.len; i++) + assert (memb.myarr[i] == 1); +} + +int main(int argc, char *argv[]) +{ + C<S> c_int(100); + c_int.bump(); + c_int.check(); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare_target-2.C b/libgomp/testsuite/libgomp.c++/declare_target-2.C new file mode 100644 index 0000000..ab94a55 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare_target-2.C @@ -0,0 +1,25 @@ +// { dg-do link } + +// Actually not needed: -fipa-cp is default with -O2: +// { dg-additional-options "-O2 -fipa-cp" } + +// The code failed because 'std::endl' becoḿes implicitly 'declare target' +// but not the 'widen' function it calls. While the linker had no issues +// (endl is never called, either because it is inlined or optimized away), +// the IPA-CP (enabled by -O2 and higher) failed as the definition for +// 'widen' did not exist on the offload side. + +#include <iostream> + +void func (int m) +{ + if (m < 0) + std::cout << "should not happen" << std::endl; +} + + +int main() +{ + #pragma omp target + func (1); +} diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C new file mode 100644 index 0000000..bcd499c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr106445-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1.C b/libgomp/testsuite/libgomp.c++/pr106445-1.C new file mode 100644 index 0000000..329ce62 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr106445-1.C @@ -0,0 +1,18 @@ +#include <vector> + +int main() +{ +#pragma omp target + { + { + std::vector<int> v; + if (!v.empty()) + __builtin_abort(); + } + { + std::vector<int> v(100); + if (v.capacity() < 100) + __builtin_abort(); + } + } +} diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C new file mode 100644 index 0000000..1f59b15 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C @@ -0,0 +1,10 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -UDEFAULT } + Wrong code for offloading execution. + { dg-additional-options -foffload=disable } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "../libgomp.oacc-c++/pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* map\(tofrom:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C new file mode 100644 index 0000000..e7ac818 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C @@ -0,0 +1,11 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(none) } + Fails to compile. + { dg-do compile } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {error: '_ZTV2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } + { dg-bogus {error: '_ZTI2C2' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } + { dg-bogus {error: '_ZTI2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C new file mode 100644 index 0000000..733feb8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C @@ -0,0 +1,10 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(present) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C new file mode 100644 index 0000000..af9fe1c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C @@ -0,0 +1,13 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(firstprivate) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(firstprivate\) firstprivate\(_ZTI2C2\) firstprivate\(_ZTI2C1\) firstprivate\(_ZTV2C1\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C new file mode 100644 index 0000000..e5c6e07 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C @@ -0,0 +1,13 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr96390.C b/libgomp/testsuite/libgomp.c++/pr96390.C index 1f3c3e0..be19601 100644 --- a/libgomp/testsuite/libgomp.c++/pr96390.C +++ b/libgomp/testsuite/libgomp.c++/pr96390.C @@ -1,6 +1,4 @@ /* { dg-additional-options "-O0 -fdump-tree-omplower" } */ -/* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */ -/* { dg-xfail-if "PR 97106/PR 97102 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */ #include <cstdlib> #include <type_traits> diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C new file mode 100644 index 0000000..ecb029e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C @@ -0,0 +1,104 @@ +/* Offloaded C++ objects construction and destruction. */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1(7); + +#pragma omp declare target + +S sHD1(5); + +std::vector<S> svHD1(2); + +#pragma omp end declare target + +S sH2(3); + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* C++ objects are constructed in order of appearance (..., and destructed in reverse order). + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C new file mode 100644 index 0000000..75e48ca --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C @@ -0,0 +1,140 @@ +/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */ + +/* { dg-require-effective-target init_priority } */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1 __attribute__((init_priority(1500))) (7); + +#pragma omp declare target + +S sHD1 __attribute__((init_priority(2000))) (5); + +std::vector<S> svHD1 __attribute__((init_priority(1000))) (2); + +static void +__attribute__((constructor(20000))) +initDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(20000))) +finiDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +S sH2 __attribute__((init_priority(500))) (3); + +static void +__attribute__((constructor(10000))) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(10000))) +finiH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order). + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {initH1, 1[\r\n]+} } + { dg-output {initDH1, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {initDH1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {finiDH1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {finiDH1, 1[\r\n]+} } + { dg-output {finiH1, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C new file mode 100644 index 0000000..a862652 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C @@ -0,0 +1,28 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'std::bad_cast' + what(): std::bad_cast + Aborted (core dumped) + { dg-output {.*std::bad_cast} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + { dg-shouldfail {'std::bad_cast' exception} } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C new file mode 100644 index 0000000..93884df --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C @@ -0,0 +1,19 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..83ec89b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C @@ -0,0 +1,19 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C new file mode 100644 index 0000000..ff15c9f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C @@ -0,0 +1,27 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target offload_target_nvptx xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C new file mode 100644 index 0000000..efed64f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C @@ -0,0 +1,17 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + ... to avoid wrong code for offloading execution; PR119692. + With this, the device code still isn't correct, but the defects are in dead code. + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-3.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C new file mode 100644 index 0000000..d4dccf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C @@ -0,0 +1,26 @@ +/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C new file mode 100644 index 0000000..724e34b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C @@ -0,0 +1,26 @@ +/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C new file mode 100644 index 0000000..24eb7a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C @@ -0,0 +1,59 @@ +/* Exception handling constructs in dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C'. */ + +#pragma omp begin declare target + +bool ok = false; + +template <typename T> +struct C +{ + C() + { + ok = true; + } + C(int) {}; + ~C() {}; + + __attribute__((noipa)) + void m() + { + C c; + } +}; + +inline void f() +{ + C<double> c(1); + c.m(); +} + +#pragma omp end declare target + +int main() +{ +#pragma omp target + { + f(); + } +#pragma omp target update from(ok) + if (!ok) + __builtin_abort(); +} + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C new file mode 100644 index 0000000..00d7c13 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C @@ -0,0 +1,23 @@ +/* 'throw' in OpenMP 'target' region. */ + +/* { dg-additional-options -O0 } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C new file mode 100644 index 0000000..a4e7a10 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C @@ -0,0 +1,22 @@ +/* 'throw' in OpenMP 'target' region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C new file mode 100644 index 0000000..b7a311d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C @@ -0,0 +1,25 @@ +/* 'throw' in OpenMP 'target' region, caught. */ + +/* { dg-additional-options -O0 } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target offload_target_amdgcn xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_amdgcn } } */ +/* { dg-bogus {Initial value type mismatch} PR119806 { target offload_target_nvptx xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C new file mode 100644 index 0000000..9905b1f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C @@ -0,0 +1,21 @@ +/* 'throw' in OpenMP 'target' region, caught, -foffload-options=-mno-fake-exceptions. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..da267d6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C @@ -0,0 +1,21 @@ +/* 'throw' in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C new file mode 100644 index 0000000..97f4845 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C @@ -0,0 +1,20 @@ +/* 'throw' in OpenMP 'target' region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C new file mode 100644 index 0000000..c35180d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C @@ -0,0 +1,19 @@ +/* 'throw' in OpenMP 'target' region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + ... to avoid wrong code for offloading execution; PR119692. + With this, the device code still isn't correct, but the defects are in dead code. + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-3.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI11MyException \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C new file mode 100644 index 0000000..8fa9af7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C @@ -0,0 +1,215 @@ +/* Basic container usage. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::vector<int> vector; + ok = vector.empty(); + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::deque<int> deque; + ok = deque.empty(); + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::list<int> list; + ok = list.empty(); + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::map<int, int> map; + ok = map.empty(); + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::set<int> set; + ok = set.empty(); + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multimap<int, int> multimap; + ok = multimap.empty(); + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multiset<int, int> multiset; + ok = multiset.empty(); + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + static constexpr std::size_t array_size = 42; + bool ok; + #pragma omp target map(from: ok) + { + std::array<int, array_size> array{}; + ok = array[0] == 0 + && array[array_size - 1] == 0; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::forward_list<int> forward_list; + ok = forward_list.empty(); + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_map<int, int> unordered_map; + ok = unordered_map.empty(); + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_set<int> unordered_set; + ok = unordered_set.empty(); + } + return ok; +} + +bool unordered_multimap_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multimap<int, int> unordered_multimap; + ok = unordered_multimap.empty(); + } + return ok; +} + +bool unordered_multiset_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multiset<int> unordered_multiset; + ok = unordered_multiset.empty(); + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool vec_res = vector_test(); + __builtin_printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + const bool deque_res = deque_test(); + __builtin_printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + const bool list_res = list_test(); + __builtin_printf("list : %s\n", list_res ? "PASS" : "FAIL"); + const bool map_res = map_test(); + __builtin_printf("map : %s\n", map_res ? "PASS" : "FAIL"); + const bool set_res = set_test(); + __builtin_printf("set : %s\n", set_res ? "PASS" : "FAIL"); + const bool multimap_res = multimap_test(); + __builtin_printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + const bool multiset_res = multiset_test(); + __builtin_printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + const bool array_res = array_test(); + __builtin_printf("array : %s\n", array_res ? "PASS" : "FAIL"); + const bool forward_list_res = forward_list_test(); + __builtin_printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + const bool unordered_map_res = unordered_map_test(); + __builtin_printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + const bool unordered_set_res = unordered_set_test(); + __builtin_printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + const bool unordered_multimap_res = unordered_multimap_test(); + __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + const bool unordered_multiset_res = unordered_multiset_test(); + __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C new file mode 100644 index 0000000..7ab047f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C @@ -0,0 +1,210 @@ +/* Container adaptors in target region. + Does not test comparison operators other than equality to allow these tests + to be generalized to arbitrary input data. */ + +#include <algorithm> +#include <cstdio> +#include <deque> +#include <queue> +#include <stack> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test_stack(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T middle_element = arr[half_size - 1]; + const T last_element = arr[Size - 1]; + typedef std::stack<T, std::vector<T> > stack_type; + stack_type stack; + VERIFY (stack.empty()); + VERIFY (stack.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + stack.push(arr[idx]); + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == half_size); + VERIFY (static_cast<const stack_type&>(stack).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + stack.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + stack.push(arr[idx]); + #endif + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == Size); + VERIFY (static_cast<const stack_type&>(stack).size() == Size); + + const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size)); + VERIFY (stack == stack_orig); + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_top = static_cast<const stack_type&>(stack).top(); + VERIFY (const_top == last_element); + T& mutable_top = stack.top(); + mutable_top = first_element; + VERIFY (const_top == first_element); + } + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || stack != stack_orig); + for (std::size_t count = Size - half_size; count != 0; --count) + stack.pop(); + VERIFY (stack.top() == middle_element); + const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size)); + VERIFY (stack == stack_half_orig); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_queue(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T last_element = arr[Size - 1]; + typedef std::queue<T, std::deque<T> > queue_type; + queue_type queue; + VERIFY (queue.empty()); + VERIFY (queue.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + queue.push(arr[idx]); + VERIFY (queue.back() == arr[idx]); + VERIFY (queue.front() == first_element); + } + VERIFY (queue.size() == half_size); + VERIFY (static_cast<const queue_type&>(queue).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + queue.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + queue.push(arr[idx]); + #endif + VERIFY (queue.back() == arr[idx]); + } + VERIFY (queue.size() == Size); + VERIFY (static_cast<const queue_type&>(queue).size() == Size); + + const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size)); + VERIFY (queue == queue_orig); + + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_front = static_cast<const queue_type&>(queue).front(); + VERIFY (const_front == first_element); + T& mutable_front = queue.front(); + + const T& const_back = static_cast<const queue_type&>(queue).back(); + VERIFY (const_back == last_element); + T& mutable_back = queue.back(); + { + using std::swap; + swap(mutable_front, mutable_back); + } + VERIFY (const_front == last_element); + VERIFY (const_back == first_element); + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || queue != queue_orig); + /* Return the last element to normal for the next comparison. */ + mutable_back = last_element; + } + + const T middle_element = arr[half_size]; + for (std::size_t count = Size - half_size; count != 0; --count) + queue.pop(); + VERIFY (queue.front() == middle_element); + const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size)); + VERIFY (queue == queue_upper_half); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + typedef std::priority_queue<T, std::vector<T> > priority_queue_type; + { + priority_queue_type pqueue; + VERIFY (pqueue.empty()); + VERIFY (pqueue.size() == 0); + } + { + priority_queue_type pqueue(arr, arr + Size); + VERIFY (!pqueue.empty()); + VERIFY (pqueue.size() == Size); + VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size); + + const T old_max = pqueue.top(); + + #if __cplusplus >= 201103L + pqueue.emplace(max_value); + #else + pqueue.push(max_value); + #endif + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + pqueue.push(min_value); + VERIFY (pqueue.top() == old_max); + pqueue.push(max_value); + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + VERIFY (pqueue.size() == Size + 1); + + for (std::size_t count = Size; count != 0; --count) + pqueue.pop(); + VERIFY (pqueue.size() == 1); + VERIFY (pqueue.top() == min_value); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + + return test_stack(arr) + && test_queue(arr) + && test_priority_queue(arr, 0, 1000) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C new file mode 100644 index 0000000..be9037e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C @@ -0,0 +1,136 @@ +/* { dg-additional-options -std=c++23 } */ + +/* C++23 container adaptors in target region. + Severely needs additional tests. */ + +#include <cstdio> +#include <utility> +#include <version> + +#if __cpp_lib_flat_map >= 202207L +#define ENABLE_FLAT_MAP 1 +#endif +#if __cpp_lib_flat_set >= 202207L +#define ENABLE_FLAT_SET 1 +#endif + +#ifdef ENABLE_FLAT_MAP +#include <flat_map> +#endif +#ifdef ENABLE_FLAT_SET +#include <flat_set> +#endif + +#include "target-flex-common.h" + +#ifdef ENABLE_FLAT_MAP +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; } + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; } +#endif + +#ifdef ENABLE_FLAT_SET +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_set_type = std::flat_set<T>; + flat_set_type set = {arr, arr + Size}; + + VERIFY (!set.empty()); + for (const auto& element : arr) + VERIFY (set.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_multiset_type = std::flat_multiset<T>; + flat_multiset_type multiset = {arr, arr + Size}; + + VERIFY (!multiset.empty()); + for (const auto& element : arr) + VERIFY (multiset.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) { return true; } + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) { return true; } +#endif + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + std::pair<int, int> pairs[10] = {{ 1, 2}, { 2, 4}, { 3, 6}, { 4, 8}, { 5, 10}, + { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}}; + + return test_flat_set(arr) + && test_flat_multiset(arr) + && test_flat_map(pairs) + && test_flat_multimap(pairs) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C new file mode 100644 index 0000000..6d55129 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C @@ -0,0 +1,444 @@ +/* Check constructors/destructors are called in containers. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#include <utility> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include "target-flex-common.h" + +struct indirect_counter +{ + typedef int counter_value_type; + counter_value_type *_count_ptr; + + indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) { + ++(*_count_ptr); + } + indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) { + ++(*_count_ptr); + } + /* Don't declare a move constructor, we want to copy no matter what. */ + ~indirect_counter() { + --(*_count_ptr); + } +}; + +bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr == rhs._count_ptr; } +bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr < rhs._count_ptr; } + +#if __cplusplus >= 201103L +template<> +struct std::hash<indirect_counter> +{ + std::size_t operator()(const indirect_counter& ic) const noexcept + { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); } +}; +#endif + +/* Not a container, just a sanity check really. */ +bool automatic_lifetime_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter c = indirect_counter(&counter); + indirect_counter(static_cast<int*>(&counter)); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::vector<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::deque<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::list<indirect_counter> list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + list.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + list.pop_back(); + VERIFY (counter == 32); + list.pop_back(); + VERIFY (counter == 31); + list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::map<int, indirect_counter> map; + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::set<indirect_counter> set; + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::multimap<int, indirect_counter> multimap; + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 2); + multimap.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::multiset<indirect_counter> multiset; + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter ic(&counter); + std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic, + ic, ic, ic, ic, ic}; + VERIFY (counter == 11); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + forward_list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + forward_list.push_front(indirect_counter(&counter)); + VERIFY (counter == 33); + forward_list.pop_front(); + VERIFY (counter == 32); + forward_list.pop_front(); + VERIFY (counter == 31); + forward_list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_map<int, indirect_counter> unordered_map; + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_set<indirect_counter> unordered_set; + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_multimap<int, indirect_counter> unordered_multimap; + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 2); + unordered_multimap.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_multiset<indirect_counter> unordered_multiset; + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + unordered_multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool auto_res = automatic_lifetime_test(); + const bool vec_res = vector_test(); + const bool deque_res = deque_test(); + const bool list_res = list_test(); + const bool map_res = map_test(); + const bool set_res = set_test(); + const bool multimap_res = multimap_test(); + const bool multiset_res = multiset_test(); + const bool array_res = array_test(); + const bool forward_list_res = forward_list_test(); + const bool unordered_map_res = unordered_map_test(); + const bool unordered_set_res = unordered_set_test(); + const bool unordered_multimap_res = unordered_multimap_test(); + const bool unordered_multiset_res = unordered_multiset_test(); + std::printf("sanity check : %s\n", auto_res ? "PASS" : "FAIL"); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = auto_res + && vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C new file mode 100644 index 0000000..024fb73 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C @@ -0,0 +1,736 @@ +/* Populated with mapped data, validate, mutate, validate again. + The cases using sets do not mutate. + Note: Some of the code in here really sucks due to being made to be + compatible with c++98. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include <limits> +#include <iterator> + +#include "target-flex-common.h" + +template<bool B, class T = void> +struct enable_if {}; + +template<class T> +struct enable_if<true, T> { typedef T type; }; + +struct identity_func +{ +#if __cplusplus < 201103L + template<typename T> + T& operator()(T& arg) const BL_NOEXCEPT { return arg; } + template<typename T> + T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; } +#else + template<typename T> + constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); } +#endif +}; + +/* Applies projection to the second iterator. */ +template<typename It0, typename It1, typename Proj> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1, + Proj proj) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + { + /* Sizes mismatch, don't bother aborting though just fail the test. */ + if (it1 == end1) + return false; + if (*it0 != proj(*it1)) + return false; + } + /* Sizes mismatch, do as above. */ + if (it1 != end1) + return false; + return true; +} + +template<typename It0, typename It1> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + return validate_sequential_elements(begin0, end0, begin1, end1, identity_func()); +} + +/* Inefficient, but simple. */ +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename It, typename MutateFn> +void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + *it = mut_fn(*it); +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool vector_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> vector(arr, arr + Size); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size)); + simple_copy(vector.begin(), vector.end(), out_arr); + simple_mutate(vector.begin(), vector.end(), MutationFunc()); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size, MutationFunc())); + simple_copy(vector.begin(), vector.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool deque_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::deque<T> deque(arr, arr + Size); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size)); + simple_copy(deque.begin(), deque.end(), out_arr); + simple_mutate(deque.begin(), deque.end(), MutationFunc()); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size, MutationFunc())); + simple_copy(deque.begin(), deque.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::list<T> list(arr, arr + Size); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size)); + simple_copy(list.begin(), list.end(), out_arr); + simple_mutate(list.begin(), list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(list.begin(), list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename T> +const T& get_key(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.first; } +template<typename T> +const T& get_value(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.second; } + +template<typename T> +struct key_type { typedef T type; }; +template<typename K, typename V> +struct key_type<std::pair<K, V> > { typedef K type; }; + +template<typename Proj, typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const typename Container::const_iterator elem_it = container.find(get_key(*compare_it)); + VERIFY_NON_TARGET (elem_it != elem_end); + VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it)); + } + return true; +} + +template<typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename It, typename MutateFn> +void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + it->second = mut_fn(it->second); +} + +template<typename It, typename OutIter> +void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT +{ + /* In case anyone reads this, I want it to be known that I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + std::set<key_t> already_seen; + for (It it = begin; it != end; ++it, ++out) + { + key_t key = get_key(*it); + if (already_seen.find(key) != already_seen.end()) + continue; + already_seen.insert(key); + *out = *it; + } +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool set_test(const T (&arr)[Size]) +{ + std::set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename Proj, typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + /* Once again, for the poor soul reviewing these, I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + typedef std::map<key_t, std::size_t> counter_map; + counter_map key_count_map; + for (It it = compare_begin; it != compare_end; ++it) + { + const key_t& key = get_key(*it); + typename counter_map::iterator counter_it + = key_count_map.find(key); + if (counter_it != key_count_map.end()) + ++counter_it->second; + else + key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1))); + } + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const key_t& key = get_key(*compare_it); + typename counter_map::iterator count_it = key_count_map.find(key); + std::size_t key_count = count_it != key_count_map.end() ? count_it->second + : std::size_t(0); + VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen"); + /* This gets tested multiple times but that should be fine. */ + VERIFY_NON_TARGET (key_count == container.count(key)); + typename Container::const_iterator elem_it = container.find(key); + /* This will never happen if the previous case passed. */ + VERIFY_NON_TARGET (elem_it != elem_end); + bool found_element = false; + for (; elem_it != elem_end; ++elem_it) + if (proj(get_value(*compare_it)) == get_value(*elem_it)) + { + found_element = true; + break; + } + VERIFY_NON_TARGET (found_element); + } + return true; +} + +template<typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_multi_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool multiset_test(const T (&arr)[Size]) +{ + std::multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#if __cplusplus >= 201103L + +template<typename MutationFunc, typename T, std::size_t Size> +bool array_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::array<T, Size> std_array{}; + /* Special case for std::array since it can't be initialized + with iterators. */ + { + T zero_val = T{}; + for (auto it = std_array.begin(); it != std_array.end(); ++it) + VERIFY (*it == zero_val); + } + simple_copy(arr, arr + Size, std_array.begin()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size)); + simple_copy(std_array.begin(), std_array.end(), out_arr); + simple_mutate(std_array.begin(), std_array.end(), MutationFunc()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size, MutationFunc())); + simple_copy(std_array.begin(), std_array.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool forward_list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::forward_list<T> fwd_list(arr, arr + Size); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size)); + simple_copy(fwd_list.begin(), fwd_list.end(), out_arr); + simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::unordered_map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_set_test(const T (&arr)[Size]) +{ + std::unordered_set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::unordered_set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_multiset_test(const T (&arr)[Size]) +{ + std::unordered_multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#else +template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; } +#endif + +/* This clamps to the maximum value to guard against overflowing, + assuming std::numeric_limits is specialized for T. */ +struct multiply_by_2 +{ + template<typename T> + typename enable_if<std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + if (arg < static_cast<T>(0)) + { + if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg) + return std::numeric_limits<T>::min(); + } + else + { + if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg) + return std::numeric_limits<T>::max(); + } + return arg * 2; + } + template<typename T> + typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + return arg * 2; + } +}; + +int main() +{ + int data[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + std::pair<int, int> pairs[10] = {std::pair<int, int>( 1, 2), + std::pair<int, int>( 2, 4), + std::pair<int, int>( 3, 6), + std::pair<int, int>( 4, 8), + std::pair<int, int>( 5, 10), + std::pair<int, int>( 6, 12), + std::pair<int, int>( 7, 14), + std::pair<int, int>( 8, 16), + std::pair<int, int>( 9, 18), + std::pair<int, int>(10, 20)}; + const bool vec_res = vector_test<multiply_by_2>(data); + const bool deque_res = deque_test<multiply_by_2>(data); + const bool list_res = list_test<multiply_by_2>(data); + const bool map_res = map_test<multiply_by_2>(pairs); + const bool set_res = set_test(data); + const bool multimap_res = multimap_test<multiply_by_2>(pairs); + const bool multiset_res = multiset_test(data); + const bool array_res = array_test<multiply_by_2>(data); + const bool forward_list_res = forward_list_test<multiply_by_2>(data); + const bool unordered_map_res = unordered_map_test<multiply_by_2>(pairs); + const bool unordered_set_res = unordered_set_test(data); + const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs); + const bool unordered_multiset_res = unordered_multiset_test(data); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C new file mode 100644 index 0000000..688c014 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C @@ -0,0 +1,32 @@ +/* Tiny tuple test. */ + +#include <tuple> + +#include "target-flex-common.h" + +bool test(int arg) +{ + bool ok; + int out; + std::tuple tup = {'a', arg, 3.14f}; + #pragma omp target map(from: ok, out) map(to: tup) + { + bool inner_ok = true; + { + VERIFY (std::get<0>(tup) == 'a'); + out = std::get<1>(tup); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out == arg); + return true; +} + +int main() +{ + volatile int arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C new file mode 100644 index 0000000..f1a6c12 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C @@ -0,0 +1,61 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* Functional */ + +#include <functional> +#include <utility> + +#include "target-flex-common.h" + +template<typename T,typename Fn> +auto invoke_unary(T&& a, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a)); +} + +template<typename T, typename U, typename Fn> +auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a), + std::forward<U>(b)); +} + +bool test(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + VERIFY (std::plus{}(arg, 2) == arg + 2); + auto bound_plus_arg = std::bind_front(std::plus{}, arg); + VERIFY (bound_plus_arg(10) == arg + 10); + VERIFY (bound_plus_arg(20) == arg + 20); + + VERIFY (std::not_fn(std::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{}))); + auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg); + VERIFY (bound_equals_arg(arg)); + VERIFY (std::not_fn(bound_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_equals_arg)); + + VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{}))); + auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg); + VERIFY (bound_ranges_equals_arg(arg)); + VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_ranges_equals_arg)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C new file mode 100644 index 0000000..f738806 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C @@ -0,0 +1,97 @@ +/* { dg-additional-options "-std=c++23" } */ + +/* expected/optional */ + +#include <optional> +#include <expected> + +#include "target-flex-common.h" + +std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::nullopt; + return {arg}; +} + +bool test_optional(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto null_opt = make_optional(false); + VERIFY (!null_opt); + VERIFY (!null_opt.has_value()); + VERIFY (null_opt.value_or(arg * 2u) == arg * 2u); + VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto opt = make_optional(true, arg); + VERIFY (opt); + VERIFY (opt.has_value()); + VERIFY (opt.value() == arg); + VERIFY (*opt == arg); + VERIFY (opt.value_or(arg + 42) == arg); + VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +struct my_error +{ + int _e; +}; + +std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::unexpected{my_error{-1}}; + return {arg}; +} + +bool test_expected(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto unexpected = make_expected(false); + VERIFY (!unexpected); + VERIFY (!unexpected.has_value()); + VERIFY (unexpected.error()._e == -1); + VERIFY (unexpected.value_or(arg * 2u) == arg * 2u); + VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto expected = make_expected(true, arg); + VERIFY (expected); + VERIFY (expected.has_value()); + VERIFY (expected.value() == arg); + VERIFY (*expected == arg); + VERIFY (expected.value_or(arg + 42) == arg); + VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42; + return test_optional(arg) + && test_expected(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C new file mode 100644 index 0000000..8e8ca8e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C @@ -0,0 +1,176 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* bit_cast and memcpy */ + +#include <bit> +#include <cstring> + +#include "target-flex-common.h" + +struct S0 +{ + int _v0; + char _v1; + long long _v2; +}; + +struct S1 +{ + int _v0; + char _v1; + long long _v2; +}; + +bool test_bit_cast(int arg) +{ + bool ok; + S1 s1_out; + #pragma omp target map(from: ok, s1_out) map(to: arg) + { + bool inner_ok = true; + { + long long v = static_cast<long long>(arg + 42ll); + S0 s = {arg, 'a', v}; + VERIFY (std::bit_cast<S1>(s)._v0 == arg); + VERIFY (std::bit_cast<S1>(s)._v1 == 'a'); + VERIFY (std::bit_cast<S1>(s)._v2 == v); + s1_out = std::bit_cast<S1>(s); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + long long v = static_cast<long long>(arg + 42ll); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a'); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v); + return true; +} + + +struct OutStruct +{ + std::size_t _id; + void *_next; +}; + +struct Extendable1 +{ + std::size_t _id; + void *_next; + int _v; +}; + +struct Extendable2 +{ + std::size_t _id; + void *_next; + char _str[256]; +}; + +struct Extendable3 +{ + std::size_t _id; + void *_next; + const int *_nums; + std::size_t _size; +}; + +struct ExtendableUnknown +{ + std::size_t _id; + void *_next; +}; + +template<typename To, std::size_t Id> +To *get_extendable(void *p) +{ + while (p != nullptr) + { + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + if (out._id == Id) + return static_cast<To *>(p); + p = out._next; + } + return nullptr; +} + +bool test_memcpy(int arg, const int *nums, std::size_t nums_size) +{ + bool ok; + Extendable2 e2_out; + #pragma omp target map(from: ok, e2_out) map(to: arg, nums[:nums_size], nums_size) + { + bool inner_ok = true; + { + Extendable3 e3 = {3u, nullptr, nums, nums_size}; + ExtendableUnknown u1 = {100u, &e3}; + Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}}; + ExtendableUnknown u2 = {101u, &e2}; + ExtendableUnknown u3 = {102u, &u2}; + ExtendableUnknown u4 = {142u, &u3}; + Extendable1 e1 = {1u, &u4, arg}; + + void *p = &e1; + while (p != nullptr) + { + /* You can always cast a pointer to a struct to a pointer to + the type of it's first member. */ + switch (*static_cast<std::size_t *>(p)) + { + case 1: + { + Extendable1 *e1_p = static_cast<Extendable1 *>(p); + p = e1_p->_next; + VERIFY (e1_p->_v == arg); + break; + } + case 2: + { + Extendable2 *e2_p = static_cast<Extendable2 *>(p); + p = e2_p->_next; + VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0); + break; + } + case 3: + { + Extendable3 *e3_p = static_cast<Extendable3 *>(p); + p = e3_p->_next; + VERIFY (nums == e3_p->_nums); + VERIFY (nums_size == e3_p->_size); + break; + } + default: + { + /* Casting to a pointer to OutStruct invokes undefined + behavior though, memcpy is required to extract the _next + member. */ + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + p = out._next; + } + } + } + Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1); + VERIFY (e2_p != nullptr); + e2_out = *e2_p; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (e2_out._id == 2u); + VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0); + return true; +} + +int main() +{ + volatile int arg = 42; + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test_bit_cast(arg) + && test_memcpy(arg, arr, 8) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C new file mode 100644 index 0000000..c66075b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C @@ -0,0 +1,51 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + auto il = {0, 1, 2, 3, 4, 5, arg}; + + int sum = 0; + for (auto const& e : il) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : il) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C new file mode 100644 index 0000000..ef9e5a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C @@ -0,0 +1,49 @@ +/* { dg-additional-options -std=c++23 } */ + +/* numerics */ + +#include <algorithm> +#include <numeric> +#include <ranges> +#include <span> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::iota) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +bool test(std::size_t arg) +{ + bool ok; + int midpoint_out; + std::vector<int> vec(arg); + int *data = vec.data(); + std::size_t size = vec.size(); + #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[:size]) map(to: arg, size) + { + std::span span = {data, size}; + bool inner_ok = true; + { + VERIFY (stdr::all_of(span, [](int v){ return v == int{}; })); + stdr::iota(span, 0); + midpoint_out = *std::midpoint(span.data(), span.data() + span.size()); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size())))); + VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size()) + == midpoint_out); + return true; +} + +int main() +{ + volatile std::size_t arg = 42; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C new file mode 100644 index 0000000..adaf18f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C @@ -0,0 +1,80 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> + +#include "target-flex-common.h" + +struct S0 +{ + int _v; + S0(std::initializer_list<int> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +struct S1 +{ + int _v; + template<typename T> + S1(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +template<typename T> +struct S2 +{ + T _v; + S2(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +#if __cplusplus >= 201703L +template<typename T> +S2(std::initializer_list<T>) -> S2<T>; +#endif + +bool test_initializer_list(int arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5; + + S0 s0{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s0._v == partial_sum + arg); + + S1 s1{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s1._v == partial_sum + arg); + + S2<int> s2{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2._v == partial_sum + arg); + + #if __cplusplus >= 201703L + S2 s2_ctad{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2_ctad._v == partial_sum + arg); + #endif + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C new file mode 100644 index 0000000..7f74401a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C @@ -0,0 +1,50 @@ +/* std::initializer_list constructor of std::vector (explicit template arg) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector<int> vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C new file mode 100644 index 0000000..bb8a39b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C @@ -0,0 +1,52 @@ +/* { dg-additional-options "-std=c++17" } */ + +/* deduced std::initializer_list constructor of std::vector (CTAD) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C new file mode 100644 index 0000000..4d36341 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C @@ -0,0 +1,94 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* <iterator> c++20 */ + +/* std::common_iterator uses std::variant. */ + +#include <vector> +#include <iterator> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::distance, std::ranges::next) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_rev_arr[Size]; + T out_fwd_arr[Size]; + T out_first_half_arr[Size / 2]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_rev_arr[:Size], out_fwd_arr[:Size], \ + out_first_half_arr[:Size / 2]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::span<const T> span = {arr, Size}; + std::vector<T> rev_vec(std::reverse_iterator{span.end()}, + std::reverse_iterator{span.begin()}); + VERIFY (std::distance(span.begin(), span.end()) + == std::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span.begin(), span.end()) + == stdr::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span) == stdr::distance(rev_vec)); + VERIFY (simple_equal(span.begin(), span.end(), + std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()})); + simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr); + simple_copy(std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()}, + out_fwd_arr); + using counted_iter = std::counted_iterator<decltype(span.begin())>; + using common_iter = std::common_iterator<counted_iter, + std::default_sentinel_t>; + std::vector<T> front_half; + simple_copy(common_iter{counted_iter{span.begin(), Size / 2}}, + common_iter{std::default_sentinel}, + std::back_insert_iterator{front_half}); + VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2), + front_half.begin(), front_half.end())); + simple_copy(front_half.begin(), front_half.end(), out_first_half_arr); + } + end: + ok = inner_ok; + } + VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size}, + std::reverse_iterator{arr}, + out_rev_arr, out_rev_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_fwd_arr, out_fwd_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2, + out_first_half_arr, out_first_half_arr + Size / 2)); + return ok; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C new file mode 100644 index 0000000..014b9f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C @@ -0,0 +1,46 @@ +/* algorithms pre c++20 */ + +#include <algorithm> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + std::copy(arr, arr + Size, vec.begin()); + VERIFY (std::equal(arr, arr + Size, vec.begin())); + std::transform(vec.begin(), vec.end(), mutated.begin(), + [](const T& v){ return v * 2; }); + std::copy(mutated.begin(), mutated.end(), out_2x_arr); + std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end()); + std::copy(vec.begin(), vec.end(), out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr, + [](const T& a, const T& b){ return a * 2 == b; })); + std::vector<T> shifted(arr, arr + Size); + std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end()); + VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin())); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C new file mode 100644 index 0000000..9070c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C @@ -0,0 +1,54 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* ranged algorithms c++20 */ + +#include <algorithm> +#include <ranges> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + stdr::copy(arr, vec.begin()); + VERIFY (stdr::equal(arr, vec)); + stdr::transform(vec, mutated.begin(), + [](const T& v){ return v * 2; }); + stdr::copy(mutated, out_2x_arr); + stdr::rotate(vec, std::next(vec.begin(), Size / 2)); + stdr::copy(vec, out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; })); + std::vector<T> shifted(arr, arr + Size); + stdr::rotate(shifted, std::next(shifted.begin(), Size / 2)); + VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted)); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C new file mode 100644 index 0000000..ef6b942 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C @@ -0,0 +1,50 @@ +/* { dg-additional-options -std=c++23 } */ + +/* std::views stuff. Also tests std::tuple with std::views::zip. */ + +#include <algorithm> +#include <ranges> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; +namespace stdv = std::views; + +bool f() +{ + const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0}; + + bool ok; + #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[:8], arr_rev[:8]) + { + std::span<const int> fwd = {arr_fwd, 8}; + std::span<const int> rev = {arr_rev, 8}; + bool inner_ok = true; + { + VERIFY(stdr::equal(fwd, rev | stdv::reverse)); + VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse, + rev | stdv::take(4))); + for (auto [first, second] : stdv::zip(fwd, rev)) + VERIFY(first + second == 7); + auto plus = [](int a, int b){ return a + b; }; + auto is_even = [](int v){ return v % 2 == 0; }; + VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus) + == 12); + VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }), + is_even)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C new file mode 100644 index 0000000..9e9383d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C @@ -0,0 +1,26 @@ +/* CTAD in target regions. */ + +template<typename T> +struct S +{ + T _v; +}; + +template<typename T> +S(T) -> S<T>; + +bool f() +{ + bool ok; + #pragma omp target map(from: ok) + { + S s{42}; + ok = s._v == 42; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C new file mode 100644 index 0000000..f41a1bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C @@ -0,0 +1,49 @@ +// { dg-additional-options "-std=c++20" } + +/* std::span */ + +#include <span> + +#include "target-flex-common.h" + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + std::span span = {arr, Size}; + bool inner_ok = true; + { + VERIFY (!span.empty()); + VERIFY (span.size() == Size); + auto out_it = out_arr; + for (auto elem : span) + *(out_it++) = elem; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_arr, out_arr + Size)); + return true; +} + +int main() +{ + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C new file mode 100644 index 0000000..a86fefb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C @@ -0,0 +1,75 @@ +/* { dg-additional-options "-std=c++20" } */ + +#include <ranges> +#include <span> +#include <type_traits> +#include <vector> + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename Rn0, typename Rn1> +bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept +{ + return simple_equal(stdr::begin(rn0), stdr::end(rn0), + stdr::begin(rn1), stdr::end(rn1)); +} + +template<typename Rn> +bool test(Rn&& range) +{ + using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>; + std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)}; + value_type *data = vec.data(); + std::size_t size = vec.size(); + bool ok; + #pragma omp target map(from: ok) map(tofrom: data[:size]) map(to: size) + { + std::vector<value_type> orig = {data, data + size}; + std::span<value_type> span = {data, size}; + bool inner_ok = true; + { + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY (simple_equal(orig, span)); + for (auto& elem : span) + elem = mul_by_2(elem); + VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span)); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec)); + return true; +} + +struct my_int +{ + int _v; + bool operator==(my_int const&) const = default; + my_int operator*(int rhs) const noexcept { + return {_v * rhs}; + } +}; + +int main() +{ + std::vector<int> ints = {1, 2, 3, 4, 5}; + const bool ints_res = test(ints); + std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}}; + const bool my_ints_res = test(my_ints); + return ints_res && my_ints_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C new file mode 100644 index 0000000..b3f1197 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C @@ -0,0 +1,107 @@ +/* structured bindings */ + +#include <array> +#include <tuple> + +#include "target-flex-common.h" + +template<typename Array, typename Tuple, typename Struct> +bool test(Array array, Tuple tuple, Struct s) +{ + bool ok; + auto array_2nd_in = std::get<2>(array); + auto tuple_2nd_in = std::get<2>(tuple); + auto s_2nd_in = s._2; + decltype(array_2nd_in) array_2nd_out_0; + decltype(tuple_2nd_in) tuple_2nd_out_0; + decltype(s_2nd_in) s_2nd_out_0; + decltype(array_2nd_in) array_2nd_out_1; + decltype(tuple_2nd_in) tuple_2nd_out_1; + decltype(s_2nd_in) s_2nd_out_1; + decltype(array_2nd_in) array_2nd_out_2; + decltype(tuple_2nd_in) tuple_2nd_out_2; + decltype(s_2nd_in) s_2nd_out_2; + #pragma omp target map(from: ok, \ + array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \ + array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \ + array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \ + map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s) + { + bool inner_ok = true; + { + { + auto [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_0 = array_2nd; + auto [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_0 = tuple_2nd; + auto [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_0 = s_2nd; + } + { + auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_1 = array_2nd; + auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_1 = tuple_2nd; + auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_1 = s_2nd; + } + { + const auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_2 = array_2nd; + const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_2 = tuple_2nd; + const auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_2 = s_2nd; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in); + + return true; +} + +struct S +{ + char _0; + float _1; + int _2; +}; + +int main() +{ + const bool test_res + = test(std::array{0, 1, 2}, + std::tuple{'a', 3.14f, 42}, + S{'a', 3.14f, 42}); + return test_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h new file mode 100644 index 0000000..14523c4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h @@ -0,0 +1,40 @@ +#include <cstdio> + +#if __cplusplus >= 201103L + #define BL_NOEXCEPT noexcept +#else + #define BL_NOEXCEPT throw() +#endif + +#if defined __has_builtin +# if __has_builtin (__builtin_LINE) +# define VERIFY_LINE __builtin_LINE () +# endif +#endif +#if !defined VERIFY_LINE +# define VERIFY_LINE __LINE__ +#endif + +/* I'm not a huge fan of macros but in the interest of keeping the code that + isn't being tested as simple as possible, we use them. */ + +#define VERIFY(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + inner_ok = false; \ + goto end; \ + } \ + } while (false) + +#define VERIFY_NON_TARGET(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + return false; \ + } \ + } while (false) diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C new file mode 100644 index 0000000..9923783 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__array-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C new file mode 100644 index 0000000..c42105a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <array> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::array<int,N> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + std::array<int,N> arr; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::array<int,N> (); +#endif + std::copy (data, data + N, arr.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] *= arr[i]; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~array (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C new file mode 100644 index 0000000..9023ef8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__bitset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C new file mode 100644 index 0000000..4fcce93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C @@ -0,0 +1,69 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <bitset> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::bitset<MAX> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::bitset<MAX> (); +#endif + for (int i = 0; i < N; ++i) + _set[data[i]] = true; + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set[i]) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~bitset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C new file mode 100644 index 0000000..aaf7152 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C @@ -0,0 +1,340 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_basic () +{ + T x = -3.456789; + T y = 1.234567; + T z = 5.678901; + + if (std::abs (x) != -x) + return false; + if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x)) + return false; + if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y))) + return false; + if (!FP_EQUAL (std::fma (x, y, z), x * y + z)) + return false; + if (std::fmax (x, y) != (x > y ? x : y)) + return false; + if (std::fmin (x, y) != (x < y ? x : y)) + return false; + if (std::fdim (x, y) != std::max(x - y, (T) 0.0)) + return false; + if (std::fdim (y, x) != std::max(y - x, (T) 0.0)) + return false; + return true; +} + +template<typename T> bool test_exp () +{ + T x = -4.567890; + T y = 2.345678; + + if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x))) + return false; + if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y))) + return false; + if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0)) + return false; + if (!FP_EQUAL (std::log (std::exp (x)), x)) + return false; + if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y)) + return false; + if (!FP_EQUAL (std::log2 (std::exp2 (y)), y)) + return false; + if (!FP_EQUAL (std::log1p (std::expm1 (y)), y)) + return false; + return true; +} + +template<typename T> bool test_power () +{ + T x = 7.234251; + T y = 0.340128; + + if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y)) + return false; + if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x)) + return false; + if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x)) + return false; + if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y))) + return false; + return true; +} + +template<typename T> bool test_trig () +{ + T theta = std::numbers::pi / 4; + T phi = std::numbers::pi / 6; + + if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::sin (phi), 0.5)) + return false; + if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2)) + return false; + if (!FP_EQUAL (std::tan (theta), 1.0)) + return false; + if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3)) + return false; + + T x = 0.33245623; + + if (!FP_EQUAL (std::asin (std::sin (x)), x)) + return false; + if (!FP_EQUAL (std::acos (std::cos (x)), x)) + return false; + if (!FP_EQUAL (std::atan (std::tan (x)), x)) + return false; + if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x)) + return false; + return true; +} + +template<typename T> bool test_hyperbolic () +{ + T x = 0.7423532; + + if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x))) + return false; + if (!FP_EQUAL (std::asinh (std::sinh (x)), x)) + return false; + if (!FP_EQUAL (std::acosh (std::cosh (x)), x)) + return false; + if (!FP_EQUAL (std::atanh (std::tanh (x)), x)) + return false; + return true; +} + +template<typename T> bool test_erf () +{ + if (!FP_EQUAL (std::erf ((T) 0), 0)) + return false; + if (!FP_EQUAL (std::erf ((T) INFINITY), 1)) + return false; + if (!FP_EQUAL (std::erf ((T) -INFINITY), -1)) + return false; + + if (!FP_EQUAL (std::erfc (0), 1)) + return false; + if (!FP_EQUAL (std::erfc ((T) INFINITY), 0)) + return false; + if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2)) + return false; + + return true; +} + +template<typename T> bool test_gamma () +{ + if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1)) + return false; + if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>))) + return false; + + if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>)))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 2.5), + std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))) + return false; + + return true; +} + +template<typename T> bool test_rounding () +{ + T x = -2.5678; + T y = 3.6789; + + if (std::ceil (x) != -2) + return false; + if (std::floor (x) != -3) + return false; + if (std::trunc (x) != -2) + return false; + if (std::round (x) != -3) + return false; + + if (std::ceil (y) != 4) + return false; + if (std::floor (y) != 3) + return false; + if (std::trunc (y) != 3) + return false; + if (std::round (y) != 4) + return false; + + /* Not testing std::rint and std::nearbyint due to dependence on + floating-point environment. */ + + return true; +} + +template<typename T> bool test_fpmanip () +{ + T x = -2.3456789; + T y = 3.6789012; + int exp; + + T mantissa = std::frexp (x, &exp); + if (std::ldexp (mantissa, exp) != x) + return false; + if (std::logb (x) + 1 != exp) + return false; + if (std::ilogb (x) + 1 != exp) + return false; + if (std::scalbn (x, -exp) != mantissa) + return false; + + T next = std::nextafter (x, y); + if (!(next > x && next < y)) + return false; + +#if 0 + /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a + '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error + with x86_64 host and nvptx, GCN offload compilers, or + '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error + with powerpc64le host and nvptx offload compiler, for example; + PR71064 'nvptx offloading: "long double" data type'. + It ought to work on systems where the host's 'long double' is the same as + 'double' ('DF'): aarch64, for example? */ + next = std::nexttoward (x, y); + if (!(next > x && next < y)) + return false; +#endif + + if (std::copysign (x, y) != std::abs (x)) + return false; + if (std::copysign (y, x) != -y) + return false; + + return true; +} + +template<typename T> bool test_classify () +{ + T x = -2.3456789; + T y = 3.6789012; + + if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL) + return false; + if (std::fpclassify ((T) INFINITY) != FP_INFINITE + || std::fpclassify ((T) -INFINITY) != FP_INFINITE) + return false; + if (std::fpclassify ((T) 0.0) != FP_ZERO) + return false; + if (std::fpclassify ((T) NAN) != FP_NAN) + return false; + if (!std::isfinite (x) || !std::isfinite (y)) + return false; + if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY)) + return false; + if (std::isinf (x) || std::isinf (y)) + return false; + if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnan ((T) 0.0 / (T) 0.0)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnormal (x) || !std::isnormal (y)) + return false; + if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN)) + return false; + if (!std::signbit (x) || std::signbit (y)) + return false; + + return true; +} + +template<typename T> bool test_compare () +{ + T x = 5.6789012; + T y = 8.9012345; + + if (std::isgreater (x, y)) + return false; + if (std::isgreater (x, x)) + return false; + if (std::isgreaterequal (x, y)) + return false; + if (!std::isgreaterequal (x, x)) + return false; + if (!std::isless (x, y)) + return false; + if (std::isless (x, x)) + return false; + if (!std::islessequal (x, y)) + return false; + if (!std::islessequal (x, x)) + return false; + if (!std::islessgreater (x, y)) + return false; + if (std::islessgreater (x, x)) + return false; + if (std::isunordered (x, y)) + return false; + if (!std::isunordered (x, NAN)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (basic); + RUN_TEST (exp); + RUN_TEST (power); + RUN_TEST (trig); + RUN_TEST (hyperbolic); + RUN_TEST (erf); + RUN_TEST (gamma); + RUN_TEST (rounding); + RUN_TEST (fpmanip); + RUN_TEST (classify); + RUN_TEST (compare); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C new file mode 100644 index 0000000..e392d17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C @@ -0,0 +1,175 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <complex> +#include <numbers> + +using namespace std::complex_literals; + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) +#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \ + && FP_EQUAL ((x).imag (), (y).imag ())) + +#pragma omp declare target +template<typename T> bool test_complex () +{ + std::complex<T> z (-1.334, 5.763); + + if (!FP_EQUAL (z.real (), (T) -1.334)) + return false; + if (!FP_EQUAL (z.imag (), (T) 5.763)) + return false; + if (!FP_EQUAL (std::abs (z), + std::sqrt (z.real () * z.real () + z.imag () * z.imag ()))) + return false; + if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ()))) + return false; + if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ())) + return false; + + auto conj = std::conj (z); + if (!FP_EQUAL (conj.real (), z.real ()) + || !FP_EQUAL (conj.imag (), -z.imag ())) + return false; + + if (std::proj (z) != z) + return false; + + auto infz1 = std::proj (std::complex<float> (INFINITY, -1)); + if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0) + return false; + auto infz2 = std::proj (std::complex<float> (0, -INFINITY)); + if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0) + return false; + + auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4); + if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4)) + || !FP_EQUAL (polarz.imag (), + (T) 1.5* std::sin (std::numbers::pi_v<T> / 4))) + return false; + + return true; +} + +template<typename T> bool test_complex_exp_log () +{ + std::complex<T> z (-1.724, -3.763); + + // Euler's identity + auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi)); + eulerz += 1.0; + if (!COMPLEX_EQUAL (eulerz, std::complex<T> ())) + return false; + + auto my_exp_z + = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()), + std::exp (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::exp (z), my_exp_z)) + return false; + + if (!COMPLEX_EQUAL (std::log10 (z), + std::log (z) / std::log (std::complex<T> (10)))) + return false; + + return true; +} + +template<typename T> bool test_complex_trig () +{ + std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10); + const std::complex<T> i (0, 1); + + auto my_sin_z + = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()), + std::cos (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::sin (z), my_sin_z)) + return false; + + auto my_cos_z + = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()), + -std::sin (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::cos (z), my_cos_z)) + return false; + + auto my_tan_z + = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ())) + / (std::cos (2*z.real ()) + std::cosh (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tan (z), my_tan_z)) + return false; + + auto my_sinh_z + = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()), + std::cosh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z)) + return false; + + auto my_cosh_z + = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()), + std::sinh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z)) + return false; + + auto my_tanh_z + = std::complex<T> (std::sinh (2*z.real ()), + std::sin (2*z.imag ())) + / (std::cosh (2*z.real ()) + std::cos (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z)) + return false; + + auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::asin (z), my_asin_z)) + return false; + + auto my_acos_z + = std::complex<T> (std::numbers::pi / 2) + + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::acos (z), my_acos_z)) + return false; + + auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z))); + if (!COMPLEX_EQUAL (std::atan (z), my_atan_z)) + return false; + + auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0)); + if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z)) + return false; + + auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0)); + if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z)) + return false; + + auto my_atanh_z + = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z)); + if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z)) + return false; + + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (complex); + RUN_TEST (complex_exp_log); + RUN_TEST (complex_trig); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C new file mode 100644 index 0000000..863a1de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__deque-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C new file mode 100644 index 0000000..9c2d6fa --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C @@ -0,0 +1,64 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <deque> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::deque<int> &_deque, int data[]) +{ + for (int i = 0; i < N; ++i) + if (_deque[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::deque<int> _deque (std::begin (data), std::end (data)); +#else + std::deque<int> _deque; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _deque) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_deque) std::deque<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + _deque[i] *= _deque[i]; + + #pragma omp target map (from: ok) + { + ok = validate (_deque, data); +#ifndef MEM_SHARED + _deque.~deque (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C new file mode 100644 index 0000000..9e59907 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C @@ -0,0 +1,71 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <flat_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C new file mode 100644 index 0000000..1dc60c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <flat_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C new file mode 100644 index 0000000..59b59bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C @@ -0,0 +1,60 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::flat_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~flat_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C new file mode 100644 index 0000000..b255cd5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C @@ -0,0 +1,67 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::flat_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::flat_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~flat_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C new file mode 100644 index 0000000..60d5cee --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__forward_list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C new file mode 100644 index 0000000..6b0ee65 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <forward_list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::forward_list<int> &list, int data[]) +{ + int i = 0; + for (auto &v : list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::forward_list<int> list (std::begin (data), std::end (data)); +#else + std::forward_list<int> list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&list) std::forward_list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (list, data); +#ifndef MEM_SHARED + list.~forward_list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C new file mode 100644 index 0000000..5057bf9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C new file mode 100644 index 0000000..1f44a17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::list<int> &_list, int data[]) +{ + int i = 0; + for (auto &v : _list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::list<int> _list (std::begin (data), std::end (data)); +#else + std::list<int> _list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_list) std::list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = _list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (_list, data); +#ifndef MEM_SHARED + _list.~list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C new file mode 100644 index 0000000..fe37426 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__map-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C new file mode 100644 index 0000000..36556ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~map (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C new file mode 100644 index 0000000..79f9245 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multimap-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C new file mode 100644 index 0000000..6a4a4e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) + sum += (long long) it->first * it->second; + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~multimap (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C new file mode 100644 index 0000000..2d80756 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multiset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C new file mode 100644 index 0000000..b12402e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <stdio.h> +#include <time.h> +#include <set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C new file mode 100644 index 0000000..a6b3665 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C @@ -0,0 +1,93 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_pi () +{ + if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0)) + return false; + if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T> + * std::numbers::inv_sqrtpi_v<T>, (T) 1.0)) + return false; + return true; +} + +template<typename T> bool test_sqrt () +{ + if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0)) + return false; + if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0)) + return false; + return true; +} + +template<typename T> bool test_phi () +{ + T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0; + if (!FP_EQUAL (myphi, std::numbers::phi_v<T>)) + return false; + return true; +} + +template<typename T> bool test_log () +{ + if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>)) + return false; + if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>)) + return false; + if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>)) + return false; + if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>)) + return false; + return true; +} + +template<typename T> bool test_egamma () +{ + T myegamma = 0.0; + #pragma omp parallel for reduction(+:myegamma) + for (int k = 2; k < 100000; ++k) + myegamma += (std::riemann_zeta (k) - 1) / k; + myegamma = (T) 1 - myegamma; + if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (pi); + RUN_TEST (sqrt); + RUN_TEST (phi); + RUN_TEST (log); + RUN_TEST (egamma); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C new file mode 100644 index 0000000..54f62e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__set-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C new file mode 100644 index 0000000..cd23128 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.find (i) != _set.end ()) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C new file mode 100644 index 0000000..7ef16bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C @@ -0,0 +1,7 @@ +// { dg-additional-options "-std=c++20" } + +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__span-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C new file mode 100644 index 0000000..046b3c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <span> + +#define N 64 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::span<int, N> &span, int data[]) +{ + for (int i = 0; i < N; ++i) + if (span[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + int elements[N]; + std::span<int, N> span(elements); + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: data[:N]) map (alloc: elements, span) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&span) std::span<int, N> (elements); +#endif + std::copy (data, data + N, span.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + span[i] *= span[i]; + + #pragma omp target map (from: ok) + { + ok = validate (span, data); +#ifndef MEM_SHARED + span.~span (); +#endif + } + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: elements, span) +#endif + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C new file mode 100644 index 0000000..00d7943 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <unordered_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C new file mode 100644 index 0000000..2567634 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C @@ -0,0 +1,65 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C new file mode 100644 index 0000000..da6c875 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C @@ -0,0 +1,59 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::unordered_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~unordered_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C new file mode 100644 index 0000000..b7bd935 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::unordered_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::unordered_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~unordered_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C new file mode 100644 index 0000000..865cde2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C @@ -0,0 +1,179 @@ +// { dg-additional-options -std=c++20 } +// { dg-output-file target-std__valarray-1.output } + +#include <valarray> +#include <ostream> +#include <sstream> + + +/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467". + +We can't: + + #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf) + +... because: + + error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’ + +Therefore, use dummy classes in '#pragma omp declare target': +*/ + +#pragma omp declare target + +// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf': + +class dummy_basic_streambuf__char + : public std::basic_streambuf<char> +{ +public: + dummy_basic_streambuf__char() {} +}; + +// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()': + +class dummy_basic_ios__char + : public std::basic_ios<char> +{ +public: + dummy_basic_ios__char() {} +}; + +#pragma omp end declare target + + +int main() +{ + // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device. + std::initializer_list<int> v1_i = {10, 20, 30, 40, 50}; + const int *v1_i_data = std::data(v1_i); + size_t v1_i_size = v1_i.size(); + std::initializer_list<int> v2_i = {5, 4, 3, 2, 1}; + const int *v2_i_data = std::data(v2_i); + size_t v2_i_size = v2_i.size(); + std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5}; + const int *shiftData_i_data = std::data(shiftData_i); + size_t shiftData_i_size = shiftData_i.size(); +#pragma omp target \ + defaultmap(none) \ + map(to: v1_i_data[:v1_i_size], v1_i_size, \ + v2_i_data[:v2_i_size], v2_i_size, \ + shiftData_i_data[:shiftData_i_size], shiftData_i_size) + { + /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region. */ + std::stringbuf out_b; + std::ostream out(&out_b); + + std::valarray<int> v1(v1_i_data, v1_i_size); + out << "\nv1:"; + for (auto val : v1) + out << " " << val; + + std::valarray<int> v2(v2_i_data, v2_i_size); + out << "\nv2:"; + for (auto val : v2) + out << " " << val; + + std::valarray<int> sum = v1 + v2; + out << "\nv1 + v2:"; + for (auto val : sum) + out << " " << val; + + std::valarray<int> diff = v1 - v2; + out << "\nv1 - v2:"; + for (auto val : diff) + out << " " << val; + + std::valarray<int> product = v1 * v2; + out << "\nv1 * v2:"; + for (auto val : product) + out << " " << val; + + std::valarray<int> quotient = v1 / v2; + out << "\nv1 / v2:"; + for (auto val : quotient) + out << " " << val; + + std::valarray<int> squares = pow(v1, 2); + out << "\npow(v1, 2):"; + for (auto val : squares) + out << " " << val; + + std::valarray<int> sinhs = sinh(v2); + out << "\nsinh(v2):"; + for (auto val : sinhs) + out << " " << val; + + std::valarray<int> logs = log(v1 * v2); + out << "\nlog(v1 * v2):"; + for (auto val : logs) + out << " " << val; + + std::valarray<int> data(12); + for (size_t i = 0; i < data.size(); ++i) + data[i] = i; + out << "\nOriginal array:"; + for (auto val : data) + out << " " << val; + + std::slice slice1(2, 5, 1); + std::valarray<int> sliced1 = data[slice1]; + out << "\nSlice(2, 5, 1):"; + for (auto val : sliced1) + out << " " << val; + + std::slice slice2(1, 4, 3); + std::valarray<int> sliced2 = data[slice2]; + out << "\nSlice(1, 4, 3):"; + for (auto val : sliced2) + out << " " << val; + + data[slice1] = 99; + out << "\nArray after slice modification:"; + for (auto val : data) + out << " " << val; + + std::valarray<bool> mask = (v1 > 20); + out << "\nElements of v1 > 20:"; + for (size_t i = 0; i < v1.size(); ++i) + { + if (mask[i]) + out << " " << v1[i]; + } + + std::valarray<int> masked = v1[mask]; + out << "\nMasked array:"; + for (auto val : masked) + out << " " << val; + + std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size); + out << "\nOriginal shiftData:"; + for (auto val : shiftData) + out << " " << val; + + std::valarray<int> shifted = shiftData.shift(2); + out << "\nshift(2):"; + for (auto val : shifted) + out << " " << val; + + std::valarray<int> cshifted = shiftData.cshift(-1); + out << "\ncshift(-1):"; + for (auto val : cshifted) + out << " " << val; + + out << "\nSum(v1): " << v1.sum(); + out << "\nMin(v1): " << v1.min(); + out << "\nMax(v1): " << v1.max(); + + out << "\n"; + + /* Terminate with a NUL. Otherwise, we'd have to use: + __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data()); + ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO). */ + out << '\0'; + std::string_view out_b_sv = out_b.view(); + __builtin_printf("%s", out_b_sv.data()); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output new file mode 100644 index 0000000..c441e06 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output @@ -0,0 +1,22 @@ + +v1: 10 20 30 40 50 +v2: 5 4 3 2 1 +v1 + v2: 15 24 33 42 51 +v1 - v2: 5 16 27 38 49 +v1 * v2: 50 80 90 80 50 +v1 / v2: 2 5 10 20 50 +pow(v1, 2): 100 400 900 1600 2500 +sinh(v2): 74 27 10 3 1 +log(v1 * v2): 3 4 4 4 3 +Original array: 0 1 2 3 4 5 6 7 8 9 10 11 +Slice(2, 5, 1): 2 3 4 5 6 +Slice(1, 4, 3): 1 4 7 10 +Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11 +Elements of v1 > 20: 30 40 50 +Masked array: 30 40 50 +Original shiftData: 1 2 3 4 5 +shift(2): 3 4 5 0 0 +cshift(-1): 5 1 2 3 4 +Sum(v1): 150 +Min(v1): 10 +Max(v1): 50 diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C new file mode 100644 index 0000000..41ec80e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__valarray-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C new file mode 100644 index 0000000..8933072b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <valarray> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::valarray<int> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i] + i) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::valarray<int> arr (data, N); +#else + std::valarray<int> arr; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::valarray<int> (data, N); +#endif + arr *= arr; + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] += i; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~valarray (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C new file mode 100644 index 0000000..967bff3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__vector-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C new file mode 100644 index 0000000..a94b4cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C @@ -0,0 +1,63 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <vector> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::vector<int> &vec, int data[]) +{ + for (int i = 0; i < N; ++i) + if (vec[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::vector<int> vec (data, data + N); +#else + std::vector<int> vec; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: vec) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&vec) std::vector<int> (data, data + N); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + vec[i] *= vec[i]; + + #pragma omp target map (from: ok) + { + ok = validate (vec, data); +#ifndef MEM_SHARED + vec.~vector (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c new file mode 100644 index 0000000..00eb48b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (float)); + var.arr2->size = N; + + { + // ... permitted here: + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c new file mode 100644 index 0000000..942d6a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct B_tag { + int *arr; + int size; +} B; + +#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c new file mode 100644 index 0000000..cfc6a91 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +typedef struct { + int *arr; + int size; +} C; + + +struct A { + int *arr1; + B *arr2; + C *arr3; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + var.arr3 = (C *) malloc (sizeof (C)); + var.arr3->arr = (int *) calloc (N, sizeof (int)); + var.arr3->size = N; + + { + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + { + #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // While GCC handles more, only default is ... + #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper( default ) , tofrom: *x.arr3) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr3->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 2); + assert (var.arr2->arr[i] == 1); + assert (var.arr3->arr[i] == 1); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + free (var.arr3->arr); + free (var.arr3); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c new file mode 100644 index 0000000..c4784eb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include <assert.h> + +struct T { + int a; + int b; + int c; +}; + +void foo (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.a++; + x.c++; + } + + assert (x.a == 1); + assert (x.b == 0); + assert (x.c == 1); +} + +// An identity mapper. This should do the same thing as the default! +#pragma omp declare mapper (struct T v) map(v) + +void bar (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.b++; + } + +#pragma omp target map(x) + { + x.a++; + } + + assert (x.a == 1); + assert (x.b == 1); + assert (x.c == 0); +} + +int main (int argc, char *argv[]) +{ + foo (); + bar (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c new file mode 100644 index 0000000..3e6027e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <assert.h> + +struct Z { + int *arr; +}; + +void baz (struct Z *zarr, int len) +{ +#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \ + map(tofrom: myvar.arr[0:len]) + zarr[0].arr = (int *) calloc (len, sizeof (int)); + zarr[5].arr = (int *) calloc (len, sizeof (int)); + +#pragma omp target map(zarr, *zarr) + { + for (int i = 0; i < len; i++) + zarr[0].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5:1]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + + for (int i = 0; i < len; i++) + assert (zarr[0].arr[i] == 1); + + for (int i = 0; i < len; i++) + assert (zarr[5].arr[i] == 3); + + free (zarr[5].arr); + free (zarr[0].arr); +} + +int +main (int argc, char *argv[]) +{ + struct Z myzarr[10]; + baz (myzarr, 256); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c new file mode 100644 index 0000000..324d535 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +struct A { + int *arr1; + float *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (float *) calloc (N, sizeof (float)); + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1) \ + map(tofrom: x.arr1[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr1[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(to: x.arr2) \ + map(tofrom: x.arr2[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr2[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr3[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2[i] == 1); + assert (var.arr3[i] == 1); + } + + free (var.arr1); + free (var.arr2); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c new file mode 100644 index 0000000..a7526dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c @@ -0,0 +1,129 @@ +/* { dg-do run } */ +/* { dg-additional-options "-lm" } */ + +/* Note: At the time this program was written, Nvptx was not asynchronous + enough to trigger the issue (with a 'nowait' added); however, one + AMD GPUs, it triggered. */ + +/* Test whether nowait / dependency is handled correctly. + Motivated by OpenMP_VV's 5.1/interop/test_interop_target.c + + The code actually only creates a streaming object without actually using it, + except for dependency tracking. + + Note that there is a difference between having a steaming (targetsync) object + and not (= omp_interop_none); at least if one assumes that omp_interop_none + does not include 'targetsync' as (effective) interop type - in that case, + 'nowait' has no effect and the 'depend' is active as included task, otherwise + the code continues with the depend being active only for the about to be + destroyed or used thread. + + The OpenMP spec states (here 6.0): + "If the interop-type set includes 'targetsync', an empty mergeable task is + generated. If the 'nowait' clause is not present on the construct then + the task is also an included task. If the interop-type set does not + include 'targetsync', the 'nowait' clause has no effect. Any depend + clauses that are present on the construct apply to the generated task. */ + +#include <omp.h> + +void +test_async (const int dev) +{ + constexpr int N = 2048; + constexpr int ulp = 4; + constexpr double M_PI = 2.0 * __builtin_acos (0.0); + omp_interop_t obj1, obj2; + double A[N] = { }; + int B[N] = { }; + + /* Create interop object. */ + #pragma omp interop device(dev) init(targetsync : obj1, obj2) + + if (dev == omp_initial_device || dev == omp_get_num_devices ()) + { + if (obj1 != omp_interop_none || obj2 != omp_interop_none) + __builtin_abort (); + } + else + { + if (obj1 == omp_interop_none || obj2 == omp_interop_none) + __builtin_abort (); + } + + /* DOUBLE */ + + /* Now in the background update it, slowly enough that the + code afterwards is reached while still running asynchronously. + As OpenMP_VV's Issue #863 shows, the overhead is high enough to + fail even when only doing an atomic integer increment. */ + + #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait + for (int i = 0; i < N; i++) + #pragma omp atomic update + A[i] += __builtin_sin (2*i*M_PI/N); + + /* DESTROY take care of the dependeny such that ... */ + + if (obj1 == omp_interop_none) + { + // Same as below as 'nowait' is ignored. + #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait + } + else + { + #pragma omp interop destroy(obj1) depend(in: A[:N]) + } + + /* ... this code is only executed once the dependency as been fulfilled. */ + + /* Check the value - part I: quick, avoid A[0] == sin(0) = 0. */ + for (int i = 1; i < N; i++) + if (A[i] == 0.0) + __builtin_abort (); + + /* Check the value - part II: throughly */ + for (int i = 0; i < N; i++) + { + double x = A[i]; + double y = __builtin_sin (2*i*M_PI/N); + if (__builtin_fabs (x - y) > ulp * __builtin_fabs (x+y) * __DBL_EPSILON__) + __builtin_abort (); + } + + /* Integer */ + + #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait + for (int i = 0; i < N; i++) + #pragma omp atomic update + B[i] += 42; + + /* Same - but using USE. */ + if (obj2 == omp_interop_none) + { + // Same as below as 'nowait' is ignored. + #pragma omp interop use(obj2) depend(in: B[:N]) nowait + } + else + { + #pragma omp interop use(obj2) depend(in: B[:N]) + } + + for (int i = 0; i < N; i++) + if (B[i] != 42) + __builtin_abort (); + + #pragma omp interop destroy(obj2) +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + test_async (dev); + test_async (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c index a57d6fd..fbe4ac3 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c @@ -1,4 +1,5 @@ -/* { dg-do run } */ +/* { dg-do run { target { ! offload_target_nvptx } } } */ +/* { dg-do compile { target offload_target_nvptx } } */ #define N 100 @@ -7,12 +8,17 @@ f (int x[], int y[], int z[]) { int i; + // The following fails as on the host the target side cannot be + // resolved - and the 'teams' or not status affects how 'target' + // is called. + // Note also the dg-do compile above for offload_target_nvptx #pragma omp target map(to: x[0:N], y[0:N]) map(from: z[0:N]) #pragma omp metadirective \ when (device={arch("nvptx")}: teams loop) \ default (parallel loop) for (i = 0; i < N; i++) z[i] = x[i] * y[i]; + /* { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-6 } */ } int diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c new file mode 100644 index 0000000..b36d2f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 +// Async version + +#include <omp.h> + +int main() +{ + #pragma omp parallel for + for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + omp_depend_t dep; + #pragma omp depobj(dep) depend(inout: ptr) + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; +#if __cplusplus + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0); +#else + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0, nullptr); +#endif + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp taskwait + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val) + __builtin_abort (); + ptr[i] += 2; + } + + omp_target_memset_async (ptr + start, val + 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val + 3) + __builtin_abort (); + ptr[i] += 1; + } + + omp_target_memset_async (ptr + start, val - 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp taskwait depend (depobj: dep) + } + #pragma omp depobj(dep) destroy + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c new file mode 100644 index 0000000..c0e4fa9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c @@ -0,0 +1,80 @@ +#include <stddef.h> +#include <stdint.h> +#include <omp.h> + +#define MIN(x,y) ((x) < (y) ? x : y) + +enum { N = 524288 + 8 }; + +static void +init_val (int8_t *ptr, int val, size_t count) +{ + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + __builtin_memset (ptr, val, count); +} + +static void +check_val (int8_t *ptr, int val, size_t count) +{ + if (count == 0) + return; + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + for (size_t i = 0; i < count; i++) + if (ptr[i] != val) __builtin_abort (); +} + +static void +test_it (int8_t *ptr, int lshift, size_t count) +{ + if (N < count + lshift) __builtin_abort (); + if (lshift >= 4) __builtin_abort (); + ptr += lshift; + + init_val (ptr, 'z', MIN (count + 32, N - lshift)); + + omp_target_memset (ptr, '1', count, omp_get_default_device()); + + check_val (ptr, '1', count); + check_val (ptr + count, 'z', MIN (32, N - lshift - count)); +} + + +int main() +{ + size_t size; + int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device()); + ptr += (4 - (uintptr_t) ptr % 4) % 4; + if ((uintptr_t) ptr % 4 != 0) __builtin_abort (); + + test_it (ptr, 0, 1); + test_it (ptr, 3, 1); + test_it (ptr, 0, 4); + test_it (ptr, 3, 4); + test_it (ptr, 0, 5); + test_it (ptr, 3, 5); + test_it (ptr, 0, 6); + test_it (ptr, 3, 6); + + for (int i = 1; i <= 9; i++) + { + switch (i) + { + case 1: size = 16; break; // = 2^4 bytes + case 2: size = 32; break; // = 2^5 bytes + case 3: size = 64; break; // = 2^7 bytes + case 4: size = 128; break; // = 2^7 bytes + case 5: size = 256; break; // = 2^8 bytes + case 6: size = 512; break; // = 2^9 bytes + case 7: size = 65536; break; // = 2^16 bytes + case 8: size = 262144; break; // = 2^18 bytes + case 9: size = 524288; break; // = 2^20 bytes + default: __builtin_abort (); + } + test_it (ptr, 0, size); + test_it (ptr, 3, size); + test_it (ptr, 0, size + 1); + test_it (ptr, 3, size + 1); + test_it (ptr, 3, size + 2); + } + omp_target_free (ptr, omp_get_default_device()); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c new file mode 100644 index 0000000..01909f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 + +#include <omp.h> + +int main() +{ + for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; + void *ptr2 = omp_target_memset (ptr + start, val, + 1024 - start - tail, dev); + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp target device(dev) is_device_ptr(ptr) + for (int i = start; i < 1024 - start - tail; i++) + if (ptr[i] != val) + __builtin_abort (); + + } + + /* Check 'small' values for correctness. */ + + for (int start = 0; start < 32; start++) + for (int size = 0; size <= 64 + 32; size++) + { + omp_target_memset (ptr, 'a' - 2, 1024, dev); + + unsigned char val = '0' + start + size % 32; + void *ptr2 = omp_target_memset (ptr + start, val, size, dev); + + if (ptr + start != ptr2) + __builtin_abort (); + + if (size == 0) + continue; + + #pragma omp target device(dev) is_device_ptr(ptr) + { + for (int i = 0; i < start; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + for (int i = start; i < start + size; i++) + if (ptr[i] != val) + __builtin_abort (); + for (int i = start + size + 1; i < 1024; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + } + } + + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c index b89f934..ca7865d 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c +++ b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c @@ -1,7 +1,7 @@ /* { dg-additional-options "-O0 -fdump-tree-omplower" } */ /* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */ /* { dg-require-alias "" } */ -/* { dg-xfail-if "PR 97102/PR 97106 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */ +/* { dg-xfail-if PR105018 { offload_target_nvptx } } */ #ifdef __cplusplus extern "C" { diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c new file mode 100644 index 0000000..9bf949a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c @@ -0,0 +1,3 @@ +/* { dg-additional-options -O0 } */ + +#include "target-abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c new file mode 100644 index 0000000..d9268af --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c @@ -0,0 +1 @@ +#include "../libgomp.oacc-c-c++-common/abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c new file mode 100644 index 0000000..e6099cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c @@ -0,0 +1,89 @@ +/* Offloaded 'constructor' and 'destructor' functions. */ + +#include <omp.h> + +#pragma omp declare target + +static void +__attribute__((constructor)) +initHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((constructor)) +initHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +static void +__attribute__((constructor)) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiH2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run. + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp new file mode 100644 index 0000000..02d5503 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp @@ -0,0 +1,57 @@ +# Copyright (C) 2006-2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Load support procs. +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } then { + return +} + +lappend ALWAYS_CFLAGS "compiler=$GCC_UNDER_TEST" + +if { [check_effective_target_aarch64_sve] } { + set sve_flags "" +} else { + set sve_flags "-march=armv8.2-a+sve" +} + +# Initialize `dg'. +dg-init + +#if ![check_effective_target_fopenmp] { +# return +#} + +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + +# Gather a list of all tests. +set tests [lsort [find $srcdir/$subdir *.c]] + +set ld_library_path $always_ld_library_path +append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +set_ld_library_path_env_vars + +# Main loop. +dg-runtest $tests "" $sve_flags + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c new file mode 100644 index 0000000..58674e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c @@ -0,0 +1,129 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +vec_compare (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_sections () +{ + int b[8], c[8]; + svint32_t vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + +#pragma omp parallel sections firstprivate (vb, vc) + { + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + } + +} + +void __attribute__ ((noipa)) +firstprivate_for () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp parallel for firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_distribute () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp teams +#pragma omp distribute firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +int +main () +{ + firstprivate_for (); + firstprivate_sections (); + firstprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c new file mode 100644 index 0000000..2f93d7b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c @@ -0,0 +1,171 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static svint32_t __attribute__ ((noipa)) +foo (svint32_t *vb, svint32_t *vc, int tn) +{ + svint32_t temp = svindex_s32 (tn, 0); + temp = svadd_s32_z (svptrue_b32 (), temp, *vb); + return svadd_s32_z (svptrue_b32 (), temp, *vc); +} + +void __attribute__ ((noipa)) +lastprivate_sections () +{ + int a[8], b[8], c[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel sections lastprivate (vb, vc) num_threads (2) + { + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + } + + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a, va); + + for (i = 0; i < 8; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_for () +{ + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel for lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_simd () +{ + + int a[64], b[64], c[64]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 64; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp simd lastprivate (va, vb, vc) + for (i = 0; i < 8; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i]) + __builtin_abort (); + + for (i = 0; i < 64; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_distribute () +{ + + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp teams +#pragma omp distribute lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +int +main () +{ + lastprivate_for (); + lastprivate_sections (); + lastprivate_simd (); + lastprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/private.c b/libgomp/testsuite/libgomp.c-target/aarch64/private.c new file mode 100644 index 0000000..fed5370 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/private.c @@ -0,0 +1,107 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +private () +{ + svint32_t a; +#pragma omp parallel private (a) num_threads (10) + { + a = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (omp_get_thread_num (), 0)); + } +} + +void __attribute__ ((noipa)) +firstprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + +#pragma omp parallel private (b) firstprivate (a) num_threads (12) + { + compare_vec (&a, svindex_s32 (1, 1)); + b = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + if (omp_get_thread_num () == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + +#pragma omp barrier + if (omp_get_thread_num () == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + } + } +} + +void __attribute__ ((noipa)) +lastprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + int i; + +#pragma omp parallel for private (a) lastprivate (b) + for (i = 0; i < 16; i++) + { + b = svindex_s32 (i, 0); + + compare_vec (&b, svindex_s32 (i, 0)); + if (i == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + else + a = svindex_s32 (1, 1); + + if (i == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (i, 0)); + } + } + + compare_vec (&b, svindex_s32 (15, 0)); +} + +int +main () +{ + private (); + firstprivate (); + lastprivate (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/shared.c b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c new file mode 100644 index 0000000..340a668 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c @@ -0,0 +1,266 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdlib.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +static void __attribute__ ((noipa)) +compare_vecb (svbool_t x, svbool_t y) +{ + svbool_t p = sveor_b_z (svptrue_b32 (), x, y); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +implicit_shared_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel default (shared) num_threads (10) + { + /* 'a', 'b' and 'p' are implicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +explicit_shared (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel shared (a, b, p) num_threads (12) + { + /* 'a', 'b' and 'p' are explicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel num_threads (16) + { + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } + +} + +void __attribute__ ((noipa)) +mix_shared (svint32_t b, svbool_t p) +{ + + svint32_t a = svindex_s32 (0, 0); + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + m[i] = i; + +#pragma omp parallel num_threads (16) + { + compare_vec (a, svindex_s32 (0, 0)); + compare_vec (b, svindex_s32 (8, 1)); + +#pragma omp barrier + /* 'm' is predetermined shared here. 'a' is implicitly shared here. */ + if (omp_get_thread_num () == 10) + a = svld1_s32 (svptrue_b32 (), m); + +#pragma omp barrier + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } +} + +#define N __ARM_FEATURE_SVE_BITS +#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N))) + +typedef svint32_t v8si FIXED_ATTR; + +void __attribute__ ((noipa)) +predetermined_shared_static (int n) +{ + + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + /* 'm' is predetermined shared here. */ + for (i = 0; i < 8; i++) + m[i] = i; + + static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 }; + +#pragma omp parallel num_threads (16) + { + /* 'a' is implicit shared here. */ + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + { + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + + a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0)); + } + +#pragma omp barrier + if (n == 0) + compare_vec (a, svindex_s32 (1, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (2, 1)); + } +} + + +int +main () +{ + svint32_t x = svindex_s32 (0, 1); + svint32_t y = svindex_s32 (8, 1); + svbool_t p = svptrue_b32 (); + + /* Implicit shared. */ + implicit_shared_default (x, y, p); + + /* Explicit shared. */ + explicit_shared (x, y, p); + + /* Implicit shared with no default clause. */ + implicit_shared_no_default (x, y, p); + + /* Mix shared. */ + mix_shared (y, p); + + /* Predetermined and static shared. */ + predetermined_shared_static (0); + predetermined_shared_static (1); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c new file mode 100644 index 0000000..14642c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q, svint32_t *onesp) +{ + svint32_t va, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) aligned (onesp : 128) \ + private (va, vc) nontemporal (va, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vc = svadd_s32_z (svptrue_b32 (), va, *onesp); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + svint32_t ones __attribute__ ((aligned(128))) = svindex_s32 (1, 0); + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b, &ones); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c new file mode 100644 index 0000000..6fe4616 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q) +{ + svint32_t va, vb, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) private (va, vb, vc) \ + nontemporal (va, vb, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vb = svindex_s32 (1, 0); + vc = svadd_s32_z (svptrue_b32 (), va, vb); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c new file mode 100644 index 0000000..aa7d2f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +typedef __SVInt32_t v8si __attribute__ ((arm_sve_vector_bits(256))); + +v8si vec1; +#pragma omp threadprivate (vec1) + +void __attribute__ ((noipa)) +foo () +{ + int64_t res = 0; + + vec1 = svindex_s32 (1, 0); + +#pragma omp parallel copyin (vec1) firstprivate (res) num_threads(10) + { + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } +} + +int +main () +{ + int64_t res = 0; + +#pragma omp parallel firstprivate (res) num_threads(10) + { + vec1 = svindex_s32 (1, 0); + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } + + foo (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c new file mode 100644 index 0000000..02e02dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c @@ -0,0 +1,134 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */ + +#include <arm_sve.h> + +#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \ + initializer (omp_priv = svindex_s32 (0, 0)) + +void __attribute__ ((noipa)) +parallel_reduction () +{ + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int i = 0; + int64_t res; + + #pragma omp parallel reduction (+:va, i) + { + va = svld1_s32 (svptrue_b32 (), a); + i++; + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != i * 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +for_reduction () +{ + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int j; + int64_t res; + + #pragma omp parallel for reduction (+:va) + for (j = 0; j < 8; j++) + va += svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +simd_reduction () +{ + int a[8]; + svint32_t va = svindex_s32 (0, 0); + int i = 0; + int j; + int64_t res = 0; + + for (j = 0; j < 8; j++) + a[j] = 1; + + #pragma omp simd reduction (+:va) + for (j = 0; j < 16; j++) + va += svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 128) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +inscan_reduction_incl () +{ + svint32_t va = svindex_s32 (0, 0); + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[64] = { 0 }; + int j; + int64_t res = 0; + + #pragma omp parallel for reduction (inscan, +:va) + for (j = 0; j < 8; j++) + { + va += svld1_s32 (svptrue_b32 (), a); + #pragma omp scan inclusive (va) + svst1_s32 (svptrue_b32 (), b + j * 8, va); + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); + + for (j = 0; j < 64; j+=8) + if (b[j] != (j / 8 + 1)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +inscan_reduction_excl () +{ + svint32_t va = svindex_s32 (0, 0); + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[64] = { 0 }; + int j; + int64_t res = 0; + + #pragma omp parallel for reduction (inscan, +:va) + for (j = 0; j < 8; j++) + { + svst1_s32 (svptrue_b32 (), b + j * 8, va); + #pragma omp scan exclusive (va) + va += svld1_s32 (svptrue_b32 (), a); + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); + + for (j = 0; j < 64; j+=8) + if (b[j] != j / 8) + __builtin_abort (); +} + + +int +main () +{ + parallel_reduction (); + for_reduction (); + simd_reduction (); + inscan_reduction_incl (); + inscan_reduction_excl (); +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c new file mode 100644 index 0000000..2fd7eda --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c @@ -0,0 +1,232 @@ +/* { dg-do run } */ + +#include "append-args-fr.h" + +enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2; +static int used_device_num, used_device_num2; +static omp_interop_fr_t expected_fr, expected_fr2; +static _Bool is_targetsync, is_targetsync2; + +void +check_interop (omp_interop_t obj) +{ + if (used_device_type == host_device) + check_host (obj); + else if (used_device_type == nvptx_device) + check_nvptx (obj, used_device_num, expected_fr, is_targetsync); + else if (used_device_type == gcn_device) + check_gcn (obj, used_device_num, expected_fr, is_targetsync); + else + __builtin_unreachable (); + + #pragma omp interop use(obj) +} + +void +check_interop2 (omp_interop_t obj, omp_interop_t obj2) +{ + check_interop (obj); + + #pragma omp interop use(obj2) + + if (used_device_type2 == host_device) + check_host (obj2); + else if (used_device_type2 == nvptx_device) + check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2); + else if (used_device_type2 == gcn_device) + check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2); + else + __builtin_unreachable (); +} + + +/* Check no args + one interop arg - and no prefer_type. */ + +int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; } +#pragma omp declare variant(f0_1_tg_) match(construct={dispatch}) append_args(interop(target)) +int f0_1_tg () { assert (false); return 42; } + +void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_) match(construct={dispatch}) append_args(interop(targetsync)) +void f0_1_tgsy () { assert (false); } + +int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; } +#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target)) +int f0_1_tgtgsy () { assert (false); return 33; } + + +/* And with PREFER_TYPE. */ + +// nv: cuda, gcn: -, -, hip +void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa"))) +void f0_1_tgsy_c_cd_hi_hs () { assert (false); } + +// nv: -, cuda_driver, gcn: hsa +void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")}))) +void f0_1_tgsy_hs_cd_c_hi () { assert (false); } + +// nv: -, hip, gcn: hsa +void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda"))) +void f0_1_tgsy_hs_hi_cd_c () { assert (false); } + + +void +check_f0 () +{ + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + else /* host; variable shall not be accessed */ + expected_fr = omp_ifr_level_zero; + + int i; + if (used_device_num == DEFAULT_DEVICE) + { + is_targetsync = 0; + #pragma omp dispatch + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch + f0_1_tgsy (); + + #pragma omp dispatch + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_hi_cd_c (); + } + else + { + is_targetsync = 0; + #pragma omp dispatch device(used_device_num) + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy (); + + #pragma omp dispatch device(used_device_num) + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_hi_cd_c (); + } +} + + + +void +do_check (int dev) +{ + int num_dev = omp_get_num_devices (); + const char *dev_type; + if (dev != DEFAULT_DEVICE) + omp_set_default_device (dev); + int is_nvptx = on_device_arch_nvptx (); + int is_gcn = on_device_arch_gcn (); + int is_host; + + if (dev != DEFAULT_DEVICE) + is_host = dev == -1 || dev == num_dev; + else + { + int def_dev = omp_get_default_device (); + is_host = def_dev == -1 || def_dev == num_dev; + } + + assert (is_nvptx + is_gcn + is_host == 1); + + if (num_dev > 0 && dev != DEFAULT_DEVICE) + { + if (is_host) + omp_set_default_device (0); + else + omp_set_default_device (-1); + } + + used_device_num = dev; + if (is_host) + { + dev_type = "host"; + used_device_type = host_device; + } + else if (is_nvptx) + { + dev_type = "nvptx"; + used_device_type = nvptx_device; + } + else if (is_gcn) + { + dev_type = "gcn"; + used_device_type = gcn_device; + } + + printf ("Running on the %s device (%d)\n", dev_type, dev); + check_f0 (); +} + + + +int +main () +{ + do_check (DEFAULT_DEVICE); + int ndev = omp_get_num_devices (); + for (int dev = -1; dev < ndev; dev++) + do_check (dev); + for (int dev = -1; dev < ndev; dev++) + { + omp_set_default_device (dev); + do_check (DEFAULT_DEVICE); + } +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h new file mode 100644 index 0000000..9f6ca04 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr.h @@ -0,0 +1,305 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + +/* Provides: */ + +#define DEFAULT_DEVICE -99 + +void check_host (omp_interop_t obj); +void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); +void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); + + +/* The following assumes that when a nvptx device is available, + cuda/cuda_driver/hip are supported. + And that likewise when a gcn device is available that the + plugin also can not only the HSA but also the HIP library + such that hsa/hip are supported. + For the host, omp_interop_none is expected. + + Otherwise, it only does some basic tests without checking + that the returned result really makes sense. */ + +void check_type (omp_interop_t obj) +{ + const char *type; + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_id); + if (obj != omp_interop_none) + assert (strcmp (type, "omp_interop_t") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_device_num); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + if (obj != omp_interop_none) + return; + assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL); +} + + +void +check_host (omp_interop_t obj) +{ + assert (obj == omp_interop_none); + check_type (obj); +} + + +void +check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_cuda) + assert (strcmp (fr_name, "cuda") == 0); + else if (fr == omp_ifr_cuda_driver) + assert (strcmp (fr_name, "cuda_driver") == 0); + else if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 11); /* Nvidia */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "nvidia") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */ + ret_code = omp_irc_no_value; + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + + /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */ + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + + /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + + if (fr == omp_ifr_cuda) + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) /* no targetsync */ + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_cuda) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0); + } + else if (fr == omp_ifr_cuda_driver) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } +} + + +void +check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else if (fr == omp_ifr_hsa) + assert (strcmp (fr_name, "hsa") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 1); /* Amd */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "amd") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */ + ret_code = omp_irc_no_value; + if (fr == omp_ifr_hip) + { + /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */ + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + } + else + { + void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (agent != NULL); + } + + /* Device context: hipCtx_t / N/A -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + if (fr == omp_ifr_hip) + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_hip) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0); + } +} diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c new file mode 100644 index 0000000..e6941d3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c @@ -0,0 +1,8 @@ +/* { dg-do link { target { offload_target_nvptx } } } */ +/* { dg-additional-options -foffload=nvptx-none } */ +/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-3.h" + +/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h index c9c8f4a..f5695a2 100644 --- a/libgomp/testsuite/libgomp.c/declare-variant-3.h +++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h @@ -37,6 +37,13 @@ f53 (void) __attribute__ ((noipa)) int +f61 (void) +{ + return 61; +} + +__attribute__ ((noipa)) +int f70 (void) { return 70; @@ -68,6 +75,7 @@ f89 (void) #pragma omp declare variant (f37) match (device={isa("sm_37")}) #pragma omp declare variant (f52) match (device={isa("sm_52")}) #pragma omp declare variant (f53) match (device={isa("sm_53")}) +#pragma omp declare variant (f61) match (device={isa("sm_61")}) #pragma omp declare variant (f70) match (device={isa("sm_70")}) #pragma omp declare variant (f75) match (device={isa("sm_75")}) #pragma omp declare variant (f80) match (device={isa("sm_80")}) diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c new file mode 100644 index 0000000..d1df550 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c @@ -0,0 +1,8 @@ +/* { dg-do link { target { offload_target_amdgcn } } } */ +/* { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-additional-options -foffload=-march=gfx942 } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h index 53788d2..2257f4c 100644 --- a/libgomp/testsuite/libgomp.c/declare-variant-4.h +++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h @@ -37,6 +37,13 @@ gfx90c (void) __attribute__ ((noipa)) int +gfx942 (void) +{ + return 0x942; +} + +__attribute__ ((noipa)) +int gfx1030 (void) { return 0x1030; @@ -68,6 +75,7 @@ gfx1103 (void) #pragma omp declare variant(gfx908) match(device = {isa("gfx908")}) #pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")}) #pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")}) +#pragma omp declare variant(gfx942) match(device = {isa("gfx942")}) #pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")}) #pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")}) #pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")}) diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c new file mode 100644 index 0000000..2df5277 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c @@ -0,0 +1,176 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-additional-options "-lcublas" } */ + +/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c + to test the fallback version. */ + +/* Check whether cuBlas' daxpy works with an interop object. + daxpy(N, DA, DX, INCX, DY, INCY) + calculates (for DX = DY = 1): + DY(1:N) = DY(1:N) + DA * DX(1:N) + and otherwise N array elements, taking every INCX-th or INCY-th one, repectively. + +Based on the interop example in OpenMP's example document */ + +/* Minimal check whether CUDA works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#include <assert.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + + +#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #include <cublas_v2.h> + +#else + /* Add a poor man's fallback declaration. */ + #if USE_CUDA_FALLBACK_HEADER + // Don't warn. + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #elif !__has_include(<cuda_runtime.h>) + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #else + #warning "Using GCC's cuda.h as fallback for cublas_v2.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef enum { + CUBLAS_STATUS_SUCCESS = 0, + } cublasStatus_t; + + typedef CUstream cudaStream_t; + typedef struct cublasContext* cublasHandle_t; + + #define cublasCreate cublasCreate_v2 + cublasStatus_t cublasCreate_v2 (cublasHandle_t *); + + #define cublasSetStream cublasSetStream_v2 + cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t); + + #define cublasDaxpy cublasDaxpy_v2 + cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int); +#endif + +static int used_variant = 0; + +void +run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj) +{ + used_variant = 1; + + omp_interop_rc_t res; + cublasStatus_t stat; + + omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success && fr == omp_ifr_cuda); + + cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + cublasHandle_t handle; + stat = cublasCreate (&handle); + assert (stat == CUBLAS_STATUS_SUCCESS); + + stat = cublasSetStream (handle, stream); + assert (stat == CUBLAS_STATUS_SUCCESS); + + /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */ + stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ; + assert (stat == CUBLAS_STATUS_SUCCESS); +} + + +#pragma omp declare variant(run_cuBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("cuda"))) + +void +run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy) +{ + used_variant = 2; + + if (incx == 1 && incy == 1) + #pragma omp simd + for (int i = 0; i < n; i++) + dy[i] += da * dx[i]; + else + { + int ix = 0; + int iy = 0; + for (int i = 0; i < n; i++) + { + dy[iy] += da * dx[ix]; + ix += incx; + iy += incy; + } + } +} + + +void +run_test (int dev) +{ + constexpr int N = 1024; + + // A = {1,2,...,N} + // B = {-1, -2, ..., N} + // B' = daxpy (N, 3, A, incx=1, B, incy=1) + // = B + 3*A + // -> B' = {0, 2, 4, 6, ... } + + double A[N], B[N]; + double factor = 3.0; + for (int i = 0; i < N; i++) + { + A[i] = i; + B[i] = -i; + } + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target enter data device(dev) map(A, B) + } + + used_variant = 99; + #pragma omp dispatch device(dev) + run_daxpy (N, factor, A, 1, B, 1); + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target exit data device(dev) map(release: A) map(from: B) + + int tmp = omp_get_default_device (); + omp_set_default_device (dev); + if (on_device_arch_nvptx ()) + assert (used_variant == 1); + else + assert (used_variant == 2); + omp_set_default_device (tmp); + } + else + assert (used_variant == 2); + + for (int i = 0; i < N; i++) + assert (B[i] == 2*i); +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + run_test (dev); + run_test (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c new file mode 100644 index 0000000..89c0652 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target openacc_libcublas } */ +/* { dg-additional-options "-lcublas" } */ + +/* Same as interop-cudablas-full.c, but also works if the header is not available. */ + +#define USE_CUDA_FALLBACK_HEADER 1 +#include "interop-cublas-full.c" diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c new file mode 100644 index 0000000..c48a934 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c @@ -0,0 +1,162 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c + to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER. */ + +/* Minimal check whether CUDA works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#include <assert.h> +#include <omp.h> + +#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + +#else + /* Add a poor man's fallback declaration. */ + #if USE_CUDA_FALLBACK_HEADER + // Don't warn. + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #else + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef int cudaError_t; + typedef CUstream cudaStream_t; + enum { + cudaSuccess = 0 + }; + + enum cudaDeviceAttr { + cudaDevAttrClockRate = 13, + cudaDevAttrMaxGridDimX = 5 + }; + + cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); + cudaError_t cudaStreamQuery(cudaStream_t); + CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); + CUresult cuStreamGetCtx (CUstream, CUcontext *); +#endif + +int +main () +{ + int ivar; + unsigned uvar; + omp_interop_rc_t res; + omp_interop_t obj_cuda = omp_interop_none; + omp_interop_t obj_cuda_driver = omp_interop_none; + cudaError_t cuda_err; + CUresult cu_err; + + #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \ + init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \ + + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_cuda); + + fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_cuda_driver); + + ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + assert (ivar == 11); + + ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + assert (ivar == 11); + + + /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device. */ + + CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res); + assert (res == omp_irc_success); + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev); + assert (cu_err == CUDA_SUCCESS); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev); + assert (cu_err == CUDA_SUCCESS); + assert (ivar > 1024); + + int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res); + assert (res == omp_irc_success); + assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ... + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev); + assert (cuda_err == cudaSuccess); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev); + assert (cuda_err == cudaSuccess); + assert (ivar > 1024); + + + + + /* Check whether the omp_ipr_device_context -> CUcontext yields a context. */ + + CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res); + assert (res == omp_irc_success); + + /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD. */ + uvar = 99; + cu_err = cuCtxGetApiVersion (cu_ctx, &uvar); + assert (cu_err == CUDA_SUCCESS); + assert (uvar > 0); + + + /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream. */ + + cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams + + int dev_stream = 99; +#if CUDA_VERSION >= 12080 + cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream); + assert (cuda_err == cudaSuccess); +#else + cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS; + assert (cu_err == CUDA_SUCCESS); +#endif + assert (dev_stream == cuda_dev); + + /* All jobs should have been completed (as there were none none) */ + cuda_err = cudaStreamQuery (cuda_sm); + assert (cuda_err == cudaSuccess); + + cu_err = cuStreamQuery (cu_sm); + assert (cu_err == CUDA_SUCCESS); + + #pragma omp interop destroy(obj_cuda, obj_cuda_driver) +} diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c new file mode 100644 index 0000000..bc257a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_libcudart } */ +/* { dg-require-effective-target openacc_libcuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +/* Same as interop-cuda-full.c, but also works if the header is not available. */ + +#define USE_CUDA_FALLBACK_HEADER 1 +#include "interop-cuda-full.c" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c new file mode 100644 index 0000000..bd44f44 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c @@ -0,0 +1,10 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + +/* { dg-require-effective-target gomp_hip_header_amd } */ +/* { dg-require-effective-target gomp_libamdhip64 } */ +/* { dg-additional-options "-lamdhip64" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c new file mode 100644 index 0000000..91ad987 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + +/* { dg-require-effective-target gomp_libamdhip64 } */ +/* { dg-additional-options "-lamdhip64" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c new file mode 100644 index 0000000..d5dc236 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-require-effective-target gomp_hip_header_nvidia } */ +/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c new file mode 100644 index 0000000..7cff2cb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c @@ -0,0 +1,13 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_libcudart } */ +/* { dg-require-effective-target openacc_libcuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 +#define USE_CUDA_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c new file mode 100644 index 0000000..7b7dc74 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h new file mode 100644 index 0000000..20a1ccb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip.h @@ -0,0 +1,234 @@ +/* Minimal check whether HIP works - by checking whether the API routines + seem to work. This includes various fallbacks if the header is not + available. */ + +#include <assert.h> +#include <omp.h> + +#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER) + #include <hip/hip_runtime_api.h> + +#elif defined(__HIP_PLATFORM_AMD__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__" + #endif + + typedef struct ihipStream_t* hipStream_t; + typedef struct ihipCtx_t* hipCtx_t; + typedef int hipError_t; + typedef int hipDevice_t; + enum { + hipSuccess = 0, + hipErrorNotSupported = 801 + }; + + typedef enum hipDeviceAttribute_t { + hipDeviceAttributeClockRate = 5, + hipDeviceAttributeMaxGridDimX = 29 + } hipDeviceAttribute_t; + + hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t); + hipError_t hipCtxGetApiVersion (hipCtx_t, int *); + hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *); + hipError_t hipStreamQuery (hipStream_t); + +#elif defined(__HIP_PLATFORM_NVIDIA__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__" + #endif + + #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #else + #if defined(USE_CUDA_FALLBACK_HEADER) + // no warning + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #else + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #endif + + #include "../../../include/cuda/cuda.h" + + typedef int cudaError_t; + enum { + cudaSuccess = 0 + }; + + enum cudaDeviceAttr { + cudaDevAttrClockRate = 13, + cudaDevAttrMaxGridDimX = 5 + }; + + cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); + CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); + CUresult cuStreamGetCtx (CUstream, CUcontext *); + #endif + + typedef CUstream hipStream_t; + typedef CUcontext hipCtx_t; + typedef CUdevice hipDevice_t; + + typedef int hipError_t; + typedef int hipDevice_t; + enum { + hipSuccess = 0, + hipErrorNotSupported = 801 + }; + + + typedef enum hipDeviceAttribute_t { + hipDeviceAttributeClockRate = 5, + hipDeviceAttributeMaxGridDimX = 29 + } hipDeviceAttribute_t; + + inline static hipError_t + hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev) + { + enum cudaDeviceAttr cuattr; + switch (attr) + { + case hipDeviceAttributeClockRate: + cuattr = cudaDevAttrClockRate; + break; + case hipDeviceAttributeMaxGridDimX: + cuattr = cudaDevAttrMaxGridDimX; + break; + default: + assert (0); + } + return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess; + } + + inline static hipError_t + hipCtxGetApiVersion (hipCtx_t ctx, int *ver) + { + unsigned uver; + hipError_t err; + err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS; + *ver = (int) uver; + return err; + } + + inline static hipError_t + hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev) + { +#if CUDA_VERSION >= 12080 + return cudaStreamGetDevice (stream, dev); +#else + hipError_t err; + CUcontext ctx; + err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxGetDevice (dev) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS; + return err; +#endif + } + + inline static hipError_t + hipStreamQuery (hipStream_t stream) + { + return cuStreamQuery (stream) != CUDA_SUCCESS; + } + +#else + #error "should be unreachable" +#endif + +int +main () +{ + int ivar; + omp_interop_rc_t res; + omp_interop_t obj = omp_interop_none; + hipError_t hip_err; + + #pragma omp interop init(target, targetsync, prefer_type("hip") : obj) + + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_hip); + + ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + int vendor_is_amd = ivar == 1; + #if defined(__HIP_PLATFORM_AMD__) + assert (ivar == 1); + #elif defined(__HIP_PLATFORM_NVIDIA__) + assert (ivar == 11); + #else + assert (0); + #endif + + + /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device. */ + + hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res); + assert (res == omp_irc_success); + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev); + assert (hip_err == hipSuccess); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev); + assert (hip_err == hipSuccess); + assert (ivar > 1024); + + + /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context. */ + + hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res); + assert (res == omp_irc_success); + + /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ + ivar = -99; + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + hip_err = hipCtxGetApiVersion (hip_ctx, &ivar); + #pragma GCC diagnostic pop + + if (vendor_is_amd) + assert (hip_err == hipErrorNotSupported && ivar == -99); + else + { + assert (hip_err == hipSuccess); + assert (ivar > 0); + } + + + /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. */ + + hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + hipDevice_t dev_stream = 99; + hip_err = hipStreamGetDevice (hip_sm, &dev_stream); + assert (hip_err == hipSuccess); + assert (dev_stream == hip_dev); + + /* All jobs should have been completed (as there were none none) */ + hip_err = hipStreamQuery (hip_sm); + assert (hip_err == hipSuccess); + + #pragma omp interop destroy(obj) +} diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c new file mode 100644 index 0000000..53c05bd --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target gomp_hip_header_amd } */ +/* { dg-require-effective-target gomp_libhipblas } */ +/* { dg-additional-options "-lhipblas" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c new file mode 100644 index 0000000..0ea3133 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target gomp_libhipblas } */ +/* { dg-additional-options "-lhipblas" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c new file mode 100644 index 0000000..ed428c6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target gomp_hip_header_nvidia } */ +/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c new file mode 100644 index 0000000..1a31b30 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c @@ -0,0 +1,9 @@ +/* { dg-require-effective-target openacc_libcublas } */ +/* { dg-additional-options "-lcublas" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 +#define USE_CUDA_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c new file mode 100644 index 0000000..f85c13b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-additional-options "-lcublas" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h new file mode 100644 index 0000000..d7cb174 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h @@ -0,0 +1,240 @@ +/* Check whether hipBlas' daxpy works with an interop object. + daxpy(N, DA, DX, INCX, DY, INCY) + calculates (for DX = DY = 1): + DY(1:N) = DY(1:N) + DA * DX(1:N) + and otherwise N array elements, taking every INCX-th or INCY-th one, repectively. + +Based on the interop example in OpenMP's example document */ + +/* Minimal check whether HIP works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + + +#include <assert.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + + +#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER) + #ifdef __HIP_PLATFORM_NVIDIA__ + /* There seems to be an issue with hip/library_types.h including + CUDA's "library_types.h". Include CUDA's one explicitly here. + Could possibly worked around by using -isystem vs. -I. */ + #include <library_types.h> + + /* For some reasons, the following symbols do not seem to get + mapped from HIP to CUDA, causing link errors. */ + #define hipblasSetStream cublasSetStream_v2 + #define hipblasDaxpy cublasDaxpy_v2 + #define hipblasCreate cublasCreate_v2 + #endif + #include <hipblas/hipblas.h> + +#elif defined(__HIP_PLATFORM_AMD__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__" + #endif + + typedef enum + { + HIPBLAS_STATUS_SUCCESS = 0 + + } hipblasStatus_t; + + typedef struct ihipStream_t* hipStream_t; + typedef void* hipblasHandle_t; + + hipblasStatus_t hipblasCreate (hipblasHandle_t*); + hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t); + hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int); + +#else + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__" + #endif + + #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #include <cublas_v2.h> + + #else + /* Add a poor man's fallback declaration. */ + #if defined(USE_CUDA_FALLBACK_HEADER) + // no warning + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #elif !__has_include(<cuda_runtime.h>) + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #else + #warning "Using GCC's cuda.h as fallback for cublas_v2.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef enum { + CUBLAS_STATUS_SUCCESS = 0, + } cublasStatus_t; + + typedef CUstream cudaStream_t; + typedef struct cublasContext* cublasHandle_t; + + #define cublasCreate cublasCreate_v2 + cublasStatus_t cublasCreate_v2 (cublasHandle_t *); + + #define cublasSetStream cublasSetStream_v2 + cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t); + + #define cublasDaxpy cublasDaxpy_v2 + cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int); + #endif + + #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS + #define hipblasStatus_t cublasStatus_t + #define hipStream_t cudaStream_t + #define hipblasHandle_t cublasHandle_t + #define hipblasCreate cublasCreate + #define hipblasSetStream cublasSetStream + #define hipblasDaxpy cublasDaxpy +#endif + +static int used_variant = 0; + +void +run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj) +{ + used_variant = 1; + + omp_interop_rc_t res; + hipblasStatus_t stat; + + omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success && fr == omp_ifr_hip); + + hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + hipblasHandle_t handle; + stat = hipblasCreate (&handle); + assert (stat == HIPBLAS_STATUS_SUCCESS); + + stat = hipblasSetStream (handle, stream); + assert (stat == HIPBLAS_STATUS_SUCCESS); + + /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */ + stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ; + assert (stat == HIPBLAS_STATUS_SUCCESS); +} + +#if defined(__HIP_PLATFORM_AMD__) +#pragma omp declare variant(run_hipBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("hip"))) +#elif defined(__HIP_PLATFORM_NVIDIA__) +#pragma omp declare variant(run_hipBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("hip"))) +#else + #error "wrong platform" +#endif + +void +run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy) +{ + used_variant = 2; + + if (incx == 1 && incy == 1) + #pragma omp simd + for (int i = 0; i < n; i++) + dy[i] += da * dx[i]; + else + { + int ix = 0; + int iy = 0; + for (int i = 0; i < n; i++) + { + dy[iy] += da * dx[ix]; + ix += incx; + iy += incy; + } + } +} + + +void +run_test (int dev) +{ + constexpr int N = 1024; + + // A = {1,2,...,N} + // B = {-1, -2, ..., N} + // B' = daxpy (N, 3, A, incx=1, B, incy=1) + // = B + 3*A + // -> B' = {0, 2, 4, 6, ... } + + double A[N], B[N]; + double factor = 3.0; + for (int i = 0; i < N; i++) + { + A[i] = i; + B[i] = -i; + } + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target enter data device(dev) map(A, B) + } + + used_variant = 99; + #pragma omp dispatch device(dev) + run_daxpy (N, factor, A, 1, B, 1); + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target exit data device(dev) map(release: A) map(from: B) + + int tmp = omp_get_default_device (); + omp_set_default_device (dev); +#if defined(__HIP_PLATFORM_AMD__) + if (on_device_arch_gcn ()) +#else + if (on_device_arch_nvptx ()) +#endif + assert (used_variant == 1); + else + assert (used_variant == 2); + omp_set_default_device (tmp); + } + else + assert (used_variant == 2); + + for (int i = 0; i < N; i++) + assert (B[i] == 2*i); +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + run_test (dev); + run_test (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c new file mode 100644 index 0000000..21ac91c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hsa.c @@ -0,0 +1,205 @@ +/* { dg-additional-options "-ldl" } */ +/* { dg-require-effective-target offload_device_gcn } + The 'asm' insert is valid for GCN only: + { dg-additional-options -foffload=amdgcn-amdhsa } */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include <assert.h> +#include <dlfcn.h> +#include "../../../include/hsa.h" +#include "../../config/gcn/libgomp-gcn.h" + +#define STACKSIZE (100 * 1024) +#define HEAPSIZE (10 * 1024 * 1024) +#define ARENASIZE HEAPSIZE + +/* This code fragment must be optimized or else the host-fallback kernel has + * invalid ASM inserts. The rest of the file can be compiled safely at -O0. */ +#pragma omp declare target +uintptr_t __attribute__((optimize("O1"))) +get_kernel_ptr () +{ + uintptr_t val; + if (!omp_is_initial_device ()) + /* "main._omp_fn.0" is the name GCC gives the first OpenMP target + * region in the "main" function. + * The ".kd" suffix is added by the LLVM assembler when it creates the + * kernel meta-data, and this is what we need to launch a kernel. */ + asm ("s_getpc_b64 %0\n\t" + "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t" + "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4" + : "=Sg"(val)); + return val; +} +#pragma omp end declare target + +int +main(int argc, char** argv) +{ + + /* Load the HSA runtime DLL. */ + void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY); + assert (hsalib); + + hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal) + = dlsym (hsalib, "hsa_signal_create"); + assert (hsa_signal_create); + + uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue) + = dlsym (hsalib, "hsa_queue_load_write_index_relaxed"); + assert (hsa_queue_load_write_index_relaxed); + + void (*hsa_signal_store_relaxed) (hsa_signal_t signal, + hsa_signal_value_t value) + = dlsym (hsalib, "hsa_signal_store_relaxed"); + assert (hsa_signal_store_relaxed); + + hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint) + = dlsym (hsalib, "hsa_signal_wait_relaxed"); + assert (hsa_signal_wait_relaxed); + + void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue, + uint64_t value) + = dlsym (hsalib, "hsa_queue_store_write_index_relaxed"); + assert (hsa_queue_store_write_index_relaxed); + + hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal) + = dlsym (hsalib, "hsa_signal_destroy"); + assert (hsa_signal_destroy); + + /* Set up the device data environment. */ + int test_data_value = 0; +#pragma omp target enter data map(test_data_value) + + /* Get the interop details. */ + int device_num = omp_get_default_device(); + hsa_agent_t *gpu_agent; + hsa_queue_t *hsa_queue = NULL; + + omp_interop_t interop = omp_interop_none; +#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num) + assert (interop != omp_interop_none); + + omp_interop_rc_t retcode; + omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode); + assert (retcode == omp_irc_success); + assert (fr == omp_ifr_hsa); + + gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode); + assert (retcode == omp_irc_success); + + hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode); + assert (retcode == omp_irc_success); + assert (hsa_queue); + + /* Call an offload kernel via OpenMP/libgomp. + * + * This kernel serves two purposes: + * 1) Lookup the device-side load-address of itself (thus avoiding the + * need to access the libgomp internals). + * 2) Count how many times it is called. + * We then call it once using OpenMP, and once manually, and check + * the counter reads "2". */ + uint64_t kernel_object = 0; +#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value) + { + kernel_object = get_kernel_ptr (); + ++test_data_value; + } + + assert (kernel_object != 0); + + /* Configure the same kernel to run again, using HSA manually this time. */ + hsa_status_t status; + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + assert (status == HSA_STATUS_SUCCESS); + + /* The kernel is built by GCC for OpenMP, so we need to pass the same + * data pointers that libgomp would pass in. */ + struct { + uintptr_t test_data_value; + uintptr_t kernel_object; + } tgtaddrs; + +#pragma omp target data use_device_addr(test_data_value) + { + tgtaddrs.test_data_value = (uintptr_t)&test_data_value; + tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num); + } + + /* We also need to duplicate the launch ABI used by plugin-gcn.c. */ + struct kernargs_abi args; /* From libgomp-gcn.h. */ + args.dummy1 = (int64_t)&tgtaddrs; + args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side. */ + args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num); + args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num); + args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num); + args.arena_size_per_team = ARENASIZE; + args.stack_size_per_thread = STACKSIZE; + + /* Build the HSA dispatch packet, and insert it into the queue. */ + uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue); + const uint32_t queueMask = hsa_queue->size - 1; + hsa_kernel_dispatch_packet_t *dispatch_packet = + &(((hsa_kernel_dispatch_packet_t *) + (hsa_queue->base_address))[packet_id & queueMask]); + + dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 1; + dispatch_packet->workgroup_size_y = 64; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 1; + dispatch_packet->grid_size_y = 64; + dispatch_packet->grid_size_z = 1; + dispatch_packet->completion_signal = signal; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = &args; + dispatch_packet->private_segment_size = 0; + dispatch_packet->group_segment_size = 1536; + + uint16_t header = 0; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + /* Finish writing the packet header with an atomic release. */ + __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE); + + hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1); + + ;/* Run the kernel and wait for it to complete. */ + hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id); + while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) + ; + + /* Clean up HSA. */ + hsa_signal_destroy(signal); + free ((void*)args.out_ptr); + omp_target_free ((void*)args.heap_ptr, device_num); + omp_target_free ((void*)args.arena_ptr, device_num); + omp_target_free ((void*)args.stack_ptr, device_num); + omp_target_free ((void*)tgtaddrs.kernel_object, device_num); + + /* Clean up OpenMP. */ + #pragma omp interop destroy(interop) + + /* Bring the data back from the device. */ +#pragma omp target exit data map(test_data_value) + + /* Ensure the kernel was called twice. Once by OpenMP, once by HSA. */ + assert (test_data_value == 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c new file mode 100644 index 0000000..3220828 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c @@ -0,0 +1,74 @@ +int +main () +{ + int i, n; + int data[] = {1,2}; + struct S { int **ptrset; }; + +// ----------------------------------- + +/* The produced mapping for sptr1->ptrset[i][:n] + + GOMP_MAP_STRUCT (size = 1) + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_ATTACH + GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION + +which get split into 3 separate map_vars call; in particular, +the latter is separate and points to an unmpapped variable. + +Thus, it failed with: + libgomp: pointer target not mapped for attach */ + + struct S s1, *sptr1; + s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset[0] = data; + s1.ptrset[1] = data; + s1.ptrset[2] = data; + sptr1 = &s1; + + i = 1; + n = 0; + #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3]) + #pragma omp target enter data map(sptr1->ptrset[i][:n]) + + #pragma omp target exit data map(sptr1->ptrset[i][:n]) + #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3]) + + __builtin_free (s1.ptrset); + +// ----------------------------------- + +/* The produced mapping for sptr2->ptrset[i][:n] is similar: + + GOMP_MAP_STRUCT (size = 1) + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_TO ! this one has now a finite size + GOMP_MAP_ATTACH + GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO + +As the latter GOMP_MAP_ATTACH has now a pointer target, +the attachment worked. */ + + struct S s2, *sptr2; + s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s2.ptrset[0] = data; + s2.ptrset[1] = data; + s2.ptrset[2] = data; + sptr2 = &s2; + + i = 1; + n = 2; + #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3]) + #pragma omp target enter data map(sptr2->ptrset[i][:n]) + + #pragma omp target + if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2) + __builtin_abort (); + + #pragma omp target exit data map(sptr2->ptrset[i][:n]) + #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3]) + + __builtin_free (s2.ptrset); +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c new file mode 100644 index 0000000..580c6ad --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c @@ -0,0 +1,50 @@ +int +main () +{ + int i, n; + int data[] = {1,2}; + struct S { + int **ptrset; + int **ptrset2; + }; + + /* This is the same as target-map-zero-sized-3.c, but by mixing + mapped and non-mapped items, the mapping before the ATTACH + might (or here: is) not actually associated with the the + pointer used for attaching. Thus, if one does a simple + + if (openmp_p + && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) + && mapnum == 1) + check in target.c's gomp_map_vars_internal will fail + as mapnum > 1 but still the map associated with this + ATTACH is in a different set. */ + + struct S s1, *sptr1; + s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset[0] = data; + s1.ptrset[1] = data; + s1.ptrset[2] = data; + s1.ptrset2[0] = data; + s1.ptrset2[1] = data; + s1.ptrset2[2] = data; + sptr1 = &s1; + + i = 1; + n = 0; + #pragma omp target enter data map(data) + #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3]) + #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + + #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2) + __builtin_abort (); + + #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3]) + #pragma omp target exit data map(data) + + __builtin_free (s1.ptrset); + __builtin_free (s1.ptrset2); +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c new file mode 100644 index 0000000..7c4ab80 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O0" } */ + +/* Issue showed up in the real world when large data was distributed + over multiple MPI progresses - such that for one process n == 0 + happend at run time. + + Before map(var[:0]) and map(var[:n]) with n > 0 was handled, + this patch now also handles map(var[:n]) with n == 0. + + Failed before with "libgomp: pointer target not mapped for attach". */ + +/* Here, the base address is shifted - which should have no effect, + but must work as well. */ +void +with_offset () +{ + struct S { + int *ptr1, *ptr2; + }; + struct S s1, s2; + int *a, *b, *c, *d; + s1.ptr1 = (int *) 0L; + s1.ptr2 = (int *) 0xdeedbeef; + s2.ptr1 = (int *) 0L; + s2.ptr2 = (int *) 0xdeedbeef; + a = (int *) 0L; + b = (int *) 0xdeedbeef; + c = (int *) 0L; + d = (int *) 0xdeedbeef; + + int n1, n2, n3, n4; + n1 = n2 = n3 = n4 = 0; + + #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) + + #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4]) + { + if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef + || c != (void *) 0L || d != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) +} + +int +main () +{ + struct S { + int *ptr1, *ptr2; + }; + struct S s1, s2; + int *a, *b, *c, *d; + s1.ptr1 = (int *) 0L; + s1.ptr2 = (int *) 0xdeedbeef; + s2.ptr1 = (int *) 0L; + s2.ptr2 = (int *) 0xdeedbeef; + a = (int *) 0L; + b = (int *) 0xdeedbeef; + c = (int *) 0L; + d = (int *) 0xdeedbeef; + + int n1, n2, n3, n4; + n1 = n2 = n3 = n4 = 0; + + #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + + #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4]) + { + if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef + || c != (void *) 0L || d != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + + with_offset (); +} diff --git a/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 new file mode 100644 index 0000000..d5e982b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 @@ -0,0 +1,75 @@ +! +! Check that mapping with map(var%tiles(1)) works. +! +! This uses deep mapping to handle the allocatable +! derived-type components +! +! The tricky part is that GCC generates intermittently +! an SSA_NAME that needs to be resolved. +! +module m +type t + integer, allocatable :: den1(:,:), den2(:,:) +end type t + +type t2 + type(t), allocatable :: tiles(:) +end type t2 +end + +use m +use iso_c_binding +implicit none (type, external) +type(t2), target :: var +logical :: is_self_map +type(C_ptr) :: pden1, pden2, ptiles, ptiles1 + +allocate(var%tiles(1)) +var%tiles(1)%den1 = reshape([1,2,3,4],[2,2]) +var%tiles(1)%den2 = reshape([11,22,33,44],[2,2]) + +ptiles = c_loc(var%tiles) +ptiles1 = c_loc(var%tiles(1)) +pden1 = c_loc(var%tiles(1)%den1) +pden2 = c_loc(var%tiles(1)%den2) + + +is_self_map = .false. +!$omp target map(to: is_self_map) + is_self_map = .true. +!$omp end target + +!$omp target enter data map(var%tiles(1)) + +!$omp target firstprivate(ptiles, ptiles1, pden1, pden2) + if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1 + if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 2 + var%tiles(1)%den1 = var%tiles(1)%den1 + 5 + var%tiles(1)%den2 = var%tiles(1)%den2 + 7 + + if (is_self_map) then + if (.not. c_associated (ptiles, c_loc(var%tiles))) stop 3 + if (.not. c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4 + if (.not. c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5 + if (.not. c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6 + else + if (c_associated (ptiles, c_loc(var%tiles))) stop 3 + if (c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4 + if (c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5 + if (c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6 + endif +!$omp end target + +if (is_self_map) then + if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7 + if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8 +else + if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 7 + if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 8 +endif + +!$omp target exit data map(var%tiles(1)) + +if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7 +if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8 +end diff --git a/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 new file mode 100644 index 0000000..383ecba --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target map(to: x) + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target map(to: x) + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target map(to: y, z) + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target map(to: y) + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 new file mode 100644 index 0000000..5f6c8c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 @@ -0,0 +1,45 @@ +! { dg-additional-options "-fopenmp-allocators" } +! { dg-additional-options "-fdump-tree-omplower" } +program main + use iso_c_binding + use omp_lib + implicit none (type, external) + integer(omp_allocator_handle_kind):: alloc_h + integer :: i, N + integer(c_intptr_t) :: intptr + integer, allocatable :: A(:) + type(omp_alloctrait):: traits(1) = [omp_alloctrait(omp_atk_alignment, 128)] + + N = 10 + alloc_h = omp_init_allocator(omp_default_mem_space, 1, traits) + + !$omp allocate(A) allocator(alloc_h) + allocate(A(N)) + a(:) = [(i, i=1,N)] + if (mod (transfer (loc(a), intptr),128) /= 0) & + stop 1 + if (any (a /= [(i, i=1,N)])) & + stop 2 + deallocate(A) + !$omp allocate(A) allocator(alloc_h) align(512) + allocate(A(N)) + block + integer, allocatable :: B(:) + !$omp allocators allocate(allocator(alloc_h), align(256) : B) + allocate(B(N)) + B(:) = [(2*i, i=1,N)] + A(:) = B + if (mod (transfer (loc(B), intptr), 256) /= 0) & + stop 1 + ! end of scope deallocation + end block + if (mod (transfer (loc(a), intptr),512) /= 0) & + stop 1 + if (any (a /= [(2*i, i=1,N)])) & + stop 2 + deallocate(A) ! Must deallocate here - before deallocator is destroyed + call omp_destroy_allocator(alloc_h) + ! No auto dealloc of A because it is SAVE +end +! { dg-final { scan-tree-dump-times "__builtin_GOMP_alloc \\(" 3 "omplower" } } +! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(" 3 "omplower" } } diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 new file mode 100644 index 0000000..eb2f437 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 @@ -0,0 +1,10 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define HAVE_HIPFORT 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 new file mode 100644 index 0000000..0ebbe80 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 @@ -0,0 +1,9 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 new file mode 100644 index 0000000..d29a689 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 @@ -0,0 +1,12 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target openacc_cudart } +! { dg-require-effective-target openacc_cuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define HAVE_HIPFORT 1 +#define USE_CUDA_NAMES 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 new file mode 100644 index 0000000..2063610 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 @@ -0,0 +1,11 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + +! { dg-require-effective-target openacc_libcudart } +! { dg-require-effective-target openacc_libcuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define USE_CUDA_NAMES 1 +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h new file mode 100644 index 0000000..753ccce --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h @@ -0,0 +1,214 @@ +! Minimal check whether HIP works - by checking whether the API routines +! seem to work. This includes a fallback if hipfort is not available + +#ifndef HAVE_HIPFORT +#ifndef USE_HIP_FALLBACK_MODULE +#if USE_CUDA_NAMES +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)" +#else +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset" +#endif +#endif +module hipfort ! Minimal implementation for the testsuite + implicit none + + enum, bind(c) + enumerator :: hipSuccess = 0 + enumerator :: hipErrorNotSupported = 801 + end enum + + enum, bind(c) + enumerator :: hipDeviceAttributeClockRate = 5 + enumerator :: hipDeviceAttributeMaxGridDimX = 29 + end enum + + interface + integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) & +#if USE_CUDA_NAMES + bind(c, name="cudaDeviceGetAttribute") +#else + bind(c, name="hipDeviceGetAttribute") +#endif + use iso_c_binding, only: c_ptr, c_int + import + implicit none + type(c_ptr), value :: ip + integer(kind(hipDeviceAttributeClockRate)), value :: attr + integer(c_int), value :: dev + end + + integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) & +#if USE_CUDA_NAMES + bind(c, name="cudaCtxGetApiVersion") +#else + bind(c, name="hipCtxGetApiVersion") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: ctx, ip + end + + integer(kind(hipSuccess)) function hipStreamQuery (stream) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamQuery") +#else + bind(c, name="hipStreamQuery") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + end + + integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamGetFlags") +#else + bind(c, name="hipStreamGetFlags") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + type(c_ptr), value :: flags + end + end interface +end module +#endif + +program main + use iso_c_binding, only: c_ptr, c_int, c_loc + use omp_lib + use hipfort + implicit none (type, external) + +! Only supported since CUDA 12.8 - skip for better compatibility +! ! Manally implement hipStreamGetDevice as hipfort misses it +! ! -> https://github.com/ROCm/hipfort/issues/238 +! interface +! integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) & +!#if USE_CUDA_NAMES +! bind(c, name="cudaStreamGetDevice") +!#else +! bind(c, name="hipStreamGetDevice") +!#endif +! use iso_c_binding, only: c_ptr, c_int +! import +! implicit none +! type(c_ptr), value :: stream +! integer(c_int) :: dev +! end +! end interface + + integer(c_int), target :: ivar + integer(omp_interop_rc_kind) :: res + integer(omp_interop_kind) :: obj + integer(omp_interop_fr_kind) :: fr + integer(kind(hipSuccess)) :: hip_err + integer(c_int) :: hip_dev, dev_stream + type(c_ptr) :: hip_ctx, hip_sm + + logical :: vendor_is_amd + + obj = omp_interop_none + + !$omp interop init(target, targetsync, prefer_type("hip") : obj) + + fr = omp_get_interop_int (obj, omp_ipr_fr_id, res) + if (res /= omp_irc_success) error stop 1 + if (fr /= omp_ifr_hip) error stop 1 + + ivar = omp_get_interop_int (obj, omp_ipr_vendor, res) + if (ivar == 1) then ! AMD + vendor_is_amd = .true. + else if (ivar == 11) then ! Nvidia + vendor_is_amd = .false. + else + error stop 1 ! Unknown + endif +#if USE_CUDA_NAMES + if (vendor_is_amd) error stop 1 +#else + if (.not. vendor_is_amd) error stop 1 +#endif + + ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device. + + hip_dev = omp_get_interop_int (obj, omp_ipr_device, res) + if (res /= omp_irc_success) error stop 1 + +! AMD messed up in Fortran with the attribute handling, missing the +! translation table it has for C. +block + enum, bind(c) + enumerator :: cudaDevAttrClockRate = 13 + enumerator :: cudaDevAttrMaxGridDimX = 5 + end enum + + ! Assume a clock size is available and > 1 GHz; value is in kHz. + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1000000) error stop 1 ! in kHz + + ! Assume that the MaxGridDimX is available and > 1024 + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1024) error stop 1 +end block + + + ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context. + + hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res) + if (res /= omp_irc_success) error stop 1 + +! ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ +! ivar = -99 +! ! AMD deprectated hipCtxGetApiVersion (in C/C++) +! hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar)) +! +! if (vendor_is_amd) then +! if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1 +! else +! if (hip_err /= hipSuccess) error stop 1 +! if (ivar <= 0) error stop 1 +! end if + + + ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. + + hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res) + if (res /= omp_irc_success) error stop 1 + +! Skip as this is only in CUDA 12.8 +! dev_stream = 99 +! ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238 +! ! hip_err = hipStreamGetDevice (hip_sm, dev_stream) +! hip_err = my_hipStreamGetDevice (hip_sm, dev_stream) +! if (hip_err /= hipSuccess) error stop 1 +! if (dev_stream /= hip_dev) error stop 1 + + ! Get flags of the stream + hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar)) + if (hip_err /= hipSuccess) error stop 1 + ! Accept any value + + ! All jobs should have been completed (as there were none none) + hip_err = hipStreamQuery (hip_sm) + if (hip_err /= hipSuccess) error stop 1 + + !$omp interop destroy(obj) +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 new file mode 100644 index 0000000..9d48c7c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 @@ -0,0 +1,121 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var) map(tofrom: var2) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q) map(tofrom: var4%Q) + call foo(var3%Q, var4%Q) +!$omp end target + +!$omp target map(to: var3%R(2)) map(tofrom: var4%R(3)) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 new file mode 100644 index 0000000..fb9859d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 @@ -0,0 +1,124 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var%A, var%B, var%C, var%D) & +!$omp& map(tofrom: var2%A, var2%B, var2%C, var2%D) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 new file mode 100644 index 0000000..b2e36b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 new file mode 100644 index 0000000..48d4aea --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 @@ -0,0 +1,308 @@ +! NOTE: This code uses POINTER. +! While map(p, var%p) etc. maps the ptr/ptr comp p / var%p (incl. allocatable comps), +! map(var) does not map var%p. + +use iso_c_binding +implicit none +type t2 + integer, allocatable :: x, y, z +end type t2 +type t + integer, pointer :: A => null() + integer, pointer :: B(:) => null() + type(t2), pointer :: C => null() + type(t2), pointer :: D(:,:) => null() +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 +integer(c_intptr_t) :: iptr + +! -------------------------------------- +! Assign + allocate +allocate (var%A, source=45) +allocate (var%B(3), source=[1,2,3]) +allocate (var%C) +var%C%x = 6; var%C%y = 5; var%C%z = 4 +allocate (var%D(2,2)) +var%D(1,1)%x = 1 +var%D(1,1)%y = 2 +var%D(1,1)%z = 3 +var%D(2,1)%x = 4 +var%D(2,1)%y = 5 +var%D(2,1)%z = 6 +var%D(1,2)%x = 11 +var%D(1,2)%y = 12 +var%D(1,2)%z = 13 +var%D(2,2)%x = 14 +var%D(2,2)%y = 15 +var%D(2,2)%z = 16 + +! Assign + allocate +allocate (var2%A, source=145) +allocate (var2%B, source=[991,992,993]) +allocate (var2%C) +var2%C%x = 996; var2%C%y = 995; var2%C%z = 994 +allocate (var2%D(2,2)) +var2%D(1,1)%x = 199 +var2%D(1,1)%y = 299 +var2%D(1,1)%z = 399 +var2%D(2,1)%x = 499 +var2%D(2,1)%y = 599 +var2%D(2,1)%z = 699 +var2%D(1,2)%x = 1199 +var2%D(1,2)%y = 1299 +var2%D(1,2)%z = 1399 +var2%D(2,2)%x = 1499 +var2%D(2,2)%y = 1599 +var2%D(2,2)%z = 1699 + +block + integer(c_intptr_t) :: loc_a, loc_b, loc_c, loc_d, loc2_a, loc2_b, loc2_c, loc2_d + loc_a = loc (var%a) + loc_b = loc (var%b) + loc_c = loc (var%d) + loc_d = loc (var%d) + loc2_a = loc (var2%a) + loc2_b = loc (var2%b) + loc2_c = loc (var2%c) + loc2_d = loc (var2%d) + ! var/var2 are mapped, but the pointer components aren't + !$omp target map(to: var) map(tofrom: var2) + if (loc_a /= loc (var%a)) stop 31 + if (loc_b /= loc (var%b)) stop 32 + if (loc_c /= loc (var%d)) stop 33 + if (loc_d /= loc (var%d)) stop 34 + if (loc2_a /= loc (var2%a)) stop 35 + if (loc2_b /= loc (var2%b)) stop 36 + if (loc2_c /= loc (var2%c)) stop 37 + if (loc2_d /= loc (var2%d)) stop 38 + !$omp end target + if (loc_a /= loc (var%a)) stop 41 + if (loc_b /= loc (var%b)) stop 42 + if (loc_c /= loc (var%d)) stop 43 + if (loc_d /= loc (var%d)) stop 44 + if (loc2_a /= loc (var2%a)) stop 45 + if (loc2_b /= loc (var2%b)) stop 46 + if (loc2_c /= loc (var2%c)) stop 47 + if (loc2_d /= loc (var2%d)) stop 48 +end block + +block + ! Map only (all) components, but this maps also the alloc comps + !$omp target map(to: var%a, var%b, var%c, var%d) map(tofrom: var2%a, var2%b, var2%c, var2%d) + call foo (var,var2) + !$omp end target +end block + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var2%D(i,j)%x /= tmp_x(i,j)) stop 12 + if (var2%D(i,j)%y /= tmp_y(i,j)) stop 12 + if (var2%D(i,j)%z /= tmp_z(i,j)) stop 12 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var%C%x, var%C%y, var%C%z) +deallocate(var%D(1,1)%x, var%D(1,1)%y, var%D(1,1)%z) +deallocate(var%D(2,1)%x, var%D(2,1)%y, var%D(2,1)%z) +deallocate(var%D(1,2)%x, var%D(1,2)%y, var%D(1,2)%z) +deallocate(var%D(2,2)%x, var%D(2,2)%y, var%D(2,2)%z) +deallocate(var%A, var%B, var%C, var%D) + +deallocate(var2%C%x, var2%C%y, var2%C%z) +deallocate(var2%D(1,1)%x, var2%D(1,1)%y, var2%D(1,1)%z) +deallocate(var2%D(2,1)%x, var2%D(2,1)%y, var2%D(2,1)%z) +deallocate(var2%D(1,2)%x, var2%D(1,2)%y, var2%D(1,2)%z) +deallocate(var2%D(2,2)%x, var2%D(2,2)%y, var2%D(2,2)%z) +deallocate(var2%A, var2%B, var2%C, var2%D) + +! -------------------------------------- +! Assign + allocate +allocate (var3%Q%A, source=45) +allocate (var3%Q%B, source=[1,2,3]) +allocate (var3%Q%C, source=t2(6,5,4)) +allocate (var3%Q%D(2,2)) +var3%Q%D(1,1) = t2(1,2,3) +var3%Q%D(2,1) = t2(4,5,6) +var3%Q%D(1,2) = t2(11,12,13) +var3%Q%D(2,2) = t2(14,15,16) + +allocate (var3%R(2)%A, source=45) +allocate (var3%R(2)%B, source=[1,2,3]) +allocate (var3%R(2)%C, source=t2(6,5,4)) +allocate (var3%R(2)%D(2,2)) +var3%R(2)%D(1,1) = t2(1,2,3) +var3%R(2)%D(2,1) = t2(4,5,6) +var3%R(2)%D(1,2) = t2(11,12,13) +var3%R(2)%D(2,2) = t2(14,15,16) + +! Assign + allocate +allocate (var4%Q%A, source=145) +allocate (var4%Q%B, source=[991,992,993]) +allocate (var4%Q%C, source=t2(996,995,994)) +allocate (var4%Q%D(2,2)) +var4%Q%D(1,1) = t2(199,299,399) +var4%Q%D(2,1) = t2(499,599,699) +var4%Q%D(1,2) = t2(1199,1299,1399) +var4%Q%D(2,2) = t2(1499,1599,1699) + +allocate (var4%R(3)%A, source=145) +allocate (var4%R(3)%B, source=[991,992,993]) +allocate (var4%R(3)%C, source=t2(996,995,994)) +allocate (var4%R(3)%D(2,2)) +var4%R(3)%D(1,1) = t2(199,299,399) +var4%R(3)%D(2,1) = t2(499,599,699) +var4%R(3)%D(1,2) = t2(1199,1299,1399) +var4%R(3)%D(2,2) = t2(1499,1599,1699) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +iptr = loc(var3%R(2)%A) + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%Q%D(i,j)%x /= tmp_x(i,j)) stop 16 + if (var4%Q%D(i,j)%y /= tmp_y(i,j)) stop 16 + if (var4%Q%D(i,j)%z /= tmp_z(i,j)) stop 16 + end do + end do +end block + +! Cf. PR fortran/104696 +! { dg-output "valid mapping, OK" { xfail { offload_device_nonshared_as } } } +if (iptr /= loc(var3%R(2)%A)) then + print *, "invalid mapping, cf. PR fortran/104696" +else + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%R(3)%D(i,j)%x /= tmp_x(i,j)) stop 20 + if (var4%R(3)%D(i,j)%y /= tmp_y(i,j)) stop 20 + if (var4%R(3)%D(i,j)%z /= tmp_z(i,j)) stop 20 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var3%Q%C%x, var3%Q%D(1,1)%x, var3%Q%D(2,1)%x, var3%Q%D(1,2)%x, var3%Q%D(2,2)%x) +deallocate(var3%Q%C%y, var3%Q%D(1,1)%y, var3%Q%D(2,1)%y, var3%Q%D(1,2)%y, var3%Q%D(2,2)%y) +deallocate(var3%Q%C%z, var3%Q%D(1,1)%z, var3%Q%D(2,1)%z, var3%Q%D(1,2)%z, var3%Q%D(2,2)%z) +deallocate(var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) + +deallocate(var4%Q%C%x, var4%Q%D(1,1)%x, var4%Q%D(2,1)%x, var4%Q%D(1,2)%x, var4%Q%D(2,2)%x) +deallocate(var4%Q%C%y, var4%Q%D(1,1)%y, var4%Q%D(2,1)%y, var4%Q%D(1,2)%y, var4%Q%D(2,2)%y) +deallocate(var4%Q%C%z, var4%Q%D(1,1)%z, var4%Q%D(2,1)%z, var4%Q%D(1,2)%z, var4%Q%D(2,2)%z) +deallocate(var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + +deallocate(var3%R(2)%C%x, var3%R(2)%D(1,1)%x, var3%R(2)%D(2,1)%x, var3%R(2)%D(1,2)%x, var3%R(2)%D(2,2)%x) +deallocate(var3%R(2)%C%y, var3%R(2)%D(1,1)%y, var3%R(2)%D(2,1)%y, var3%R(2)%D(1,2)%y, var3%R(2)%D(2,2)%y) +deallocate(var3%R(2)%C%z, var3%R(2)%D(1,1)%z, var3%R(2)%D(2,1)%z, var3%R(2)%D(1,2)%z, var3%R(2)%D(2,2)%z) +deallocate(var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) + +deallocate(var4%R(3)%C%x, var4%R(3)%D(1,1)%x, var4%R(3)%D(2,1)%x, var4%R(3)%D(1,2)%x, var4%R(3)%D(2,2)%x) +deallocate(var4%R(3)%C%y, var4%R(3)%D(1,1)%y, var4%R(3)%D(2,1)%y, var4%R(3)%D(1,2)%y, var4%R(3)%D(2,2)%y) +deallocate(var4%R(3)%C%z, var4%R(3)%D(1,1)%z, var4%R(3)%D(2,1)%z, var4%R(3)%D(1,2)%z, var4%R(3)%D(2,2)%z) +deallocate(var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + + print *, "valid mapping, OK" +endif + +contains + subroutine foo(x, y) + type(t) :: x, y + intent(in) :: x + intent(inout) :: y + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (x%D(i,j)%x /= tmp_x(i,j)) stop 4 + if (x%D(i,j)%y /= tmp_y(i,j)) stop 4 + if (x%D(i,j)%z /= tmp_z(i,j)) stop 4 + end do + end do + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + tmp_x = reshape([199, 499, 1199, 1499], [2,2]) + tmp_y = reshape([299, 599, 1299, 1599], [2,2]) + tmp_z = reshape([399, 699, 1399, 1699], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (y%D(i,j)%x /= tmp_x(i,j)) stop 8 + if (y%D(i,j)%y /= tmp_y(i,j)) stop 8 + if (y%D(i,j)%z /= tmp_z(i,j)) stop 8 + end do + end do + + y%A = x%A + y%B(:) = x%B + y%C%x = x%C%x + y%C%y = x%C%y + y%C%z = x%C%z + do j = 1, 2 + do i = 1, 2 + y%D(i,j)%x = x%D(i,j)%x + y%D(i,j)%y = x%D(i,j)%y + y%D(i,j)%z = x%D(i,j)%z + end do + end do + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 new file mode 100644 index 0000000..1493c5f --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 @@ -0,0 +1,672 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: arr(:,:) + integer :: var + integer, allocatable :: slr + end type t + +contains + + subroutine check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42) stop 1 + if (l_scalar%var /= 42) stop 1 + if (is_present) then + if (opt_scalar%var /= 42) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42) stop 1 + if (la_scalar%var /= 42) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467) stop 5 + if (l_scalar%slr /= 467) stop 5 + if (a_scalar%slr /= 467) stop 6 + if (la_scalar%slr /= 467) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467) stop 7 + if (a_opt_scalar%slr /= 467) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! SET VALUE + scalar%var = 42 + 13 + l_scalar%var = 42 + 13 + if (is_present) then + opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + 13 + l_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + 13 + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + 13 + la_scalar%var = 42 + 13 + if (is_present) then + a_opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + 13 + la_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + 13 + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + 13 + l_scalar%slr = 467 + 13 + a_scalar%slr = 467 + 13 + la_scalar%slr = 467 + 13 + if (is_present) then + opt_scalar%slr = 467 + 13 + a_opt_scalar%slr = 467 + 13 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + end if + end do + end do + end do + end do + end if + + end subroutine + subroutine check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42 + 13) stop 1 + if (l_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (opt_scalar%var /= 42 + 13) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42 + 13) stop 1 + if (la_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42 + 13) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467 + 13) stop 5 + if (l_scalar%slr /= 467 + 13) stop 5 + if (a_scalar%slr /= 467 + 13) stop 6 + if (la_scalar%slr /= 467 + 13) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467 + 13) stop 7 + if (a_opt_scalar%slr /= 467 + 13) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! (RE)SET VALUE + scalar%var = 42 + l_scalar%var = 42 + if (is_present) then + opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + l_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + la_scalar%var = 42 + if (is_present) then + a_opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + l_scalar%slr = 467 + a_scalar%slr = 467 + la_scalar%slr = 467 + if (is_present) then + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end if + end do + end do + end do + end do + end if + end subroutine + + subroutine test(scalar, array, a_scalar, a_array, opt_scalar, opt_array, & + a_opt_scalar, a_opt_array) + type(t) :: scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + type(t) :: l_scalar, l_array(3,2), la_scalar, la_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + + integer :: i, j, k, l + logical :: is_present, dummy_alloced, local_alloced, inner_alloc + is_present = present(opt_scalar) + dummy_alloced = allocated(a_scalar) + inner_alloc = allocated(scalar%slr) + + l_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + end do + end do + + if (dummy_alloced) then + allocate(la_scalar, la_array(3,2)) + a_scalar%var = 42 + la_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + end do + end do + end if + + if (inner_alloc) then + l_scalar%slr = 467 + la_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(l_scalar%arr(4,5), la_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(l_array(i,j)%arr(i,j), la_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + end if + + ! implicit mapping + !$omp target + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + end if + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + + ! explicit mapping + !$omp target map(scalar, array, opt_scalar, opt_array, a_scalar, a_array) & + !$omp& map(a_opt_scalar, a_opt_array) & + !$omp& map(l_scalar, l_array, la_scalar, la_array) + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + end subroutine +end module + +program main + use m + implicit none (type, external) + type(t) :: scalar, array(3,2), opt_scalar, opt_array(3,2), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array + integer :: i, j, k, l, n + + scalar%var = 42 + opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + ! unallocated + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! allocated + allocate(a_scalar, a_opt_scalar, a_array(3,2), a_opt_array(3,2)) + a_scalar%var = 42 + a_opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + a_opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! comps allocated + scalar%slr = 467 + a_scalar%slr = 467 + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(scalar%arr(4,5), a_scalar%arr(4,5), opt_scalar%arr(4,5), a_opt_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(array(i,j)%arr(i,j), a_array(i,j)%arr(i,j), opt_array(i,j)%arr(i,j), a_opt_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + deallocate(a_scalar, a_opt_scalar, a_array, a_opt_array) +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 new file mode 100644 index 0000000..f5a286e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 @@ -0,0 +1,268 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: A(:) + end type t + type t2 + type(t), allocatable :: vT + integer, allocatable :: x + end type t2 + +contains + + subroutine test_alloc() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_alloc + + subroutine test2_alloc() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_alloc + + + subroutine test_alloc_target() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + end subroutine test_alloc_target + + subroutine test2_alloc_target() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + end subroutine test2_alloc_target + + + + subroutine test_from() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_from + + subroutine test2_from() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_from + +end module m + +use m + implicit none (type, external) + call test_alloc + call test2_alloc + call test_alloc_target + call test2_alloc_target + + call test_from + call test2_from +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 new file mode 100644 index 0000000..90378c0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 @@ -0,0 +1,11 @@ +! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" } +! +! We silence the warning: +! Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp] +! +! Ensure that polymorphic mapping is diagnosed as undefined behavior +! Ensure that static access to polymorphic variables works + +! Run map-alloc-comp-9.f90 in unified-shared-memory mode + +#include "map-alloc-comp-9.f90" diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 new file mode 100644 index 0000000..26c73d7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 @@ -0,0 +1,578 @@ +! { dg-additional-options "-cpp" } +! +! Ensure that polymorphic mapping is diagnosed as undefined behavior +! Ensure that static access to polymorphic variables works + +! Some extended tests are only run with shared memory +! To enforce this (where possible) on the device side: +! #define USE_USM_REQUIREMENT +! which is done in map-alloc-comp-9-usm.f90 + +subroutine test(case) +implicit none(type, external) +#ifdef USE_USM_REQUIREMENT + !$omp requires unified_shared_memory +#endif + +type t + integer :: x(4) +end type t + +type ta + integer, allocatable :: x(:) +end type ta + +type t2 + class(t), allocatable :: x + class(t), allocatable :: x2(:) +end type t2 + +type t3 + type(t2) :: y + type(t2) :: y2(2) +end type t3 + +type t4 + type(t3), allocatable :: y + type(t3), allocatable :: y2(:) +end type t4 + +integer, value :: case + +logical :: is_shared_mem + +! Mangle stack addresses +integer, volatile :: case_var(100*case) + +type(t), allocatable :: var1 +type(ta), allocatable :: var1a +class(t), allocatable :: var2 +type(t2), allocatable :: var3 +type(t4), allocatable :: var4 + +case_var(100) = 0 +!print *, 'case', case + +var1 = t([1,2,3,4]) +var1a = ta([-1,-2,-3,-4,-5]) + +var2 = t([11,22,33,44]) + +allocate(t2 :: var3) +allocate(t :: var3%x) +allocate(t :: var3%x2(2)) +var3%x%x = [111,222,333,444] +var3%x2(1)%x = 2*[111,222,333,444] +var3%x2(2)%x = 3*[111,222,333,444] + +allocate(t4 :: var4) +allocate(t3 :: var4%y) +allocate(t3 :: var4%y2(2)) +allocate(t :: var4%y%y%x) +allocate(t :: var4%y%y%x2(2)) +allocate(t :: var4%y2(1)%y%x) +allocate(t :: var4%y2(1)%y%x2(2)) +allocate(t :: var4%y2(2)%y%x) +allocate(t :: var4%y2(2)%y%x2(2)) +var4%y%y%x%x = -1 * [1111,2222,3333,4444] +var4%y%y%x2(1)%x = -2 * [1111,2222,3333,4444] +var4%y%y%x2(2)%x = -3 * [1111,2222,3333,4444] +var4%y2(1)%y%x%x = -4 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(1)%x = -5 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(2)%x = -6 * [1111,2222,3333,4444] +var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444] + +#ifdef USE_USM_REQUIREMENT +is_shared_mem = .true. +#else +is_shared_mem = .false. +!$omp target map(to: is_shared_mem) + is_shared_mem = .true. +!$omp end target +#endif + +if (case == 1) then + ! implicit mapping + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + end if + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + end if + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 2) then + ! Use target with defaultmap(TO) + + !$omp target defaultmap(to : all) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target defaultmap(to : all) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target defaultmap(to : all) firstprivate(is_shared_mem) ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 3) then + ! Use target with map clause + + !$omp target map(tofrom: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(tofrom: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(tofrom: var2) ! { dg-warning "28: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(tofrom: var3) ! { dg-warning "28: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(tofrom: var4) ! { dg-warning "28: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + end if + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 4) then + ! Use target with map clause -- NOTE: This uses TO not TOFROM + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 5) then + ! Use target enter/exit data + target with explicit map + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else if (case == 6) then + ! Use target enter/exit data + target with implicit map + + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else + error stop +end if + +if ((case /= 2 .and. case /= 4) .or. is_shared_mem) then + ! The target update should have been active, check for the updated values + if (any (var1%x /= 2 * [1,2,3,4])) stop 11 + if (any (var1a%x /= 3 * [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= 4 * [11,22,33,44])) stop 33 + + if (any (var3%x%x /= 5 * [111,222,333,444])) stop 44 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2 * 5 * [111,222,333,444])) stop 44 + if (any (var3%x2(2)%x /= 3 * 5 * [111,222,333,444])) stop 44 + endif + + if (any (var4%y%y%x%x /= -1 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(1)%y%x%x /= -4 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(2)%y%x%x /= -7 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * 6 * [1111,2222,3333,4444])) stop 55 + endif +else + ! The old host values should still be there as 'to:' created a device copy + if (any (var1%x /= [1,2,3,4])) stop 12 + if (any (var1a%x /= [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= [11,22,33,44])) stop 33 + + if (any (var3%x%x /= [111,222,333,444])) stop 44 + ! .not. is_shared_mem: + ! if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 44 + ! if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 44 + + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 55 +end if +if (case_var(100) /= 0) stop 123 +end subroutine test + +program main + use omp_lib + implicit none(type, external) +#ifdef USE_USM_REQUIREMENT + !$omp requires unified_shared_memory +#endif + + interface + subroutine test(case) + integer, value :: case + end + end interface + integer :: dev + call run_it(omp_get_default_device()) + do dev = 0, omp_get_num_devices() + call run_it(dev) + end do + call run_it(omp_initial_device) +! print *, 'all done' +contains +subroutine run_it(dev) + integer, value :: dev +! print *, 'DEVICE', dev + call omp_set_default_device(dev) + call test(1) + call test(2) + call test(3) + call test(4) + call test(5) + call test(6) +end +end diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 index 7b3e09f..d6f4d5b 100644 --- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { target { ! offload_target_nvptx } } } +! { dg-do compile { target offload_target_nvptx } } program test implicit none @@ -33,6 +34,10 @@ program test contains subroutine f (x, y, z) integer :: x(N), y(N), z(N) + ! The following fails as on the host the target side cannot be + ! resolved - and the 'teams' or not status affects how 'target' + ! is called. -> See PR118694, esp. comment 9. + ! Note also the dg-do compile above for offload_target_nvptx !$omp target map (to: x, y) map(from: z) block @@ -43,6 +48,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ end subroutine subroutine g (x, y, z) integer :: x(N), y(N), z(N) @@ -56,6 +62,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ !$omp end target end subroutine end program diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 new file mode 100644 index 0000000..2641086 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 @@ -0,0 +1,67 @@ +! PR libgomp/120444 +! Async version + +use omp_lib +use iso_c_binding +implicit none (type, external) +integer(c_int) :: dev + +!$omp parallel do +do dev = omp_initial_device, omp_get_num_devices () +block + integer(c_int) :: i, val, start, tail + type(c_ptr) :: ptr, ptr2, tmpptr + integer(c_int8_t), pointer, contiguous :: fptr(:) + integer(c_intptr_t) :: intptr + integer(c_size_t), parameter :: count = 1024 + integer(omp_depend_kind) :: dep(1) + + ptr = omp_target_alloc (count, dev) + + !$omp depobj(dep(1)) depend(inout: ptr) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp taskwait + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + fptr(i) = fptr(i) + 2_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val + 3, & + count - start - tail, dev, 1, dep) + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val + 3, c_int8_t)) stop 3 + fptr(i) = fptr(i) - 1_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val - 3, & + count - start - tail, dev, 1, dep) + + !$omp taskwait depend (depobj: dep(1)) + end do + end do + + !$omp depobj(dep(1)) destroy + call omp_target_free (ptr, dev); +end block +end do +end diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 new file mode 100644 index 0000000..1ee184a --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 @@ -0,0 +1,39 @@ +! PR libgomp/120444 + +use omp_lib +use iso_c_binding +implicit none (type, external) + +integer(c_int) :: dev, i, val, start, tail +type(c_ptr) :: ptr, ptr2, tmpptr +integer(c_int8_t), pointer, contiguous :: fptr(:) +integer(c_intptr_t) :: intptr +integer(c_size_t), parameter :: count = 1024 + +do dev = omp_initial_device, omp_get_num_devices () + ptr = omp_target_alloc (count, dev) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp target device(dev) is_device_ptr(ptr) + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + end do + !$omp end target + end do + end do + + call omp_target_free (ptr, dev); +end do +end diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 new file mode 100644 index 0000000..c6d671c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 @@ -0,0 +1,532 @@ +! { dg-additional-options "-cpp" } + +! FIXME: Some tests do not work yet. Those are for now in '#if 0' + +! Check that 'map(alloc:' properly works with +! - deferred-length character strings +! - arrays with array descriptors +! For those, the array descriptor / string length must be mapped with 'to:' + +program main +implicit none + +type t + integer :: ic(2:5), ic2 + character(len=11) :: ccstr(3:4), ccstr2 + character(len=11,kind=4) :: cc4str(3:7), cc4str2 + integer, pointer :: pc(:), pc2 + character(len=:), pointer :: pcstr(:), pcstr2 + character(len=:,kind=4), pointer :: pc4str(:), pc4str2 +end type t + +type(t) :: dt + +integer :: ii(5), ii2 +character(len=11) :: clstr(-1:1), clstr2 +character(len=11,kind=4) :: cl4str(0:3), cl4str2 +integer, pointer :: ip(:), ip2 +integer, allocatable :: ia(:), ia2 +character(len=:), pointer :: pstr(:), pstr2 +character(len=:), allocatable :: astr(:), astr2 +character(len=:,kind=4), pointer :: p4str(:), p4str2 +character(len=:,kind=4), allocatable :: a4str(:), a4str2 + + +allocate(dt%pc(5), dt%pc2) +allocate(character(len=2) :: dt%pcstr(2)) +allocate(character(len=4) :: dt%pcstr2) + +allocate(character(len=3,kind=4) :: dt%pc4str(2:3)) +allocate(character(len=5,kind=4) :: dt%pc4str2) + +allocate(ip(5), ip2, ia(8), ia2) +allocate(character(len=2) :: pstr(-2:0)) +allocate(character(len=4) :: pstr2) +allocate(character(len=6) :: astr(3:5)) +allocate(character(len=8) :: astr2) + +allocate(character(len=3,kind=4) :: p4str(2:4)) +allocate(character(len=5,kind=4) :: p4str2) +allocate(character(len=7,kind=4) :: a4str(-2:3)) +allocate(character(len=9,kind=4) :: a4str2) + + +! integer :: ic(2:5), ic2 + +!$omp target enter data map(alloc: dt%ic) +!$omp target map(alloc: dt%ic) + if (size(dt%ic) /= 4) error stop + if (lbound(dt%ic, 1) /= 2) error stop + if (ubound(dt%ic, 1) /= 5) error stop + dt%ic = [22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: dt%ic) +if (size(dt%ic) /= 4) error stop +if (lbound(dt%ic, 1) /= 2) error stop +if (ubound(dt%ic, 1) /= 5) error stop +if (any (dt%ic /= [22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: dt%ic2) +!$omp target map(alloc: dt%ic2) + dt%ic2 = 42 +!$omp end target +!$omp target exit data map(from: dt%ic2) +if (dt%ic2 /= 42) error stop + + +! character(len=11) :: ccstr(3:4), ccstr2 + +!$omp target enter data map(alloc: dt%ccstr) +!$omp target map(alloc: dt%ccstr) + if (len(dt%ccstr) /= 11) error stop + if (size(dt%ccstr) /= 2) error stop + if (lbound(dt%ccstr, 1) /= 3) error stop + if (ubound(dt%ccstr, 1) /= 4) error stop + dt%ccstr = ["12345678901", "abcdefghijk"] +!$omp end target +!$omp target exit data map(from: dt%ccstr) +if (len(dt%ccstr) /= 11) error stop +if (size(dt%ccstr) /= 2) error stop +if (lbound(dt%ccstr, 1) /= 3) error stop +if (ubound(dt%ccstr, 1) /= 4) error stop +if (any (dt%ccstr /= ["12345678901", "abcdefghijk"])) error stop + +!$omp target enter data map(alloc: dt%ccstr2) +!$omp target map(alloc: dt%ccstr2) + if (len(dt%ccstr2) /= 11) error stop + dt%ccstr2 = "ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: dt%ccstr2) +if (len(dt%ccstr2) /= 11) error stop +if (dt%ccstr2 /= "ABCDEFGHIJK") error stop + + +! character(len=11,kind=4) :: cc4str(3:7), cc4str2 + +#if 0 +! Value check fails +!$omp target map(alloc: dt%cc4str) + if (len(dt%cc4str) /= 11) error stop + if (size(dt%cc4str) /= 5) error stop + if (lbound(dt%cc4str, 1) /= 3) error stop + if (ubound(dt%cc4str, 1) /= 7) error stop + dt%cc4str = [4_"12345678901", 4_"abcdefghijk", & + 4_"qerftcea6ds", 4_"a1f9g37ga4.", & + 4_"45ngwj56sj2"] +!$omp end target +!$omp target exit data map(from: dt%cc4str) +if (len(dt%cc4str) /= 11) error stop +if (size(dt%cc4str) /= 5) error stop +if (lbound(dt%cc4str, 1) /= 3) error stop +if (ubound(dt%cc4str, 1) /= 7) error stop +if (dt%cc4str(3) /= 4_"12345678901") error stop +if (dt%cc4str(4) /= 4_"abcdefghijk") error stop +if (dt%cc4str(5) /= 4_"qerftcea6ds") error stop +if (dt%cc4str(6) /= 4_"a1f9g37ga4.") error stop +if (dt%cc4str(7) /= 4_"45ngwj56sj2") error stop +#endif + +!$omp target enter data map(alloc: dt%cc4str2) +!$omp target map(alloc: dt%cc4str2) + if (len(dt%cc4str2) /= 11) error stop + dt%cc4str2 = 4_"ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: dt%cc4str2) +if (len(dt%cc4str2) /= 11) error stop +if (dt%cc4str2 /= 4_"ABCDEFGHIJK") error stop + + +! integer, pointer :: pc(:), pc2 +! allocate(dt%pc(5), dt%pc2) + +!$omp target enter data map(alloc: dt%pc) +!$omp target map(alloc: dt%pc) + if (.not. associated(dt%pc)) error stop + if (size(dt%pc) /= 5) error stop + if (lbound(dt%pc, 1) /= 1) error stop + if (ubound(dt%pc, 1) /= 5) error stop + dt%pc = [11, 22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: dt%pc) +if (.not. associated(dt%pc)) error stop +if (size(dt%pc) /= 5) error stop +if (lbound(dt%pc, 1) /= 1) error stop +if (ubound(dt%pc, 1) /= 5) error stop +if (any (dt%pc /= [11, 22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: dt%pc2) +!$omp target map(alloc: dt%pc2) + if (.not. associated(dt%pc2)) error stop + dt%pc2 = 99 +!$omp end target +!$omp target exit data map(from: dt%pc2) +if (dt%pc2 /= 99) error stop +if (.not. associated(dt%pc2)) error stop + + +! character(len=:), pointer :: pcstr(:), pcstr2 +! allocate(character(len=2) :: dt%pcstr(2)) +! allocate(character(len=4) :: dt%pcstr2) + +!$omp target enter data map(alloc: dt%pcstr) +!$omp target map(alloc: dt%pcstr) + if (.not. associated(dt%pcstr)) error stop + if (len(dt%pcstr) /= 2) error stop + if (size(dt%pcstr) /= 2) error stop + if (lbound(dt%pcstr, 1) /= 1) error stop + if (ubound(dt%pcstr, 1) /= 2) error stop + dt%pcstr = ["01", "jk"] +!$omp end target +!$omp target exit data map(from: dt%pcstr) +if (.not. associated(dt%pcstr)) error stop +if (len(dt%pcstr) /= 2) error stop +if (size(dt%pcstr) /= 2) error stop +if (lbound(dt%pcstr, 1) /= 1) error stop +if (ubound(dt%pcstr, 1) /= 2) error stop +if (any (dt%pcstr /= ["01", "jk"])) error stop + + +!$omp target enter data map(alloc: dt%pcstr2) +!$omp target map(alloc: dt%pcstr2) + if (.not. associated(dt%pcstr2)) error stop + if (len(dt%pcstr2) /= 4) error stop + dt%pcstr2 = "HIJK" +!$omp end target +!$omp target exit data map(from: dt%pcstr2) +if (.not. associated(dt%pcstr2)) error stop +if (len(dt%pcstr2) /= 4) error stop +if (dt%pcstr2 /= "HIJK") error stop + + +! character(len=:,kind=4), pointer :: pc4str(:), pc4str2 +! allocate(character(len=3,kind=4) :: dt%pc4str(2:3)) +! allocate(character(len=5,kind=4) :: dt%pc4str2) + +!$omp target enter data map(alloc: dt%pc4str) +!$omp target map(alloc: dt%pc4str) + if (.not. associated(dt%pc4str)) error stop + if (len(dt%pc4str) /= 3) error stop + if (size(dt%pc4str) /= 2) error stop + if (lbound(dt%pc4str, 1) /= 2) error stop + if (ubound(dt%pc4str, 1) /= 3) error stop + dt%pc4str = [4_"456", 4_"tzu"] +!$omp end target +!$omp target exit data map(from: dt%pc4str) +if (.not. associated(dt%pc4str)) error stop +if (len(dt%pc4str) /= 3) error stop +if (size(dt%pc4str) /= 2) error stop +if (lbound(dt%pc4str, 1) /= 2) error stop +if (ubound(dt%pc4str, 1) /= 3) error stop +if (dt%pc4str(2) /= 4_"456") error stop +if (dt%pc4str(3) /= 4_"tzu") error stop + +!$omp target enter data map(alloc: dt%pc4str2) +!$omp target map(alloc: dt%pc4str2) + if (.not. associated(dt%pc4str2)) error stop + if (len(dt%pc4str2) /= 5) error stop + dt%pc4str2 = 4_"98765" +!$omp end target +!$omp target exit data map(from: dt%pc4str2) +if (.not. associated(dt%pc4str2)) error stop +if (len(dt%pc4str2) /= 5) error stop +if (dt%pc4str2 /= 4_"98765") error stop + + +! integer :: ii(5), ii2 + +!$omp target enter data map(alloc: ii) +!$omp target map(alloc: ii) + if (size(ii) /= 5) error stop + if (lbound(ii, 1) /= 1) error stop + if (ubound(ii, 1) /= 5) error stop + ii = [-1, -2, -3, -4, -5] +!$omp end target +!$omp target exit data map(from: ii) +if (size(ii) /= 5) error stop +if (lbound(ii, 1) /= 1) error stop +if (ubound(ii, 1) /= 5) error stop +if (any (ii /= [-1, -2, -3, -4, -5])) error stop + +!$omp target enter data map(alloc: ii2) +!$omp target map(alloc: ii2) + ii2 = -410 +!$omp end target +!$omp target exit data map(from: ii2) +if (ii2 /= -410) error stop + + +! character(len=11) :: clstr(-1:1), clstr2 + +!$omp target enter data map(alloc: clstr) +!$omp target map(alloc: clstr) + if (len(clstr) /= 11) error stop + if (size(clstr) /= 3) error stop + if (lbound(clstr, 1) /= -1) error stop + if (ubound(clstr, 1) /= 1) error stop + clstr = ["12345678901", "abcdefghijk", "ABCDEFGHIJK"] +!$omp end target +!$omp target exit data map(from: clstr) +if (len(clstr) /= 11) error stop +if (size(clstr) /= 3) error stop +if (lbound(clstr, 1) /= -1) error stop +if (ubound(clstr, 1) /= 1) error stop +if (any (clstr /= ["12345678901", "abcdefghijk", "ABCDEFGHIJK"])) error stop + +!$omp target enter data map(alloc: clstr2) +!$omp target map(alloc: clstr2) + if (len(clstr2) /= 11) error stop + clstr2 = "ABCDEFghijk" +!$omp end target +!$omp target exit data map(from: clstr2) +if (len(clstr2) /= 11) error stop +if (clstr2 /= "ABCDEFghijk") error stop + + +! character(len=11,kind=4) :: cl4str(0:3), cl4str2 + +!$omp target enter data map(alloc: cl4str) +!$omp target map(alloc: cl4str) + if (len(cl4str) /= 11) error stop + if (size(cl4str) /= 4) error stop + if (lbound(cl4str, 1) /= 0) error stop + if (ubound(cl4str, 1) /= 3) error stop + cl4str = [4_"12345678901", 4_"abcdefghijk", & + 4_"qerftcea6ds", 4_"a1f9g37ga4."] +!$omp end target +!$omp target exit data map(from: cl4str) +if (len(cl4str) /= 11) error stop +if (size(cl4str) /= 4) error stop +if (lbound(cl4str, 1) /= 0) error stop +if (ubound(cl4str, 1) /= 3) error stop +if (cl4str(0) /= 4_"12345678901") error stop +if (cl4str(1) /= 4_"abcdefghijk") error stop +if (cl4str(2) /= 4_"qerftcea6ds") error stop +if (cl4str(3) /= 4_"a1f9g37ga4.") error stop + +!$omp target enter data map(alloc: cl4str2) +!$omp target map(alloc: cl4str2) + if (len(cl4str2) /= 11) error stop + cl4str2 = 4_"ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: cl4str2) +if (len(cl4str2) /= 11) error stop +if (cl4str2 /= 4_"ABCDEFGHIJK") error stop + + +! allocate(ip(5), ip2, ia(8), ia2) + +!$omp target enter data map(alloc: ip) +!$omp target map(alloc: ip) + if (.not. associated(ip)) error stop + if (size(ip) /= 5) error stop + if (lbound(ip, 1) /= 1) error stop + if (ubound(ip, 1) /= 5) error stop + ip = [11, 22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: ip) +if (.not. associated(ip)) error stop +if (size(ip) /= 5) error stop +if (lbound(ip, 1) /= 1) error stop +if (ubound(ip, 1) /= 5) error stop +if (any (ip /= [11, 22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: ip2) +!$omp target map(alloc: ip2) + if (.not. associated(ip2)) error stop + ip2 = 99 +!$omp end target +!$omp target exit data map(from: ip2) +if (ip2 /= 99) error stop +if (.not. associated(ip2)) error stop + + +! allocate(ip(5), ip2, ia(8), ia2) + +!$omp target enter data map(alloc: ia) +!$omp target map(alloc: ia) + if (.not. allocated(ia)) error stop + if (size(ia) /= 8) error stop + if (lbound(ia, 1) /= 1) error stop + if (ubound(ia, 1) /= 8) error stop + ia = [1,2,3,4,5,6,7,8] +!$omp end target +!$omp target exit data map(from: ia) +if (.not. allocated(ia)) error stop +if (size(ia) /= 8) error stop +if (lbound(ia, 1) /= 1) error stop +if (ubound(ia, 1) /= 8) error stop +if (any (ia /= [1,2,3,4,5,6,7,8])) error stop + +!$omp target enter data map(alloc: ia2) +!$omp target map(alloc: ia2) + if (.not. allocated(ia2)) error stop + ia2 = 102 +!$omp end target +!$omp target exit data map(from: ia2) +if (ia2 /= 102) error stop +if (.not. allocated(ia2)) error stop + + +! character(len=:), pointer :: pstr(:), pstr2 +! allocate(character(len=2) :: pstr(-2:0)) +! allocate(character(len=4) :: pstr2) + +!$omp target enter data map(alloc: pstr) +!$omp target map(alloc: pstr) + if (.not. associated(pstr)) error stop + if (len(pstr) /= 2) error stop + if (size(pstr) /= 3) error stop + if (lbound(pstr, 1) /= -2) error stop + if (ubound(pstr, 1) /= 0) error stop + pstr = ["01", "jk", "aq"] +!$omp end target +!$omp target exit data map(from: pstr) +if (.not. associated(pstr)) error stop +if (len(pstr) /= 2) error stop +if (size(pstr) /= 3) error stop +if (lbound(pstr, 1) /= -2) error stop +if (ubound(pstr, 1) /= 0) error stop +if (any (pstr /= ["01", "jk", "aq"])) error stop + +!$omp target enter data map(alloc: pstr2) +!$omp target map(alloc: pstr2) + if (.not. associated(pstr2)) error stop + if (len(pstr2) /= 4) error stop + pstr2 = "HIJK" +!$omp end target +!$omp target exit data map(from: pstr2) +if (.not. associated(pstr2)) error stop +if (len(pstr2) /= 4) error stop +if (pstr2 /= "HIJK") error stop + + +! character(len=:), allocatable :: astr(:), astr2 +! allocate(character(len=6) :: astr(3:5)) +! allocate(character(len=8) :: astr2) + + +!$omp target enter data map(alloc: astr) +!$omp target map(alloc: astr) + if (.not. allocated(astr)) error stop + if (len(astr) /= 6) error stop + if (size(astr) /= 3) error stop + if (lbound(astr, 1) /= 3) error stop + if (ubound(astr, 1) /= 5) error stop + astr = ["01db45", "jk$D%S", "zutg47"] +!$omp end target +!$omp target exit data map(from: astr) +if (.not. allocated(astr)) error stop +if (len(astr) /= 6) error stop +if (size(astr) /= 3) error stop +if (lbound(astr, 1) /= 3) error stop +if (ubound(astr, 1) /= 5) error stop +if (any (astr /= ["01db45", "jk$D%S", "zutg47"])) error stop + + +!$omp target enter data map(alloc: astr2) +!$omp target map(alloc: astr2) + if (.not. allocated(astr2)) error stop + if (len(astr2) /= 8) error stop + astr2 = "HIJKhijk" +!$omp end target +!$omp target exit data map(from: astr2) +if (.not. allocated(astr2)) error stop +if (len(astr2) /= 8) error stop +if (astr2 /= "HIJKhijk") error stop + + +! character(len=:,kind=4), pointer :: p4str(:), p4str2 +! allocate(character(len=3,kind=4) :: p4str(2:4)) +! allocate(character(len=5,kind=4) :: p4str2) + +! FAILS with value check + +!$omp target enter data map(alloc: p4str) +!$omp target map(alloc: p4str) + if (.not. associated(p4str)) error stop + if (len(p4str) /= 3) error stop + if (size(p4str) /= 3) error stop + if (lbound(p4str, 1) /= 2) error stop + if (ubound(p4str, 1) /= 4) error stop + p4str(:) = [4_"f85", 4_"8af", 4_"A%F"] +!$omp end target +!$omp target exit data map(from: p4str) +if (.not. associated(p4str)) error stop +if (len(p4str) /= 3) error stop +if (size(p4str) /= 3) error stop +if (lbound(p4str, 1) /= 2) error stop +if (ubound(p4str, 1) /= 4) error stop +if (p4str(2) /= 4_"f85") error stop +if (p4str(3) /= 4_"8af") error stop +if (p4str(4) /= 4_"A%F") error stop + +!$omp target enter data map(alloc: p4str2) +!$omp target map(alloc: p4str2) + if (.not. associated(p4str2)) error stop + if (len(p4str2) /= 5) error stop + p4str2 = 4_"9875a" +!$omp end target +!$omp target exit data map(from: p4str2) +if (.not. associated(p4str2)) error stop +if (len(p4str2) /= 5) error stop +if (p4str2 /= 4_"9875a") error stop + + +! character(len=:,kind=4), allocatable :: a4str(:), a4str2 +! allocate(character(len=7,kind=4) :: a4str(-2:3)) +! allocate(character(len=9,kind=4) :: a4str2) + +!$omp target enter data map(alloc: a4str) +!$omp target map(alloc: a4str) + if (.not. allocated(a4str)) error stop + if (len(a4str) /= 7) error stop + if (size(a4str) /= 6) error stop + if (lbound(a4str, 1) /= -2) error stop + if (ubound(a4str, 1) /= 3) error stop + ! See PR fortran/107508 why '(:)' is required + a4str(:) = [4_"sf456aq", 4_"3dtzu24", 4_"_4fh7sm", 4_"=ff85s7", 4_"j=8af4d", 4_".,A%Fsz"] +!$omp end target +!$omp target exit data map(from: a4str) +if (.not. allocated(a4str)) error stop +if (len(a4str) /= 7) error stop +if (size(a4str) /= 6) error stop +if (lbound(a4str, 1) /= -2) error stop +if (ubound(a4str, 1) /= 3) error stop +if (a4str(-2) /= 4_"sf456aq") error stop +if (a4str(-1) /= 4_"3dtzu24") error stop +if (a4str(0) /= 4_"_4fh7sm") error stop +if (a4str(1) /= 4_"=ff85s7") error stop +if (a4str(2) /= 4_"j=8af4d") error stop +if (a4str(3) /= 4_".,A%Fsz") error stop + +!$omp target enter data map(alloc: a4str2) +!$omp target map(alloc: a4str2) + if (.not. allocated(a4str2)) error stop + if (len(a4str2) /= 9) error stop + a4str2 = 4_"98765a23d" +!$omp end target +!$omp target exit data map(from: a4str2) +if (.not. allocated(a4str2)) error stop +if (len(a4str2) /= 9) error stop +if (a4str2 /= 4_"98765a23d") error stop + + +deallocate(dt%pc, dt%pc2) +deallocate(dt%pcstr) +deallocate(dt%pcstr2) + +deallocate(dt%pc4str) +deallocate(dt%pc4str2) + +deallocate(ip, ip2, ia, ia2) +deallocate(pstr) +deallocate(pstr2) +deallocate(astr) +deallocate(astr2) + +deallocate(p4str) +deallocate(p4str2) +deallocate(a4str) +deallocate(a4str2) + +end diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C new file mode 100644 index 0000000..6957a6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C @@ -0,0 +1,57 @@ +/* 'std::bad_cast' exception in OpenACC compute region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C'. */ + +#include <iostream> + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + C1 c1; + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'std::bad_cast' + what(): std::bad_cast + Aborted (core dumped) + { dg-output {.*std::bad_cast} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + { dg-shouldfail {'std::bad_cast' exception} } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C new file mode 100644 index 0000000..8260966 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C @@ -0,0 +1,18 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_radeon_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..86d3f6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C @@ -0,0 +1,20 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_nvidia_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C new file mode 100644 index 0000000..0f84cf2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C @@ -0,0 +1,63 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target openacc_nvidia_accel_selected xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail openacc_nvidia_accel_selected } } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-2.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C'. */ + +#include <iostream> +#include <typeinfo> + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + { + C1 c1; + try + { + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } + catch (const std::bad_cast &e) + { + __builtin_printf("caught '%s'\n", e.what()); + } + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'std::bad_cast' exception} { ! openacc_host_selected } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C new file mode 100644 index 0000000..4fa419f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C @@ -0,0 +1,49 @@ +/* 'std::bad_cast' exception in OpenACC compute region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-3.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C'. */ + +/* For PR119692 workarounds. */ +#ifndef DEFAULT +# define DEFAULT +#endif + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + { + C1 c1; + bool a = false; + asm volatile ("" : : "r" (&a) : "memory"); + if (a) + { + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C new file mode 100644 index 0000000..08c5766 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C @@ -0,0 +1,43 @@ +/* 'throw' in OpenACC compute region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-throw-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C'. */ + +#include <iostream> + +class MyException +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + MyException e1; + throw e1; + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C new file mode 100644 index 0000000..40be837 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C @@ -0,0 +1,20 @@ +/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_radeon_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..9461455 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C @@ -0,0 +1,22 @@ +/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_nvidia_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C new file mode 100644 index 0000000..a7408cd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C @@ -0,0 +1,52 @@ +/* 'throw' in OpenACC compute region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */ +/* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */ + +/* See also '../libgomp.c++/target-exceptions-throw-2.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-2.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C'. */ + +#include <iostream> + +class MyException +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + try + { + MyException e1; + throw e1; + } + catch (const MyException &e) + { + __builtin_printf("caught '%s'\n", "MyException"); + } + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { ! openacc_host_selected } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C new file mode 100644 index 0000000..74a62b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C @@ -0,0 +1,43 @@ +/* 'throw' in OpenACC compute region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-throw-3.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-3.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C'. */ + +/* For PR119692 workarounds. */ +#ifndef DEFAULT +# define DEFAULT +#endif + +class MyException +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + bool a = false; + asm volatile ("" : : "r" (&a) : "memory"); + if (a) + { + MyException e1; + throw e1; + } + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI11MyException \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C new file mode 100644 index 0000000..5c3e037 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C @@ -0,0 +1,42 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -UDEFAULT } + Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +/* See also '../libgomp.c++/pr119692-1-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/pr119692-1-1.C', + '../../../gcc/testsuite/g++.target/nvptx/pr119692-1-1.C'. */ + +#ifndef DEFAULT +# define DEFAULT +#endif + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + C1 c1; + C1 *c1p = &c1; + asm volatile ("" : : "r" (&c1p) : "memory"); + C2 *c2 = dynamic_cast<C2 *>(c1p); + if (c2) + __builtin_abort(); + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C new file mode 100644 index 0000000..207b183 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C @@ -0,0 +1,12 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=default(none) } + Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(none\) map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C new file mode 100644 index 0000000..e9b44de --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C @@ -0,0 +1,12 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=default(present) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c new file mode 100644 index 0000000..4b54171 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c @@ -0,0 +1,125 @@ +/* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'. */ + +/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'. */ + +/* To exercise PR119835 (if optimizations enabled): disable inlining, so that + GIMPLE passes still see the functions that return aggregate types. */ +#pragma GCC optimize "-fno-inline" + +typedef struct {} empty; /* See 'gcc/doc/extend.texi', "Empty Structures". */ +typedef struct {char a;} schar; +typedef struct {short a;} sshort; +typedef struct {int a;} sint; +typedef struct {long long a;} slonglong; +typedef struct {int a, b[12];} sint_13; + +#pragma omp declare target + +#define M(T) ({T t; t.a = sizeof t; t;}) + +static __SIZE_TYPE__ empty_a; +#pragma acc declare create(empty_a) +#pragma acc routine +static empty rempty(void) +{ + return ({empty t; empty_a = sizeof t; t;}); +} + +#pragma acc routine +static schar rschar(void) +{ + return M(schar); +} + +#pragma acc routine +static sshort rsshort(void) +{ + return M(sshort); +} + +#pragma acc routine +static sint rsint(void) +{ + return M(sint); +} + +#pragma acc routine +static slonglong rslonglong(void) +{ + return M(slonglong); +} + +#pragma acc routine +static sint_13 rsint_13(void) +{ + return M(sint_13); +} + +#pragma acc routine +static void aempty(empty empty) +{ + (void) empty; + + __SIZE_TYPE__ empty_a_exp; +#ifndef __cplusplus + empty_a_exp = 0; +#else + empty_a_exp = sizeof (char); +#endif + if (empty_a != empty_a_exp) + __builtin_abort(); +} + +#pragma acc routine +static void aschar(schar schar) +{ + if (schar.a != sizeof (char)) + __builtin_abort(); +} + +#pragma acc routine +static void asshort(sshort sshort) +{ + if (sshort.a != sizeof (short)) + __builtin_abort(); +} + +#pragma acc routine +static void asint(sint sint) +{ + if (sint.a != sizeof (int)) + __builtin_abort(); +} + +#pragma acc routine +static void aslonglong(slonglong slonglong) +{ + if (slonglong.a != sizeof (long long)) + __builtin_abort(); +} + +#pragma acc routine +static void asint_13(sint_13 sint_13) +{ + if (sint_13.a != (sizeof (int) * 13)) + __builtin_abort(); +} + +#pragma omp end declare target + +int main() +{ +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + aempty(rempty()); + aschar(rschar()); + asshort(rsshort()); + asint(rsint()); + aslonglong(rslonglong()); + asint_13(rsint_13()); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c new file mode 100644 index 0000000..eda651d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c @@ -0,0 +1,96 @@ +/* { dg-prune-output "using .vector_length \\(32\\)" } */ + +/* PR libgomp/93226 */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <openacc.h> + +enum { N = 1024 }; + +static int D[N]; +#pragma acc declare device_resident(D) + +#pragma acc routine +intptr_t init_d() +{ + for (int i = 0; i < N; i++) + D[i] = 27*i; + return (intptr_t) &D[0]; +} + +int +main () +{ + int *a, *b, *e; + void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f; + intptr_t intptr; + bool fail = false; + + a = (int *) malloc (N*sizeof (int)); + b = (int *) malloc (N*sizeof (int)); + e = (int *) malloc (N*sizeof (int)); + d_c = acc_malloc (N*sizeof (int)); + d_f = acc_malloc (N*sizeof (int)); + + memset (e, 0xff, N*sizeof (int)); + d_e = acc_copyin (e, N*sizeof (int)); + + #pragma acc serial copyout(intptr) + intptr = init_d (); + d_d = (void*) intptr; + acc_memcpy_device (d_c, d_d, N*sizeof (int)); + + #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr) + { + int *cc = (int *) d_c; + int *dd = (int *) intptr; + for (int i = 0; i < N; i++) + if (dd[i] != 27*i || cc[i] != 27*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); + + for (int i = 0; i < N; i++) + a[i] = 11*i; + for (int i = 0; i < N; i++) + b[i] = 31*i; + + d_a = acc_copyin (a, N*sizeof (int)); + acc_copyin_async (b, N*sizeof (int), acc_async_noval); + + #pragma acc parallel deviceptr(d_c) async + { + int *cc = (int *) d_c; + #pragma acc loop + for (int i = 0; i < N; i++) + cc[i] = -17*i; + } + + acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval); + acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + d_b = acc_deviceptr (b); + acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + + #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail) + { + int *dd = (int *) d_d; + int *ee = (int *) d_e; + int *ff = (int *) d_f; + for (int i = 0; i < N; i++) + if (dd[i] != 11*i + || ee[i] != 31*i + || ff[i] != -17*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 new file mode 100644 index 0000000..15393b4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 @@ -0,0 +1,25 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +use openacc +implicit none (type, external) +integer,pointer :: a, b(:) +integer,allocatable :: c, d(:) + +call acc_attach(a) ! ICE +call acc_attach_async(b, 4) +call acc_attach(c) + +call acc_detach(a) +call acc_detach_async(b, 4) +call acc_detach_finalize(c) +call acc_detach_finalize_async(d,7) +end + +! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 new file mode 100644 index 0000000..b2204ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 @@ -0,0 +1,62 @@ +! { dg-do run } + +use openacc +implicit none (type, external) +integer, target :: tgt_a, tgt_b(5) + +integer, pointer :: p1, p2(:) + +type t + integer,pointer :: a => null () + integer,pointer :: b(:) => null () + integer,allocatable :: c, d(:) +end type t + +type(t), target :: var + +tgt_a = 51 +tgt_b = [11,22,33,44,55] + +var%b => tgt_b +!$acc enter data copyin(var, tgt_a, tgt_b) +var%a => tgt_a + +call acc_attach(var%a) +call acc_attach(var%b) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%a /= 51) stop 1 + if (any (var%b /= [11,22,33,44,55])) stop 2 +!$acc end serial + +call acc_detach(var%a) +call acc_detach(var%b) + +!$acc exit data delete(var, tgt_a, tgt_b) + +var%c = 9 +var%d = [1,2,3] + +p1 => var%c +p2 => var%d + +!$acc enter data copyin(p1, p2) +!$acc enter data copyin(var) +call acc_attach(var%c) +call acc_attach(var%d) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%c /= 9) stop 3 + if (any (var%d /= [1,2,3])) stop 4 +!$acc end serial + +call acc_detach(var%c) +call acc_detach(var%d) + +!$acc exit data delete(var, p1, p2) + +deallocate(var%d) + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 new file mode 100644 index 0000000..8f3a8f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 @@ -0,0 +1,113 @@ +! { dg-prune-output "using .vector_length \\(32\\)" } + +! PR libgomp/93226 */ + +module m + use iso_c_binding + use openacc + implicit none (external, type) + + integer, parameter :: N = 1024 + + integer :: D(N) + !$acc declare device_resident(D) + +contains + + integer(c_intptr_t) function init_d() + !$acc routine + integer :: i + do i = 1, N + D(i) = 27*i + end do + init_d = loc(D) + end +end module + +program main + use m + implicit none (external, type) + + integer, allocatable, target :: a(:), b(:), e(:) + type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f + integer(c_intptr_t) intptr + integer :: i + logical fail + + fail = .false. + + allocate(a(N), b(N), e(N)) + d_c = acc_malloc (N*c_sizeof (i)) + d_f = acc_malloc (N*c_sizeof (i)) + + e = huge(e) + call acc_copyin (e, N*c_sizeof (i)); + d_e = acc_deviceptr (e); + + !$acc serial copyout(intptr) + intptr = init_d () + !$acc end serial + d_d = transfer(intptr, d_d) + call acc_memcpy_device (d_c, d_d, N*c_sizeof (i)) + + !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr) + block + integer, pointer :: cc(:), dd(:) + call c_f_pointer (d_c, cc, [N]) + call c_f_pointer (d_d, dd, [N]) + a = cc + do i = 1, N + if (dd(i) /= 27*i .or. cc(i) /= 27*i) then + fail = .true. + stop 1 + end if + end do + end block + !$acc end serial + if (fail) error stop 1 + + do i = 1, N + a(i) = 11*i + b(i) = 31*i + end do + + call acc_copyin (a, N*c_sizeof (i)) + d_a = acc_deviceptr (a) + call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval) + + !$acc parallel deviceptr(d_c) private(i) async + block + integer, pointer :: cc(:) + call c_f_pointer (d_c, cc, [N]) + !$acc loop + do i = 1, N + cc(i) = -17*i + end do + end block + !$acc end parallel + + call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval) + call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + d_b = acc_deviceptr (b) + call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + + !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail) + block + integer, pointer :: dd(:), ee(:), ff(:) + call c_f_pointer (d_d, dd, [N]) + call c_f_pointer (d_e, ee, [N]) + call c_f_pointer (d_f, ff, [N]) + do i = 1, N + if (dd(i) /= 11*i & + .or. ee(i) /= 31*i & + .or. ff(i) /= -17*i) then + fail = .true. + stop 2 + end if + end do + end block + !$acc end serial + if (fail) error stop 2 +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 index deb2c28..f6bd27a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 @@ -19,11 +19,10 @@ program main end do !$acc end parallel end do - !$acc end data call acc_wait_all_async (nprocs + 1) - call acc_wait (nprocs + 1) + !$acc end data if (acc_async_test (1) .neqv. .TRUE.) stop 1 if (acc_async_test (2) .neqv. .TRUE.) stop 2 |