diff options
Diffstat (limited to 'libgomp')
218 files changed, 18419 insertions, 209 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 6507ba0..d3384b0 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,744 @@ +2025-06-24 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (acc_attach, acc_detach): Update for Fortran + version. + * openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}): + Add. + * openacc_lib.h: Likewise. + * testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test. + * testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test. + +2025-06-19 Tobias Burnus <tburnus@baylibre.com> + + * target.c (GOMP_REQUIRES_NAME_BUF_LEN): Define. + (GOMP_offload_register_ver, gomp_target_init): Use it for the + char buffer size. + +2025-06-19 Tobias Burnus <tburnus@baylibre.com> + waffl3x <waffl3x@baylibre.com> + + * libgomp.texi (omp_init_allocator): Refer to 'Memory allocation' + for available memory spaces. + (OMP_ALLOCATOR): Move list of traits and predefined memspaces + and allocators to ... + (Memory allocation): ... here. Document omp(x)::allocator::*; + minor wording tweaks, be more explicit about memkind, pinned and + pool_size. + +2025-06-17 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c++/declare_target-2.C: New test. + +2025-06-10 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c/declare-variant-4.h (gfx942): New variant function. + * testsuite/libgomp.c/declare-variant-4-gfx942.c: New test. + +2025-06-06 Tobias Burnus <tburnus@baylibre.com> + Sandra Loosemore <sloosemore@baylibre.com> + + * libgomp.texi (omp_get_num_devices, omp_get_intrinsic_device): + Document builtin handling. + +2025-06-06 Tobias Burnus <tburnus@baylibre.com> + + PR target/120530 + * testsuite/libgomp.c/target-map-zero-sized-3.c (main): Add missing + map clause; remove unused variable. + +2025-06-04 Tobias Burnus <tburnus@baylibre.com> + Sandra Loosemore <sloosemore@baylibre.com> + + * libgomp.texi (omp_interop_{int,ptr,str,rc_desc}): Add note about + the 'ret_code' type change in OpenMP 6. + +2025-06-03 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/120444 + * testsuite/libgomp.c-c++-common/omp_target_memset-3.c (test_it): + Change ptr argument type from void * to int8_t *. + (main): Change ptr variable type from void * to int8_t * and cast + omp_target_alloc result to the latter type. + +2025-06-02 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/120444 + * libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare. + * libgomp.h (struct gomp_device_descr): Add memset_func. + * libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}. + * libgomp.texi (Device Memory Routines): Document them. + * omp.h.in (omp_target_memset, omp_target_memset_async): Declare. + * omp_lib.f90.in (omp_target_memset, omp_target_memset_async): + Add interfaces. + * omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise. + * plugin/cuda-lib.def: Add cuMemsetD8. + * plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add + hsa_amd_memory_fill_fn. + (init_hsa_runtime_functions): DLSYM_OPT_FN load it. + (GOMP_OFFLOAD_memset): New. + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New. + * target.c (omp_target_memset_int, omp_target_memset, + omp_target_memset_async_helper, omp_target_memset_async): New. + (gomp_load_plugin_for_device): Add DLSYM (memset). + * testsuite/libgomp.c-c++-common/omp_target_memset.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test. + * testsuite/libgomp.fortran/omp_target_memset.f90: New test. + * testsuite/libgomp.fortran/omp_target_memset-2.f90: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__valarray-1.C: New. + * testsuite/libgomp.c++/target-std__valarray-1.output: Likewise. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__array-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__map-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__set-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__span-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Adjust. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + + * testsuite/libgomp.c++/target-std__cmath.C: New. + * testsuite/libgomp.c++/target-std__complex.C: Likewise. + * testsuite/libgomp.c++/target-std__numbers.C: Likewise. + +2025-05-30 Waffl3x <waffl3x@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-flex-10.C: New test. + * testsuite/libgomp.c++/target-flex-100.C: New test. + * testsuite/libgomp.c++/target-flex-101.C: New test. + * testsuite/libgomp.c++/target-flex-11.C: New test. + * testsuite/libgomp.c++/target-flex-12.C: New test. + * testsuite/libgomp.c++/target-flex-2000.C: New test. + * testsuite/libgomp.c++/target-flex-2001.C: New test. + * testsuite/libgomp.c++/target-flex-2002.C: New test. + * testsuite/libgomp.c++/target-flex-2003.C: New test. + * testsuite/libgomp.c++/target-flex-30.C: New test. + * testsuite/libgomp.c++/target-flex-300.C: New test. + * testsuite/libgomp.c++/target-flex-31.C: New test. + * testsuite/libgomp.c++/target-flex-32.C: New test. + * testsuite/libgomp.c++/target-flex-33.C: New test. + * testsuite/libgomp.c++/target-flex-41.C: New test. + * testsuite/libgomp.c++/target-flex-60.C: New test. + * testsuite/libgomp.c++/target-flex-61.C: New test. + * testsuite/libgomp.c++/target-flex-62.C: New test. + * testsuite/libgomp.c++/target-flex-70.C: New test. + * testsuite/libgomp.c++/target-flex-80.C: New test. + * testsuite/libgomp.c++/target-flex-81.C: New test. + * testsuite/libgomp.c++/target-flex-90.C: New test. + * testsuite/libgomp.c++/target-flex-common.h: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + Richard Biener <rguenther@suse.de> + + PR middle-end/119835 + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: + '#pragma GCC optimize "-fno-inline"'. + * testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New. + * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust. + +2025-05-30 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: Enable for C. + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-30 Julian Brown <julian@codesourcery.com> + Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c++/declare-mapper-1.C: New test. + * testsuite/libgomp.c++/declare-mapper-2.C: New test. + * testsuite/libgomp.c++/declare-mapper-3.C: New test. + * testsuite/libgomp.c++/declare-mapper-4.C: New test. + * testsuite/libgomp.c++/declare-mapper-5.C: New test. + * testsuite/libgomp.c++/declare-mapper-6.C: New test. + * testsuite/libgomp.c++/declare-mapper-7.C: New test. + * testsuite/libgomp.c++/declare-mapper-8.C: New test. + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: New test (only + enabled for C++ for now). + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-29 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/93226 + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New + prototype. + * libgomp.h (struct acc_dispatch_t): Add dev2dev_func. + (gomp_copy_dev2dev): New prototype. + * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}. + * libgomp.texi (acc_memcpy_device): New. + * oacc-mem.c (memcpy_tofrom_device): Change to take from/to + device boolean; use memcpy not memmove; add early return if + size == 0 or same device + same ptr. + (acc_memcpy_to_device, acc_memcpy_to_device_async, + acc_memcpy_from_device, acc_memcpy_from_device_async): Update. + (acc_memcpy_device, acc_memcpy_device_async): New. + * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async): + Add interface. + * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async): + Likewise. + * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add + prototype. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): + Update comment. + (GOMP_OFFLOAD_openacc_async_dev2host): Update call. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New. + (GOMP_OFFLOAD_dev2dev): Call it. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * target.c (gomp_copy_dev2dev): New. + (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev. + * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test. + * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test. + +2025-05-28 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.fortran/metadirective-1.f90: xfail when + compiling (also) for nvptx offloading as an error is then expected. + +2025-05-23 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when + compiling (also) for nvptx offloading as an error is then expected. + +2025-05-19 Thomas Schwinge <tschwinge@baylibre.com> + + PR lto/120308 + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Add empty + structure testing. + +2025-05-19 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: New. + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Likewise. + +2025-05-19 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after + wait API calls. + +2025-05-15 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/alloc-comp-4.f90: New test. + +2025-05-14 Tobias Burnus <tburnus@baylibre.com> + + * target.c (gomp_attach_pointer): Return bool; accept additional + bool to optionally silence the fatal pointee-not-found error. + (gomp_map_vars_internal): If the pointee could not be found, + check whether it was mapped as GOMP_MAP_ZERO_LEN_ARRAY_SECTION. + * libgomp.h (gomp_attach_pointer): Update prototype. + * oacc-mem.c (acc_attach_async, goacc_enter_data_internal): Update + calls. + * testsuite/libgomp.c/target-map-zero-sized.c: New test. + * testsuite/libgomp.c/target-map-zero-sized-2.c: New test. + * testsuite/libgomp.c/target-map-zero-sized-3.c: New test. + +2025-05-12 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119692 + * testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 }'. + * testsuite/libgomp.c++/pr119692-1-5.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + +2025-05-12 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c/declare-variant-3-sm61.c: New. + * testsuite/libgomp.c/declare-variant-3.h: Adjust. + +2025-05-09 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c/interop-cuda-full.c: Use 'link' instead + of 'run' when the default device is "! offload_device_nvptx". + * testsuite/libgomp.c/interop-cuda-libonly.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: Likewise. + * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: Likewise. + * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: Likewise. + * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: Likewise. + * testsuite/libgomp.c/interop-hip-amd-full.c: Use 'link' instead + of 'run' when the default device is "! offload_device_gcn". + * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: Likewise. + * testsuite/libgomp.fortran/interop-hip-amd-full.F90: Likewise. + * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: Likewise. + +2025-05-09 David Malcolm <dmalcolm@redhat.com> + + PR other/116792 + * testsuite/lib/libgomp.exp: Add load_lib of scanhtml.exp. + +2025-05-07 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/map-alloc-comp-9.f90: Process differently + when USE_USM_REQUIREMENT is set. + * testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90: New test. + +2025-05-06 Tejas Belagod <tejas.belagod@arm.com> + + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test. + +2025-05-05 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c/interop-hsa.c: GCN offloading only. + +2025-05-01 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/allocate-8a.f90: New test. + +2025-04-25 Andrew Stubbs <ams@baylibre.com> + + * testsuite/libgomp.c/interop-hsa.c: New test. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119853 + PR target/119854 + * target-cxa-dso-dtor.c: New. + * config/accel/target-cxa-dso-dtor.c: Likewise. + * Makefile.am (libgomp_la_SOURCES): Add it. + * Makefile.in: Regenerate. + * testsuite/libgomp.c++/target-cdtor-1.C: New. + * testsuite/libgomp.c++/target-cdtor-2.C: Likewise. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c-c++-common/target-cdtor-1.c: New. + +2025-04-25 Andrew Pinski <quic_apinski@quicinc.com> + Thomas Schwinge <tschwinge@baylibre.com> + + PR target/119737 + * testsuite/libgomp.c++/target-exceptions-throw-1.C: Remove + PR119737 XFAILing. + * testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + +2025-04-25 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Adjust for + 'targetm.arm_eabi_unwinder'. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_gomp_hip_header_nvidia): Compile with + "-Wno-deprecated-declarations". + * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas.h: Add workarounds + when using the HIP headers with __HIP_PLATFORM_NVIDIA__. + +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas, + check_effective_target_openacc_cudart): Update description as + the check requires more. + (check_effective_target_openacc_libcuda, + check_effective_target_openacc_libcublas, + check_effective_target_openacc_libcudart, + check_effective_target_gomp_hip_header_amd, + check_effective_target_gomp_hip_header_nvidia, + check_effective_target_gomp_hipfort_module, + check_effective_target_gomp_libamdhip64, + check_effective_target_gomp_libhipblas): New. + * testsuite/libgomp.c-c++-common/interop-2.c: New test. + * testsuite/libgomp.c/interop-cublas-full.c: New test. + * testsuite/libgomp.c/interop-cublas-libonly.c: New test. + * testsuite/libgomp.c/interop-cuda-full.c: New test. + * testsuite/libgomp.c/interop-cuda-libonly.c: New test. + * testsuite/libgomp.c/interop-hip-amd-full.c: New test. + * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip.h: New test. + * testsuite/libgomp.c/interop-hipblas-amd-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas.h: New test. + * testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip.h: New test. + +2025-04-23 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/target-enter-data-8.f90: New test. + +2025-04-17 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/119849 + * testsuite/libgomp.c++/allocator-1.C (test_inequality, main): Guard + ompx::allocator::gnu_pinned_mem uses with #ifdef __gnu_linux__. + * testsuite/libgomp.c++/allocator-2.C (main): Likewise. + +2025-04-17 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (gcn interop, nvptx interop): For HIP with C/C++, add + a note about setting a preprocessor define. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Remove + 'ALWAYS_INLINE' workaround. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/106445 + * testsuite/libgomp.c++/pr106445-1.C: New. + * testsuite/libgomp.c++/pr106445-1-O0.C: Likewise. + +2025-04-16 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/97106 + * testsuite/libgomp.c++/pr96390.C: Un-XFAIL nvptx offloading. + * testsuite/libgomp.c-c++-common/pr96390.c: Adjust. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (gcn, nvptx): Mention self_maps clause + besides unified_shared_memory in the requirements item. + +2025-04-15 waffl3x <waffl3x@baylibre.com> + + * omp.h.in: Add omp::allocator::* and ompx::allocator::* allocators. + (__detail::__allocator_templ<T, omp_allocator_handle_t>): + New struct template. + (null_allocator<T>): New struct template. + (default_mem<T>): Likewise. + (large_cap_mem<T>): Likewise. + (const_mem<T>): Likewise. + (high_bw_mem<T>): Likewise. + (low_lat_mem<T>): Likewise. + (cgroup_mem<T>): Likewise. + (pteam_mem<T>): Likewise. + (thread_mem<T>): Likewise. + (ompx::allocator::gnu_pinned_mem<T>): Likewise. + * testsuite/libgomp.c++/allocator-1.C: New test. + * testsuite/libgomp.c++/allocator-2.C: New test. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (5.0 Impl. Status): Mark mapping alloc comps as 'Y'. + * testsuite/libgomp.fortran/allocatable-comp.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-3.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-4.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-5.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-6.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-7.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-9.f90: New test. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C: + Set '-foffload-options=-mno-fake-exceptions'. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Adjust. + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-O0.C: New. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-3.C: New. + * testsuite/libgomp.oacc-c++/exceptions-throw-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-2.C: New. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-throw-1.C: New. + * testsuite/libgomp.c++/target-exceptions-throw-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-3.C: New. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: New. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: New. + * testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * testsuite/libgomp.c++/target-exceptions-pr118794-1.C: New. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C: + Likewise. + * testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C: + Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR c++/119692 + * testsuite/libgomp.c++/pr119692-1-1.C: New. + * testsuite/libgomp.c++/pr119692-1-2.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-3.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-4.C: Likewise. + * testsuite/libgomp.c++/pr119692-1-5.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-1.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-2.C: Likewise. + * testsuite/libgomp.oacc-c++/pr119692-1-3.C: Likewise. + +2025-04-10 Richard Sandiford <richard.sandiford@arm.com> + + * testsuite/libgomp.c-target/aarch64/firstprivate.c: Add +sve pragma. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Add an -march option. + (for_reduction): Use "+=" in the reduction loop. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/119662 + * testsuite/libgomp.c/append-args-fr-1.c: New test. + * testsuite/libgomp.c/append-args-fr.h: New test. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + * Makefile.am (%.mod): Add -Wno-c-binding-type. + * Makefile.in: Regenerate. + +2025-04-08 Tejas Belagod <tejas.belagod@arm.com> + + * testsuite/libgomp.c-target/aarch64/aarch64.exp: Test driver. + * testsuite/libgomp.c-target/aarch64/firstprivate.c: New test. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Likewise. + +2025-04-07 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (omp_target_memcpy_rect_async, + omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'. + (AMD Radeon (GCN)): Document how memcpy_rect is implemented. + (nvptx): Move item about memcpy_rect item down; use present tense. + +2025-03-26 Thomas Schwinge <thomas@codesourcery.com> + + PR driver/101544 + * testsuite/libgomp.c++/pr101544-1-O0.C: Remove + '-foffload-options=-lstdc++'. + * testsuite/libgomp.c++/pr101544-1.C: Likewise. + * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise. + +2025-03-26 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (OpenMP 5.1): Add @ref to offload-target specifics + for 'interop'. + (OpenMP 6.0): Mark dispatch's interop clause as implemented. + (omp_get_interop_int, omp_get_interop_str, + omp_get_interop_ptr, omp_get_interop_type_desc): Add @ref to + Offload-Target Specifics; change ret_code argument type to + 'omp_interop_rc_t *'. + (Offload-Target Specifics): Document the supported OpenMP + interop foreign runtimes on AMD and Nvidia GPUs. + +2025-03-25 Sandra Loosemore <sloosemore@baylibre.com> + Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (OpenMP 5.1): Mark append_args as fully supported. + +2025-03-24 Tobias Burnus <tburnus@baylibre.com> + + * target.c (gomp_interop_internal): Set the 'device_num' member + when initializing an interop object. + +2025-03-24 Tobias Burnus <tburnus@baylibre.com> + + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for + stream creation to use the specified device. + +2025-03-24 Thomas Schwinge <tschwinge@baylibre.com> + + PR libgomp/96835 + * testsuite/libgomp.c++/pr96835-1.C: New. + * testsuite/libgomp.c++/pr96835-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/pr96835-1.C: Likewise. + +2025-03-24 Thomas Schwinge <thomas@codesourcery.com> + + PR target/101544 + * testsuite/libgomp.c++/pr101544-1.C: New. + * testsuite/libgomp.c++/pr101544-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise. + +2025-03-21 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/get-mapped-ptr-1.f90: Use -6 + not -5 as non-conforming device number. + +2025-03-21 Tobias Burnus <tburnus@baylibre.com> + + * plugin/plugin-gcn.c (_LIBGOMP_PLUGIN_INCLUDE): Define. + (struct hsa_runtime_fn_info): Add two queue functions. + (hipError_t, hipCtx_t, hipStream_s, hipStream_t): New types. + (struct hip_runtime_fn_info): New. + (hip_runtime_lib, hip_fns): New global vars. + (init_environment_variables): Handle hip_runtime_lib. + (init_hsa_runtime_functions): Load the two queue functions. + (init_hip_runtime_functions, GOMP_OFFLOAD_interop, + GOMP_OFFLOAD_get_interop_int, GOMP_OFFLOAD_get_interop_ptr, + GOMP_OFFLOAD_get_interop_str, + GOMP_OFFLOAD_get_interop_type_desc): New. + * plugin/plugin-nvptx.c (_LIBGOMP_PLUGIN_INCLUDE): Define. + (GOMP_OFFLOAD_interop, GOMP_OFFLOAD_get_interop_int, + GOMP_OFFLOAD_get_interop_ptr, GOMP_OFFLOAD_get_interop_str, + GOMP_OFFLOAD_get_interop_type_desc): New. + * testsuite/libgomp.c/interop-fr-1.c: New test. + * testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: Use -6 + not -5 as non-conforming device number. + +2025-03-21 Paul-Antoine Arras <parras@baylibre.com> + Tobias Burnus <tburnus@baylibre.com> + + * icv-device.c (omp_set_default_device): Check + GOMP_DEVICE_DEFAULT_OMP_61. + * libgomp-plugin.h (struct interop_obj_t): New. + (enum gomp_interop_flag): New. + (GOMP_OFFLOAD_interop): Declare. + (GOMP_OFFLOAD_get_interop_int): Declare. + (GOMP_OFFLOAD_get_interop_ptr): Declare. + (GOMP_OFFLOAD_get_interop_str): Declare. + (GOMP_OFFLOAD_get_interop_type_desc): Declare. + * libgomp.h (_LIBGOMP_OMP_LOCK_DEFINED): Define. + (struct gomp_device_descr): Add interop_func, get_interop_int_func, + get_interop_ptr_func, get_interop_str_func, get_interop_type_desc_func. + * libgomp.map: Add GOMP_interop. + * libgomp_g.h (GOMP_interop): Declare. + * target.c (resolve_device): Handle GOMP_DEVICE_DEFAULT_OMP_61. + (omp_get_interop_int): Replace stub with actual implementation. + (omp_get_interop_ptr): Likewise. + (omp_get_interop_str): Likewise. + (omp_get_interop_type_desc): Likewise. + (struct interop_data_t): Define. + (gomp_interop_internal): New function. + (GOMP_interop): Likewise. + (gomp_load_plugin_for_device): Load symbols for get_interop_int, + get_interop_ptr, get_interop_str and get_interop_type_desc. + * testsuite/libgomp.c-c++-common/interop-1.c: New test. + +2025-03-21 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp (libgomp_init): Add + -fdiagnostics-plain-output to additional_flags; remove + -fno-diagnostics-show-caret and -fdiagnostics-color=never. + 2025-03-17 Tobias Burnus <tburnus@baylibre.com> PR fortran/115271 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 855f0af..19479ae 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c + oacc-target.c target-indirect.c target-cxa-dso-dtor.c include $(top_srcdir)/plugin/Makefrag.am @@ -97,7 +97,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 25cb6fc..6d22b3d 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo target-indirect.lo $(am__objects_1) + oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c $(am__append_3) + oacc-target.c target-indirect.c target-cxa-dso-dtor.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -780,6 +782,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @@ -1388,7 +1391,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c new file mode 100644 index 0000000..e40a5f0 --- /dev/null +++ b/libgomp/config/accel/target-cxa-dso-dtor.c @@ -0,0 +1,62 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API + + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp.h" + +extern void __cxa_finalize (void *); + +/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>. + + Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit' + calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host. + '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx + results in 'ld' error: + + unresolved symbol __dso_handle + collect2: error: ld returned 1 exit status + nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status + + ..., or for GCN get an implicit definition (running with + '--trace-symbol=__dso_handle'): + + ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle + <internal>: definition of __dso_handle + + ..., which might be fine, but let's just make it explicit. */ + +/* There are no DSOs; this is the main program. */ +attribute_hidden void * const __dso_handle = 0; + +/* If this file gets linked in, that means that '__dso_handle' has been + referenced (for '__cxa_atexit'), and in that case, we also have to run + '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy + function '__GCC_offload___cxa_finalize'. */ + +void +__GCC_offload___cxa_finalize (void *dso_handle) +{ + __cxa_finalize (dso_handle); +} diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index ba06f50..40bf7cd 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -32,8 +32,11 @@ void omp_set_default_device (int device_num) { - struct gomp_task_icv *icv = gomp_icv (true); - icv->default_device_var = device_num; + if (device_num != GOMP_DEVICE_DEFAULT_OMP_61) + { + struct gomp_task_icv *icv = gomp_icv (true); + icv->default_device_var = device_num; + } } ialias (omp_set_default_device) diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 62bf43d..191106b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -33,6 +33,14 @@ #include <stddef.h> #include <stdint.h> +#ifdef _LIBGOMP_PLUGIN_INCLUDE + /* Include 'omp.h' for the interop definitions. */ + #define _LIBGOMP_OMP_LOCK_DEFINED 1 + typedef struct omp_lock_t omp_lock_t; + typedef struct omp_nest_lock_t omp_nest_lock_t; + #include "omp.h.in" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -101,6 +109,25 @@ struct addr_pair uintptr_t end; }; + +#ifdef _LIBGOMP_OMP_LOCK_DEFINED +/* Only define when omp.h.in was included, as in plugin/ and in libgomp.h. */ +struct interop_obj_t +{ + void *stream; + void *device_data; + omp_interop_fr_t fr; + int device_num; +}; + +enum gomp_interop_flag +{ + gomp_interop_flag_init, + gomp_interop_flag_use, + gomp_interop_flag_destroy +}; +#endif + /* This following symbol is used to name the target side variable struct that holds the designated ICVs of the target device. The symbol needs to be available to libgomp code and the offload plugin (which in the latter case @@ -150,6 +177,7 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *, size_t, size_t, size_t, size_t, size_t, const void *, size_t, size_t, size_t, size_t, size_t); +extern bool GOMP_OFFLOAD_memset (int, void *, int, size_t); extern bool GOMP_OFFLOAD_can_run (void *); extern void GOMP_OFFLOAD_run (int, void *, void *, void **); extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *); @@ -173,6 +201,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, struct goacc_asyncqueue *); +extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t, + struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *); @@ -181,6 +211,23 @@ extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *, extern union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int, enum goacc_property); +#ifdef _LIBGOMP_OMP_LOCK_DEFINED +/* Only define when omp.h.in was included, as in plugin/ and in libgomp.h. */ +extern void GOMP_OFFLOAD_interop (struct interop_obj_t *, int, + enum gomp_interop_flag, bool, const char *); +extern intptr_t GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *, + omp_interop_property_t, + omp_interop_rc_t *); +extern void *GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *, + omp_interop_property_t, + omp_interop_rc_t *); +extern const char *GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj, + omp_interop_property_t, + omp_interop_rc_t *); +extern const char *GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *, + omp_interop_property_t); +#endif + #ifdef __cplusplus } #endif diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 44ad980..a433983 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -43,7 +43,14 @@ #include "config.h" #include <stdint.h> + +/* Include omp.h by parts. */ +#include "omp-lock.h" +#define _LIBGOMP_OMP_LOCK_DEFINED 1 +#include "omp.h.in" + #include "libgomp-plugin.h" + #include "gomp-constants.h" #ifdef HAVE_PTHREAD_H @@ -1353,6 +1360,7 @@ typedef struct acc_dispatch_t __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func; __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func; } async; __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func; @@ -1413,12 +1421,18 @@ struct gomp_device_descr __typeof (GOMP_OFFLOAD_free) *free_func; __typeof (GOMP_OFFLOAD_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func; __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func; - __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; + __typeof (GOMP_OFFLOAD_memset) *memset_func; __typeof (GOMP_OFFLOAD_can_run) *can_run_func; __typeof (GOMP_OFFLOAD_run) *run_func; __typeof (GOMP_OFFLOAD_async_run) *async_run_func; + __typeof (GOMP_OFFLOAD_interop) *interop_func; + __typeof (GOMP_OFFLOAD_get_interop_int) *get_interop_int_func; + __typeof (GOMP_OFFLOAD_get_interop_ptr) *get_interop_ptr_func; + __typeof (GOMP_OFFLOAD_get_interop_str) *get_interop_str_func; + __typeof (GOMP_OFFLOAD_get_interop_type_desc) *get_interop_type_desc_func; /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; @@ -1455,11 +1469,14 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *, extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); +extern void gomp_copy_dev2dev (struct gomp_device_descr *, + struct goacc_asyncqueue *, void *, const void *, + size_t); extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); -extern void gomp_attach_pointer (struct gomp_device_descr *, +extern bool gomp_attach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree, splay_tree_key, uintptr_t, size_t, - struct gomp_coalesce_buf *, bool); + struct gomp_coalesce_buf *, bool, bool); extern void gomp_detach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree_key, uintptr_t, bool, struct gomp_coalesce_buf *); @@ -1501,11 +1518,6 @@ gomp_work_share_init_done (void) /* Now that we're back to default visibility, include the globals. */ #include "libgomp_g.h" -/* Include omp.h by parts. */ -#include "omp-lock.h" -#define _LIBGOMP_OMP_LOCK_DEFINED 1 -#include "omp.h.in" - #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \ || !defined (HAVE_ATTRIBUTE_ALIAS) \ || !defined (HAVE_AS_SYMVER_DIRECTIVE) \ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 4530b3a..f6aee7c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -430,6 +430,7 @@ GOMP_5.1.2 { GOMP_5.1.3 { global: + GOMP_interop; omp_get_num_interop_properties; omp_get_interop_int; omp_get_interop_ptr; @@ -452,6 +453,12 @@ GOMP_6.0 { omp_get_uid_from_device_8_; } GOMP_5.1.3; +GOMP_6.0.1 { + global: + omp_target_memset; + omp_target_memset_async; +} GOMP_6.0; + OACC_2.0 { global: acc_get_num_devices; @@ -608,6 +615,12 @@ OACC_2.6 { acc_get_property_string_h_; } OACC_2.5.1; +OACC_2.6.1 { + global: + acc_memcpy_device; + acc_memcpy_device_async; +} OACC_2.6; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index d1cf9be..5518033 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -258,7 +258,7 @@ The OpenMP 4.5 specification is fully supported. device memory mapped by an array section @tab P @tab @item Mapping of Fortran pointer and allocatable variables, including pointer and allocatable components of variables - @tab P @tab Mapping of vars with allocatable components unsupported + @tab Y @tab @item @code{defaultmap} extensions @tab Y @tab @item @code{declare mapper} directive @tab N @tab @item @code{omp_get_supported_active_levels} routine @tab Y @tab @@ -293,8 +293,7 @@ The OpenMP 4.5 specification is fully supported. @item C/C++'s @code{declare variant} directive: elision support of preprocessed code @tab N @tab @item @code{declare variant}: new clauses @code{adjust_args} and - @code{append_args} @tab P @tab For @code{append_args}, all interop objects - must be specified in the @code{interop} clause of @code{dispatch} + @code{append_args} @tab Y @tab @item @code{dispatch} construct @tab Y @tab @item device-specific ICV settings with environment variables @tab Y @tab @item @code{assume} and @code{assumes} directives @tab Y @tab @@ -314,7 +313,7 @@ The OpenMP 4.5 specification is fully supported. clauses @tab N @tab @item Indirect calls to the device version of a procedure or function in @code{target} regions @tab Y @tab -@item @code{interop} directive @tab N @tab +@item @code{interop} directive @tab Y @tab Cf. @ref{Offload-Target Specifics} @item @code{omp_interop_t} object support in runtime routines @tab Y @tab @item @code{nowait} clause in @code{taskwait} directive @tab Y @tab @item Extensions to the @code{atomic} directive @tab Y @tab @@ -545,7 +544,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @tab N @tab @item Semicolon-separated list to @code{uses_allocators} @tab N @tab @item New @code{need_device_addr} modifier to @code{adjust_args} clause @tab N @tab -@item @code{interop} clause to @code{dispatch} @tab N @tab +@item @code{interop} clause to @code{dispatch} @tab Y @tab @item Scope requirement changes for @code{declare_target} @tab N @tab @item @code{message} and @code{severity} clauses to @code{parallel} directive @tab N @tab @@ -604,7 +603,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @code{omp_get_device_teams_thread_limit}, and @code{omp_set_device_teams_thread_limit} routines @tab N @tab @item @code{omp_target_memset} and @code{omp_target_memset_async} routines - @tab N @tab + @tab Y @tab @item Fortran version of the interop runtime routines @tab Y @tab @item Routines for obtaining memory spaces/allocators for shared/device memory @tab N @tab @@ -1803,6 +1802,11 @@ Returns the number of available non-host devices. The effect of running this routine in a @code{target} region is unspecified. +Note that in GCC the function is marked pure, i.e. as returning always the +same number. When GCC was not configured to support offloading, it is replaced +by zero; compile with @option{-fno-builtin-omp_get_num_devices} if a run-time +function is desired. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{int omp_get_num_devices(void);} @@ -1813,6 +1817,9 @@ The effect of running this routine in a @code{target} region is unspecified. @item @emph{Interface}: @tab @code{integer function omp_get_num_devices()} @end multitable +@item @emph{See also}: +@ref{omp_get_initial_device} + @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.31. @end table @@ -1951,6 +1958,12 @@ the value of @code{omp_initial_device}. The effect of running this routine in a @code{target} region is unspecified. +Note that GCC inlines this function unless you compile with +@option{-fno-builtin-omp_get_initial_device}. If GCC was not configured to +support offloading, it expands to constant zero; in non-host code it expands +to @code{omp_initial_device}; and otherwise it is replaced with a call to +@code{omp_get_num_devices}. + @item @emph{C/C++} @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{int omp_get_initial_device(void);} @@ -1985,8 +1998,8 @@ pointers on devices. They have C linkage and do not throw exceptions. * omp_target_memcpy_async:: Copy data between devices asynchronously * omp_target_memcpy_rect:: Copy a subvolume of data between devices * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously -@c * omp_target_memset:: <fixme>/TR12 -@c * omp_target_memset_async:: <fixme>/TR12 +* omp_target_memset:: Set bytes in device memory +* omp_target_memset_async:: Set bytes in device memory asynchronously * omp_target_associate_ptr:: Associate a device pointer with a host pointer * omp_target_disassociate_ptr:: Remove device--host pointer association * omp_get_mapped_ptr:: Return device pointer to a host pointer @@ -2317,7 +2330,7 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy} +@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6 @@ -2392,13 +2405,105 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async} +@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8 @end table +@node omp_target_memset +@subsection @code{omp_target_memset} -- Set bytes in device memory +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset( &} +@item @tab @code{ ptr, val, count, device_num) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num} +@end multitable + +@item @emph{See also}: +@ref{omp_target_memset_async} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.1 +@end table + + + +@node omp_target_memset_async +@subsection @code{omp_target_memset} -- Set bytes in device memory asynchronously +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. Task dependence +is expressed by passing an array of depend objects to @var{depobj_list}, where +the number of array elements is passed as @var{depobj_count}; if the count is +zero, the @var{depobj_list} argument is ignored. In C++ and Fortran, the +@var{depobj_list} argument can also be omitted in that case. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy_async(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num,} +@item @tab @code{ int depobj_count,} +@item @tab @code{ omp_depend_t *depobj_list)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset_async( &} +@item @tab @code{ ptr, val, count, device_num, &} +@item @tab @code{ depobj_count, depobj_list) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num, depobj_count} +@item @tab @code{integer(omp_depend_kind), optional :: depobj_list(*)} +@end multitable + + +@item @emph{See also}: +@ref{omp_target_memset} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.2 +@end table + + @node omp_target_associate_ptr @subsection @code{omp_target_associate_ptr} -- Associate a device pointer with a host pointer @@ -3039,6 +3144,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: the function has C binding and @var{interop} and @var{property_id} are @c passed by value, which permits use of the same ABI as the C function. This does @@ -3048,7 +3158,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{omp_intptr_t omp_get_interop_int(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3062,7 +3172,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.2, @@ -3084,6 +3195,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: the function has C binding and @var{interop} and @var{property_id} are @c passed by value, which permits use of the same ABI as the C function. This does @@ -3093,7 +3209,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void *omp_get_interop_ptr(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3107,7 +3223,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.3, @@ -3129,6 +3246,11 @@ and Fortran or used with @code{NULL} as argument in C and C++. If successful, In GCC, the effect of running this routine in a @code{target} region that is not the initial device is unspecified. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int*} was used for the @var{ret_code} argument +in place of a pointer to the enumerated type @code{omp_interop_rc_t}. + @c Implementation remark: In GCC, the Fortran interface differs from the one shown @c below: @var{interop} and @var{property_id} are passed by value. This does not @c affect the usage of the function when GCC's @code{omp_lib} module or @@ -3137,7 +3259,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_str(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3151,7 +3273,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.4, @@ -3234,7 +3357,8 @@ a null pointer is returned. The effect of running this routine in a @end multitable @item @emph{See also}: -@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name} +@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.6, @@ -3253,6 +3377,11 @@ the @var{ret_code} in human-readable form. The behavior is unspecified if value of @var{ret_code} was not set by an interoperability routine invoked for @var{interop}. +GCC implements the OpenMP 6.0 version of this function for C and C++, which is not +compatible with its type signature in previous versions of the OpenMP specification. +In older versions, the type @code{int} was used for the @var{ret_code} argument +in place of the enumerated type @code{omp_interop_rc_t}. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_rc_desc(const omp_interop_t interop, @@ -3324,7 +3453,7 @@ traits; if an allocator that fulfills the requirements cannot be created, @code{omp_null_allocator} is returned. The predefined memory spaces and available traits can be found at -@ref{OMP_ALLOCATOR}, where the trait names have to be prefixed by +@ref{Memory allocation}, where the trait names have to be prefixed by @code{omp_atk_} (e.g. @code{omp_atk_pinned}) and the named trait values by @code{omp_atv_} (e.g. @code{omp_atv_true}); additionally, @code{omp_atv_default} may be used as trait value to specify that the default value should be used. @@ -3347,7 +3476,7 @@ may be used as trait value to specify that the default value should be used. @end multitable @item @emph{See also}: -@ref{OMP_ALLOCATOR}, @ref{Memory allocation}, @ref{omp_destroy_allocator} +@ref{Memory allocation}, @ref{OMP_ALLOCATOR}, @ref{omp_destroy_allocator} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.7.2 @@ -3928,63 +4057,15 @@ The value can either be a predefined allocator or a predefined memory space or a predefined memory space followed by a colon and a comma-separated list of memory trait and value pairs, separated by @code{=}. +See @ref{Memory allocation} for a list of supported prefedined allocators, +memory spaces, and traits. + Note: The corresponding device environment variables are currently not supported. Therefore, the non-host @var{def-allocator-var} ICVs are always initialized to @code{omp_default_mem_alloc}. However, on all devices, the @code{omp_set_default_allocator} API routine can be used to change value. -@multitable @columnfractions .45 .45 -@headitem Predefined allocators @tab Associated predefined memory spaces -@item omp_default_mem_alloc @tab omp_default_mem_space -@item omp_large_cap_mem_alloc @tab omp_large_cap_mem_space -@item omp_const_mem_alloc @tab omp_const_mem_space -@item omp_high_bw_mem_alloc @tab omp_high_bw_mem_space -@item omp_low_lat_mem_alloc @tab omp_low_lat_mem_space -@item omp_cgroup_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item omp_pteam_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item omp_thread_mem_alloc @tab omp_low_lat_mem_space (implementation defined) -@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension) -@end multitable - -The predefined allocators use the default values for the traits, -as listed below. Except that the last three allocators have the -@code{access} trait set to @code{cgroup}, @code{pteam}, and -@code{thread}, respectively. - -@multitable @columnfractions .25 .40 .25 -@headitem Trait @tab Allowed values @tab Default value -@item @code{sync_hint} @tab @code{contended}, @code{uncontended}, - @code{serialized}, @code{private} - @tab @code{contended} -@item @code{alignment} @tab Positive integer being a power of two - @tab 1 byte -@item @code{access} @tab @code{all}, @code{cgroup}, - @code{pteam}, @code{thread} - @tab @code{all} -@item @code{pool_size} @tab Positive integer - @tab See @ref{Memory allocation} -@item @code{fallback} @tab @code{default_mem_fb}, @code{null_fb}, - @code{abort_fb}, @code{allocator_fb} - @tab See below -@item @code{fb_data} @tab @emph{unsupported as it needs an allocator handle} - @tab (none) -@item @code{pinned} @tab @code{true}, @code{false} - @tab See below -@item @code{partition} @tab @code{environment}, @code{nearest}, - @code{blocked}, @code{interleaved} - @tab @code{environment} -@end multitable - -For the @code{fallback} trait, the default value is @code{null_fb} for the -@code{omp_default_mem_alloc} allocator and any allocator that is associated -with device memory; for all other allocators, it is @code{default_mem_fb} -by default. - -For the @code{pinned} trait, the default value is @code{true} for -predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and -@code{false} for all others. - Examples: @smallexample OMP_ALLOCATOR=omp_high_bw_mem_alloc @@ -4760,6 +4841,7 @@ acceleration device. present on device. * acc_memcpy_to_device:: Copy host memory to device memory. * acc_memcpy_from_device:: Copy device memory to host memory. +* acc_memcpy_device:: Copy memory within a device. * acc_attach:: Let device pointer point to device-pointer target. * acc_detach:: Let device pointer point to host-pointer target. @@ -5834,6 +5916,44 @@ This function copies device memory specified by device address of +@node acc_memcpy_device +@section @code{acc_memcpy_device} -- Copy memory within a device. +@table @asis +@item @emph{Description} +This function copies device memory from one memory location to another +on the current device. It copies @var{bytes} bytes of data from the device +address, specified by @var{data_dev_src}, to the device address +@var{data_dev_dest}. The @code{_async} version performs the transfer +asynchronously using the queue associated with @var{async_arg}. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes);} +@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes)} +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes, async_arg)} +@item @tab @code{type(c_ptr), value :: data_dev_dest} +@item @tab @code{type(c_ptr), value :: data_dev_src} +@item @tab @code{integer(c_size_t), value :: bytes} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.28. +@end table + + + @node acc_attach @section @code{acc_attach} -- Let device pointer point to device-pointer target. @table @asis @@ -5847,19 +5967,19 @@ address to pointing to the corresponding device data. @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.34. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. + @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table @@ -5879,21 +5999,21 @@ address to pointing to the corresponding host data. @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.35. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. +@uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table @@ -6715,6 +6835,7 @@ on more architectures, GCC currently does not match any @code{arch} or @tab See @code{-march=} in ``Nvidia PTX Options'' @end multitable + @node Memory allocation @section Memory allocation @@ -6749,11 +6870,94 @@ The description below applies to: @code{_Alignof} and C++'s @code{alignof}. @end itemize -For the available predefined allocators and, as applicable, their associated -predefined memory spaces and for the available traits and their default values, -see @ref{OMP_ALLOCATOR}. Predefined allocators without an associated memory -space use the @code{omp_default_mem_space} memory space. See additionally -@ref{Offload-Target Specifics}. +GCC supports the following predefined allocators and predefined memory spaces: + +@multitable @columnfractions .45 .45 +@headitem Predefined allocators @tab Associated predefined memory spaces +@item omp_default_mem_alloc @tab omp_default_mem_space +@item omp_large_cap_mem_alloc @tab omp_large_cap_mem_space +@item omp_const_mem_alloc @tab omp_const_mem_space +@item omp_high_bw_mem_alloc @tab omp_high_bw_mem_space +@item omp_low_lat_mem_alloc @tab omp_low_lat_mem_space +@item omp_cgroup_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item omp_pteam_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item omp_thread_mem_alloc @tab omp_low_lat_mem_space (implementation defined) +@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension) +@end multitable + +Each predefined allocator, including @code{omp_null_allocator}, has a corresponding +allocator class template that meet the C++ allocator completeness requirements. +These are located in the @code{omp::allocator} namespace, and the +@code{ompx::allocator} namespace for gnu extensions. This allows the +allocator-aware C++ standard library containers to use OpenMP allocation routines; +for instance: + +@smallexample +std::vector<int, omp::allocator::cgroup_mem<int>> vec; +@end smallexample + +The following allocator templates are supported: + +@multitable @columnfractions .45 .45 +@headitem Predefined allocators @tab Associated allocator template +@item omp_null_allocator @tab omp::allocator::null_allocator +@item omp_default_mem_alloc @tab omp::allocator::default_mem +@item omp_large_cap_mem_alloc @tab omp::allocator::large_cap_mem +@item omp_const_mem_alloc @tab omp::allocator::const_mem +@item omp_high_bw_mem_alloc @tab omp::allocator::high_bw_mem +@item omp_low_lat_mem_alloc @tab omp::allocator::low_lat_mem +@item omp_cgroup_mem_alloc @tab omp::allocator::cgroup_mem +@item omp_pteam_mem_alloc @tab omp::allocator::pteam_mem +@item omp_thread_mem_alloc @tab omp::allocator::thread_mem +@item ompx_gnu_pinned_mem_alloc @tab ompx::allocator::gnu_pinned_mem +@end multitable + +The following traits are available when constructing a new allocator; +if a trait is not specified or with the value @code{default}, the +specified default value is used for that trait. The predefined +allocators use the default values of each trait, except that the +@code{omp_cgroup_mem_alloc}, @code{omp_pteam_mem_alloc}, and +@code{omp_thread_mem_alloc} allocators have the @code{access} trait +set to @code{cgroup}, @code{pteam}, and @code{thread}, respectively. +For each trait, a named constant prefixed by @code{omp_atk_} exists; +for each non-numeric value, a named constant prefixed by @code{omp_atv_} +exists. + +@multitable @columnfractions .25 .40 .25 +@headitem Trait @tab Allowed values @tab Default value +@item @code{sync_hint} @tab @code{contended}, @code{uncontended}, + @code{serialized}, @code{private} + @tab @code{contended} +@item @code{alignment} @tab Positive integer being a power of two + @tab 1 byte +@item @code{access} @tab @code{all}, @code{cgroup}, + @code{pteam}, @code{thread} + @tab @code{all} +@item @code{pool_size} @tab Positive integer (bytes) + @tab See below. +@item @code{fallback} @tab @code{default_mem_fb}, @code{null_fb}, + @code{abort_fb}, @code{allocator_fb} + @tab See below +@item @code{fb_data} @tab @emph{allocator handle} + @tab (none) +@item @code{pinned} @tab @code{true}, @code{false} + @tab See below +@item @code{partition} @tab @code{environment}, @code{nearest}, + @code{blocked}, @code{interleaved} + @tab @code{environment} +@end multitable + +For the @code{fallback} trait, the default value is @code{null_fb} for the +@code{omp_default_mem_alloc} allocator and any allocator that is associated +with device memory; for all other allocators, it is @code{default_mem_fb} +by default. + +For the @code{pinned} trait, the default value is @code{true} for +predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and +@code{false} for all others. + +The following description applies to the initial device (the host) and largely +also to non-host devices; for the latter, also see @ref{Offload-Target Specifics}. For the memory spaces, the following applies: @itemize @@ -6768,14 +6972,16 @@ For the memory spaces, the following applies: @end itemize On Linux systems, where the @uref{https://github.com/memkind/memkind, memkind -library} (@code{libmemkind.so.0}) is available at runtime, it is used when -creating memory allocators requesting +library} (@code{libmemkind.so.0}) is available at runtime and the respective +memkind kind is supported, it is used when creating memory allocators requesting @itemize -@item the memory space @code{omp_high_bw_mem_space} -@item the memory space @code{omp_large_cap_mem_space} -@item the @code{partition} trait @code{interleaved}; note that for - @code{omp_large_cap_mem_space} the allocation will not be interleaved +@item the @code{partition} trait @code{interleaved} except when the memory space + is @code{omp_large_cap_mem_space} (uses @code{MEMKIND_HBW_INTERLEAVE}) +@item the memory space is @code{omp_high_bw_mem_space} (uses + @code{MEMKIND_HBW_PREFERRED}) +@item the memory space is @code{omp_large_cap_mem_space} (uses + @code{MEMKIND_DAX_KMEM_ALL} or, if not available, @code{MEMKIND_DAX_KMEM}) @end itemize On Linux systems, where the @uref{https://github.com/numactl/numactl, numa @@ -6801,10 +7007,15 @@ a @code{nearest} allocation. Additional notes regarding the traits: @itemize @item The @code{pinned} trait is supported on Linux hosts, but is subject to - the OS @code{ulimit}/@code{rlimit} locked memory settings. + the OS @code{ulimit}/@code{rlimit} locked memory settings. It currently + uses @code{mmap} and is therefore optimized for few allocations, including + large data. If the conditions for numa or memkind allocations are + fulfilled, those allocators are used instead. @item The default for the @code{pool_size} trait is no pool and for every (re)allocation the associated library routine is called, which might - internally use a memory pool. + internally use a memory pool. Currently, the same applies when a + @code{pool_size} has been specified, except that once allocations exceed + the the pool size, the action of the @code{fallback} trait applies. @item For the @code{partition} trait, the partition part size will be the same as the requested size (i.e. @code{interleaved} or @code{blocked} has no effect), except for @code{interleaved} when the memkind library is @@ -6813,13 +7024,15 @@ Additional notes regarding the traits: that allocated the memory; on Linux, this is in particular the case when the memory placement policy is set to preferred. @item The @code{access} trait has no effect such that memory is always - accessible by all threads. + accessible by all threads. (Except on supported no-host devices.) @item The @code{sync_hint} trait has no effect. @end itemize See also: @ref{Offload-Target Specifics} + + @c --------------------------------------------------------------------- @c Offload-Target Specifics @c --------------------------------------------------------------------- @@ -6837,6 +7050,10 @@ The following sections present notes on the offload-target specifics @node AMD Radeon @section AMD Radeon (GCN) +@menu +* Foreign-runtime support for AMD GPUs:: +@end menu + On the hardware side, there is the hierarchy (fine to coarse): @itemize @item work item (thread) @@ -6881,7 +7098,7 @@ The implementation remark: @code{device(ancestor:1)}) are processed serially per @code{target} region such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} is only supported if all AMD GPUs have the @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for discrete GPUs, this may require setting the @code{HSA_XNACK} environment @@ -6904,6 +7121,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 3D memory-copy function + of the HSA library. Higher dimensions call this functions in a loop and + are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, is the value returned by the HSA runtime library for @code{HSA_AMD_AGENT_INFO_UUID}. For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex digits, @@ -6912,10 +7134,78 @@ The implementation remark: @end itemize +@node Foreign-runtime support for AMD GPUs +@subsection OpenMP @code{interop} -- Foreign-Runtime Support for AMD GPUs + +On AMD GPUs, the foreign runtimes are HIP (C++ Heterogeneous-Compute Interface +for Portability) and HSA (Heterogeneous System Architecture), +where HIP is the default. The interop object is created using OpenMP's +@code{interop} directive or, implicitly, when invoking a @code{declare variant} +procedure that has the @code{append_args} clause. In either case, the +@code{prefer_type} modifier determines whether HIP or HSA is used. + +When specifying the @code{targetsync} modifier: For HIP, a stream is +created using @code{hipStreamCreate}. For HSA, a queue is created of type +@code{HSA_QUEUE_TYPE_MULTI} with a queue size of 64. + +Invoke the @ref{Interoperability Routines} on an interop object to obtain +the following properties. For properties with integral (int), pointer (ptr), +or string (str) data type, call @code{omp_get_interop_int}, +@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively. +Note that @code{device_num} is the OpenMP device number +while @code{device} is the HIP device number or HSA device handle. + +When using HIP with C and C++, the @code{__HIP_PLATFORM_AMD__} preprocessor +macro must be defined before including the HIP header files. + +For the API routine call, add the prefix @code{omp_ipr_} to the property name; +for instance: +@smallexample +omp_interop_rc_t ret; +int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret); +@end smallexample + +@noindent +Available properties for an HIP interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{1} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hipDevice_t} @tab int @tab +@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab +@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab +@end multitable + +@noindent +Available properties for an HSA interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hsa} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hsa"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{1} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hsa_agent *} @tab ptr @tab +@item @code{device_context} @tab N/A @tab @tab +@item @code{targetsync} @tab @code{hsa_queue *} @tab ptr @tab +@end multitable + + @node nvptx @section nvptx +@menu +* Foreign-runtime support for Nvidia GPUs:: +@end menu + On the hardware side, there is the hierarchy (fine to coarse): @itemize @item thread @@ -6968,7 +7258,7 @@ The implementation remark: Per device, reverse offload regions are processed serially such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} runs on nvptx devices if and only if all of those support the @code{pageableMemoryAccess} property;@footnote{ @uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}} @@ -6976,11 +7266,6 @@ The implementation remark: devices (``host fallback''). @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack} in the GCC manual. -@item The OpenMP routines @code{omp_target_memcpy_rect} and - @code{omp_target_memcpy_rect_async} and the @code{target update} - directive for non-contiguous list items will use the 2D and 3D - memory-copy functions of the CUDA library. Higher dimensions will - call those functions in a loop and are therefore supported. @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the the @code{access} trait is set to @code{cgroup}, and libgomp has been built for PTX ISA version 4.1 or higher (such as in GCC's @@ -6998,6 +7283,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 2D and 3D memory-copy + functions of the CUDA library. Higher dimensions call those functions + in a loop and are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, consists of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by the CUDA runtime library. This UUID is output in grouped lower-case @@ -7008,6 +7298,88 @@ The implementation remark: @end itemize +@node Foreign-runtime support for Nvidia GPUs +@subsection OpenMP @code{interop} -- Foreign-Runtime Support for Nvidia GPUs + +On Nvidia GPUs, the foreign runtimes APIs are the CUDA runtime API, the CUDA +driver API, and HIP, the C++ Heterogeneous-Compute Interface for Portability +that is---on CUDA-based systems---a very thin layer on top of the CUDA API. By +default, CUDA is used. The interop object is created using OpenMP's +@code{interop} directive or, implicitly, when invoking a @code{declare variant} +procedure that has the @code{append_args} clause. In either case, the +@code{prefer_type} modifier determines whether CUDA, CUDA driver, or HSA is +used. + +When specifying the @code{targetsync} modifier, a CUDA stream is created using +the @code{CU_STREAM_DEFAULT} flag. + +Invoke the @ref{Interoperability Routines} on an interop object to obtain +the following properties. For properties with integral (int), pointer (ptr), +or string (str) data type, call @code{omp_get_interop_int}, +@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively. +Note that @code{device_num} is the OpenMP device number while @code{device} +is the CUDA, CUDA Driver, or HIP device number. + +When using HIP with C and C++, the @code{__HIP_PLATFORM_NVIDIA__} preprocessor +macro must be defined before including the HIP header files. + +For the API routine call, add the prefix @code{omp_ipr_} to the property name; +for instance: +@smallexample +omp_interop_rc_t ret; +int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret); +@end smallexample + +@noindent +Available properties for a CUDA runtime API interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{int} @tab int @tab +@item @code{device_context} @tab N/A @tab @tab +@item @code{targetsync} @tab @code{cudaStream_t} @tab ptr @tab +@end multitable + +@noindent +Available properties for a CUDA driver API interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda_driver} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda_driver"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{CUdevice} @tab int @tab +@item @code{device_context} @tab @code{CUcontext} @tab ptr @tab +@item @code{targetsync} @tab @code{CUstream} @tab ptr @tab +@end multitable + +@noindent +Available properties for an HIP interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hipDevice_t} @tab int @tab +@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab +@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab +@end multitable + + + @c --------------------------------------------------------------------- @c The libgomp ABI @c --------------------------------------------------------------------- diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index eed800b..8993ec6 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -358,6 +358,10 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, extern void GOMP_teams (unsigned int, unsigned int); extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool); extern void *GOMP_target_map_indirect_ptr (void *); +struct interop_obj_t; +extern void GOMP_interop (int, int, struct interop_obj_t ***, const int *, + const char **, int, struct interop_obj_t **, int, + struct interop_obj_t ***, unsigned, void **); /* teams.c */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 718252b..5b8ba7e 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -171,21 +171,22 @@ acc_free (void *d) } static void -memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, - const char *libfnname) +memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src, + size_t s, int async, const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ struct goacc_thread *thr = goacc_thread (); assert (thr && thr->dev); + if (s == 0) + return; if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - if (from) - memmove (h, d, s); - else - memmove (d, h, s); + if (src == dst) + return; + memcpy (dst, src, s); return; } @@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, } goacc_aq aq = get_goacc_asyncqueue (async); - if (from) - gomp_copy_dev2host (thr->dev, aq, h, d, s); + if (dev_to && dev_from) + { + if (dst != src) + gomp_copy_dev2dev (thr->dev, aq, dst, src, s); + } + else if (dev_from) + gomp_copy_dev2host (thr->dev, aq, dst, src, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, void acc_memcpy_to_device (void *d, void *h, size_t s) { - memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) { - memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__); } void acc_memcpy_from_device (void *h, void *d, size_t s) { - memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) { - memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__); +} + +void +acc_memcpy_device (void *dst, void *src, size_t s) +{ + memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__); +} + +void +acc_memcpy_device_async (void *dst, void *src, size_t s, int async) +{ + memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL @@ -951,7 +969,7 @@ acc_attach_async (void **hostaddr, int async) } gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, - 0, NULL, false); + 0, NULL, false, true); gomp_mutex_unlock (&acc_dev->lock); } @@ -1158,7 +1176,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) { gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, - (uintptr_t) h, s, NULL, false); + (uintptr_t) h, s, NULL, false, true); /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference counts ('n->refcount', 'n->dynamic_refcount'). */ } @@ -1176,7 +1194,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, = lookup_host (acc_dev, hostaddrs[j], sizeof (void *)); gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m, (uintptr_t) hostaddrs[j], sizes[j], NULL, - false); + false, true); } bool processed = false; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index d5e8be4..4f2bc46 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -347,6 +347,10 @@ extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__, const __SIZE_TYPE__ *, int, int, int, omp_depend_t * __GOMP_DEFAULT_NULL) __GOMP_NOTHROW; +extern void *omp_target_memset (void *, int, __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern void *omp_target_memset_async (void *, int, __SIZE_TYPE__, int, + int, omp_depend_t * __GOMP_DEFAULT_NULL) + __GOMP_NOTHROW; extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__, __SIZE_TYPE__, int) __GOMP_NOTHROW; extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW; @@ -432,4 +436,136 @@ extern const char *omp_get_uid_from_device (int) __GOMP_NOTHROW; } #endif +#if __cplusplus >= 201103L + +/* std::__throw_bad_alloc and std::__throw_bad_array_new_length. */ +#include <bits/functexcept.h> + +namespace omp +{ +namespace allocator +{ + +namespace __detail +{ + +template<typename __T, omp_allocator_handle_t __Handle> +struct __allocator_templ +{ + using value_type = __T; + using pointer = __T*; + using const_pointer = const __T*; + using size_type = __SIZE_TYPE__; + using difference_type = __PTRDIFF_TYPE__; + + __T* + allocate (size_type __n) + { + if (__SIZE_MAX__ / sizeof(__T) < __n) + std::__throw_bad_array_new_length (); + void *__p = omp_aligned_alloc (alignof(__T), __n * sizeof(__T), __Handle); + if (!__p) + std::__throw_bad_alloc (); + return static_cast<__T*>(__p); + } + + void + deallocate (__T *__p, size_type) __GOMP_NOTHROW + { + omp_free (static_cast<void*>(__p), __Handle); + } +}; + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return true; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return true; +} + +} /* namespace __detail */ + +template<typename __T> +struct null_allocator + : __detail::__allocator_templ<__T, omp_null_allocator> {}; + +template<typename __T> +struct default_mem + : __detail::__allocator_templ<__T, omp_default_mem_alloc> {}; + +template<typename __T> +struct large_cap_mem + : __detail::__allocator_templ<__T, omp_large_cap_mem_alloc> {}; + +template<typename __T> +struct const_mem + : __detail::__allocator_templ<__T, omp_const_mem_alloc> {}; + +template<typename __T> +struct high_bw_mem + : __detail::__allocator_templ<__T, omp_high_bw_mem_alloc> {}; + +template<typename __T> +struct low_lat_mem + : __detail::__allocator_templ<__T, omp_low_lat_mem_alloc> {}; + +template<typename __T> +struct cgroup_mem + : __detail::__allocator_templ<__T, omp_cgroup_mem_alloc> {}; + +template<typename __T> +struct pteam_mem + : __detail::__allocator_templ<__T, omp_pteam_mem_alloc> {}; + +template<typename __T> +struct thread_mem + : __detail::__allocator_templ<__T, omp_thread_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace omp */ + +namespace ompx +{ + +namespace allocator +{ + +template<typename __T> +struct gnu_pinned_mem + : omp::allocator::__detail::__allocator_templ<__T, ompx_gnu_pinned_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace ompx */ + +#endif /* __cplusplus */ + #endif /* _OMP_H */ diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index cb6b95f..ce866c0 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -904,6 +904,29 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) :: omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + depobj_count, depobj_list) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & device_offset, device_num) bind(c) use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index f7af5ff..9047095 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -505,6 +505,31 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + & depobj_count, depobj_list) & + & bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & & device_offset, device_num) & & bind(c) diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index 8ef107e..3f2db45 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -797,6 +797,9 @@ module openacc public :: acc_copyout_finalize, acc_delete_finalize public :: acc_memcpy_to_device, acc_memcpy_to_device_async public :: acc_memcpy_from_device, acc_memcpy_from_device_async + public :: acc_memcpy_device, acc_memcpy_device_async + public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async + public :: acc_detach_finalize, acc_detach_finalize_async integer, parameter :: openacc_version = 201711 @@ -1046,6 +1049,69 @@ module openacc end subroutine end interface + interface + subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, & + bytes, async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + interface acc_copyin_async procedure :: acc_copyin_async_32_h procedure :: acc_copyin_async_64_h diff --git a/libgomp/openacc.h b/libgomp/openacc.h index a520bbe..3085b00 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_attach (void **) __GOACC_NOTHROW; void acc_attach_async (void **, int) __GOACC_NOTHROW; void acc_detach (void **) __GOACC_NOTHROW; @@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_detach_finalize (void **) __GOACC_NOTHROW; void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; -/* Async functions, specified in OpenACC 2.5. */ +/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; void acc_create_async (void *, size_t, int) __GOACC_NOTHROW; void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW; @@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW; void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index b0d287e..dbdc4d7 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -528,6 +528,30 @@ end subroutine end interface + interface + subroutine acc_memcpy_device(data_dev_dest, data_dev_src, & + & bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async(data_dev_dest, & + & data_dev_src, bytes, & + & async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface acc_copyin_async subroutine acc_copyin_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t @@ -683,3 +707,45 @@ integer (acc_handle_kind) async_ end subroutine end interface + + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index eb562ac..7f4ddcc 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync) CUDA_ONE_CALL (cuMemcpy2D) CUDA_ONE_CALL (cuMemcpy2DUnaligned) CUDA_ONE_CALL (cuMemcpy3D) +CUDA_ONE_CALL (cuMemsetD8) CUDA_ONE_CALL (cuMemFree) CUDA_ONE_CALL (cuMemFreeHost) CUDA_ONE_CALL (cuMemGetAddressRange) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 5c65778..498b549 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -41,7 +41,9 @@ #include <hsa_ext_amd.h> #include <dlfcn.h> #include <signal.h> +#define _LIBGOMP_PLUGIN_INCLUDE 1 #include "libgomp-plugin.h" +#undef _LIBGOMP_PLUGIN_INCLUDE #include "config/gcn/libgomp-gcn.h" /* For struct output. */ #include "gomp-constants.h" #include <elf.h> @@ -190,6 +192,8 @@ struct hsa_runtime_fn_info uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue, uint64_t value); uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue); + uint64_t (*hsa_queue_load_read_index_relaxed_fn) (const hsa_queue_t *queue); + uint64_t (*hsa_queue_load_write_index_relaxed_fn) (const hsa_queue_t *queue); void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, hsa_signal_value_t value); void (*hsa_signal_store_release_fn) (hsa_signal_t signal, @@ -204,6 +208,8 @@ struct hsa_runtime_fn_info hsa_status_t (*hsa_code_object_deserialize_fn) (void *serialized_code_object, size_t serialized_code_object_size, const char *options, hsa_code_object_t *code_object); + hsa_status_t (*hsa_amd_memory_fill_fn)(void *ptr, uint32_t value, + size_t count); hsa_status_t (*hsa_amd_memory_lock_fn) (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent, void **agent_ptr); @@ -216,6 +222,25 @@ struct hsa_runtime_fn_info const hsa_signal_t *dep_signals, hsa_signal_t completion_signal); }; +/* As an HIP runtime is dlopened, following structure defines function + pointers utilized by the interop feature of this plugin. + Add suffient type declarations to get this work. */ + +typedef int hipError_t; /* Actually an enum; 0 == success. */ +typedef void* hipCtx_t; +struct hipStream_s; +typedef struct hipStream_s* hipStream_t; + +struct hip_runtime_fn_info +{ + hipError_t (*hipStreamCreate_fn) (hipStream_t *); + hipError_t (*hipStreamDestroy_fn) (hipStream_t); + hipError_t (*hipStreamSynchronize_fn) (hipStream_t); + hipError_t (*hipCtxGetCurrent_fn) (hipCtx_t *ctx); + hipError_t (*hipSetDevice_fn) (int deviceId); + hipError_t (*hipGetDevice_fn) (int *deviceId); +}; + /* Structure describing the run-time and grid properties of an HSA kernel lauch. This needs to match the format passed to GOMP_OFFLOAD_run. */ @@ -553,9 +578,11 @@ struct hsa_context_info static struct hsa_context_info hsa_context; /* HSA runtime functions that are initialized in init_hsa_context. */ - static struct hsa_runtime_fn_info hsa_fns; +/* HIP runtime functions that are initialized in init_hip_runtime_functions. */ +static struct hip_runtime_fn_info hip_fns; + /* Heap space, allocated target-side, provided for use of newlib malloc. Each module should have it's own heap allocated. Beware that heap usage increases with OpenMP teams. See also arenas. */ @@ -578,10 +605,11 @@ static bool debug; static bool suppress_host_fallback; -/* Flag to locate HSA runtime shared library that is dlopened +/* Flag to locate HSA and HIP runtime shared libraries that are dlopened by this plug-in. */ static const char *hsa_runtime_lib; +static const char *hip_runtime_lib; /* Flag to decide if the runtime should support also CPU devices (can be a simulator). */ @@ -1068,6 +1096,10 @@ init_environment_variables (void) if (hsa_runtime_lib == NULL) hsa_runtime_lib = "libhsa-runtime64.so.1"; + hip_runtime_lib = secure_getenv ("HIP_RUNTIME_LIB"); + if (hip_runtime_lib == NULL) + hip_runtime_lib = "libamdhip64.so"; + support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES"); const char *x = secure_getenv ("GCN_NUM_TEAMS"); @@ -1418,12 +1450,15 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_executable_iterate_symbols) DLSYM_FN (hsa_queue_add_write_index_release) DLSYM_FN (hsa_queue_load_read_index_acquire) + DLSYM_FN (hsa_queue_load_read_index_relaxed) + DLSYM_FN (hsa_queue_load_write_index_relaxed) DLSYM_FN (hsa_signal_wait_acquire) DLSYM_FN (hsa_signal_store_relaxed) DLSYM_FN (hsa_signal_store_release) DLSYM_FN (hsa_signal_load_acquire) DLSYM_FN (hsa_queue_destroy) DLSYM_FN (hsa_code_object_deserialize) + DLSYM_OPT_FN (hsa_amd_memory_fill) DLSYM_OPT_FN (hsa_amd_memory_lock) DLSYM_OPT_FN (hsa_amd_memory_unlock) DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect) @@ -4365,6 +4400,511 @@ unlock: return retval; } + +static bool +init_hip_runtime_functions (void) +{ + bool inited = false; + if (inited) + return hip_fns.hipStreamCreate_fn != NULL; + inited = true; + + void *handle = dlopen (hip_runtime_lib, RTLD_LAZY); + if (handle == NULL) + return false; + +#define DLSYM_OPT_FN(function) \ + hip_fns.function##_fn = dlsym (handle, #function) + + DLSYM_OPT_FN (hipStreamCreate); + DLSYM_OPT_FN (hipStreamDestroy); + DLSYM_OPT_FN (hipStreamSynchronize); + DLSYM_OPT_FN (hipCtxGetCurrent); + DLSYM_OPT_FN (hipGetDevice); + DLSYM_OPT_FN (hipSetDevice); +#undef DLSYM_OPT_FN + + if (!hip_fns.hipStreamCreate_fn + || !hip_fns.hipStreamDestroy_fn + || !hip_fns.hipStreamSynchronize_fn + || !hip_fns.hipCtxGetCurrent_fn + || !hip_fns.hipGetDevice_fn + || !hip_fns.hipSetDevice_fn) + { + hip_fns.hipStreamCreate_fn = NULL; + return false; + } + + return true; +} + +bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + + /* A memset feature is only provided via hsa_amd_memory_fill; while it + is fast, it is an HSA extension and it has two requirements: The memory + must be aligned to multiples of 4 bytes - and, by construction, only + multiples of 4 bytes can be filled (uint32_t value argument). + + This means: Either not using that function or up to three function calls: + - copy 1 to 3 bytes to get alignment (hsa_memory_copy), if unaligned + - call hsa_amd_memory_fill + - copy remaining 1 to 3 bytes (hsa_memory_copy), if after alignment + count is not a multiple of 4 bytes. + + Having more than one function call is only profitable if there is + enough data to process; see below for the used heuristic values. */ + + uint8_t v8 = (uint8_t) val; + size_t before = (4 - (uintptr_t) ptr % 4) % 4; /* 0 to 3 bytes. */ + size_t tail = (count - before) % 4; /* 0 to 3 bytes. */ + + /* Heuristic */ + enum { + /* Prefer alloca to malloc up to ... */ + alloca_size = 256, /* bytes */ + /* Call hsa_amd_memory_fill also when two copy calls are required. */ + always_use_fill = 256*1024, /* bytes */ + /* Call hsa_amd_memory_fill also when on copy call is required. */ + use_fill_one_copy = (128+64)*1024 /* bytes */ + }; + + /* Do not call hsa_amd_memory_fill when any of the following conditions + is true. Note that it is always preferred if available and + before == tail == 0. */ + if (__builtin_expect (!hsa_fns.hsa_amd_memory_fill_fn, 0) + || (before && tail && count < always_use_fill) + || ((before || tail) && count < use_fill_one_copy)) + before = count; + + /* Copy call for alignment - or all data, if condition above is true. */ + if (before) + { + void *data; + if (before > alloca_size) + data = malloc (before * sizeof (uint8_t)); + else + data = alloca (before * sizeof (uint8_t)); + memset (data, val, before); + status = hsa_fns.hsa_memory_copy_fn (ptr, data, before); + if (before > alloca_size) + free (data); + if (data == 0 || status != HSA_STATUS_SUCCESS) + goto fail; + count -= before; + } + + if (count == 0) + return true; + + ptr += before; + + uint32_t values = v8 | (v8 << 8) | (v8 << 16) | (v8 << 24); + status = hsa_fns.hsa_amd_memory_fill_fn (ptr, values, count / 4); + if (tail && status == HSA_STATUS_SUCCESS) + { + ptr += count - tail; + status = hsa_fns.hsa_memory_copy_fn (ptr, &values, tail); + } + if (status == HSA_STATUS_SUCCESS) + return true; + +fail: + GOMP_PLUGIN_error ("memory set failed"); + return false; +} + +void +GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, + enum gomp_interop_flag action, bool targetsync, + const char *prefer_type) +{ + if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use) + && !obj->stream) + return; + if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use) + && obj->fr == omp_ifr_hsa) + { + /* Wait until the queue is is empty. */ + bool is_empty; + uint64_t read_index, write_index; + hsa_queue_t *queue = (hsa_queue_t *) obj->stream; + do + { + read_index = hsa_fns.hsa_queue_load_read_index_relaxed_fn (queue); + write_index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue); + is_empty = (read_index == write_index); + } + while (!is_empty); + + if (action == gomp_interop_flag_destroy) + { + hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (queue); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error destroying interop hsa_queue_t", status); + } + return; + } + if (action == gomp_interop_flag_destroy) + { + hipError_t err = hip_fns.hipStreamDestroy_fn ((hipStream_t) obj->stream); + if (err != 0) + GOMP_PLUGIN_fatal ("Error destroying interop hipStream_t: %d", err); + return; + } + if (action == gomp_interop_flag_use) + { + hipError_t err + = hip_fns.hipStreamSynchronize_fn ((hipStream_t) obj->stream); + if (err != 0) + GOMP_PLUGIN_fatal ("Error synchronizing interop hipStream_t: %d", err); + return; + } + + bool fr_set = false; + + /* Check for the preferred type; cf. parser in C/C++/Fortran or + dump_omp_init_prefer_type for the format. + Accept the first '{...}' block that specifies a 'fr' that we support. + Currently, no 'attr(...)' are supported. */ + if (prefer_type) + while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR) + { + /* '{' item block starts. */ + prefer_type++; + /* 'fr(...)' block */ + while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR) + { + omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0]; + if (fr == omp_ifr_hip) + { + obj->fr = omp_ifr_hip; + fr_set = true; + } + if (fr == omp_ifr_hsa) + { + obj->fr = omp_ifr_hsa; + fr_set = true; + } + prefer_type++; + } + prefer_type++; + /* 'attr(...)' block */ + while (prefer_type[0] != '\0') + { + /* const char *attr = prefer_type; */ + prefer_type += strlen (prefer_type) + 1; + } + prefer_type++; + /* end of '}'. */ + if (fr_set) + break; + } + + /* Prefer HIP, use HSA as fallback. The warning is only printed if GCN_DEBUG + is set and does not distinguishes between on prefer_type or hip prefer_type + nor whether a later/lower preference also specifies 'hsa'. + The assumption is that the user code handles HSA gracefully, but likely + just by falling back to the host version. On the other hand, have_hip is + likely true if HSA is available. */ + if (!fr_set || obj->fr == omp_ifr_hip) + { + bool have_hip = init_hip_runtime_functions (); + if (have_hip) + obj->fr = omp_ifr_hip; + else + { + GCN_WARNING ("interop object requested, using HSA instead of HIP " + "as %s could not be loaded", hip_runtime_lib); + obj->fr = omp_ifr_hsa; + } + } + + _Static_assert (sizeof (uint64_t) == sizeof (hsa_agent_t), + "sizeof (uint64_t) == sizeof (hsa_agent_t)"); + struct agent_info *agent = get_agent_info (ord); + obj->device_data = agent; + + if (targetsync && obj->fr == omp_ifr_hsa) + { + hsa_status_t status; + /* Queue size must be (for GPUs) a power of 2 >= 40, i.e. at least 64 and + maximally HSA_AGENT_INFO_QUEUE_MAX_SIZE. Arbitrary choice: */ + uint32_t queue_size = ASYNC_QUEUE_SIZE; + status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, + HSA_QUEUE_TYPE_MULTI, + NULL, NULL, UINT32_MAX, UINT32_MAX, + (hsa_queue_t **) &obj->stream); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error creating interop hsa_queue_t", status); + } + else if (targetsync) + { + hipError_t err; + int dev_curr; + err = hip_fns.hipGetDevice_fn (&dev_curr); + if (!err && ord != dev_curr) + err = hip_fns.hipSetDevice_fn (ord); + if (!err) + err = hip_fns.hipStreamCreate_fn ((hipStream_t *) &obj->stream); + if (!err && ord != dev_curr) + err = hip_fns.hipSetDevice_fn (dev_curr); + if (err != 0) + GOMP_PLUGIN_fatal ("Error creating interop hipStream_t: %d", err); + } +} + +intptr_t +GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_success; + return obj->fr; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_success; + return 1; /* amd */ + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_success; + return obj->device_num; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return 0; + case omp_ipr_device: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + } + if (ret_code) + *ret_code = omp_irc_success; + return ((struct agent_info *) obj->device_data)->device_id; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + case omp_ipr_targetsync: + if (ret_code && !obj->stream) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + default: + break; + } + __builtin_unreachable (); + return 0; +} + +void * +GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_success; + /* hsa_agent_t is an struct containing a single uint64_t. */ + return &((struct agent_info *) obj->device_data)->id; + } + else + { + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + } + case omp_ipr_device_context: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + else + { + hipCtx_t ctx; + int dev_curr; + int dev = ((struct agent_info *) obj->device_data)->device_id; + hipError_t err; + err = hip_fns.hipGetDevice_fn (&dev_curr); + if (!err && dev != dev_curr) + err = hip_fns.hipSetDevice_fn (dev); + if (!err) + err = hip_fns.hipCtxGetCurrent_fn (&ctx); + if (!err && dev != dev_curr) + err = hip_fns.hipSetDevice_fn (dev_curr); + if (err) + GOMP_PLUGIN_fatal ("Error obtaining hipCtx_t for device %d: %d", + obj->device_num, err); + if (ret_code) + *ret_code = omp_irc_success; + return ctx; + } + case omp_ipr_targetsync: + if (!obj->stream) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + if (ret_code) + *ret_code = omp_irc_success; + return obj->stream; + default: + break; + } + __builtin_unreachable (); + return NULL; +} + +const char * +GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_success; + if (obj->fr == omp_ifr_hip) + return "hip"; + if (obj->fr == omp_ifr_hsa) + return "hsa"; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_success; + return "amd"; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_type_ptr; + else if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + case omp_ipr_targetsync: + if (ret_code && !obj->stream) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + default: + break; + } + __builtin_unreachable (); + return 0; +} + +const char * +GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj, + omp_interop_property_t property_id) +{ + _Static_assert (omp_ipr_targetsync == omp_ipr_first, + "omp_ipr_targetsync == omp_ipr_first"); + _Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4, + "omp_ipr_platform - omp_ipr_first + 1 == 4"); + static const char *desc_hip[] = {"N/A", /* platform */ + "hipDevice_t", /* device */ + "hipCtx_t", /* device_context */ + "hipStream_t"}; /* targetsync */ + static const char *desc_hsa[] = {"N/A", /* platform */ + "hsa_agent_t *", /* device */ + "N/A", /* device_context */ + "hsa_queue_t *"}; /* targetsync */ + if (obj->fr == omp_ifr_hip) + return desc_hip[omp_ipr_platform - property_id]; + else + return desc_hsa[omp_ipr_platform - property_id]; + return NULL; +} + /* }}} */ /* {{{ OpenMP Plugin API */ @@ -4619,7 +5159,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, queue_push_callback (aq, fn, data); } -/* Queue up an asynchronous data copy from host to DEVICE. */ +/* Queue up an asynchronous data copy from host to DEVICE. + (Also handles dev2host and dev2dev.) */ bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, @@ -4637,10 +5178,16 @@ bool GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { - struct agent_info *agent = get_agent_info (device); - assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n); - return true; + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); +} + +/* Queue up an asynchronous data copy from DEVICE to DEVICE. */ + +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); } union goacc_property_value diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index c47461e..0ba445e 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -35,7 +35,9 @@ #include "openacc.h" #include "config.h" #include "symcat.h" +#define _LIBGOMP_PLUGIN_INCLUDE 1 #include "libgomp-plugin.h" +#undef _LIBGOMP_PLUGIN_INCLUDE #include "oacc-plugin.h" #include "gomp-constants.h" #include "oacc-int.h" @@ -2017,6 +2019,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2075,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2265,6 +2298,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size, } bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + if (!nvptx_attach_host_thread_to_device (ord)) + return false; + CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count); + return true; +} + +bool GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { @@ -2286,6 +2328,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { @@ -2425,6 +2479,320 @@ nvptx_stacks_acquire (struct ptx_device *ptx_dev, size_t size, int num) return (void *) ptx_dev->omp_stacks.ptr; } +void +GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, + enum gomp_interop_flag action, bool targetsync, + const char *prefer_type) +{ + obj->fr = omp_ifr_cuda; + + if (action == gomp_interop_flag_destroy) + { + if (obj->stream) + CUDA_CALL_ASSERT (cuStreamDestroy, obj->stream); + return; + } + if (action == gomp_interop_flag_use) + { + if (obj->stream) + CUDA_CALL_ASSERT (cuStreamSynchronize, obj->stream); + return; + } + + /* Check for the preferred type; cf. parser in C/C++/Fortran or + dump_omp_init_prefer_type for the format. + Accept the first '{...}' block that specifies a 'fr' that we support. + Currently, no 'attr(...)' are supported. */ + if (prefer_type) + while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR) + { + bool found = false; + /* '{' item block starts. */ + prefer_type++; + /* 'fr(...)' block */ + while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR) + { + omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0]; + if (fr == omp_ifr_cuda + || fr == omp_ifr_cuda_driver + || fr == omp_ifr_hip) + { + obj->fr = fr; + found = true; + } + prefer_type++; + } + prefer_type++; + /* 'attr(...)' block */ + while (prefer_type[0] != '\0') + { + /* const char *attr = prefer_type; */ + prefer_type += strlen (prefer_type) + 1; + } + prefer_type++; + /* end of '}'. */ + if (found) + break; + } + + struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord]; + + if (targetsync) + { + CUstream stream = NULL; + CUdevice cur_ctx_dev; + CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev); + if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT) + GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res)); + if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev) + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + else + { + CUcontext old_ctx; + assert (ptx_dev->ctx); + CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx); + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + if (res != CUDA_ERROR_INVALID_CONTEXT) + CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx); + } + obj->stream = stream; + } +} + + +intptr_t +GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_cuda + && obj->fr != omp_ifr_cuda_driver + && obj->fr != omp_ifr_hip) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_success; + return obj->fr; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_success; + return 11; /* nvidia */ + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_success; + return obj->device_num; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return 0; + case omp_ipr_device: + if (ret_code) + *ret_code = omp_irc_success; + return ((struct ptx_device *) obj->device_data)->dev; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_cuda) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + case omp_ipr_targetsync: + if (!obj->stream) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return 0; + } + /* ptr fits into (u)intptr_t */ + if (ret_code) + *ret_code = omp_irc_success; + return (uintptr_t) obj->stream; + default: + break; + } + __builtin_unreachable (); + return 0; +} + +void * +GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_cuda + && obj->fr != omp_ifr_cuda_driver + && obj->fr != omp_ifr_hip) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_device_context: + if (obj->fr == omp_ifr_cuda) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + if (ret_code) + *ret_code = omp_irc_success; + return ((struct ptx_device *) obj->device_data)->ctx; + case omp_ipr_targetsync: + if (!obj->stream) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + if (ret_code) + *ret_code = omp_irc_success; + return obj->stream; + default: + break; + } + __builtin_unreachable (); + return NULL; +} + +const char * +GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_cuda + && obj->fr != omp_ifr_cuda_driver + && obj->fr != omp_ifr_hip) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_success; + if (obj->fr == omp_ifr_cuda) + return "cuda"; + if (obj->fr == omp_ifr_cuda_driver) + return "cuda_driver"; + if (obj->fr == omp_ifr_hip) + return "hip"; + break; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_success; + return "nvidia"; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_cuda) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + case omp_ipr_targetsync: + if (ret_code && !obj->stream) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + default: + break; + } + __builtin_unreachable (); + return NULL; +} + +const char * +GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj, + omp_interop_property_t property_id) +{ + _Static_assert (omp_ipr_targetsync == omp_ipr_first, + "omp_ipr_targetsync == omp_ipr_first"); + _Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4, + "omp_ipr_platform - omp_ipr_first + 1 == 4"); + static const char *desc_cuda[] = {"N/A", /* platform */ + "int", /* device */ + "N/A", /* device_context */ + "cudaStream_t"}; /* targetsync */ + static const char *desc_cuda_driver[] = {"N/A", /* platform */ + "CUdevice", /* device */ + "CUcontext", /* device_context */ + "CUstream"}; /* targetsync */ + static const char *desc_hip[] = {"N/A", /* platform */ + "hipDevice_t", /* device */ + "hipCtx_t", /* device_context */ + "hipStream_t"}; /* targetsync */ + if (obj->fr == omp_ifr_cuda) + return desc_cuda[omp_ipr_platform - property_id]; + if (obj->fr == omp_ifr_cuda_driver) + return desc_cuda_driver[omp_ipr_platform - property_id]; + else + return desc_hip[omp_ipr_platform - property_id]; + return NULL; +} void GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c new file mode 100644 index 0000000..d1a898d --- /dev/null +++ b/libgomp/target-cxa-dso-dtor.c @@ -0,0 +1,3 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */ + +/* Nothing needed here. */ diff --git a/libgomp/target.c b/libgomp/target.c index dbc4535..cda092b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -146,7 +146,8 @@ resolve_device (int device_id, bool remapped) called, which must be done before using default_device_var. */ int num_devices = gomp_get_num_devices (); - if (remapped && device_id == GOMP_DEVICE_ICV) + if ((remapped && device_id == GOMP_DEVICE_ICV) + || device_id == GOMP_DEVICE_DEFAULT_OMP_61) { struct gomp_task_icv *icv = gomp_icv (false); device_id = icv->default_device_var; @@ -460,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } +attribute_hidden void +gomp_copy_dev2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, + void *dst, const void *src, size_t sz) +{ + if (__builtin_expect (aq != NULL, 0)) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func, + "dev", dst, "dev", src, NULL, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst, + "dev", src, sz); +} + static void gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) { @@ -799,12 +813,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, (void *) cur_node.host_end); } -attribute_hidden void +/* Update the devptr by setting it to the device address of the host pointee + 'attach_to'; devptr is obtained from the splay_tree_key n. + When the pointer is already attached or the host pointee is either + NULL or in memory map, this function returns true. + Otherwise, the device pointer is set to point to the host pointee and: + - If allow_zero_length_array_sections is set, true is returned. + - Else, if fail_if_not_found is set, a fatal error is issued. + - Otherwise, false is returned. */ + +attribute_hidden bool gomp_attach_pointer (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, splay_tree mem_map, splay_tree_key n, uintptr_t attach_to, size_t bias, struct gomp_coalesce_buf *cbufp, - bool allow_zero_length_array_sections) + bool allow_zero_length_array_sections, + bool fail_if_not_found) { struct splay_tree_key_s s; size_t size, idx; @@ -859,7 +883,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); - return; + return true; } s.host_start = target + bias; @@ -868,15 +892,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, if (!tn) { - if (allow_zero_length_array_sections) - /* When allowing attachment to zero-length array sections, we - copy the host pointer when the target region is not mapped. */ - data = target; - else + /* We copy the host pointer when the target region is not mapped; + for allow_zero_length_array_sections, that's permitted. + Otherwise, it depends on the context. Return false in that + case, unless fail_if_not_found. */ + if (!allow_zero_length_array_sections && fail_if_not_found) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("pointer target not mapped for attach"); } + data = target; } else data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; @@ -888,10 +913,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); + if (!tn && !allow_zero_length_array_sections) + return false; } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, (void *) attach_to, (int) n->aux->attach_count[idx]); + return true; } attribute_hidden void @@ -1586,9 +1614,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, bool zlas = ((kind & typemask) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); - gomp_attach_pointer (devicep, aq, mem_map, n, - (uintptr_t) hostaddrs[i], sizes[i], - cbufp, zlas); + /* For 'target enter data', the map clauses are split; + however, for more complex code with struct and + pointer members, the mapping and the attach can end up + in different sets; or the wrong mapping with the + attach. As there is no way to know whether a size + zero like 'var->ptr[i][:0]' happend in the same + directive or not, the not-attached check is now + fully silenced for 'enter data'. */ + if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)) + zlas = true; + if (!gomp_attach_pointer (devicep, aq, mem_map, n, + (uintptr_t) hostaddrs[i], sizes[i], + cbufp, zlas, !openmp_p)) + { + /* Pointee not found; that's an error except for + map(var[:n]) with n == 0; the compiler adds a + runtime condition such that for those the kind is + always GOMP_MAP_ZERO_LEN_ARRAY_SECTION. */ + for (j = i; j > 0; j--) + if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i] + && sizes[j-1] == 0 + && (GOMP_MAP_ZERO_LEN_ARRAY_SECTION + == (get_kind (short_mapkind, kinds, j-1) + & typemask))) + break; + if (j == 0) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("pointer target not mapped for attach"); + } + } } else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) { @@ -2585,6 +2641,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, } } +#define GOMP_REQUIRES_NAME_BUF_LEN \ + sizeof ("unified_address, unified_shared_memory, " \ + "self_maps, reverse_offload") + static void gomp_requires_to_name (char *buf, size_t size, int requires_mask) { @@ -2633,10 +2693,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table, if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) { - char buf1[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; - char buf2[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; + char buf1[GOMP_REQUIRES_NAME_BUF_LEN]; + char buf2[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf2, sizeof (buf2), omp_req != GOMP_REQUIRES_TARGET_USED ? omp_req : omp_requires_mask); @@ -4947,6 +5005,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, return 0; } +static void +omp_target_memset_int (void *ptr, int val, size_t count, + struct gomp_device_descr *devicep) +{ + if (__builtin_expect (count == 0, 0)) + return; + if (devicep == NULL) + { + memset (ptr, val, count); + return; + } + + gomp_mutex_lock (&devicep->lock); + int ret = devicep->memset_func (devicep->target_id, ptr, val, count); + gomp_mutex_unlock (&devicep->lock); + if (!ret) + gomp_fatal ("omp_target_memset failed"); +} + +void* +omp_target_memset (void *ptr, int val, size_t count, int device_num) +{ + struct gomp_device_descr *devicep; + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_int (ptr, val, count, devicep); + return ptr; +} + +typedef struct +{ + void *ptr; + size_t count; + struct gomp_device_descr *devicep; + int val; +} omp_target_memset_data; + +static void +omp_target_memset_async_helper (void *args) +{ + omp_target_memset_data *a = args; + omp_target_memset_int (a->ptr, a->val, a->count, a->devicep); +} + +void* +omp_target_memset_async (void *ptr, int val, size_t count, int device_num, + int depobj_count, omp_depend_t *depobj_list) +{ + void *depend[depobj_count + 5]; + struct gomp_device_descr *devicep; + unsigned flags = 0; + int i; + + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count, + .devicep = devicep}; + if (depobj_count > 0 && depobj_list != NULL) + { + flags |= GOMP_TASK_FLAG_DEPEND; + depend[0] = 0; + depend[1] = (void *) (uintptr_t) depobj_count; + depend[2] = depend[3] = depend[4] = 0; + for (i = 0; i < depobj_count; ++i) + depend[i + 5] = &depobj_list[i]; + } + + GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s), + __alignof__ (s), true, flags, depend, 0, NULL); + return ptr; +} + int omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, size_t size, size_t device_offset, int device_num) @@ -5136,45 +5276,78 @@ omp_get_num_interop_properties (const omp_interop_t interop } omp_intptr_t -omp_get_interop_int (const omp_interop_t interop __attribute__ ((unused)), +omp_get_interop_int (const omp_interop_t interop, omp_interop_property_t property_id, omp_interop_rc_t *ret_code) { - if (ret_code == NULL) - return 0; + struct interop_obj_t *obj = (struct interop_obj_t *) interop; + struct gomp_device_descr *devicep; + if (property_id < omp_ipr_first || property_id >= 0) - *ret_code = omp_irc_out_of_range; - else - *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ - return 0; + { + if (ret_code) + *ret_code = omp_irc_out_of_range; + return 0; + } + if (obj == NULL + || (devicep = resolve_device (obj->device_num, false)) == NULL + || devicep->get_interop_int_func == NULL) + { + if (ret_code) + *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ + return 0; + } + return devicep->get_interop_int_func (obj, property_id, ret_code); } void * -omp_get_interop_ptr (const omp_interop_t interop __attribute__ ((unused)), +omp_get_interop_ptr (const omp_interop_t interop, omp_interop_property_t property_id, omp_interop_rc_t *ret_code) { - if (ret_code == NULL) - return NULL; + struct interop_obj_t *obj = (struct interop_obj_t *) interop; + struct gomp_device_descr *devicep; + if (property_id < omp_ipr_first || property_id >= 0) - *ret_code = omp_irc_out_of_range; - else - *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ - return NULL; + { + if (ret_code) + *ret_code = omp_irc_out_of_range; + return 0; + } + if (obj == NULL + || (devicep = resolve_device (obj->device_num, false)) == NULL + || devicep->get_interop_int_func == NULL) + { + if (ret_code) + *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ + return 0; + } + return devicep->get_interop_ptr_func (obj, property_id, ret_code); } const char * -omp_get_interop_str (const omp_interop_t interop __attribute__ ((unused)), +omp_get_interop_str (const omp_interop_t interop, omp_interop_property_t property_id, omp_interop_rc_t *ret_code) { - if (ret_code == NULL) - return NULL; + struct interop_obj_t *obj = (struct interop_obj_t *) interop; + struct gomp_device_descr *devicep; + if (property_id < omp_ipr_first || property_id >= 0) - *ret_code = omp_irc_out_of_range; - else - *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ - return NULL; + { + if (ret_code) + *ret_code = omp_irc_out_of_range; + return 0; + } + if (obj == NULL + || (devicep = resolve_device (obj->device_num, false)) == NULL + || devicep->get_interop_int_func == NULL) + { + if (ret_code) + *ret_code = omp_irc_empty; /* Assume omp_interop_none. */ + return 0; + } + return devicep->get_interop_str_func (obj, property_id, ret_code); } const char * @@ -5194,18 +5367,24 @@ omp_get_interop_type_desc (const omp_interop_t interop, omp_interop_property_t property_id) { static const char *desc[omp_ipr_fr_id - omp_ipr_device_num + 1] - = {"omp_interop_t", /* fr_id */ - "const char*", /* fr_name */ + = {"omp_interop_t", /* fr_id */ + "const char *", /* fr_name */ "int", /* vendor */ "const char *", /* vendor_name */ "int"}; /* device_num */ + + struct interop_obj_t *obj = (struct interop_obj_t *) interop; + struct gomp_device_descr *devicep; + if (property_id > omp_ipr_fr_id || property_id < omp_ipr_first) return NULL; - if (interop == omp_interop_none) + if (obj == NULL + || (devicep = resolve_device (obj->device_num, false)) == NULL + || devicep->get_interop_int_func == NULL) return NULL; if (property_id >= omp_ipr_device_num) return desc[omp_ipr_fr_id - property_id]; - return NULL; /* FIXME: Call plugin. */ + return devicep->get_interop_type_desc_func (obj, property_id); } const char * @@ -5236,6 +5415,120 @@ ialias (omp_get_interop_name) ialias (omp_get_interop_type_desc) ialias (omp_get_interop_rc_desc) +struct interop_data_t +{ + int device_num, n_init, n_use, n_destroy; + struct interop_obj_t ***init; + struct interop_obj_t **use; + struct interop_obj_t ***destroy; + const int *target_targetsync; + const char **prefer_type; +}; + +static void +gomp_interop_internal (void *data) +{ + struct interop_data_t *args = (struct interop_data_t *) data; + struct gomp_device_descr *devicep; + + /* Destroy objects to free resources. */ + for (int i = 0; i < args->n_destroy; i++) + { + struct interop_obj_t **obj = args->destroy[i]; + if (*obj == NULL /* omp_interop_none */) + continue; + devicep = resolve_device ((*obj)->device_num, false); + if (devicep != NULL && devicep->interop_func) + devicep->interop_func (*obj, devicep->target_id, + gomp_interop_flag_destroy, false, NULL); + free (*obj); + *obj = NULL; + } + + /* Init streams next to give 'use' more time for completion. */ + if (args->n_init) + { + devicep = resolve_device (args->device_num, false); + for (int i = 0; i < args->n_init; i++) + { + struct interop_obj_t **obj = args->init[i]; + bool targetsync + = (args->target_targetsync[i] & GOMP_INTEROP_TARGETSYNC); + const char *prefer_type + = (args->prefer_type ? args->prefer_type[i] : NULL); + if (devicep == NULL || !devicep->interop_func) + { + *obj = NULL; + continue; + } + *obj = + (struct interop_obj_t *) calloc (1, sizeof (struct interop_obj_t)); + (*obj)->device_num = devicep->target_id; + devicep->interop_func (*obj, devicep->target_id, + gomp_interop_flag_init, targetsync, + prefer_type); + } + } + + for (int i = 0; i < args->n_use; i++) + { + struct interop_obj_t *obj = args->use[i]; + if (obj == NULL) + continue; + devicep = resolve_device (obj->device_num, false); + if (devicep != NULL && devicep->interop_func) + devicep->interop_func (obj, devicep->target_id, + gomp_interop_flag_use, false, NULL); + } +} + +/* Process the OpenMP interop directive. 'init' and 'destroy' take an array + of 'omp_interop_t *', 'use' an array of 'omp_interop_t', where + 'omp_interop_t' is internally 'struct interop_obj_t *'; + 'flags' is used for the 'nowait' clause. */ + +void +GOMP_interop (int device_num, int n_init, struct interop_obj_t ***init, + const int *target_targetsync, const char **prefer_type, int n_use, + struct interop_obj_t **use, int n_destroy, + struct interop_obj_t ***destroy, unsigned int flags, + void **depend) +{ + struct interop_data_t args; + args.device_num = device_num; + args.n_init = n_init; + args.n_use = n_use; + args.n_destroy = n_destroy; + args.init = init; + args.target_targetsync = target_targetsync; + args.prefer_type = prefer_type; + args.use = use; + args.destroy = destroy; + + /* No need to create a task for 'init' as that should be fast. */ + bool use_task = false; + if (flags & GOMP_INTEROP_FLAG_NOWAIT) + { + for (int i = 0; i < n_use && !use_task; i++) + if (args.use[i]) + use_task |= args.use[i]->stream != NULL; + for (int i = 0; i < n_destroy && !use_task; i++) + if (*args.destroy[i]) + use_task |= (*args.destroy[i])->stream != NULL; + } + + if (use_task) + GOMP_task (gomp_interop_internal, &args, NULL, sizeof (args), + __alignof__ (args), true, depend ? GOMP_TASK_FLAG_DEPEND : 0, + depend, 0, NULL); + else + { + gomp_interop_internal (&args); + if (depend) + GOMP_taskwait_depend (depend); + } +} + static const char * gomp_get_uid_for_device (struct gomp_device_descr *devicep, int device_num) { @@ -5344,6 +5637,14 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (host2dev); DLSYM_OPT (memcpy2d, memcpy2d); DLSYM_OPT (memcpy3d, memcpy3d); + if (DLSYM_OPT (interop, interop)) + { + DLSYM (get_interop_int); + DLSYM (get_interop_ptr); + DLSYM (get_interop_str); + DLSYM (get_interop_type_desc); + } + device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) { @@ -5351,6 +5652,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM_OPT (async_run, async_run); DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); + DLSYM (memset); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { @@ -5369,6 +5671,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev) || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have @@ -5485,8 +5788,7 @@ gomp_target_init (void) found = true; if (found) { - char buf[sizeof ("unified_address, unified_shared_memory, " - "reverse_offload")]; + char buf[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf, sizeof (buf), omp_req); char *name = (char *) malloc (cur_len + 1); memcpy (name, cur, cur_len); diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index fd21371..fd475ac 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -30,6 +30,7 @@ load_gcc_lib scandump.exp load_gcc_lib scanlang.exp load_gcc_lib scanrtl.exp load_gcc_lib scansarif.exp +load_gcc_lib scanhtml.exp load_gcc_lib scantree.exp load_gcc_lib scanltrans.exp load_gcc_lib scanoffload.exp @@ -233,11 +234,8 @@ proc libgomp_init { args } { # error-message parsing machinery. lappend ALWAYS_CFLAGS "additional_flags=-fmessage-length=0" - # Disable caret - lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret" - - # Disable color diagnostics - lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never" + # Disable caret, color, URL diagnostics + lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-plain-output" # Help GCC to find offload compilers' 'mkoffload'. global offload_additional_options @@ -556,7 +554,23 @@ int main() { } } "-lcuda" ] } -# Return 1 if cublas_v2.h and -lcublas are available. +# Return 1 if -lcuda is available (header not required). + +proc check_effective_target_openacc_libcuda { } { + return [check_no_compiler_messages openacc_libcuda executable { +typedef enum { CUDA_SUCCESS } CUresult; +typedef int CUdevice; +CUresult cuDeviceGet (CUdevice *, int); +int main() { + CUdevice dev; + CUresult r = cuDeviceGet (&dev, 0); + if (r != CUDA_SUCCESS) + return 1; + return 0; +} } "-lcuda" ] +} + +# Return 1 if cublas_v2.h, cuda.h, -lcublas and -lcuda are available. proc check_effective_target_openacc_cublas { } { return [check_no_compiler_messages openacc_cublas executable { @@ -576,7 +590,25 @@ int main() { } } "-lcuda -lcublas" ] } -# Return 1 if cuda_runtime_api.h and -lcudart are available. +# Return 1 if -lcublas is available header not required). + +proc check_effective_target_openacc_libcublas { } { + return [check_no_compiler_messages openacc_libcublas executable { +typedef enum { CUBLAS_STATUS_SUCCESS } cublasStatus_t; +typedef struct cublasContext* cublasHandle_t; +#define cublasCreate cublasCreate_v2 +cublasStatus_t cublasCreate_v2 (cublasHandle_t *); +int main() { + cublasStatus_t s; + cublasHandle_t h; + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + return 1; + return 0; +} } "-lcublas" ] +} + +# Return 1 if cuda_runtime_api.h, cuda.h, -lcuda and -lcudart are available. proc check_effective_target_openacc_cudart { } { return [check_no_compiler_messages openacc_cudart executable { @@ -595,3 +627,98 @@ int main() { return 0; } } "-lcuda -lcudart" ] } + +# Return 1 if -lcudart is available (no header required). + +proc check_effective_target_openacc_libcudart { } { + return [check_no_compiler_messages openacc_libcudart executable { +typedef int cudaError_t; +cudaError_t cudaGetDevice(int *); +enum { cudaSuccess }; +int main() { + cudaError_t e; + int devn; + e = cudaGetDevice (&devn); + if (e != cudaSuccess) + return 1; + return 0; +} } "-lcudart" ] +} + +# Return 1 if hip.h is available (no link check; AMD platform). + +proc check_effective_target_gomp_hip_header_amd { } { + return [check_no_compiler_messages gomp_hip_header_amd assembly { +#define __HIP_PLATFORM_AMD__ +#include <hip/hip_runtime_api.h> +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} }] +} + +# Return 1 if hip.h is available (no link check; Nvidia/CUDA platform). + +proc check_effective_target_gomp_hip_header_nvidia { } { + return [check_no_compiler_messages gomp_hip_header_nvidia assembly { +#define __HIP_PLATFORM_NVIDIA__ +#include <hip/hip_runtime_api.h> +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} } "-Wno-deprecated-declarations"] +} + +# Return 1 if the Fortran hipfort module is available (no link check) + +proc check_effective_target_gomp_hipfort_module { } { + return [check_no_compiler_messages gomp_hipfort_module assembly { +! Fortran +use hipfort +implicit none +integer(kind(hipSuccess)) :: r +integer(c_int) :: dev +r = hipDeviceGet (dev, 0) +if (r /= hipSuccess) error stop +end +}] +} + +# Return 1 if AMD HIP's -lamdhip64 is available (no header required). + +proc check_effective_target_gomp_libamdhip64 { } { + return [check_no_compiler_messages gomp_libamdhip64 executable { +typedef int hipError_t; +typedef int hipDevice_t; +enum { hipSuccess = 0 }; +hipError_t hipDeviceGet(hipDevice_t*, int); +int main() { + hipDevice_t dev; + hipError_t r = hipDeviceGet (&dev, 0); + if (r != hipSuccess) + return 1; + return 0; +} } "-lamdhip64" ] +} + +# Return 1 if AMD HIP's -lamdhip64 is available (no header required). + +proc check_effective_target_gomp_libhipblas { } { + return [check_no_compiler_messages gomp_libhipblas executable { +typedef enum { HIPBLAS_STATUS_SUCCESS = 0 } hipblasStatus_t; +typedef void* hipblasHandle_t; +hipblasStatus_t hipblasCreate (hipblasHandle_t*); +int main() { + hipblasHandle_t handle; + hipblasStatus_t stat = hipblasCreate (&handle); + if (stat != HIPBLAS_STATUS_SUCCESS) + return 1; + return 0; +} } "-lhipblas" ] +} diff --git a/libgomp/testsuite/libgomp.c++/allocator-1.C b/libgomp/testsuite/libgomp.c++/allocator-1.C new file mode 100644 index 0000000..49425386 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-1.C @@ -0,0 +1,171 @@ +// { dg-do run } + +#include <omp.h> +#include <memory> +#include <limits> + +template<typename T, template<typename> class Alloc> +void test (T const initial_value = T()) +{ + using Allocator = Alloc<T>; + Allocator a; + using Traits = std::allocator_traits<Allocator>; + static_assert (__is_same(typename Traits::allocator_type, Allocator )); + static_assert (__is_same(typename Traits::value_type, T )); + static_assert (__is_same(typename Traits::pointer, T* )); + static_assert (__is_same(typename Traits::const_pointer, T const* )); + static_assert (__is_same(typename Traits::void_pointer, void* )); + static_assert (__is_same(typename Traits::const_void_pointer, void const* )); + static_assert (__is_same(typename Traits::difference_type, __PTRDIFF_TYPE__)); + static_assert (__is_same(typename Traits::size_type, __SIZE_TYPE__ )); + static_assert (Traits::propagate_on_container_copy_assignment::value == false); + static_assert (Traits::propagate_on_container_move_assignment::value == false); + static_assert (Traits::propagate_on_container_swap::value == false); + static_assert (Traits::is_always_equal::value == true); + + static constexpr __SIZE_TYPE__ correct_max_size + = std::numeric_limits<__SIZE_TYPE__>::max () / sizeof (T); + if (Traits::max_size (a) != correct_max_size) + __builtin_abort (); + + static constexpr __SIZE_TYPE__ alloc_count = 1; + T *p = Traits::allocate (a, alloc_count); + if (p == nullptr) + __builtin_abort (); + Traits::construct (a, p, initial_value); + if (*p != initial_value) + __builtin_abort (); + Traits::destroy (a, p); + Traits::deallocate (a, p, alloc_count); + /* Not interesting but might as well test it. */ + static_cast<void>(Traits::select_on_container_copy_construction (a)); + + if (!(a == Allocator())) + __builtin_abort (); + if (a != Allocator()) + __builtin_abort (); + if (!(a == Alloc<void>())) + __builtin_abort (); + if (a != Alloc<void>()) + __builtin_abort (); +} + +#define CHECK_INEQUALITY(other_alloc_templ, type) \ +do { \ + /* Skip tests for itself, those are equal. Intantiate each */ \ + /* one with void so we can easily tell if they are the same. */ \ + if (!__is_same (AllocTempl<void>, other_alloc_templ<void>)) \ + { \ + other_alloc_templ<type> other; \ + if (a == other) \ + __builtin_abort (); \ + if (!(a != other)) \ + __builtin_abort (); \ + } \ +} while (false) + +template<typename T, template<typename> class AllocTempl> +void test_inequality () +{ + using Allocator = AllocTempl<T>; + Allocator a; + CHECK_INEQUALITY (omp::allocator::null_allocator, void); + CHECK_INEQUALITY (omp::allocator::default_mem, void); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, void); + CHECK_INEQUALITY (omp::allocator::const_mem, void); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, void); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, void); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, void); + CHECK_INEQUALITY (omp::allocator::pteam_mem, void); + CHECK_INEQUALITY (omp::allocator::thread_mem, void); +#ifdef __gnu_linux__ + /* Pinning not implemented on other targets. */ + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, void); +#endif + /* And again with the same type passed to the allocator. */ + CHECK_INEQUALITY (omp::allocator::null_allocator, T); + CHECK_INEQUALITY (omp::allocator::default_mem, T); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, T); + CHECK_INEQUALITY (omp::allocator::const_mem, T); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, T); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, T); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, T); + CHECK_INEQUALITY (omp::allocator::pteam_mem, T); + CHECK_INEQUALITY (omp::allocator::thread_mem, T); +#ifdef __gnu_linux__ + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, T); +#endif +} + +#undef CHECK_INEQUALITY + +struct S +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S const& other) const noexcept { + return !this->operator==(other); + } +}; + +int main () +{ + test<int, omp::allocator::null_allocator>(42); + test<int, omp::allocator::default_mem>(42); + test<int, omp::allocator::large_cap_mem>(42); + test<int, omp::allocator::const_mem>(42); + test<int, omp::allocator::high_bw_mem>(42); + test<int, omp::allocator::low_lat_mem>(42); + test<int, omp::allocator::cgroup_mem>(42); + test<int, omp::allocator::pteam_mem>(42); + test<int, omp::allocator::thread_mem>(42); +#ifdef __gnu_linux__ + test<int, ompx::allocator::gnu_pinned_mem>(42); +#endif + + test<long long, omp::allocator::null_allocator>(42); + test<long long, omp::allocator::default_mem>(42); + test<long long, omp::allocator::large_cap_mem>(42); + test<long long, omp::allocator::const_mem>(42); + test<long long, omp::allocator::high_bw_mem>(42); + test<long long, omp::allocator::low_lat_mem>(42); + test<long long, omp::allocator::cgroup_mem>(42); + test<long long, omp::allocator::pteam_mem>(42); + test<long long, omp::allocator::thread_mem>(42); +#ifdef __gnu_linux__ + test<long long, ompx::allocator::gnu_pinned_mem>(42); +#endif + + test<S, omp::allocator::null_allocator>( S{42, true, 128.f}); + test<S, omp::allocator::default_mem>( S{42, true, 128.f}); + test<S, omp::allocator::large_cap_mem>( S{42, true, 128.f}); + test<S, omp::allocator::const_mem>( S{42, true, 128.f}); + test<S, omp::allocator::high_bw_mem>( S{42, true, 128.f}); + test<S, omp::allocator::low_lat_mem>( S{42, true, 128.f}); + test<S, omp::allocator::cgroup_mem>( S{42, true, 128.f}); + test<S, omp::allocator::pteam_mem>( S{42, true, 128.f}); + test<S, omp::allocator::thread_mem>( S{42, true, 128.f}); +#ifdef __gnu_linux__ + test<S, ompx::allocator::gnu_pinned_mem>(S{42, true, 128.f}); +#endif + + test_inequality<int, omp::allocator::null_allocator>(); + test_inequality<int, omp::allocator::default_mem>(); + test_inequality<int, omp::allocator::large_cap_mem>(); + test_inequality<int, omp::allocator::const_mem>(); + test_inequality<int, omp::allocator::high_bw_mem>(); + test_inequality<int, omp::allocator::low_lat_mem>(); + test_inequality<int, omp::allocator::cgroup_mem>(); + test_inequality<int, omp::allocator::pteam_mem>(); + test_inequality<int, omp::allocator::thread_mem>(); +#ifdef __gnu_linux__ + test_inequality<int, ompx::allocator::gnu_pinned_mem>(); +#endif +} diff --git a/libgomp/testsuite/libgomp.c++/allocator-2.C b/libgomp/testsuite/libgomp.c++/allocator-2.C new file mode 100644 index 0000000..ca94fc7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-2.C @@ -0,0 +1,141 @@ +// { dg-do run } +// { dg-additional-options "-Wno-psabi" } + +#include <omp.h> +#include <vector> + +template<typename T> +bool ptr_is_aligned(T *ptr, std::size_t alignment) +{ + /* ALIGNMENT must be a power of 2. */ + if ((alignment & (alignment - 1)) != 0) + __builtin_abort (); + __UINTPTR_TYPE__ ptr_value + = reinterpret_cast<__UINTPTR_TYPE__>(static_cast<void*>(ptr)); + return (ptr_value % alignment) == 0; +} + +template<typename T, template<typename> class Alloc> +void f (T v0, T v1, T v2, T v3) +{ + std::vector<T, Alloc<T>> vec; + vec.push_back (v0); + vec.push_back (v1); + vec.push_back (v2); + vec.push_back (v3); + if (vec.at (0) != v0) + __builtin_abort (); + if (vec.at (1) != v1) + __builtin_abort (); + if (vec.at (2) != v2) + __builtin_abort (); + if (vec.at (3) != v3) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (0), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (1), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (2), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (3), alignof (T))) + __builtin_abort (); +} + +struct S0 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S0 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S0 const& other) const noexcept { + return !this->operator==(other); + } +}; + +struct alignas(128) S1 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S1 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S1 const& other) const noexcept { + return !this->operator==(other); + } +}; + +/* Note: the test for const_mem should be disabled in the future. */ + +int main () +{ + f<int, omp::allocator::null_allocator >(0, 1, 2, 3); + f<int, omp::allocator::default_mem >(0, 1, 2, 3); + f<int, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<int, omp::allocator::const_mem >(0, 1, 2, 3); + f<int, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<int, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<int, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<int, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<int, omp::allocator::thread_mem >(0, 1, 2, 3); +#ifdef __gnu_linux__ + /* Pinning not implemented on other targets. */ + f<int, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); +#endif + + f<long long, omp::allocator::null_allocator >(0, 1, 2, 3); + f<long long, omp::allocator::default_mem >(0, 1, 2, 3); + f<long long, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<long long, omp::allocator::const_mem >(0, 1, 2, 3); + f<long long, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<long long, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<long long, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<long long, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<long long, omp::allocator::thread_mem >(0, 1, 2, 3); +#ifdef __gnu_linux__ + f<long long, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); +#endif + + S0 s0_0{ 42, true, 111128.f}; + S0 s0_1{ 142, false, 11128.f}; + S0 s0_2{ 1142, true, 1128.f}; + S0 s0_3{11142, false, 128.f}; + f<S0, omp::allocator::null_allocator >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::default_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::large_cap_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::const_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::high_bw_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::low_lat_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::cgroup_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::pteam_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::thread_mem >(s0_0, s0_1, s0_2, s0_3); +#ifdef __gnu_linux__ + f<S0, ompx::allocator::gnu_pinned_mem>(s0_0, s0_1, s0_2, s0_3); +#endif + + S1 s1_0{ 42, true, 111128.f}; + S1 s1_1{ 142, false, 11128.f}; + S1 s1_2{ 1142, true, 1128.f}; + S1 s1_3{11142, false, 128.f}; + + f<S1, omp::allocator::null_allocator >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::default_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::large_cap_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::const_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::high_bw_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::low_lat_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::cgroup_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::pteam_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::thread_mem >(s1_0, s1_1, s1_2, s1_3); +#ifdef __gnu_linux__ + f<S1, ompx::allocator::gnu_pinned_mem>(s1_0, s1_1, s1_2, s1_3); +#endif +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C new file mode 100644 index 0000000..aba4f42 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C @@ -0,0 +1,87 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +#define N 64 + +struct points +{ + double *x; + double *y; + double *z; + size_t len; +}; + +#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \ + map(p.x[0:p.len]) \ + map(p.y[0:p.len]) \ + map(p.z[0:p.len]) + +struct shape +{ + points tmp; + points *pts; + int metadata[128]; +}; + +#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp) + +void +alloc_points (points *pts, size_t sz) +{ + pts->x = new double[sz]; + pts->y = new double[sz]; + pts->z = new double[sz]; + pts->len = sz; + for (int i = 0; i < sz; i++) + pts->x[i] = pts->y[i] = pts->z[i] = 0; +} + +int main (int argc, char *argv[]) +{ + shape myshape; + points mypts; + + myshape.pts = &mypts; + + alloc_points (&myshape.tmp, N); + myshape.pts = new points; + alloc_points (myshape.pts, N); + + #pragma omp target map(myshape) + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 1); + assert (myshape.pts->y[i] == 1); + assert (myshape.pts->z[i] == 1); + } + + #pragma omp target + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 2); + assert (myshape.pts->y[i] == 2); + assert (myshape.pts->z[i] == 2); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C new file mode 100644 index 0000000..d848fdb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C @@ -0,0 +1,55 @@ +// { dg-do run } + +#include <cassert> + +#define N 256 + +struct doublebuf +{ + int buf_a[N][N]; + int buf_b[N][N]; +}; + +#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \ + map(b.buf_b[0:N/2][0:N]) + +#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \ + map(b.buf_b[N/2:N/2][0:N]) + +int main (int argc, char *argv[]) +{ + doublebuf db; + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + db.buf_a[i][j] = db.buf_b[i][j] = 0; + + #pragma omp target map(mapper(lo), tofrom:db) + { + for (int i = 0; i < N / 2; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + #pragma omp target map(mapper(hi), tofrom:db) + { + for (int i = N / 2; i < N; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + assert (db.buf_a[i][j] == 1); + assert (db.buf_b[i][j] == 1); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C new file mode 100644 index 0000000..ea9b7de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C new file mode 100644 index 0000000..f194e63 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S &my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S &my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C new file mode 100644 index 0000000..0030de8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C @@ -0,0 +1,52 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +class C +{ + S smemb; +#pragma omp declare mapper (custom:S s) map(to:s.myarr) \ + map(tofrom:s.myarr[0:s.len]) + +public: + C(int l) + { + smemb.myarr = new int[l]; + smemb.len = l; + for (int i = 0; i < l; i++) + smemb.myarr[i] = 0; + } + void bump(); + void check(); +}; + +void +C::bump () +{ +#pragma omp target map(mapper(custom), tofrom: smemb) + { + for (int i = 0; i < smemb.len; i++) + smemb.myarr[i]++; + } +} + +void +C::check () +{ + for (int i = 0; i < smemb.len; i++) + assert (smemb.myarr[i] == 1); +} + +int main (int argc, char *argv[]) +{ + C test (100); + test.bump (); + test.check (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C new file mode 100644 index 0000000..14ed10d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C @@ -0,0 +1,37 @@ +// { dg-do run } + +#include <cassert> + +template <typename T> +void adjust (T param) +{ +#pragma omp declare mapper (T x) map(to:x.len, x.base) \ + map(tofrom:x.base[0:x.len]) + +#pragma omp target + for (int i = 0; i < param.len; i++) + param.base[i]++; +} + +struct S { + int len; + int *base; +}; + +int main (int argc, char *argv[]) +{ + S a; + + a.len = 100; + a.base = new int[a.len]; + + for (int i = 0; i < a.len; i++) + a.base[i] = 0; + + adjust (a); + + for (int i = 0; i < a.len; i++) + assert (a.base[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C new file mode 100644 index 0000000..ba4792a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C @@ -0,0 +1,59 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; +}; + +struct T +{ + S *s; +}; + +#pragma omp declare mapper (s100: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) +// Define this because ... +#pragma omp declare mapper (default: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) + + +void +bump (T t) +{ + /* Here we have an implicit/default mapper invoking a named mapper. We + need to make sure that can be located properly at gimplification + time. */ + +// ... the following is invalid in OpenMP - albeit supported by GCC +// (after disabling: error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ ) + +// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0]) + +// ... thus, we now use ... +#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0]) + +#pragma omp target + for (int i = 0; i < 100; i++) + t.s->myarr[i]++; +} + +int main (int argc, char *argv[]) +{ + S my_s; + T my_t; + + my_s.myarr = new int[100]; + my_t.s = &my_s; + + for (int i = 0; i < 100; i++) + my_s.myarr[i] = 0; + + bump (my_t); + + for (int i = 0; i < 100; i++) + assert (my_s.myarr[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C new file mode 100644 index 0000000..3818e52 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C @@ -0,0 +1,61 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +template<typename T> +class C +{ + T memb; +#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \ + map(tofrom:t.myarr[0:t.len]) + +public: + C(int sz); + ~C(); + void bump(); + void check(); +}; + +template<typename T> +C<T>::C(int sz) +{ + memb.myarr = new int[sz]; + for (int i = 0; i < sz; i++) + memb.myarr[i] = 0; + memb.len = sz; +} + +template<typename T> +C<T>::~C() +{ + delete[] memb.myarr; +} + +template<typename T> +void C<T>::bump() +{ +#pragma omp target map(memb) + for (int i = 0; i < memb.len; i++) + memb.myarr[i]++; +} + +template<typename T> +void C<T>::check() +{ + for (int i = 0; i < memb.len; i++) + assert (memb.myarr[i] == 1); +} + +int main(int argc, char *argv[]) +{ + C<S> c_int(100); + c_int.bump(); + c_int.check(); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare_target-2.C b/libgomp/testsuite/libgomp.c++/declare_target-2.C new file mode 100644 index 0000000..ab94a55 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare_target-2.C @@ -0,0 +1,25 @@ +// { dg-do link } + +// Actually not needed: -fipa-cp is default with -O2: +// { dg-additional-options "-O2 -fipa-cp" } + +// The code failed because 'std::endl' becoḿes implicitly 'declare target' +// but not the 'widen' function it calls. While the linker had no issues +// (endl is never called, either because it is inlined or optimized away), +// the IPA-CP (enabled by -O2 and higher) failed as the definition for +// 'widen' did not exist on the offload side. + +#include <iostream> + +void func (int m) +{ + if (m < 0) + std::cout << "should not happen" << std::endl; +} + + +int main() +{ + #pragma omp target + func (1); +} diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C new file mode 100644 index 0000000..c8a73dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr101544-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1.C b/libgomp/testsuite/libgomp.c++/pr101544-1.C new file mode 100644 index 0000000..fcd3e97 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr101544-1.C @@ -0,0 +1,82 @@ +// See also '../libgomp.oacc-c++/pr101544-1.C'. +#ifndef ALWAYS_INLINE +# define ALWAYS_INLINE +#endif + +//===--- declare_target_base_class.cpp --------------------------------------===// +// +// OpenMP API Version 4.5 Nov 2015 +// +// This test was suggested by members of NERSC. This test defines a declare +// target region which includes only a base class and a 'concrete' device +// pointer. +// +// Test suggestion comes from Chris Daily and Rahulkumar Gayatri from NERSC +////===----------------------------------------------------------------------===// + +#include <new> +#include <vector> +#include <iostream> + +#pragma omp declare target +//#pragma acc routine //TODO error: '#pragma acc routine' not immediately followed by function declaration or definition +class S { +public: + //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope + ALWAYS_INLINE + S() : _devPtr(nullptr) {} + //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope + ALWAYS_INLINE + double sag(double x, double y) { + return x + y; + } + S* cloneToDevice() { + S* ptr; +#pragma omp target map(ptr) +#pragma acc serial copy(ptr) + { + ptr = new S(); + } + _devPtr = ptr; + return ptr; + } +private: + S* _devPtr; +}; +//#pragma acc routine (S) //TODO error: 'class S' does not refer to a function +//#pragma acc routine (S::S) //TODO error: '#pragma acc routine' names a set of overloads +//#pragma acc routine (S::sag) //TODO error: '#pragma acc routine' names a set of overloads +#pragma omp end declare target + +int main() { + int errors = 0; + + S s; + S* devPtr = s.cloneToDevice(); + + std::vector<double> in(10, 0.0); + for(int i = 0; i < 10; i++) { + in[i] = i; + } + + std::vector<double> out(10, 0.0); + + double* inptr = in.data(); + double* outptr = out.data(); + +#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr) +#pragma acc parallel loop copy(inptr[:10], outptr[:10]) deviceptr(devPtr) + for(int i = 0; i < 10; i++) { + outptr[i] = devPtr->sag(inptr[i], inptr[i]); + } + + for(int i = 0; i < 10; i++) { + if (out[i] != i * 2) + { + ++errors; + std::cerr << "ERROR: " << "i = " << i << ": " << out[i] << " != " << (i * 2) << "\n"; + } + } + + return errors ? 1 : 0; +} diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C new file mode 100644 index 0000000..bcd499c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr106445-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1.C b/libgomp/testsuite/libgomp.c++/pr106445-1.C new file mode 100644 index 0000000..329ce62 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr106445-1.C @@ -0,0 +1,18 @@ +#include <vector> + +int main() +{ +#pragma omp target + { + { + std::vector<int> v; + if (!v.empty()) + __builtin_abort(); + } + { + std::vector<int> v(100); + if (v.capacity() < 100) + __builtin_abort(); + } + } +} diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C new file mode 100644 index 0000000..1f59b15 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C @@ -0,0 +1,10 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -UDEFAULT } + Wrong code for offloading execution. + { dg-additional-options -foffload=disable } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "../libgomp.oacc-c++/pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* map\(tofrom:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C new file mode 100644 index 0000000..e7ac818 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C @@ -0,0 +1,11 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(none) } + Fails to compile. + { dg-do compile } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {error: '_ZTV2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } + { dg-bogus {error: '_ZTI2C2' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } + { dg-bogus {error: '_ZTI2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C new file mode 100644 index 0000000..733feb8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C @@ -0,0 +1,10 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(present) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C new file mode 100644 index 0000000..af9fe1c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C @@ -0,0 +1,13 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(firstprivate) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(firstprivate\) firstprivate\(_ZTI2C2\) firstprivate\(_ZTI2C1\) firstprivate\(_ZTV2C1\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C new file mode 100644 index 0000000..e5c6e07 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C @@ -0,0 +1,13 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c++/pr96390.C b/libgomp/testsuite/libgomp.c++/pr96390.C index 1f3c3e0..be19601 100644 --- a/libgomp/testsuite/libgomp.c++/pr96390.C +++ b/libgomp/testsuite/libgomp.c++/pr96390.C @@ -1,6 +1,4 @@ /* { dg-additional-options "-O0 -fdump-tree-omplower" } */ -/* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */ -/* { dg-xfail-if "PR 97106/PR 97102 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */ #include <cstdlib> #include <type_traits> diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C new file mode 100644 index 0000000..85e4290 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr96835-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1.C b/libgomp/testsuite/libgomp.c++/pr96835-1.C new file mode 100644 index 0000000..c9f6475 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr96835-1.C @@ -0,0 +1,45 @@ +// See also '../libgomp.oacc-c++/pr96835-1.C'. +#ifndef ALWAYS_INLINE +# define ALWAYS_INLINE +#endif + +#pragma omp declare target + +template<int sz> +struct vector { + int values_[sz]; + vector(); + ALWAYS_INLINE + vector(int const& init_val); + ALWAYS_INLINE + int dot(vector o) { + int res = 0; + for (int i = 0; i < sz; ++ i) + res += values_[i] * o.values_[i]; + return res; + } +}; + +template<int sz> +vector<sz>::vector(int const& init_val) { + for (int i = 0; i < sz; ++ i) values_[i] = init_val; +} +template<int sz> +vector<sz>::vector() : vector(0) { +} + +#pragma omp end declare target + +int main() { + int res = 0; + #pragma omp target map(from:res) + #pragma acc serial copyout(res) + { + vector<4> v1(1); + vector<4> v2(2); + res = v1.dot(v2); + } + if (res != 8) + __builtin_abort(); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C new file mode 100644 index 0000000..ecb029e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C @@ -0,0 +1,104 @@ +/* Offloaded C++ objects construction and destruction. */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1(7); + +#pragma omp declare target + +S sHD1(5); + +std::vector<S> svHD1(2); + +#pragma omp end declare target + +S sH2(3); + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* C++ objects are constructed in order of appearance (..., and destructed in reverse order). + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C new file mode 100644 index 0000000..75e48ca --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C @@ -0,0 +1,140 @@ +/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */ + +/* { dg-require-effective-target init_priority } */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1 __attribute__((init_priority(1500))) (7); + +#pragma omp declare target + +S sHD1 __attribute__((init_priority(2000))) (5); + +std::vector<S> svHD1 __attribute__((init_priority(1000))) (2); + +static void +__attribute__((constructor(20000))) +initDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(20000))) +finiDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +S sH2 __attribute__((init_priority(500))) (3); + +static void +__attribute__((constructor(10000))) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(10000))) +finiH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order). + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {initH1, 1[\r\n]+} } + { dg-output {initDH1, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {initDH1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {finiDH1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {finiDH1, 1[\r\n]+} } + { dg-output {finiH1, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C new file mode 100644 index 0000000..a862652 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C @@ -0,0 +1,28 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'std::bad_cast' + what(): std::bad_cast + Aborted (core dumped) + { dg-output {.*std::bad_cast} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + { dg-shouldfail {'std::bad_cast' exception} } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C new file mode 100644 index 0000000..93884df --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C @@ -0,0 +1,19 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..83ec89b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C @@ -0,0 +1,19 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C new file mode 100644 index 0000000..ff15c9f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C @@ -0,0 +1,27 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target offload_target_nvptx xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C new file mode 100644 index 0000000..efed64f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C @@ -0,0 +1,17 @@ +/* 'std::bad_cast' exception in OpenMP 'target' region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + ... to avoid wrong code for offloading execution; PR119692. + With this, the device code still isn't correct, but the defects are in dead code. + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-bad_cast-3.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C new file mode 100644 index 0000000..d4dccf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C @@ -0,0 +1,26 @@ +/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C new file mode 100644 index 0000000..724e34b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C @@ -0,0 +1,26 @@ +/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C new file mode 100644 index 0000000..24eb7a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C @@ -0,0 +1,59 @@ +/* Exception handling constructs in dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C'. */ + +#pragma omp begin declare target + +bool ok = false; + +template <typename T> +struct C +{ + C() + { + ok = true; + } + C(int) {}; + ~C() {}; + + __attribute__((noipa)) + void m() + { + C c; + } +}; + +inline void f() +{ + C<double> c(1); + c.m(); +} + +#pragma omp end declare target + +int main() +{ +#pragma omp target + { + f(); + } +#pragma omp target update from(ok) + if (!ok) + __builtin_abort(); +} + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C new file mode 100644 index 0000000..00d7c13 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C @@ -0,0 +1,23 @@ +/* 'throw' in OpenMP 'target' region. */ + +/* { dg-additional-options -O0 } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C new file mode 100644 index 0000000..a4e7a10 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C @@ -0,0 +1,22 @@ +/* 'throw' in OpenMP 'target' region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C new file mode 100644 index 0000000..b7a311d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C @@ -0,0 +1,25 @@ +/* 'throw' in OpenMP 'target' region, caught. */ + +/* { dg-additional-options -O0 } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target offload_target_amdgcn xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_amdgcn } } */ +/* { dg-bogus {Initial value type mismatch} PR119806 { target offload_target_nvptx xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C new file mode 100644 index 0000000..9905b1f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C @@ -0,0 +1,21 @@ +/* 'throw' in OpenMP 'target' region, caught, -foffload-options=-mno-fake-exceptions. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_amdgcn } } + { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..da267d6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C @@ -0,0 +1,21 @@ +/* 'throw' in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target offload_target_nvptx } } + { dg-additional-options -foffload=nvptx-none } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "target-exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C new file mode 100644 index 0000000..97f4845 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C @@ -0,0 +1,20 @@ +/* 'throw' in OpenMP 'target' region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { offload_device } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C new file mode 100644 index 0000000..c35180d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C @@ -0,0 +1,19 @@ +/* 'throw' in OpenMP 'target' region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -DDEFAULT=defaultmap(to) } + ... to avoid wrong code for offloading execution; PR119692. + With this, the device code still isn't correct, but the defects are in dead code. + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "../libgomp.oacc-c++/exceptions-throw-3.C" + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI11MyException \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C new file mode 100644 index 0000000..8fa9af7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C @@ -0,0 +1,215 @@ +/* Basic container usage. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::vector<int> vector; + ok = vector.empty(); + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::deque<int> deque; + ok = deque.empty(); + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::list<int> list; + ok = list.empty(); + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::map<int, int> map; + ok = map.empty(); + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::set<int> set; + ok = set.empty(); + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multimap<int, int> multimap; + ok = multimap.empty(); + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multiset<int, int> multiset; + ok = multiset.empty(); + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + static constexpr std::size_t array_size = 42; + bool ok; + #pragma omp target map(from: ok) + { + std::array<int, array_size> array{}; + ok = array[0] == 0 + && array[array_size - 1] == 0; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::forward_list<int> forward_list; + ok = forward_list.empty(); + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_map<int, int> unordered_map; + ok = unordered_map.empty(); + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_set<int> unordered_set; + ok = unordered_set.empty(); + } + return ok; +} + +bool unordered_multimap_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multimap<int, int> unordered_multimap; + ok = unordered_multimap.empty(); + } + return ok; +} + +bool unordered_multiset_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multiset<int> unordered_multiset; + ok = unordered_multiset.empty(); + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool vec_res = vector_test(); + __builtin_printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + const bool deque_res = deque_test(); + __builtin_printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + const bool list_res = list_test(); + __builtin_printf("list : %s\n", list_res ? "PASS" : "FAIL"); + const bool map_res = map_test(); + __builtin_printf("map : %s\n", map_res ? "PASS" : "FAIL"); + const bool set_res = set_test(); + __builtin_printf("set : %s\n", set_res ? "PASS" : "FAIL"); + const bool multimap_res = multimap_test(); + __builtin_printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + const bool multiset_res = multiset_test(); + __builtin_printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + const bool array_res = array_test(); + __builtin_printf("array : %s\n", array_res ? "PASS" : "FAIL"); + const bool forward_list_res = forward_list_test(); + __builtin_printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + const bool unordered_map_res = unordered_map_test(); + __builtin_printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + const bool unordered_set_res = unordered_set_test(); + __builtin_printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + const bool unordered_multimap_res = unordered_multimap_test(); + __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + const bool unordered_multiset_res = unordered_multiset_test(); + __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C new file mode 100644 index 0000000..7ab047f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C @@ -0,0 +1,210 @@ +/* Container adaptors in target region. + Does not test comparison operators other than equality to allow these tests + to be generalized to arbitrary input data. */ + +#include <algorithm> +#include <cstdio> +#include <deque> +#include <queue> +#include <stack> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test_stack(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T middle_element = arr[half_size - 1]; + const T last_element = arr[Size - 1]; + typedef std::stack<T, std::vector<T> > stack_type; + stack_type stack; + VERIFY (stack.empty()); + VERIFY (stack.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + stack.push(arr[idx]); + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == half_size); + VERIFY (static_cast<const stack_type&>(stack).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + stack.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + stack.push(arr[idx]); + #endif + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == Size); + VERIFY (static_cast<const stack_type&>(stack).size() == Size); + + const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size)); + VERIFY (stack == stack_orig); + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_top = static_cast<const stack_type&>(stack).top(); + VERIFY (const_top == last_element); + T& mutable_top = stack.top(); + mutable_top = first_element; + VERIFY (const_top == first_element); + } + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || stack != stack_orig); + for (std::size_t count = Size - half_size; count != 0; --count) + stack.pop(); + VERIFY (stack.top() == middle_element); + const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size)); + VERIFY (stack == stack_half_orig); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_queue(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T last_element = arr[Size - 1]; + typedef std::queue<T, std::deque<T> > queue_type; + queue_type queue; + VERIFY (queue.empty()); + VERIFY (queue.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + queue.push(arr[idx]); + VERIFY (queue.back() == arr[idx]); + VERIFY (queue.front() == first_element); + } + VERIFY (queue.size() == half_size); + VERIFY (static_cast<const queue_type&>(queue).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + queue.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + queue.push(arr[idx]); + #endif + VERIFY (queue.back() == arr[idx]); + } + VERIFY (queue.size() == Size); + VERIFY (static_cast<const queue_type&>(queue).size() == Size); + + const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size)); + VERIFY (queue == queue_orig); + + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_front = static_cast<const queue_type&>(queue).front(); + VERIFY (const_front == first_element); + T& mutable_front = queue.front(); + + const T& const_back = static_cast<const queue_type&>(queue).back(); + VERIFY (const_back == last_element); + T& mutable_back = queue.back(); + { + using std::swap; + swap(mutable_front, mutable_back); + } + VERIFY (const_front == last_element); + VERIFY (const_back == first_element); + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || queue != queue_orig); + /* Return the last element to normal for the next comparison. */ + mutable_back = last_element; + } + + const T middle_element = arr[half_size]; + for (std::size_t count = Size - half_size; count != 0; --count) + queue.pop(); + VERIFY (queue.front() == middle_element); + const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size)); + VERIFY (queue == queue_upper_half); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + typedef std::priority_queue<T, std::vector<T> > priority_queue_type; + { + priority_queue_type pqueue; + VERIFY (pqueue.empty()); + VERIFY (pqueue.size() == 0); + } + { + priority_queue_type pqueue(arr, arr + Size); + VERIFY (!pqueue.empty()); + VERIFY (pqueue.size() == Size); + VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size); + + const T old_max = pqueue.top(); + + #if __cplusplus >= 201103L + pqueue.emplace(max_value); + #else + pqueue.push(max_value); + #endif + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + pqueue.push(min_value); + VERIFY (pqueue.top() == old_max); + pqueue.push(max_value); + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + VERIFY (pqueue.size() == Size + 1); + + for (std::size_t count = Size; count != 0; --count) + pqueue.pop(); + VERIFY (pqueue.size() == 1); + VERIFY (pqueue.top() == min_value); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + + return test_stack(arr) + && test_queue(arr) + && test_priority_queue(arr, 0, 1000) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C new file mode 100644 index 0000000..be9037e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C @@ -0,0 +1,136 @@ +/* { dg-additional-options -std=c++23 } */ + +/* C++23 container adaptors in target region. + Severely needs additional tests. */ + +#include <cstdio> +#include <utility> +#include <version> + +#if __cpp_lib_flat_map >= 202207L +#define ENABLE_FLAT_MAP 1 +#endif +#if __cpp_lib_flat_set >= 202207L +#define ENABLE_FLAT_SET 1 +#endif + +#ifdef ENABLE_FLAT_MAP +#include <flat_map> +#endif +#ifdef ENABLE_FLAT_SET +#include <flat_set> +#endif + +#include "target-flex-common.h" + +#ifdef ENABLE_FLAT_MAP +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; } + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; } +#endif + +#ifdef ENABLE_FLAT_SET +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_set_type = std::flat_set<T>; + flat_set_type set = {arr, arr + Size}; + + VERIFY (!set.empty()); + for (const auto& element : arr) + VERIFY (set.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_multiset_type = std::flat_multiset<T>; + flat_multiset_type multiset = {arr, arr + Size}; + + VERIFY (!multiset.empty()); + for (const auto& element : arr) + VERIFY (multiset.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) { return true; } + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) { return true; } +#endif + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + std::pair<int, int> pairs[10] = {{ 1, 2}, { 2, 4}, { 3, 6}, { 4, 8}, { 5, 10}, + { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}}; + + return test_flat_set(arr) + && test_flat_multiset(arr) + && test_flat_map(pairs) + && test_flat_multimap(pairs) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C new file mode 100644 index 0000000..6d55129 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C @@ -0,0 +1,444 @@ +/* Check constructors/destructors are called in containers. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#include <utility> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include "target-flex-common.h" + +struct indirect_counter +{ + typedef int counter_value_type; + counter_value_type *_count_ptr; + + indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) { + ++(*_count_ptr); + } + indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) { + ++(*_count_ptr); + } + /* Don't declare a move constructor, we want to copy no matter what. */ + ~indirect_counter() { + --(*_count_ptr); + } +}; + +bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr == rhs._count_ptr; } +bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr < rhs._count_ptr; } + +#if __cplusplus >= 201103L +template<> +struct std::hash<indirect_counter> +{ + std::size_t operator()(const indirect_counter& ic) const noexcept + { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); } +}; +#endif + +/* Not a container, just a sanity check really. */ +bool automatic_lifetime_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter c = indirect_counter(&counter); + indirect_counter(static_cast<int*>(&counter)); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::vector<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::deque<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::list<indirect_counter> list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + list.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + list.pop_back(); + VERIFY (counter == 32); + list.pop_back(); + VERIFY (counter == 31); + list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::map<int, indirect_counter> map; + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::set<indirect_counter> set; + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::multimap<int, indirect_counter> multimap; + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 2); + multimap.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::multiset<indirect_counter> multiset; + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter ic(&counter); + std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic, + ic, ic, ic, ic, ic}; + VERIFY (counter == 11); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + forward_list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + forward_list.push_front(indirect_counter(&counter)); + VERIFY (counter == 33); + forward_list.pop_front(); + VERIFY (counter == 32); + forward_list.pop_front(); + VERIFY (counter == 31); + forward_list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_map<int, indirect_counter> unordered_map; + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_set<indirect_counter> unordered_set; + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_multimap<int, indirect_counter> unordered_multimap; + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 2); + unordered_multimap.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_multiset<indirect_counter> unordered_multiset; + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + unordered_multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool auto_res = automatic_lifetime_test(); + const bool vec_res = vector_test(); + const bool deque_res = deque_test(); + const bool list_res = list_test(); + const bool map_res = map_test(); + const bool set_res = set_test(); + const bool multimap_res = multimap_test(); + const bool multiset_res = multiset_test(); + const bool array_res = array_test(); + const bool forward_list_res = forward_list_test(); + const bool unordered_map_res = unordered_map_test(); + const bool unordered_set_res = unordered_set_test(); + const bool unordered_multimap_res = unordered_multimap_test(); + const bool unordered_multiset_res = unordered_multiset_test(); + std::printf("sanity check : %s\n", auto_res ? "PASS" : "FAIL"); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = auto_res + && vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C new file mode 100644 index 0000000..024fb73 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C @@ -0,0 +1,736 @@ +/* Populated with mapped data, validate, mutate, validate again. + The cases using sets do not mutate. + Note: Some of the code in here really sucks due to being made to be + compatible with c++98. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include <limits> +#include <iterator> + +#include "target-flex-common.h" + +template<bool B, class T = void> +struct enable_if {}; + +template<class T> +struct enable_if<true, T> { typedef T type; }; + +struct identity_func +{ +#if __cplusplus < 201103L + template<typename T> + T& operator()(T& arg) const BL_NOEXCEPT { return arg; } + template<typename T> + T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; } +#else + template<typename T> + constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); } +#endif +}; + +/* Applies projection to the second iterator. */ +template<typename It0, typename It1, typename Proj> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1, + Proj proj) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + { + /* Sizes mismatch, don't bother aborting though just fail the test. */ + if (it1 == end1) + return false; + if (*it0 != proj(*it1)) + return false; + } + /* Sizes mismatch, do as above. */ + if (it1 != end1) + return false; + return true; +} + +template<typename It0, typename It1> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + return validate_sequential_elements(begin0, end0, begin1, end1, identity_func()); +} + +/* Inefficient, but simple. */ +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename It, typename MutateFn> +void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + *it = mut_fn(*it); +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool vector_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> vector(arr, arr + Size); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size)); + simple_copy(vector.begin(), vector.end(), out_arr); + simple_mutate(vector.begin(), vector.end(), MutationFunc()); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size, MutationFunc())); + simple_copy(vector.begin(), vector.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool deque_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::deque<T> deque(arr, arr + Size); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size)); + simple_copy(deque.begin(), deque.end(), out_arr); + simple_mutate(deque.begin(), deque.end(), MutationFunc()); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size, MutationFunc())); + simple_copy(deque.begin(), deque.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::list<T> list(arr, arr + Size); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size)); + simple_copy(list.begin(), list.end(), out_arr); + simple_mutate(list.begin(), list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(list.begin(), list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename T> +const T& get_key(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.first; } +template<typename T> +const T& get_value(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.second; } + +template<typename T> +struct key_type { typedef T type; }; +template<typename K, typename V> +struct key_type<std::pair<K, V> > { typedef K type; }; + +template<typename Proj, typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const typename Container::const_iterator elem_it = container.find(get_key(*compare_it)); + VERIFY_NON_TARGET (elem_it != elem_end); + VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it)); + } + return true; +} + +template<typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename It, typename MutateFn> +void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + it->second = mut_fn(it->second); +} + +template<typename It, typename OutIter> +void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT +{ + /* In case anyone reads this, I want it to be known that I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + std::set<key_t> already_seen; + for (It it = begin; it != end; ++it, ++out) + { + key_t key = get_key(*it); + if (already_seen.find(key) != already_seen.end()) + continue; + already_seen.insert(key); + *out = *it; + } +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool set_test(const T (&arr)[Size]) +{ + std::set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename Proj, typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + /* Once again, for the poor soul reviewing these, I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + typedef std::map<key_t, std::size_t> counter_map; + counter_map key_count_map; + for (It it = compare_begin; it != compare_end; ++it) + { + const key_t& key = get_key(*it); + typename counter_map::iterator counter_it + = key_count_map.find(key); + if (counter_it != key_count_map.end()) + ++counter_it->second; + else + key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1))); + } + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const key_t& key = get_key(*compare_it); + typename counter_map::iterator count_it = key_count_map.find(key); + std::size_t key_count = count_it != key_count_map.end() ? count_it->second + : std::size_t(0); + VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen"); + /* This gets tested multiple times but that should be fine. */ + VERIFY_NON_TARGET (key_count == container.count(key)); + typename Container::const_iterator elem_it = container.find(key); + /* This will never happen if the previous case passed. */ + VERIFY_NON_TARGET (elem_it != elem_end); + bool found_element = false; + for (; elem_it != elem_end; ++elem_it) + if (proj(get_value(*compare_it)) == get_value(*elem_it)) + { + found_element = true; + break; + } + VERIFY_NON_TARGET (found_element); + } + return true; +} + +template<typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_multi_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool multiset_test(const T (&arr)[Size]) +{ + std::multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#if __cplusplus >= 201103L + +template<typename MutationFunc, typename T, std::size_t Size> +bool array_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::array<T, Size> std_array{}; + /* Special case for std::array since it can't be initialized + with iterators. */ + { + T zero_val = T{}; + for (auto it = std_array.begin(); it != std_array.end(); ++it) + VERIFY (*it == zero_val); + } + simple_copy(arr, arr + Size, std_array.begin()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size)); + simple_copy(std_array.begin(), std_array.end(), out_arr); + simple_mutate(std_array.begin(), std_array.end(), MutationFunc()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size, MutationFunc())); + simple_copy(std_array.begin(), std_array.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool forward_list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::forward_list<T> fwd_list(arr, arr + Size); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size)); + simple_copy(fwd_list.begin(), fwd_list.end(), out_arr); + simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::unordered_map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_set_test(const T (&arr)[Size]) +{ + std::unordered_set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::unordered_set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_multiset_test(const T (&arr)[Size]) +{ + std::unordered_multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#else +template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; } +#endif + +/* This clamps to the maximum value to guard against overflowing, + assuming std::numeric_limits is specialized for T. */ +struct multiply_by_2 +{ + template<typename T> + typename enable_if<std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + if (arg < static_cast<T>(0)) + { + if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg) + return std::numeric_limits<T>::min(); + } + else + { + if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg) + return std::numeric_limits<T>::max(); + } + return arg * 2; + } + template<typename T> + typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + return arg * 2; + } +}; + +int main() +{ + int data[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + std::pair<int, int> pairs[10] = {std::pair<int, int>( 1, 2), + std::pair<int, int>( 2, 4), + std::pair<int, int>( 3, 6), + std::pair<int, int>( 4, 8), + std::pair<int, int>( 5, 10), + std::pair<int, int>( 6, 12), + std::pair<int, int>( 7, 14), + std::pair<int, int>( 8, 16), + std::pair<int, int>( 9, 18), + std::pair<int, int>(10, 20)}; + const bool vec_res = vector_test<multiply_by_2>(data); + const bool deque_res = deque_test<multiply_by_2>(data); + const bool list_res = list_test<multiply_by_2>(data); + const bool map_res = map_test<multiply_by_2>(pairs); + const bool set_res = set_test(data); + const bool multimap_res = multimap_test<multiply_by_2>(pairs); + const bool multiset_res = multiset_test(data); + const bool array_res = array_test<multiply_by_2>(data); + const bool forward_list_res = forward_list_test<multiply_by_2>(data); + const bool unordered_map_res = unordered_map_test<multiply_by_2>(pairs); + const bool unordered_set_res = unordered_set_test(data); + const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs); + const bool unordered_multiset_res = unordered_multiset_test(data); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C new file mode 100644 index 0000000..688c014 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C @@ -0,0 +1,32 @@ +/* Tiny tuple test. */ + +#include <tuple> + +#include "target-flex-common.h" + +bool test(int arg) +{ + bool ok; + int out; + std::tuple tup = {'a', arg, 3.14f}; + #pragma omp target map(from: ok, out) map(to: tup) + { + bool inner_ok = true; + { + VERIFY (std::get<0>(tup) == 'a'); + out = std::get<1>(tup); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out == arg); + return true; +} + +int main() +{ + volatile int arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C new file mode 100644 index 0000000..f1a6c12 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C @@ -0,0 +1,61 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* Functional */ + +#include <functional> +#include <utility> + +#include "target-flex-common.h" + +template<typename T,typename Fn> +auto invoke_unary(T&& a, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a)); +} + +template<typename T, typename U, typename Fn> +auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a), + std::forward<U>(b)); +} + +bool test(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + VERIFY (std::plus{}(arg, 2) == arg + 2); + auto bound_plus_arg = std::bind_front(std::plus{}, arg); + VERIFY (bound_plus_arg(10) == arg + 10); + VERIFY (bound_plus_arg(20) == arg + 20); + + VERIFY (std::not_fn(std::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{}))); + auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg); + VERIFY (bound_equals_arg(arg)); + VERIFY (std::not_fn(bound_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_equals_arg)); + + VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{}))); + auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg); + VERIFY (bound_ranges_equals_arg(arg)); + VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_ranges_equals_arg)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C new file mode 100644 index 0000000..f738806 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C @@ -0,0 +1,97 @@ +/* { dg-additional-options "-std=c++23" } */ + +/* expected/optional */ + +#include <optional> +#include <expected> + +#include "target-flex-common.h" + +std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::nullopt; + return {arg}; +} + +bool test_optional(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto null_opt = make_optional(false); + VERIFY (!null_opt); + VERIFY (!null_opt.has_value()); + VERIFY (null_opt.value_or(arg * 2u) == arg * 2u); + VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto opt = make_optional(true, arg); + VERIFY (opt); + VERIFY (opt.has_value()); + VERIFY (opt.value() == arg); + VERIFY (*opt == arg); + VERIFY (opt.value_or(arg + 42) == arg); + VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +struct my_error +{ + int _e; +}; + +std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::unexpected{my_error{-1}}; + return {arg}; +} + +bool test_expected(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto unexpected = make_expected(false); + VERIFY (!unexpected); + VERIFY (!unexpected.has_value()); + VERIFY (unexpected.error()._e == -1); + VERIFY (unexpected.value_or(arg * 2u) == arg * 2u); + VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto expected = make_expected(true, arg); + VERIFY (expected); + VERIFY (expected.has_value()); + VERIFY (expected.value() == arg); + VERIFY (*expected == arg); + VERIFY (expected.value_or(arg + 42) == arg); + VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42; + return test_optional(arg) + && test_expected(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C new file mode 100644 index 0000000..8e8ca8e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C @@ -0,0 +1,176 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* bit_cast and memcpy */ + +#include <bit> +#include <cstring> + +#include "target-flex-common.h" + +struct S0 +{ + int _v0; + char _v1; + long long _v2; +}; + +struct S1 +{ + int _v0; + char _v1; + long long _v2; +}; + +bool test_bit_cast(int arg) +{ + bool ok; + S1 s1_out; + #pragma omp target map(from: ok, s1_out) map(to: arg) + { + bool inner_ok = true; + { + long long v = static_cast<long long>(arg + 42ll); + S0 s = {arg, 'a', v}; + VERIFY (std::bit_cast<S1>(s)._v0 == arg); + VERIFY (std::bit_cast<S1>(s)._v1 == 'a'); + VERIFY (std::bit_cast<S1>(s)._v2 == v); + s1_out = std::bit_cast<S1>(s); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + long long v = static_cast<long long>(arg + 42ll); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a'); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v); + return true; +} + + +struct OutStruct +{ + std::size_t _id; + void *_next; +}; + +struct Extendable1 +{ + std::size_t _id; + void *_next; + int _v; +}; + +struct Extendable2 +{ + std::size_t _id; + void *_next; + char _str[256]; +}; + +struct Extendable3 +{ + std::size_t _id; + void *_next; + const int *_nums; + std::size_t _size; +}; + +struct ExtendableUnknown +{ + std::size_t _id; + void *_next; +}; + +template<typename To, std::size_t Id> +To *get_extendable(void *p) +{ + while (p != nullptr) + { + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + if (out._id == Id) + return static_cast<To *>(p); + p = out._next; + } + return nullptr; +} + +bool test_memcpy(int arg, const int *nums, std::size_t nums_size) +{ + bool ok; + Extendable2 e2_out; + #pragma omp target map(from: ok, e2_out) map(to: arg, nums[:nums_size], nums_size) + { + bool inner_ok = true; + { + Extendable3 e3 = {3u, nullptr, nums, nums_size}; + ExtendableUnknown u1 = {100u, &e3}; + Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}}; + ExtendableUnknown u2 = {101u, &e2}; + ExtendableUnknown u3 = {102u, &u2}; + ExtendableUnknown u4 = {142u, &u3}; + Extendable1 e1 = {1u, &u4, arg}; + + void *p = &e1; + while (p != nullptr) + { + /* You can always cast a pointer to a struct to a pointer to + the type of it's first member. */ + switch (*static_cast<std::size_t *>(p)) + { + case 1: + { + Extendable1 *e1_p = static_cast<Extendable1 *>(p); + p = e1_p->_next; + VERIFY (e1_p->_v == arg); + break; + } + case 2: + { + Extendable2 *e2_p = static_cast<Extendable2 *>(p); + p = e2_p->_next; + VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0); + break; + } + case 3: + { + Extendable3 *e3_p = static_cast<Extendable3 *>(p); + p = e3_p->_next; + VERIFY (nums == e3_p->_nums); + VERIFY (nums_size == e3_p->_size); + break; + } + default: + { + /* Casting to a pointer to OutStruct invokes undefined + behavior though, memcpy is required to extract the _next + member. */ + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + p = out._next; + } + } + } + Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1); + VERIFY (e2_p != nullptr); + e2_out = *e2_p; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (e2_out._id == 2u); + VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0); + return true; +} + +int main() +{ + volatile int arg = 42; + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test_bit_cast(arg) + && test_memcpy(arg, arr, 8) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C new file mode 100644 index 0000000..c66075b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C @@ -0,0 +1,51 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + auto il = {0, 1, 2, 3, 4, 5, arg}; + + int sum = 0; + for (auto const& e : il) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : il) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C new file mode 100644 index 0000000..ef9e5a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C @@ -0,0 +1,49 @@ +/* { dg-additional-options -std=c++23 } */ + +/* numerics */ + +#include <algorithm> +#include <numeric> +#include <ranges> +#include <span> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::iota) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +bool test(std::size_t arg) +{ + bool ok; + int midpoint_out; + std::vector<int> vec(arg); + int *data = vec.data(); + std::size_t size = vec.size(); + #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[:size]) map(to: arg, size) + { + std::span span = {data, size}; + bool inner_ok = true; + { + VERIFY (stdr::all_of(span, [](int v){ return v == int{}; })); + stdr::iota(span, 0); + midpoint_out = *std::midpoint(span.data(), span.data() + span.size()); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size())))); + VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size()) + == midpoint_out); + return true; +} + +int main() +{ + volatile std::size_t arg = 42; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C new file mode 100644 index 0000000..adaf18f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C @@ -0,0 +1,80 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> + +#include "target-flex-common.h" + +struct S0 +{ + int _v; + S0(std::initializer_list<int> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +struct S1 +{ + int _v; + template<typename T> + S1(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +template<typename T> +struct S2 +{ + T _v; + S2(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +#if __cplusplus >= 201703L +template<typename T> +S2(std::initializer_list<T>) -> S2<T>; +#endif + +bool test_initializer_list(int arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5; + + S0 s0{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s0._v == partial_sum + arg); + + S1 s1{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s1._v == partial_sum + arg); + + S2<int> s2{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2._v == partial_sum + arg); + + #if __cplusplus >= 201703L + S2 s2_ctad{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2_ctad._v == partial_sum + arg); + #endif + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C new file mode 100644 index 0000000..7f74401a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C @@ -0,0 +1,50 @@ +/* std::initializer_list constructor of std::vector (explicit template arg) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector<int> vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C new file mode 100644 index 0000000..bb8a39b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C @@ -0,0 +1,52 @@ +/* { dg-additional-options "-std=c++17" } */ + +/* deduced std::initializer_list constructor of std::vector (CTAD) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C new file mode 100644 index 0000000..4d36341 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C @@ -0,0 +1,94 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* <iterator> c++20 */ + +/* std::common_iterator uses std::variant. */ + +#include <vector> +#include <iterator> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::distance, std::ranges::next) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_rev_arr[Size]; + T out_fwd_arr[Size]; + T out_first_half_arr[Size / 2]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_rev_arr[:Size], out_fwd_arr[:Size], \ + out_first_half_arr[:Size / 2]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::span<const T> span = {arr, Size}; + std::vector<T> rev_vec(std::reverse_iterator{span.end()}, + std::reverse_iterator{span.begin()}); + VERIFY (std::distance(span.begin(), span.end()) + == std::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span.begin(), span.end()) + == stdr::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span) == stdr::distance(rev_vec)); + VERIFY (simple_equal(span.begin(), span.end(), + std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()})); + simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr); + simple_copy(std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()}, + out_fwd_arr); + using counted_iter = std::counted_iterator<decltype(span.begin())>; + using common_iter = std::common_iterator<counted_iter, + std::default_sentinel_t>; + std::vector<T> front_half; + simple_copy(common_iter{counted_iter{span.begin(), Size / 2}}, + common_iter{std::default_sentinel}, + std::back_insert_iterator{front_half}); + VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2), + front_half.begin(), front_half.end())); + simple_copy(front_half.begin(), front_half.end(), out_first_half_arr); + } + end: + ok = inner_ok; + } + VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size}, + std::reverse_iterator{arr}, + out_rev_arr, out_rev_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_fwd_arr, out_fwd_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2, + out_first_half_arr, out_first_half_arr + Size / 2)); + return ok; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C new file mode 100644 index 0000000..014b9f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C @@ -0,0 +1,46 @@ +/* algorithms pre c++20 */ + +#include <algorithm> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + std::copy(arr, arr + Size, vec.begin()); + VERIFY (std::equal(arr, arr + Size, vec.begin())); + std::transform(vec.begin(), vec.end(), mutated.begin(), + [](const T& v){ return v * 2; }); + std::copy(mutated.begin(), mutated.end(), out_2x_arr); + std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end()); + std::copy(vec.begin(), vec.end(), out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr, + [](const T& a, const T& b){ return a * 2 == b; })); + std::vector<T> shifted(arr, arr + Size); + std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end()); + VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin())); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C new file mode 100644 index 0000000..9070c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C @@ -0,0 +1,54 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* ranged algorithms c++20 */ + +#include <algorithm> +#include <ranges> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + stdr::copy(arr, vec.begin()); + VERIFY (stdr::equal(arr, vec)); + stdr::transform(vec, mutated.begin(), + [](const T& v){ return v * 2; }); + stdr::copy(mutated, out_2x_arr); + stdr::rotate(vec, std::next(vec.begin(), Size / 2)); + stdr::copy(vec, out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; })); + std::vector<T> shifted(arr, arr + Size); + stdr::rotate(shifted, std::next(shifted.begin(), Size / 2)); + VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted)); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C new file mode 100644 index 0000000..ef6b942 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C @@ -0,0 +1,50 @@ +/* { dg-additional-options -std=c++23 } */ + +/* std::views stuff. Also tests std::tuple with std::views::zip. */ + +#include <algorithm> +#include <ranges> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; +namespace stdv = std::views; + +bool f() +{ + const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0}; + + bool ok; + #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[:8], arr_rev[:8]) + { + std::span<const int> fwd = {arr_fwd, 8}; + std::span<const int> rev = {arr_rev, 8}; + bool inner_ok = true; + { + VERIFY(stdr::equal(fwd, rev | stdv::reverse)); + VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse, + rev | stdv::take(4))); + for (auto [first, second] : stdv::zip(fwd, rev)) + VERIFY(first + second == 7); + auto plus = [](int a, int b){ return a + b; }; + auto is_even = [](int v){ return v % 2 == 0; }; + VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus) + == 12); + VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }), + is_even)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C new file mode 100644 index 0000000..9e9383d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C @@ -0,0 +1,26 @@ +/* CTAD in target regions. */ + +template<typename T> +struct S +{ + T _v; +}; + +template<typename T> +S(T) -> S<T>; + +bool f() +{ + bool ok; + #pragma omp target map(from: ok) + { + S s{42}; + ok = s._v == 42; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C new file mode 100644 index 0000000..f41a1bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C @@ -0,0 +1,49 @@ +// { dg-additional-options "-std=c++20" } + +/* std::span */ + +#include <span> + +#include "target-flex-common.h" + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + std::span span = {arr, Size}; + bool inner_ok = true; + { + VERIFY (!span.empty()); + VERIFY (span.size() == Size); + auto out_it = out_arr; + for (auto elem : span) + *(out_it++) = elem; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_arr, out_arr + Size)); + return true; +} + +int main() +{ + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C new file mode 100644 index 0000000..a86fefb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C @@ -0,0 +1,75 @@ +/* { dg-additional-options "-std=c++20" } */ + +#include <ranges> +#include <span> +#include <type_traits> +#include <vector> + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename Rn0, typename Rn1> +bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept +{ + return simple_equal(stdr::begin(rn0), stdr::end(rn0), + stdr::begin(rn1), stdr::end(rn1)); +} + +template<typename Rn> +bool test(Rn&& range) +{ + using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>; + std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)}; + value_type *data = vec.data(); + std::size_t size = vec.size(); + bool ok; + #pragma omp target map(from: ok) map(tofrom: data[:size]) map(to: size) + { + std::vector<value_type> orig = {data, data + size}; + std::span<value_type> span = {data, size}; + bool inner_ok = true; + { + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY (simple_equal(orig, span)); + for (auto& elem : span) + elem = mul_by_2(elem); + VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span)); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec)); + return true; +} + +struct my_int +{ + int _v; + bool operator==(my_int const&) const = default; + my_int operator*(int rhs) const noexcept { + return {_v * rhs}; + } +}; + +int main() +{ + std::vector<int> ints = {1, 2, 3, 4, 5}; + const bool ints_res = test(ints); + std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}}; + const bool my_ints_res = test(my_ints); + return ints_res && my_ints_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C new file mode 100644 index 0000000..b3f1197 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C @@ -0,0 +1,107 @@ +/* structured bindings */ + +#include <array> +#include <tuple> + +#include "target-flex-common.h" + +template<typename Array, typename Tuple, typename Struct> +bool test(Array array, Tuple tuple, Struct s) +{ + bool ok; + auto array_2nd_in = std::get<2>(array); + auto tuple_2nd_in = std::get<2>(tuple); + auto s_2nd_in = s._2; + decltype(array_2nd_in) array_2nd_out_0; + decltype(tuple_2nd_in) tuple_2nd_out_0; + decltype(s_2nd_in) s_2nd_out_0; + decltype(array_2nd_in) array_2nd_out_1; + decltype(tuple_2nd_in) tuple_2nd_out_1; + decltype(s_2nd_in) s_2nd_out_1; + decltype(array_2nd_in) array_2nd_out_2; + decltype(tuple_2nd_in) tuple_2nd_out_2; + decltype(s_2nd_in) s_2nd_out_2; + #pragma omp target map(from: ok, \ + array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \ + array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \ + array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \ + map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s) + { + bool inner_ok = true; + { + { + auto [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_0 = array_2nd; + auto [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_0 = tuple_2nd; + auto [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_0 = s_2nd; + } + { + auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_1 = array_2nd; + auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_1 = tuple_2nd; + auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_1 = s_2nd; + } + { + const auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_2 = array_2nd; + const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_2 = tuple_2nd; + const auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_2 = s_2nd; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in); + + return true; +} + +struct S +{ + char _0; + float _1; + int _2; +}; + +int main() +{ + const bool test_res + = test(std::array{0, 1, 2}, + std::tuple{'a', 3.14f, 42}, + S{'a', 3.14f, 42}); + return test_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h new file mode 100644 index 0000000..14523c4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h @@ -0,0 +1,40 @@ +#include <cstdio> + +#if __cplusplus >= 201103L + #define BL_NOEXCEPT noexcept +#else + #define BL_NOEXCEPT throw() +#endif + +#if defined __has_builtin +# if __has_builtin (__builtin_LINE) +# define VERIFY_LINE __builtin_LINE () +# endif +#endif +#if !defined VERIFY_LINE +# define VERIFY_LINE __LINE__ +#endif + +/* I'm not a huge fan of macros but in the interest of keeping the code that + isn't being tested as simple as possible, we use them. */ + +#define VERIFY(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + inner_ok = false; \ + goto end; \ + } \ + } while (false) + +#define VERIFY_NON_TARGET(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + return false; \ + } \ + } while (false) diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C new file mode 100644 index 0000000..9923783 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__array-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C new file mode 100644 index 0000000..c42105a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <array> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::array<int,N> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + std::array<int,N> arr; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::array<int,N> (); +#endif + std::copy (data, data + N, arr.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] *= arr[i]; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~array (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C new file mode 100644 index 0000000..9023ef8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__bitset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C new file mode 100644 index 0000000..4fcce93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C @@ -0,0 +1,69 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <bitset> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::bitset<MAX> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::bitset<MAX> (); +#endif + for (int i = 0; i < N; ++i) + _set[data[i]] = true; + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set[i]) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~bitset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C new file mode 100644 index 0000000..aaf7152 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C @@ -0,0 +1,340 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_basic () +{ + T x = -3.456789; + T y = 1.234567; + T z = 5.678901; + + if (std::abs (x) != -x) + return false; + if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x)) + return false; + if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y))) + return false; + if (!FP_EQUAL (std::fma (x, y, z), x * y + z)) + return false; + if (std::fmax (x, y) != (x > y ? x : y)) + return false; + if (std::fmin (x, y) != (x < y ? x : y)) + return false; + if (std::fdim (x, y) != std::max(x - y, (T) 0.0)) + return false; + if (std::fdim (y, x) != std::max(y - x, (T) 0.0)) + return false; + return true; +} + +template<typename T> bool test_exp () +{ + T x = -4.567890; + T y = 2.345678; + + if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x))) + return false; + if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y))) + return false; + if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0)) + return false; + if (!FP_EQUAL (std::log (std::exp (x)), x)) + return false; + if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y)) + return false; + if (!FP_EQUAL (std::log2 (std::exp2 (y)), y)) + return false; + if (!FP_EQUAL (std::log1p (std::expm1 (y)), y)) + return false; + return true; +} + +template<typename T> bool test_power () +{ + T x = 7.234251; + T y = 0.340128; + + if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y)) + return false; + if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x)) + return false; + if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x)) + return false; + if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y))) + return false; + return true; +} + +template<typename T> bool test_trig () +{ + T theta = std::numbers::pi / 4; + T phi = std::numbers::pi / 6; + + if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::sin (phi), 0.5)) + return false; + if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2)) + return false; + if (!FP_EQUAL (std::tan (theta), 1.0)) + return false; + if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3)) + return false; + + T x = 0.33245623; + + if (!FP_EQUAL (std::asin (std::sin (x)), x)) + return false; + if (!FP_EQUAL (std::acos (std::cos (x)), x)) + return false; + if (!FP_EQUAL (std::atan (std::tan (x)), x)) + return false; + if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x)) + return false; + return true; +} + +template<typename T> bool test_hyperbolic () +{ + T x = 0.7423532; + + if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x))) + return false; + if (!FP_EQUAL (std::asinh (std::sinh (x)), x)) + return false; + if (!FP_EQUAL (std::acosh (std::cosh (x)), x)) + return false; + if (!FP_EQUAL (std::atanh (std::tanh (x)), x)) + return false; + return true; +} + +template<typename T> bool test_erf () +{ + if (!FP_EQUAL (std::erf ((T) 0), 0)) + return false; + if (!FP_EQUAL (std::erf ((T) INFINITY), 1)) + return false; + if (!FP_EQUAL (std::erf ((T) -INFINITY), -1)) + return false; + + if (!FP_EQUAL (std::erfc (0), 1)) + return false; + if (!FP_EQUAL (std::erfc ((T) INFINITY), 0)) + return false; + if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2)) + return false; + + return true; +} + +template<typename T> bool test_gamma () +{ + if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1)) + return false; + if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>))) + return false; + + if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>)))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 2.5), + std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))) + return false; + + return true; +} + +template<typename T> bool test_rounding () +{ + T x = -2.5678; + T y = 3.6789; + + if (std::ceil (x) != -2) + return false; + if (std::floor (x) != -3) + return false; + if (std::trunc (x) != -2) + return false; + if (std::round (x) != -3) + return false; + + if (std::ceil (y) != 4) + return false; + if (std::floor (y) != 3) + return false; + if (std::trunc (y) != 3) + return false; + if (std::round (y) != 4) + return false; + + /* Not testing std::rint and std::nearbyint due to dependence on + floating-point environment. */ + + return true; +} + +template<typename T> bool test_fpmanip () +{ + T x = -2.3456789; + T y = 3.6789012; + int exp; + + T mantissa = std::frexp (x, &exp); + if (std::ldexp (mantissa, exp) != x) + return false; + if (std::logb (x) + 1 != exp) + return false; + if (std::ilogb (x) + 1 != exp) + return false; + if (std::scalbn (x, -exp) != mantissa) + return false; + + T next = std::nextafter (x, y); + if (!(next > x && next < y)) + return false; + +#if 0 + /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a + '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error + with x86_64 host and nvptx, GCN offload compilers, or + '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error + with powerpc64le host and nvptx offload compiler, for example; + PR71064 'nvptx offloading: "long double" data type'. + It ought to work on systems where the host's 'long double' is the same as + 'double' ('DF'): aarch64, for example? */ + next = std::nexttoward (x, y); + if (!(next > x && next < y)) + return false; +#endif + + if (std::copysign (x, y) != std::abs (x)) + return false; + if (std::copysign (y, x) != -y) + return false; + + return true; +} + +template<typename T> bool test_classify () +{ + T x = -2.3456789; + T y = 3.6789012; + + if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL) + return false; + if (std::fpclassify ((T) INFINITY) != FP_INFINITE + || std::fpclassify ((T) -INFINITY) != FP_INFINITE) + return false; + if (std::fpclassify ((T) 0.0) != FP_ZERO) + return false; + if (std::fpclassify ((T) NAN) != FP_NAN) + return false; + if (!std::isfinite (x) || !std::isfinite (y)) + return false; + if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY)) + return false; + if (std::isinf (x) || std::isinf (y)) + return false; + if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnan ((T) 0.0 / (T) 0.0)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnormal (x) || !std::isnormal (y)) + return false; + if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN)) + return false; + if (!std::signbit (x) || std::signbit (y)) + return false; + + return true; +} + +template<typename T> bool test_compare () +{ + T x = 5.6789012; + T y = 8.9012345; + + if (std::isgreater (x, y)) + return false; + if (std::isgreater (x, x)) + return false; + if (std::isgreaterequal (x, y)) + return false; + if (!std::isgreaterequal (x, x)) + return false; + if (!std::isless (x, y)) + return false; + if (std::isless (x, x)) + return false; + if (!std::islessequal (x, y)) + return false; + if (!std::islessequal (x, x)) + return false; + if (!std::islessgreater (x, y)) + return false; + if (std::islessgreater (x, x)) + return false; + if (std::isunordered (x, y)) + return false; + if (!std::isunordered (x, NAN)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (basic); + RUN_TEST (exp); + RUN_TEST (power); + RUN_TEST (trig); + RUN_TEST (hyperbolic); + RUN_TEST (erf); + RUN_TEST (gamma); + RUN_TEST (rounding); + RUN_TEST (fpmanip); + RUN_TEST (classify); + RUN_TEST (compare); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C new file mode 100644 index 0000000..e392d17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C @@ -0,0 +1,175 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <complex> +#include <numbers> + +using namespace std::complex_literals; + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) +#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \ + && FP_EQUAL ((x).imag (), (y).imag ())) + +#pragma omp declare target +template<typename T> bool test_complex () +{ + std::complex<T> z (-1.334, 5.763); + + if (!FP_EQUAL (z.real (), (T) -1.334)) + return false; + if (!FP_EQUAL (z.imag (), (T) 5.763)) + return false; + if (!FP_EQUAL (std::abs (z), + std::sqrt (z.real () * z.real () + z.imag () * z.imag ()))) + return false; + if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ()))) + return false; + if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ())) + return false; + + auto conj = std::conj (z); + if (!FP_EQUAL (conj.real (), z.real ()) + || !FP_EQUAL (conj.imag (), -z.imag ())) + return false; + + if (std::proj (z) != z) + return false; + + auto infz1 = std::proj (std::complex<float> (INFINITY, -1)); + if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0) + return false; + auto infz2 = std::proj (std::complex<float> (0, -INFINITY)); + if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0) + return false; + + auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4); + if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4)) + || !FP_EQUAL (polarz.imag (), + (T) 1.5* std::sin (std::numbers::pi_v<T> / 4))) + return false; + + return true; +} + +template<typename T> bool test_complex_exp_log () +{ + std::complex<T> z (-1.724, -3.763); + + // Euler's identity + auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi)); + eulerz += 1.0; + if (!COMPLEX_EQUAL (eulerz, std::complex<T> ())) + return false; + + auto my_exp_z + = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()), + std::exp (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::exp (z), my_exp_z)) + return false; + + if (!COMPLEX_EQUAL (std::log10 (z), + std::log (z) / std::log (std::complex<T> (10)))) + return false; + + return true; +} + +template<typename T> bool test_complex_trig () +{ + std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10); + const std::complex<T> i (0, 1); + + auto my_sin_z + = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()), + std::cos (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::sin (z), my_sin_z)) + return false; + + auto my_cos_z + = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()), + -std::sin (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::cos (z), my_cos_z)) + return false; + + auto my_tan_z + = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ())) + / (std::cos (2*z.real ()) + std::cosh (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tan (z), my_tan_z)) + return false; + + auto my_sinh_z + = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()), + std::cosh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z)) + return false; + + auto my_cosh_z + = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()), + std::sinh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z)) + return false; + + auto my_tanh_z + = std::complex<T> (std::sinh (2*z.real ()), + std::sin (2*z.imag ())) + / (std::cosh (2*z.real ()) + std::cos (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z)) + return false; + + auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::asin (z), my_asin_z)) + return false; + + auto my_acos_z + = std::complex<T> (std::numbers::pi / 2) + + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::acos (z), my_acos_z)) + return false; + + auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z))); + if (!COMPLEX_EQUAL (std::atan (z), my_atan_z)) + return false; + + auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0)); + if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z)) + return false; + + auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0)); + if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z)) + return false; + + auto my_atanh_z + = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z)); + if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z)) + return false; + + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (complex); + RUN_TEST (complex_exp_log); + RUN_TEST (complex_trig); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C new file mode 100644 index 0000000..863a1de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__deque-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C new file mode 100644 index 0000000..9c2d6fa --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C @@ -0,0 +1,64 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <deque> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::deque<int> &_deque, int data[]) +{ + for (int i = 0; i < N; ++i) + if (_deque[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::deque<int> _deque (std::begin (data), std::end (data)); +#else + std::deque<int> _deque; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _deque) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_deque) std::deque<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + _deque[i] *= _deque[i]; + + #pragma omp target map (from: ok) + { + ok = validate (_deque, data); +#ifndef MEM_SHARED + _deque.~deque (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C new file mode 100644 index 0000000..9e59907 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C @@ -0,0 +1,71 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <flat_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C new file mode 100644 index 0000000..1dc60c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <flat_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C new file mode 100644 index 0000000..59b59bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C @@ -0,0 +1,60 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::flat_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~flat_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C new file mode 100644 index 0000000..b255cd5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C @@ -0,0 +1,67 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::flat_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::flat_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~flat_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C new file mode 100644 index 0000000..60d5cee --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__forward_list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C new file mode 100644 index 0000000..6b0ee65 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <forward_list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::forward_list<int> &list, int data[]) +{ + int i = 0; + for (auto &v : list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::forward_list<int> list (std::begin (data), std::end (data)); +#else + std::forward_list<int> list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&list) std::forward_list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (list, data); +#ifndef MEM_SHARED + list.~forward_list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C new file mode 100644 index 0000000..5057bf9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C new file mode 100644 index 0000000..1f44a17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::list<int> &_list, int data[]) +{ + int i = 0; + for (auto &v : _list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::list<int> _list (std::begin (data), std::end (data)); +#else + std::list<int> _list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_list) std::list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = _list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (_list, data); +#ifndef MEM_SHARED + _list.~list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C new file mode 100644 index 0000000..fe37426 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__map-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C new file mode 100644 index 0000000..36556ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~map (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C new file mode 100644 index 0000000..79f9245 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multimap-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C new file mode 100644 index 0000000..6a4a4e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) + sum += (long long) it->first * it->second; + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~multimap (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C new file mode 100644 index 0000000..2d80756 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multiset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C new file mode 100644 index 0000000..b12402e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <stdio.h> +#include <time.h> +#include <set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C new file mode 100644 index 0000000..a6b3665 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C @@ -0,0 +1,93 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_pi () +{ + if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0)) + return false; + if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T> + * std::numbers::inv_sqrtpi_v<T>, (T) 1.0)) + return false; + return true; +} + +template<typename T> bool test_sqrt () +{ + if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0)) + return false; + if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0)) + return false; + return true; +} + +template<typename T> bool test_phi () +{ + T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0; + if (!FP_EQUAL (myphi, std::numbers::phi_v<T>)) + return false; + return true; +} + +template<typename T> bool test_log () +{ + if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>)) + return false; + if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>)) + return false; + if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>)) + return false; + if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>)) + return false; + return true; +} + +template<typename T> bool test_egamma () +{ + T myegamma = 0.0; + #pragma omp parallel for reduction(+:myegamma) + for (int k = 2; k < 100000; ++k) + myegamma += (std::riemann_zeta (k) - 1) / k; + myegamma = (T) 1 - myegamma; + if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (pi); + RUN_TEST (sqrt); + RUN_TEST (phi); + RUN_TEST (log); + RUN_TEST (egamma); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C new file mode 100644 index 0000000..54f62e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__set-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C new file mode 100644 index 0000000..cd23128 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.find (i) != _set.end ()) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C new file mode 100644 index 0000000..7ef16bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C @@ -0,0 +1,7 @@ +// { dg-additional-options "-std=c++20" } + +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__span-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C new file mode 100644 index 0000000..046b3c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <span> + +#define N 64 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::span<int, N> &span, int data[]) +{ + for (int i = 0; i < N; ++i) + if (span[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + int elements[N]; + std::span<int, N> span(elements); + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: data[:N]) map (alloc: elements, span) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&span) std::span<int, N> (elements); +#endif + std::copy (data, data + N, span.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + span[i] *= span[i]; + + #pragma omp target map (from: ok) + { + ok = validate (span, data); +#ifndef MEM_SHARED + span.~span (); +#endif + } + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: elements, span) +#endif + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C new file mode 100644 index 0000000..00d7943 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <unordered_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C new file mode 100644 index 0000000..2567634 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C @@ -0,0 +1,65 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C new file mode 100644 index 0000000..da6c875 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C @@ -0,0 +1,59 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::unordered_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~unordered_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C new file mode 100644 index 0000000..b7bd935 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::unordered_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::unordered_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~unordered_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C new file mode 100644 index 0000000..865cde2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C @@ -0,0 +1,179 @@ +// { dg-additional-options -std=c++20 } +// { dg-output-file target-std__valarray-1.output } + +#include <valarray> +#include <ostream> +#include <sstream> + + +/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467". + +We can't: + + #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf) + +... because: + + error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’ + +Therefore, use dummy classes in '#pragma omp declare target': +*/ + +#pragma omp declare target + +// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf': + +class dummy_basic_streambuf__char + : public std::basic_streambuf<char> +{ +public: + dummy_basic_streambuf__char() {} +}; + +// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()': + +class dummy_basic_ios__char + : public std::basic_ios<char> +{ +public: + dummy_basic_ios__char() {} +}; + +#pragma omp end declare target + + +int main() +{ + // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device. + std::initializer_list<int> v1_i = {10, 20, 30, 40, 50}; + const int *v1_i_data = std::data(v1_i); + size_t v1_i_size = v1_i.size(); + std::initializer_list<int> v2_i = {5, 4, 3, 2, 1}; + const int *v2_i_data = std::data(v2_i); + size_t v2_i_size = v2_i.size(); + std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5}; + const int *shiftData_i_data = std::data(shiftData_i); + size_t shiftData_i_size = shiftData_i.size(); +#pragma omp target \ + defaultmap(none) \ + map(to: v1_i_data[:v1_i_size], v1_i_size, \ + v2_i_data[:v2_i_size], v2_i_size, \ + shiftData_i_data[:shiftData_i_size], shiftData_i_size) + { + /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region. */ + std::stringbuf out_b; + std::ostream out(&out_b); + + std::valarray<int> v1(v1_i_data, v1_i_size); + out << "\nv1:"; + for (auto val : v1) + out << " " << val; + + std::valarray<int> v2(v2_i_data, v2_i_size); + out << "\nv2:"; + for (auto val : v2) + out << " " << val; + + std::valarray<int> sum = v1 + v2; + out << "\nv1 + v2:"; + for (auto val : sum) + out << " " << val; + + std::valarray<int> diff = v1 - v2; + out << "\nv1 - v2:"; + for (auto val : diff) + out << " " << val; + + std::valarray<int> product = v1 * v2; + out << "\nv1 * v2:"; + for (auto val : product) + out << " " << val; + + std::valarray<int> quotient = v1 / v2; + out << "\nv1 / v2:"; + for (auto val : quotient) + out << " " << val; + + std::valarray<int> squares = pow(v1, 2); + out << "\npow(v1, 2):"; + for (auto val : squares) + out << " " << val; + + std::valarray<int> sinhs = sinh(v2); + out << "\nsinh(v2):"; + for (auto val : sinhs) + out << " " << val; + + std::valarray<int> logs = log(v1 * v2); + out << "\nlog(v1 * v2):"; + for (auto val : logs) + out << " " << val; + + std::valarray<int> data(12); + for (size_t i = 0; i < data.size(); ++i) + data[i] = i; + out << "\nOriginal array:"; + for (auto val : data) + out << " " << val; + + std::slice slice1(2, 5, 1); + std::valarray<int> sliced1 = data[slice1]; + out << "\nSlice(2, 5, 1):"; + for (auto val : sliced1) + out << " " << val; + + std::slice slice2(1, 4, 3); + std::valarray<int> sliced2 = data[slice2]; + out << "\nSlice(1, 4, 3):"; + for (auto val : sliced2) + out << " " << val; + + data[slice1] = 99; + out << "\nArray after slice modification:"; + for (auto val : data) + out << " " << val; + + std::valarray<bool> mask = (v1 > 20); + out << "\nElements of v1 > 20:"; + for (size_t i = 0; i < v1.size(); ++i) + { + if (mask[i]) + out << " " << v1[i]; + } + + std::valarray<int> masked = v1[mask]; + out << "\nMasked array:"; + for (auto val : masked) + out << " " << val; + + std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size); + out << "\nOriginal shiftData:"; + for (auto val : shiftData) + out << " " << val; + + std::valarray<int> shifted = shiftData.shift(2); + out << "\nshift(2):"; + for (auto val : shifted) + out << " " << val; + + std::valarray<int> cshifted = shiftData.cshift(-1); + out << "\ncshift(-1):"; + for (auto val : cshifted) + out << " " << val; + + out << "\nSum(v1): " << v1.sum(); + out << "\nMin(v1): " << v1.min(); + out << "\nMax(v1): " << v1.max(); + + out << "\n"; + + /* Terminate with a NUL. Otherwise, we'd have to use: + __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data()); + ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO). */ + out << '\0'; + std::string_view out_b_sv = out_b.view(); + __builtin_printf("%s", out_b_sv.data()); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output new file mode 100644 index 0000000..c441e06 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output @@ -0,0 +1,22 @@ + +v1: 10 20 30 40 50 +v2: 5 4 3 2 1 +v1 + v2: 15 24 33 42 51 +v1 - v2: 5 16 27 38 49 +v1 * v2: 50 80 90 80 50 +v1 / v2: 2 5 10 20 50 +pow(v1, 2): 100 400 900 1600 2500 +sinh(v2): 74 27 10 3 1 +log(v1 * v2): 3 4 4 4 3 +Original array: 0 1 2 3 4 5 6 7 8 9 10 11 +Slice(2, 5, 1): 2 3 4 5 6 +Slice(1, 4, 3): 1 4 7 10 +Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11 +Elements of v1 > 20: 30 40 50 +Masked array: 30 40 50 +Original shiftData: 1 2 3 4 5 +shift(2): 3 4 5 0 0 +cshift(-1): 5 1 2 3 4 +Sum(v1): 150 +Min(v1): 10 +Max(v1): 50 diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C new file mode 100644 index 0000000..41ec80e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__valarray-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C new file mode 100644 index 0000000..8933072b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <valarray> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::valarray<int> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i] + i) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::valarray<int> arr (data, N); +#else + std::valarray<int> arr; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::valarray<int> (data, N); +#endif + arr *= arr; + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] += i; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~valarray (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C new file mode 100644 index 0000000..967bff3b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__vector-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C new file mode 100644 index 0000000..a94b4cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C @@ -0,0 +1,63 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <vector> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::vector<int> &vec, int data[]) +{ + for (int i = 0; i < N; ++i) + if (vec[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::vector<int> vec (data, data + N); +#else + std::vector<int> vec; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: vec) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&vec) std::vector<int> (data, data + N); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + vec[i] *= vec[i]; + + #pragma omp target map (from: ok) + { + ok = validate (vec, data); +#ifndef MEM_SHARED + vec.~vector (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c new file mode 100644 index 0000000..00eb48b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (float)); + var.arr2->size = N; + + { + // ... permitted here: + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c new file mode 100644 index 0000000..942d6a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct B_tag { + int *arr; + int size; +} B; + +#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c new file mode 100644 index 0000000..cfc6a91 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +typedef struct { + int *arr; + int size; +} C; + + +struct A { + int *arr1; + B *arr2; + C *arr3; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + var.arr3 = (C *) malloc (sizeof (C)); + var.arr3->arr = (int *) calloc (N, sizeof (int)); + var.arr3->size = N; + + { + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + { + #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // While GCC handles more, only default is ... + #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper( default ) , tofrom: *x.arr3) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr3->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 2); + assert (var.arr2->arr[i] == 1); + assert (var.arr3->arr[i] == 1); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + free (var.arr3->arr); + free (var.arr3); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c new file mode 100644 index 0000000..c4784eb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include <assert.h> + +struct T { + int a; + int b; + int c; +}; + +void foo (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.a++; + x.c++; + } + + assert (x.a == 1); + assert (x.b == 0); + assert (x.c == 1); +} + +// An identity mapper. This should do the same thing as the default! +#pragma omp declare mapper (struct T v) map(v) + +void bar (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.b++; + } + +#pragma omp target map(x) + { + x.a++; + } + + assert (x.a == 1); + assert (x.b == 1); + assert (x.c == 0); +} + +int main (int argc, char *argv[]) +{ + foo (); + bar (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c new file mode 100644 index 0000000..3e6027e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <assert.h> + +struct Z { + int *arr; +}; + +void baz (struct Z *zarr, int len) +{ +#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \ + map(tofrom: myvar.arr[0:len]) + zarr[0].arr = (int *) calloc (len, sizeof (int)); + zarr[5].arr = (int *) calloc (len, sizeof (int)); + +#pragma omp target map(zarr, *zarr) + { + for (int i = 0; i < len; i++) + zarr[0].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5:1]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + + for (int i = 0; i < len; i++) + assert (zarr[0].arr[i] == 1); + + for (int i = 0; i < len; i++) + assert (zarr[5].arr[i] == 3); + + free (zarr[5].arr); + free (zarr[0].arr); +} + +int +main (int argc, char *argv[]) +{ + struct Z myzarr[10]; + baz (myzarr, 256); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c new file mode 100644 index 0000000..324d535 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +struct A { + int *arr1; + float *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (float *) calloc (N, sizeof (float)); + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1) \ + map(tofrom: x.arr1[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr1[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(to: x.arr2) \ + map(tofrom: x.arr2[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr2[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr3[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2[i] == 1); + assert (var.arr3[i] == 1); + } + + free (var.arr1); + free (var.arr2); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c b/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c index 4708ae8..90d1a72 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c @@ -21,7 +21,7 @@ main () if (omp_target_associate_ptr (q, p, sizeof (int), 0, d) != 0) return 0; - if (omp_get_mapped_ptr (q, -5) != NULL) + if (omp_get_mapped_ptr (q, -6) != NULL) abort (); if (omp_get_mapped_ptr (q, omp_get_num_devices () + 1) != NULL) diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-1.c b/libgomp/testsuite/libgomp.c-c++-common/interop-1.c new file mode 100644 index 0000000..149f387 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/interop-1.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int dev = omp_get_num_devices (); + int x[6]; + omp_interop_t obj1 = omp_interop_none; +#pragma omp interop init(targetsync : obj1) depend(in : x) device(dev) + if (obj1 != omp_interop_none) + abort (); + +#pragma omp interop use(obj1) +#pragma omp interop destroy(obj1) depend(out : x) + if (obj1 != omp_interop_none) + abort (); + + omp_set_default_device (dev); + omp_interop_t obj2; + +#pragma omp interop init( \ + target, targetsync, \ + prefer_type({fr("hip"), attr("ompx_gnu_prio:1", "ompx_gnu_debug")}, \ + {attr("ompx_gnu_nicest"), attr("ompx_something")}) : obj1, \ + obj2) nowait + if (obj1 != omp_interop_none || obj2 != omp_interop_none) + abort (); +#pragma omp interop use(obj1, obj2) nowait + + omp_interop_t obj3 = __omp_interop_t_max__; + +#pragma omp interop init(target : obj3) use(obj2) destroy(obj1) nowait + if (obj1 != omp_interop_none || obj3 != omp_interop_none) + abort (); +#pragma omp interop destroy(obj3, obj2) nowait + if (obj2 != omp_interop_none || obj3 != omp_interop_none) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c new file mode 100644 index 0000000..a7526dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c @@ -0,0 +1,129 @@ +/* { dg-do run } */ +/* { dg-additional-options "-lm" } */ + +/* Note: At the time this program was written, Nvptx was not asynchronous + enough to trigger the issue (with a 'nowait' added); however, one + AMD GPUs, it triggered. */ + +/* Test whether nowait / dependency is handled correctly. + Motivated by OpenMP_VV's 5.1/interop/test_interop_target.c + + The code actually only creates a streaming object without actually using it, + except for dependency tracking. + + Note that there is a difference between having a steaming (targetsync) object + and not (= omp_interop_none); at least if one assumes that omp_interop_none + does not include 'targetsync' as (effective) interop type - in that case, + 'nowait' has no effect and the 'depend' is active as included task, otherwise + the code continues with the depend being active only for the about to be + destroyed or used thread. + + The OpenMP spec states (here 6.0): + "If the interop-type set includes 'targetsync', an empty mergeable task is + generated. If the 'nowait' clause is not present on the construct then + the task is also an included task. If the interop-type set does not + include 'targetsync', the 'nowait' clause has no effect. Any depend + clauses that are present on the construct apply to the generated task. */ + +#include <omp.h> + +void +test_async (const int dev) +{ + constexpr int N = 2048; + constexpr int ulp = 4; + constexpr double M_PI = 2.0 * __builtin_acos (0.0); + omp_interop_t obj1, obj2; + double A[N] = { }; + int B[N] = { }; + + /* Create interop object. */ + #pragma omp interop device(dev) init(targetsync : obj1, obj2) + + if (dev == omp_initial_device || dev == omp_get_num_devices ()) + { + if (obj1 != omp_interop_none || obj2 != omp_interop_none) + __builtin_abort (); + } + else + { + if (obj1 == omp_interop_none || obj2 == omp_interop_none) + __builtin_abort (); + } + + /* DOUBLE */ + + /* Now in the background update it, slowly enough that the + code afterwards is reached while still running asynchronously. + As OpenMP_VV's Issue #863 shows, the overhead is high enough to + fail even when only doing an atomic integer increment. */ + + #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait + for (int i = 0; i < N; i++) + #pragma omp atomic update + A[i] += __builtin_sin (2*i*M_PI/N); + + /* DESTROY take care of the dependeny such that ... */ + + if (obj1 == omp_interop_none) + { + // Same as below as 'nowait' is ignored. + #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait + } + else + { + #pragma omp interop destroy(obj1) depend(in: A[:N]) + } + + /* ... this code is only executed once the dependency as been fulfilled. */ + + /* Check the value - part I: quick, avoid A[0] == sin(0) = 0. */ + for (int i = 1; i < N; i++) + if (A[i] == 0.0) + __builtin_abort (); + + /* Check the value - part II: throughly */ + for (int i = 0; i < N; i++) + { + double x = A[i]; + double y = __builtin_sin (2*i*M_PI/N); + if (__builtin_fabs (x - y) > ulp * __builtin_fabs (x+y) * __DBL_EPSILON__) + __builtin_abort (); + } + + /* Integer */ + + #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait + for (int i = 0; i < N; i++) + #pragma omp atomic update + B[i] += 42; + + /* Same - but using USE. */ + if (obj2 == omp_interop_none) + { + // Same as below as 'nowait' is ignored. + #pragma omp interop use(obj2) depend(in: B[:N]) nowait + } + else + { + #pragma omp interop use(obj2) depend(in: B[:N]) + } + + for (int i = 0; i < N; i++) + if (B[i] != 42) + __builtin_abort (); + + #pragma omp interop destroy(obj2) +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + test_async (dev); + test_async (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c index a57d6fd..fbe4ac3 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c @@ -1,4 +1,5 @@ -/* { dg-do run } */ +/* { dg-do run { target { ! offload_target_nvptx } } } */ +/* { dg-do compile { target offload_target_nvptx } } */ #define N 100 @@ -7,12 +8,17 @@ f (int x[], int y[], int z[]) { int i; + // The following fails as on the host the target side cannot be + // resolved - and the 'teams' or not status affects how 'target' + // is called. + // Note also the dg-do compile above for offload_target_nvptx #pragma omp target map(to: x[0:N], y[0:N]) map(from: z[0:N]) #pragma omp metadirective \ when (device={arch("nvptx")}: teams loop) \ default (parallel loop) for (i = 0; i < N; i++) z[i] = x[i] * y[i]; + /* { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-6 } */ } int diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c new file mode 100644 index 0000000..b36d2f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 +// Async version + +#include <omp.h> + +int main() +{ + #pragma omp parallel for + for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + omp_depend_t dep; + #pragma omp depobj(dep) depend(inout: ptr) + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; +#if __cplusplus + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0); +#else + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0, nullptr); +#endif + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp taskwait + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val) + __builtin_abort (); + ptr[i] += 2; + } + + omp_target_memset_async (ptr + start, val + 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val + 3) + __builtin_abort (); + ptr[i] += 1; + } + + omp_target_memset_async (ptr + start, val - 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp taskwait depend (depobj: dep) + } + #pragma omp depobj(dep) destroy + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c new file mode 100644 index 0000000..c0e4fa9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c @@ -0,0 +1,80 @@ +#include <stddef.h> +#include <stdint.h> +#include <omp.h> + +#define MIN(x,y) ((x) < (y) ? x : y) + +enum { N = 524288 + 8 }; + +static void +init_val (int8_t *ptr, int val, size_t count) +{ + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + __builtin_memset (ptr, val, count); +} + +static void +check_val (int8_t *ptr, int val, size_t count) +{ + if (count == 0) + return; + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + for (size_t i = 0; i < count; i++) + if (ptr[i] != val) __builtin_abort (); +} + +static void +test_it (int8_t *ptr, int lshift, size_t count) +{ + if (N < count + lshift) __builtin_abort (); + if (lshift >= 4) __builtin_abort (); + ptr += lshift; + + init_val (ptr, 'z', MIN (count + 32, N - lshift)); + + omp_target_memset (ptr, '1', count, omp_get_default_device()); + + check_val (ptr, '1', count); + check_val (ptr + count, 'z', MIN (32, N - lshift - count)); +} + + +int main() +{ + size_t size; + int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device()); + ptr += (4 - (uintptr_t) ptr % 4) % 4; + if ((uintptr_t) ptr % 4 != 0) __builtin_abort (); + + test_it (ptr, 0, 1); + test_it (ptr, 3, 1); + test_it (ptr, 0, 4); + test_it (ptr, 3, 4); + test_it (ptr, 0, 5); + test_it (ptr, 3, 5); + test_it (ptr, 0, 6); + test_it (ptr, 3, 6); + + for (int i = 1; i <= 9; i++) + { + switch (i) + { + case 1: size = 16; break; // = 2^4 bytes + case 2: size = 32; break; // = 2^5 bytes + case 3: size = 64; break; // = 2^7 bytes + case 4: size = 128; break; // = 2^7 bytes + case 5: size = 256; break; // = 2^8 bytes + case 6: size = 512; break; // = 2^9 bytes + case 7: size = 65536; break; // = 2^16 bytes + case 8: size = 262144; break; // = 2^18 bytes + case 9: size = 524288; break; // = 2^20 bytes + default: __builtin_abort (); + } + test_it (ptr, 0, size); + test_it (ptr, 3, size); + test_it (ptr, 0, size + 1); + test_it (ptr, 3, size + 1); + test_it (ptr, 3, size + 2); + } + omp_target_free (ptr, omp_get_default_device()); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c new file mode 100644 index 0000000..01909f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 + +#include <omp.h> + +int main() +{ + for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; + void *ptr2 = omp_target_memset (ptr + start, val, + 1024 - start - tail, dev); + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp target device(dev) is_device_ptr(ptr) + for (int i = start; i < 1024 - start - tail; i++) + if (ptr[i] != val) + __builtin_abort (); + + } + + /* Check 'small' values for correctness. */ + + for (int start = 0; start < 32; start++) + for (int size = 0; size <= 64 + 32; size++) + { + omp_target_memset (ptr, 'a' - 2, 1024, dev); + + unsigned char val = '0' + start + size % 32; + void *ptr2 = omp_target_memset (ptr + start, val, size, dev); + + if (ptr + start != ptr2) + __builtin_abort (); + + if (size == 0) + continue; + + #pragma omp target device(dev) is_device_ptr(ptr) + { + for (int i = 0; i < start; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + for (int i = start; i < start + size; i++) + if (ptr[i] != val) + __builtin_abort (); + for (int i = start + size + 1; i < 1024; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + } + } + + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c index b89f934..ca7865d 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c +++ b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c @@ -1,7 +1,7 @@ /* { dg-additional-options "-O0 -fdump-tree-omplower" } */ /* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */ /* { dg-require-alias "" } */ -/* { dg-xfail-if "PR 97102/PR 97106 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */ +/* { dg-xfail-if PR105018 { offload_target_nvptx } } */ #ifdef __cplusplus extern "C" { diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c new file mode 100644 index 0000000..9bf949a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c @@ -0,0 +1,3 @@ +/* { dg-additional-options -O0 } */ + +#include "target-abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c new file mode 100644 index 0000000..d9268af --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c @@ -0,0 +1 @@ +#include "../libgomp.oacc-c-c++-common/abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c new file mode 100644 index 0000000..e6099cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c @@ -0,0 +1,89 @@ +/* Offloaded 'constructor' and 'destructor' functions. */ + +#include <omp.h> + +#pragma omp declare target + +static void +__attribute__((constructor)) +initHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((constructor)) +initHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +static void +__attribute__((constructor)) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiH2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run. + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp new file mode 100644 index 0000000..02d5503 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp @@ -0,0 +1,57 @@ +# Copyright (C) 2006-2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Load support procs. +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } then { + return +} + +lappend ALWAYS_CFLAGS "compiler=$GCC_UNDER_TEST" + +if { [check_effective_target_aarch64_sve] } { + set sve_flags "" +} else { + set sve_flags "-march=armv8.2-a+sve" +} + +# Initialize `dg'. +dg-init + +#if ![check_effective_target_fopenmp] { +# return +#} + +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + +# Gather a list of all tests. +set tests [lsort [find $srcdir/$subdir *.c]] + +set ld_library_path $always_ld_library_path +append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +set_ld_library_path_env_vars + +# Main loop. +dg-runtest $tests "" $sve_flags + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c new file mode 100644 index 0000000..58674e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c @@ -0,0 +1,129 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +vec_compare (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_sections () +{ + int b[8], c[8]; + svint32_t vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + +#pragma omp parallel sections firstprivate (vb, vc) + { + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + } + +} + +void __attribute__ ((noipa)) +firstprivate_for () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp parallel for firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_distribute () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp teams +#pragma omp distribute firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +int +main () +{ + firstprivate_for (); + firstprivate_sections (); + firstprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c new file mode 100644 index 0000000..2f93d7b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c @@ -0,0 +1,171 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static svint32_t __attribute__ ((noipa)) +foo (svint32_t *vb, svint32_t *vc, int tn) +{ + svint32_t temp = svindex_s32 (tn, 0); + temp = svadd_s32_z (svptrue_b32 (), temp, *vb); + return svadd_s32_z (svptrue_b32 (), temp, *vc); +} + +void __attribute__ ((noipa)) +lastprivate_sections () +{ + int a[8], b[8], c[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel sections lastprivate (vb, vc) num_threads (2) + { + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + } + + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a, va); + + for (i = 0; i < 8; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_for () +{ + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel for lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_simd () +{ + + int a[64], b[64], c[64]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 64; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp simd lastprivate (va, vb, vc) + for (i = 0; i < 8; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i]) + __builtin_abort (); + + for (i = 0; i < 64; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_distribute () +{ + + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp teams +#pragma omp distribute lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +int +main () +{ + lastprivate_for (); + lastprivate_sections (); + lastprivate_simd (); + lastprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/private.c b/libgomp/testsuite/libgomp.c-target/aarch64/private.c new file mode 100644 index 0000000..fed5370 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/private.c @@ -0,0 +1,107 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +private () +{ + svint32_t a; +#pragma omp parallel private (a) num_threads (10) + { + a = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (omp_get_thread_num (), 0)); + } +} + +void __attribute__ ((noipa)) +firstprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + +#pragma omp parallel private (b) firstprivate (a) num_threads (12) + { + compare_vec (&a, svindex_s32 (1, 1)); + b = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + if (omp_get_thread_num () == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + +#pragma omp barrier + if (omp_get_thread_num () == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + } + } +} + +void __attribute__ ((noipa)) +lastprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + int i; + +#pragma omp parallel for private (a) lastprivate (b) + for (i = 0; i < 16; i++) + { + b = svindex_s32 (i, 0); + + compare_vec (&b, svindex_s32 (i, 0)); + if (i == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + else + a = svindex_s32 (1, 1); + + if (i == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (i, 0)); + } + } + + compare_vec (&b, svindex_s32 (15, 0)); +} + +int +main () +{ + private (); + firstprivate (); + lastprivate (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/shared.c b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c new file mode 100644 index 0000000..340a668 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c @@ -0,0 +1,266 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdlib.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +static void __attribute__ ((noipa)) +compare_vecb (svbool_t x, svbool_t y) +{ + svbool_t p = sveor_b_z (svptrue_b32 (), x, y); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +implicit_shared_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel default (shared) num_threads (10) + { + /* 'a', 'b' and 'p' are implicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +explicit_shared (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel shared (a, b, p) num_threads (12) + { + /* 'a', 'b' and 'p' are explicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel num_threads (16) + { + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } + +} + +void __attribute__ ((noipa)) +mix_shared (svint32_t b, svbool_t p) +{ + + svint32_t a = svindex_s32 (0, 0); + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + m[i] = i; + +#pragma omp parallel num_threads (16) + { + compare_vec (a, svindex_s32 (0, 0)); + compare_vec (b, svindex_s32 (8, 1)); + +#pragma omp barrier + /* 'm' is predetermined shared here. 'a' is implicitly shared here. */ + if (omp_get_thread_num () == 10) + a = svld1_s32 (svptrue_b32 (), m); + +#pragma omp barrier + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } +} + +#define N __ARM_FEATURE_SVE_BITS +#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N))) + +typedef svint32_t v8si FIXED_ATTR; + +void __attribute__ ((noipa)) +predetermined_shared_static (int n) +{ + + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + /* 'm' is predetermined shared here. */ + for (i = 0; i < 8; i++) + m[i] = i; + + static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 }; + +#pragma omp parallel num_threads (16) + { + /* 'a' is implicit shared here. */ + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + { + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + + a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0)); + } + +#pragma omp barrier + if (n == 0) + compare_vec (a, svindex_s32 (1, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (2, 1)); + } +} + + +int +main () +{ + svint32_t x = svindex_s32 (0, 1); + svint32_t y = svindex_s32 (8, 1); + svbool_t p = svptrue_b32 (); + + /* Implicit shared. */ + implicit_shared_default (x, y, p); + + /* Explicit shared. */ + explicit_shared (x, y, p); + + /* Implicit shared with no default clause. */ + implicit_shared_no_default (x, y, p); + + /* Mix shared. */ + mix_shared (y, p); + + /* Predetermined and static shared. */ + predetermined_shared_static (0); + predetermined_shared_static (1); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c new file mode 100644 index 0000000..14642c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q, svint32_t *onesp) +{ + svint32_t va, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) aligned (onesp : 128) \ + private (va, vc) nontemporal (va, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vc = svadd_s32_z (svptrue_b32 (), va, *onesp); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + svint32_t ones __attribute__ ((aligned(128))) = svindex_s32 (1, 0); + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b, &ones); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c new file mode 100644 index 0000000..6fe4616 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q) +{ + svint32_t va, vb, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) private (va, vb, vc) \ + nontemporal (va, vb, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vb = svindex_s32 (1, 0); + vc = svadd_s32_z (svptrue_b32 (), va, vb); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c new file mode 100644 index 0000000..aa7d2f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +typedef __SVInt32_t v8si __attribute__ ((arm_sve_vector_bits(256))); + +v8si vec1; +#pragma omp threadprivate (vec1) + +void __attribute__ ((noipa)) +foo () +{ + int64_t res = 0; + + vec1 = svindex_s32 (1, 0); + +#pragma omp parallel copyin (vec1) firstprivate (res) num_threads(10) + { + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } +} + +int +main () +{ + int64_t res = 0; + +#pragma omp parallel firstprivate (res) num_threads(10) + { + vec1 = svindex_s32 (1, 0); + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } + + foo (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c new file mode 100644 index 0000000..02e02dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c @@ -0,0 +1,134 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */ + +#include <arm_sve.h> + +#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \ + initializer (omp_priv = svindex_s32 (0, 0)) + +void __attribute__ ((noipa)) +parallel_reduction () +{ + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int i = 0; + int64_t res; + + #pragma omp parallel reduction (+:va, i) + { + va = svld1_s32 (svptrue_b32 (), a); + i++; + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != i * 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +for_reduction () +{ + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int j; + int64_t res; + + #pragma omp parallel for reduction (+:va) + for (j = 0; j < 8; j++) + va += svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +simd_reduction () +{ + int a[8]; + svint32_t va = svindex_s32 (0, 0); + int i = 0; + int j; + int64_t res = 0; + + for (j = 0; j < 8; j++) + a[j] = 1; + + #pragma omp simd reduction (+:va) + for (j = 0; j < 16; j++) + va += svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 128) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +inscan_reduction_incl () +{ + svint32_t va = svindex_s32 (0, 0); + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[64] = { 0 }; + int j; + int64_t res = 0; + + #pragma omp parallel for reduction (inscan, +:va) + for (j = 0; j < 8; j++) + { + va += svld1_s32 (svptrue_b32 (), a); + #pragma omp scan inclusive (va) + svst1_s32 (svptrue_b32 (), b + j * 8, va); + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); + + for (j = 0; j < 64; j+=8) + if (b[j] != (j / 8 + 1)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +inscan_reduction_excl () +{ + svint32_t va = svindex_s32 (0, 0); + int a[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int b[64] = { 0 }; + int j; + int64_t res = 0; + + #pragma omp parallel for reduction (inscan, +:va) + for (j = 0; j < 8; j++) + { + svst1_s32 (svptrue_b32 (), b + j * 8, va); + #pragma omp scan exclusive (va) + va += svld1_s32 (svptrue_b32 (), a); + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); + + for (j = 0; j < 64; j+=8) + if (b[j] != j / 8) + __builtin_abort (); +} + + +int +main () +{ + parallel_reduction (); + for_reduction (); + simd_reduction (); + inscan_reduction_incl (); + inscan_reduction_excl (); +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c new file mode 100644 index 0000000..2fd7eda --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c @@ -0,0 +1,232 @@ +/* { dg-do run } */ + +#include "append-args-fr.h" + +enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2; +static int used_device_num, used_device_num2; +static omp_interop_fr_t expected_fr, expected_fr2; +static _Bool is_targetsync, is_targetsync2; + +void +check_interop (omp_interop_t obj) +{ + if (used_device_type == host_device) + check_host (obj); + else if (used_device_type == nvptx_device) + check_nvptx (obj, used_device_num, expected_fr, is_targetsync); + else if (used_device_type == gcn_device) + check_gcn (obj, used_device_num, expected_fr, is_targetsync); + else + __builtin_unreachable (); + + #pragma omp interop use(obj) +} + +void +check_interop2 (omp_interop_t obj, omp_interop_t obj2) +{ + check_interop (obj); + + #pragma omp interop use(obj2) + + if (used_device_type2 == host_device) + check_host (obj2); + else if (used_device_type2 == nvptx_device) + check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2); + else if (used_device_type2 == gcn_device) + check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2); + else + __builtin_unreachable (); +} + + +/* Check no args + one interop arg - and no prefer_type. */ + +int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; } +#pragma omp declare variant(f0_1_tg_) match(construct={dispatch}) append_args(interop(target)) +int f0_1_tg () { assert (false); return 42; } + +void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_) match(construct={dispatch}) append_args(interop(targetsync)) +void f0_1_tgsy () { assert (false); } + +int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; } +#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target)) +int f0_1_tgtgsy () { assert (false); return 33; } + + +/* And with PREFER_TYPE. */ + +// nv: cuda, gcn: -, -, hip +void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa"))) +void f0_1_tgsy_c_cd_hi_hs () { assert (false); } + +// nv: -, cuda_driver, gcn: hsa +void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")}))) +void f0_1_tgsy_hs_cd_c_hi () { assert (false); } + +// nv: -, hip, gcn: hsa +void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda"))) +void f0_1_tgsy_hs_hi_cd_c () { assert (false); } + + +void +check_f0 () +{ + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + else /* host; variable shall not be accessed */ + expected_fr = omp_ifr_level_zero; + + int i; + if (used_device_num == DEFAULT_DEVICE) + { + is_targetsync = 0; + #pragma omp dispatch + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch + f0_1_tgsy (); + + #pragma omp dispatch + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_hi_cd_c (); + } + else + { + is_targetsync = 0; + #pragma omp dispatch device(used_device_num) + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy (); + + #pragma omp dispatch device(used_device_num) + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_hi_cd_c (); + } +} + + + +void +do_check (int dev) +{ + int num_dev = omp_get_num_devices (); + const char *dev_type; + if (dev != DEFAULT_DEVICE) + omp_set_default_device (dev); + int is_nvptx = on_device_arch_nvptx (); + int is_gcn = on_device_arch_gcn (); + int is_host; + + if (dev != DEFAULT_DEVICE) + is_host = dev == -1 || dev == num_dev; + else + { + int def_dev = omp_get_default_device (); + is_host = def_dev == -1 || def_dev == num_dev; + } + + assert (is_nvptx + is_gcn + is_host == 1); + + if (num_dev > 0 && dev != DEFAULT_DEVICE) + { + if (is_host) + omp_set_default_device (0); + else + omp_set_default_device (-1); + } + + used_device_num = dev; + if (is_host) + { + dev_type = "host"; + used_device_type = host_device; + } + else if (is_nvptx) + { + dev_type = "nvptx"; + used_device_type = nvptx_device; + } + else if (is_gcn) + { + dev_type = "gcn"; + used_device_type = gcn_device; + } + + printf ("Running on the %s device (%d)\n", dev_type, dev); + check_f0 (); +} + + + +int +main () +{ + do_check (DEFAULT_DEVICE); + int ndev = omp_get_num_devices (); + for (int dev = -1; dev < ndev; dev++) + do_check (dev); + for (int dev = -1; dev < ndev; dev++) + { + omp_set_default_device (dev); + do_check (DEFAULT_DEVICE); + } +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h new file mode 100644 index 0000000..9f6ca04 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr.h @@ -0,0 +1,305 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + +/* Provides: */ + +#define DEFAULT_DEVICE -99 + +void check_host (omp_interop_t obj); +void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); +void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); + + +/* The following assumes that when a nvptx device is available, + cuda/cuda_driver/hip are supported. + And that likewise when a gcn device is available that the + plugin also can not only the HSA but also the HIP library + such that hsa/hip are supported. + For the host, omp_interop_none is expected. + + Otherwise, it only does some basic tests without checking + that the returned result really makes sense. */ + +void check_type (omp_interop_t obj) +{ + const char *type; + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_id); + if (obj != omp_interop_none) + assert (strcmp (type, "omp_interop_t") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_device_num); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + if (obj != omp_interop_none) + return; + assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL); +} + + +void +check_host (omp_interop_t obj) +{ + assert (obj == omp_interop_none); + check_type (obj); +} + + +void +check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_cuda) + assert (strcmp (fr_name, "cuda") == 0); + else if (fr == omp_ifr_cuda_driver) + assert (strcmp (fr_name, "cuda_driver") == 0); + else if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 11); /* Nvidia */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "nvidia") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */ + ret_code = omp_irc_no_value; + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + + /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */ + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + + /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + + if (fr == omp_ifr_cuda) + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) /* no targetsync */ + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_cuda) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0); + } + else if (fr == omp_ifr_cuda_driver) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } +} + + +void +check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else if (fr == omp_ifr_hsa) + assert (strcmp (fr_name, "hsa") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 1); /* Amd */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "amd") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */ + ret_code = omp_irc_no_value; + if (fr == omp_ifr_hip) + { + /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */ + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + } + else + { + void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (agent != NULL); + } + + /* Device context: hipCtx_t / N/A -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + if (fr == omp_ifr_hip) + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_hip) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0); + } +} diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c new file mode 100644 index 0000000..e6941d3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c @@ -0,0 +1,8 @@ +/* { dg-do link { target { offload_target_nvptx } } } */ +/* { dg-additional-options -foffload=nvptx-none } */ +/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-3.h" + +/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h index c9c8f4a..f5695a2 100644 --- a/libgomp/testsuite/libgomp.c/declare-variant-3.h +++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h @@ -37,6 +37,13 @@ f53 (void) __attribute__ ((noipa)) int +f61 (void) +{ + return 61; +} + +__attribute__ ((noipa)) +int f70 (void) { return 70; @@ -68,6 +75,7 @@ f89 (void) #pragma omp declare variant (f37) match (device={isa("sm_37")}) #pragma omp declare variant (f52) match (device={isa("sm_52")}) #pragma omp declare variant (f53) match (device={isa("sm_53")}) +#pragma omp declare variant (f61) match (device={isa("sm_61")}) #pragma omp declare variant (f70) match (device={isa("sm_70")}) #pragma omp declare variant (f75) match (device={isa("sm_75")}) #pragma omp declare variant (f80) match (device={isa("sm_80")}) diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c new file mode 100644 index 0000000..d1df550 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c @@ -0,0 +1,8 @@ +/* { dg-do link { target { offload_target_amdgcn } } } */ +/* { dg-additional-options -foffload=amdgcn-amdhsa } */ +/* { dg-additional-options -foffload=-march=gfx942 } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h index 53788d2..2257f4c 100644 --- a/libgomp/testsuite/libgomp.c/declare-variant-4.h +++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h @@ -37,6 +37,13 @@ gfx90c (void) __attribute__ ((noipa)) int +gfx942 (void) +{ + return 0x942; +} + +__attribute__ ((noipa)) +int gfx1030 (void) { return 0x1030; @@ -68,6 +75,7 @@ gfx1103 (void) #pragma omp declare variant(gfx908) match(device = {isa("gfx908")}) #pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")}) #pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")}) +#pragma omp declare variant(gfx942) match(device = {isa("gfx942")}) #pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")}) #pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")}) #pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")}) diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c new file mode 100644 index 0000000..2df5277 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c @@ -0,0 +1,176 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-additional-options "-lcublas" } */ + +/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c + to test the fallback version. */ + +/* Check whether cuBlas' daxpy works with an interop object. + daxpy(N, DA, DX, INCX, DY, INCY) + calculates (for DX = DY = 1): + DY(1:N) = DY(1:N) + DA * DX(1:N) + and otherwise N array elements, taking every INCX-th or INCY-th one, repectively. + +Based on the interop example in OpenMP's example document */ + +/* Minimal check whether CUDA works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#include <assert.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + + +#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #include <cublas_v2.h> + +#else + /* Add a poor man's fallback declaration. */ + #if USE_CUDA_FALLBACK_HEADER + // Don't warn. + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #elif !__has_include(<cuda_runtime.h>) + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #else + #warning "Using GCC's cuda.h as fallback for cublas_v2.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef enum { + CUBLAS_STATUS_SUCCESS = 0, + } cublasStatus_t; + + typedef CUstream cudaStream_t; + typedef struct cublasContext* cublasHandle_t; + + #define cublasCreate cublasCreate_v2 + cublasStatus_t cublasCreate_v2 (cublasHandle_t *); + + #define cublasSetStream cublasSetStream_v2 + cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t); + + #define cublasDaxpy cublasDaxpy_v2 + cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int); +#endif + +static int used_variant = 0; + +void +run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj) +{ + used_variant = 1; + + omp_interop_rc_t res; + cublasStatus_t stat; + + omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success && fr == omp_ifr_cuda); + + cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + cublasHandle_t handle; + stat = cublasCreate (&handle); + assert (stat == CUBLAS_STATUS_SUCCESS); + + stat = cublasSetStream (handle, stream); + assert (stat == CUBLAS_STATUS_SUCCESS); + + /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */ + stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ; + assert (stat == CUBLAS_STATUS_SUCCESS); +} + + +#pragma omp declare variant(run_cuBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("cuda"))) + +void +run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy) +{ + used_variant = 2; + + if (incx == 1 && incy == 1) + #pragma omp simd + for (int i = 0; i < n; i++) + dy[i] += da * dx[i]; + else + { + int ix = 0; + int iy = 0; + for (int i = 0; i < n; i++) + { + dy[iy] += da * dx[ix]; + ix += incx; + iy += incy; + } + } +} + + +void +run_test (int dev) +{ + constexpr int N = 1024; + + // A = {1,2,...,N} + // B = {-1, -2, ..., N} + // B' = daxpy (N, 3, A, incx=1, B, incy=1) + // = B + 3*A + // -> B' = {0, 2, 4, 6, ... } + + double A[N], B[N]; + double factor = 3.0; + for (int i = 0; i < N; i++) + { + A[i] = i; + B[i] = -i; + } + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target enter data device(dev) map(A, B) + } + + used_variant = 99; + #pragma omp dispatch device(dev) + run_daxpy (N, factor, A, 1, B, 1); + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target exit data device(dev) map(release: A) map(from: B) + + int tmp = omp_get_default_device (); + omp_set_default_device (dev); + if (on_device_arch_nvptx ()) + assert (used_variant == 1); + else + assert (used_variant == 2); + omp_set_default_device (tmp); + } + else + assert (used_variant == 2); + + for (int i = 0; i < N; i++) + assert (B[i] == 2*i); +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + run_test (dev); + run_test (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c new file mode 100644 index 0000000..89c0652 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target openacc_libcublas } */ +/* { dg-additional-options "-lcublas" } */ + +/* Same as interop-cudablas-full.c, but also works if the header is not available. */ + +#define USE_CUDA_FALLBACK_HEADER 1 +#include "interop-cublas-full.c" diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c new file mode 100644 index 0000000..c48a934 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c @@ -0,0 +1,162 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c + to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER. */ + +/* Minimal check whether CUDA works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#include <assert.h> +#include <omp.h> + +#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + +#else + /* Add a poor man's fallback declaration. */ + #if USE_CUDA_FALLBACK_HEADER + // Don't warn. + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #else + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef int cudaError_t; + typedef CUstream cudaStream_t; + enum { + cudaSuccess = 0 + }; + + enum cudaDeviceAttr { + cudaDevAttrClockRate = 13, + cudaDevAttrMaxGridDimX = 5 + }; + + cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); + cudaError_t cudaStreamQuery(cudaStream_t); + CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); + CUresult cuStreamGetCtx (CUstream, CUcontext *); +#endif + +int +main () +{ + int ivar; + unsigned uvar; + omp_interop_rc_t res; + omp_interop_t obj_cuda = omp_interop_none; + omp_interop_t obj_cuda_driver = omp_interop_none; + cudaError_t cuda_err; + CUresult cu_err; + + #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \ + init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \ + + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_cuda); + + fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_cuda_driver); + + ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + assert (ivar == 11); + + ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + assert (ivar == 11); + + + /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device. */ + + CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res); + assert (res == omp_irc_success); + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev); + assert (cu_err == CUDA_SUCCESS); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev); + assert (cu_err == CUDA_SUCCESS); + assert (ivar > 1024); + + int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res); + assert (res == omp_irc_success); + assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ... + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev); + assert (cuda_err == cudaSuccess); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev); + assert (cuda_err == cudaSuccess); + assert (ivar > 1024); + + + + + /* Check whether the omp_ipr_device_context -> CUcontext yields a context. */ + + CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res); + assert (res == omp_irc_success); + + /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD. */ + uvar = 99; + cu_err = cuCtxGetApiVersion (cu_ctx, &uvar); + assert (cu_err == CUDA_SUCCESS); + assert (uvar > 0); + + + /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream. */ + + cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams + + int dev_stream = 99; +#if CUDA_VERSION >= 12080 + cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream); + assert (cuda_err == cudaSuccess); +#else + cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS; + if (cu_err == CUDA_SUCCESS) + cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS; + assert (cu_err == CUDA_SUCCESS); +#endif + assert (dev_stream == cuda_dev); + + /* All jobs should have been completed (as there were none none) */ + cuda_err = cudaStreamQuery (cuda_sm); + assert (cuda_err == cudaSuccess); + + cu_err = cuStreamQuery (cu_sm); + assert (cu_err == CUDA_SUCCESS); + + #pragma omp interop destroy(obj_cuda, obj_cuda_driver) +} diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c new file mode 100644 index 0000000..bc257a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_libcudart } */ +/* { dg-require-effective-target openacc_libcuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +/* Same as interop-cuda-full.c, but also works if the header is not available. */ + +#define USE_CUDA_FALLBACK_HEADER 1 +#include "interop-cuda-full.c" diff --git a/libgomp/testsuite/libgomp.c/interop-fr-1.c b/libgomp/testsuite/libgomp.c/interop-fr-1.c new file mode 100644 index 0000000..9310c95 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-fr-1.c @@ -0,0 +1,577 @@ +/* { dg-do run } */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + +#define DEFAULT_DEVICE -99 + +/* The following assumes that when a nvptx device is available, + cuda/cuda_driver/hip are supported. + And that likewise when a gcn device is available that the + plugin also can not only the HSA but also the HIP library + such that hsa/hip are supported. + For the host, omp_interop_none is expected. + + Otherwise, it only does some basic tests without checking + that the returned result really makes sense. */ + +void check_host (int); +void check_nvptx (int); +void check_gcn (int); + +void check_type (omp_interop_t obj) +{ + const char *type; + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_id); + if (obj != omp_interop_none) + assert (strcmp (type, "omp_interop_t") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_device_num); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + if (obj != omp_interop_none) + return; + assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL); +} + +void +do_check (int dev) +{ + int num_dev = omp_get_num_devices (); + const char *dev_type; + if (dev != DEFAULT_DEVICE) + omp_set_default_device (dev); + int is_nvptx = on_device_arch_nvptx (); + int is_gcn = on_device_arch_gcn (); + int is_host; + + if (dev != DEFAULT_DEVICE) + is_host = dev == -1 || dev == num_dev; + else + { + int def_dev = omp_get_default_device (); + is_host = def_dev == -1 || def_dev == num_dev; + } + + assert (is_nvptx + is_gcn + is_host == 1); + + if (num_dev > 0 && dev != DEFAULT_DEVICE) + { + if (is_host) + omp_set_default_device (0); + else + omp_set_default_device (-1); + } + + if (is_host) + dev_type = "host"; + else if (is_nvptx) + dev_type = "nvptx"; + else if (is_gcn) + dev_type = "gcn"; + + printf ("Running on the %s device (%d)\n", dev_type, dev); + if (is_host) + check_host (dev); + else if (is_nvptx) + check_nvptx (dev); + else if (is_gcn) + check_gcn (dev); +} + + +void +check_host (int dev) +{ + omp_interop_t obj = (omp_interop_t) -1L; + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target : obj) + } else { + #pragma omp interop init(target : obj) device(dev) + } + assert (obj == omp_interop_none); + check_type (obj); + + obj = (omp_interop_t) -1L; + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj) + } else { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj) device(dev) + } + assert (obj == omp_interop_none); + check_type (obj); + + obj = (omp_interop_t) -1L; + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync : obj) + } else { + #pragma omp interop init(targetsync : obj) device(dev) + } + assert (obj == omp_interop_none); + check_type (obj); + + obj = (omp_interop_t) -1L; + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj) device(dev) + } + assert (obj == omp_interop_none); + check_type (obj); +} + + +void +check_nvptx (int dev) +{ + for (int variant = 0; variant <= 7; variant++) + { + omp_interop_t obj = (omp_interop_t) -1L; + switch (variant) + { + /* Expect 'cuda'. */ + case 0: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target : obj) + } else { + #pragma omp interop init(target : obj) device(dev) + } + break; + } + case 1: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync : obj) + } else { + #pragma omp interop init(targetsync : obj) device(dev) + } + break; + } + case 2: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj) + } else { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj) device(dev) + } + break; + } + case 3: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj) device(dev) + } + break; + } + + /* Expect 'cuda_driver'. */ + case 4: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) + } else { + #pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev) + } + break; + } + case 5: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev) + } + break; + } + + /* Expect 'hip'. */ + case 6: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) + } else { + #pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev) + } + break; + } + case 7: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev) + } + break; + } + default: + abort (); + } + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + if (variant >= 0 && variant <= 3) + assert (fr == omp_ifr_cuda); + else if (variant <= 5) + assert (fr == omp_ifr_cuda_driver); + else if (variant <= 7) + assert (fr == omp_ifr_hip); + else + assert (0); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_cuda) + assert (strcmp (fr_name, "cuda") == 0); + else if (fr == omp_ifr_cuda_driver) + assert (strcmp (fr_name, "cuda_driver") == 0); + else if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 11); /* Nvidia */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "nvidia") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */ + ret_code = omp_irc_no_value; + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + + /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */ + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + + /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + + if (fr == omp_ifr_cuda) + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (variant % 2 == 0) /* no targetsync */ + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + + check_type (obj); + if (fr == omp_ifr_cuda) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0); + } + else if (fr == omp_ifr_cuda_driver) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } + + if (dev == DEFAULT_DEVICE) { + #pragma omp interop use(obj) + #pragma omp interop destroy(obj) + } else { + #pragma omp interop use(obj) device(dev) + #pragma omp interop destroy(obj) device(dev) + } + } +} + + +void +check_gcn (int dev) +{ + for (int variant = 0; variant <= 5; variant++) + { + omp_interop_t obj = (omp_interop_t) -1L; + switch (variant) + { + /* Expect 'hip'. */ + case 0: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target : obj) + } else { + #pragma omp interop init(target : obj) device(dev) + } + break; + } + case 1: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync : obj) + } else { + #pragma omp interop init(targetsync : obj) device(dev) + } + break; + } + case 2: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj) + } else { + #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj) device(dev) + } + break; + } + case 3: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj) device(dev) + } + break; + } + + /* Expect 'hsa'. */ + case 4: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) + } else { + #pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev) + } + break; + } + case 5: + { + if (dev == DEFAULT_DEVICE) { + #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) + } else { + #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev) + } + break; + } + default: + abort (); + } + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + if (variant >= 0 && variant <= 3) + assert (fr == omp_ifr_hip); + else if (variant <= 5) + assert (fr == omp_ifr_hsa); + else + assert (0); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else if (fr == omp_ifr_hsa) + assert (strcmp (fr_name, "hsa") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 1); /* Amd */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "amd") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */ + ret_code = omp_irc_no_value; + if (fr == omp_ifr_hip) + { + /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */ + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + } + else + { + void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (agent != NULL); + } + + /* Device context: hipCtx_t / N/A -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + if (fr == omp_ifr_hip) + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (variant % 2 == 0) /* no targetsync */ + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + + check_type (obj); + if (fr == omp_ifr_hip) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0); + } + + if (dev == DEFAULT_DEVICE) { + #pragma omp interop use(obj) + #pragma omp interop destroy(obj) + } else { + #pragma omp interop use(obj) device(dev) + #pragma omp interop destroy(obj) device(dev) + } + } +} + + +int +main () +{ + do_check (DEFAULT_DEVICE); + int ndev = omp_get_num_devices (); + for (int dev = -1; dev < ndev; dev++) + do_check (dev); + for (int dev = -1; dev < ndev; dev++) + { + omp_set_default_device (dev); + do_check (DEFAULT_DEVICE); + } +} diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c new file mode 100644 index 0000000..bd44f44 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c @@ -0,0 +1,10 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + +/* { dg-require-effective-target gomp_hip_header_amd } */ +/* { dg-require-effective-target gomp_libamdhip64 } */ +/* { dg-additional-options "-lamdhip64" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c new file mode 100644 index 0000000..91ad987 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + +/* { dg-require-effective-target gomp_libamdhip64 } */ +/* { dg-additional-options "-lamdhip64" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c new file mode 100644 index 0000000..d5dc236 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-require-effective-target gomp_hip_header_nvidia } */ +/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c new file mode 100644 index 0000000..7cff2cb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c @@ -0,0 +1,13 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_libcudart } */ +/* { dg-require-effective-target openacc_libcuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 +#define USE_CUDA_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c new file mode 100644 index 0000000..7b7dc74 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + +/* { dg-require-effective-target openacc_cudart } */ +/* { dg-require-effective-target openacc_cuda } */ +/* { dg-additional-options "-lcuda -lcudart" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h new file mode 100644 index 0000000..20a1ccb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hip.h @@ -0,0 +1,234 @@ +/* Minimal check whether HIP works - by checking whether the API routines + seem to work. This includes various fallbacks if the header is not + available. */ + +#include <assert.h> +#include <omp.h> + +#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER) + #include <hip/hip_runtime_api.h> + +#elif defined(__HIP_PLATFORM_AMD__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__" + #endif + + typedef struct ihipStream_t* hipStream_t; + typedef struct ihipCtx_t* hipCtx_t; + typedef int hipError_t; + typedef int hipDevice_t; + enum { + hipSuccess = 0, + hipErrorNotSupported = 801 + }; + + typedef enum hipDeviceAttribute_t { + hipDeviceAttributeClockRate = 5, + hipDeviceAttributeMaxGridDimX = 29 + } hipDeviceAttribute_t; + + hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t); + hipError_t hipCtxGetApiVersion (hipCtx_t, int *); + hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *); + hipError_t hipStreamQuery (hipStream_t); + +#elif defined(__HIP_PLATFORM_NVIDIA__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__" + #endif + + #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #else + #if defined(USE_CUDA_FALLBACK_HEADER) + // no warning + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #else + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #endif + + #include "../../../include/cuda/cuda.h" + + typedef int cudaError_t; + enum { + cudaSuccess = 0 + }; + + enum cudaDeviceAttr { + cudaDevAttrClockRate = 13, + cudaDevAttrMaxGridDimX = 5 + }; + + cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); + CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); + CUresult cuStreamGetCtx (CUstream, CUcontext *); + #endif + + typedef CUstream hipStream_t; + typedef CUcontext hipCtx_t; + typedef CUdevice hipDevice_t; + + typedef int hipError_t; + typedef int hipDevice_t; + enum { + hipSuccess = 0, + hipErrorNotSupported = 801 + }; + + + typedef enum hipDeviceAttribute_t { + hipDeviceAttributeClockRate = 5, + hipDeviceAttributeMaxGridDimX = 29 + } hipDeviceAttribute_t; + + inline static hipError_t + hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev) + { + enum cudaDeviceAttr cuattr; + switch (attr) + { + case hipDeviceAttributeClockRate: + cuattr = cudaDevAttrClockRate; + break; + case hipDeviceAttributeMaxGridDimX: + cuattr = cudaDevAttrMaxGridDimX; + break; + default: + assert (0); + } + return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess; + } + + inline static hipError_t + hipCtxGetApiVersion (hipCtx_t ctx, int *ver) + { + unsigned uver; + hipError_t err; + err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS; + *ver = (int) uver; + return err; + } + + inline static hipError_t + hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev) + { +#if CUDA_VERSION >= 12080 + return cudaStreamGetDevice (stream, dev); +#else + hipError_t err; + CUcontext ctx; + err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxGetDevice (dev) != CUDA_SUCCESS; + if (err == hipSuccess) + err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS; + return err; +#endif + } + + inline static hipError_t + hipStreamQuery (hipStream_t stream) + { + return cuStreamQuery (stream) != CUDA_SUCCESS; + } + +#else + #error "should be unreachable" +#endif + +int +main () +{ + int ivar; + omp_interop_rc_t res; + omp_interop_t obj = omp_interop_none; + hipError_t hip_err; + + #pragma omp interop init(target, targetsync, prefer_type("hip") : obj) + + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success); + assert (fr == omp_ifr_hip); + + ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res); + assert (res == omp_irc_success); + int vendor_is_amd = ivar == 1; + #if defined(__HIP_PLATFORM_AMD__) + assert (ivar == 1); + #elif defined(__HIP_PLATFORM_NVIDIA__) + assert (ivar == 11); + #else + assert (0); + #endif + + + /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device. */ + + hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res); + assert (res == omp_irc_success); + + /* Assume a clock size is available and > 1 GHz; value is in kHz. */ + hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev); + assert (hip_err == hipSuccess); + assert (ivar > 1000000 /* kHz */); + + /* Assume that the MaxGridDimX is available and > 1024. */ + hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev); + assert (hip_err == hipSuccess); + assert (ivar > 1024); + + + /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context. */ + + hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res); + assert (res == omp_irc_success); + + /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ + ivar = -99; + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + hip_err = hipCtxGetApiVersion (hip_ctx, &ivar); + #pragma GCC diagnostic pop + + if (vendor_is_amd) + assert (hip_err == hipErrorNotSupported && ivar == -99); + else + { + assert (hip_err == hipSuccess); + assert (ivar > 0); + } + + + /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. */ + + hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + hipDevice_t dev_stream = 99; + hip_err = hipStreamGetDevice (hip_sm, &dev_stream); + assert (hip_err == hipSuccess); + assert (dev_stream == hip_dev); + + /* All jobs should have been completed (as there were none none) */ + hip_err = hipStreamQuery (hip_sm); + assert (hip_err == hipSuccess); + + #pragma omp interop destroy(obj) +} diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c new file mode 100644 index 0000000..53c05bd --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target gomp_hip_header_amd } */ +/* { dg-require-effective-target gomp_libhipblas } */ +/* { dg-additional-options "-lhipblas" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c new file mode 100644 index 0000000..0ea3133 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target gomp_libhipblas } */ +/* { dg-additional-options "-lhipblas" } */ + +#define __HIP_PLATFORM_AMD__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c new file mode 100644 index 0000000..ed428c6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c @@ -0,0 +1,7 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target gomp_hip_header_nvidia } */ +/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c new file mode 100644 index 0000000..1a31b30 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c @@ -0,0 +1,9 @@ +/* { dg-require-effective-target openacc_libcublas } */ +/* { dg-additional-options "-lcublas" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 +#define USE_CUDA_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c new file mode 100644 index 0000000..f85c13b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-additional-options "-lcublas" } */ + +#define __HIP_PLATFORM_NVIDIA__ 1 + +#define USE_HIP_FALLBACK_HEADER 1 + +#include "interop-hipblas.h" diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h new file mode 100644 index 0000000..d7cb174 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h @@ -0,0 +1,240 @@ +/* Check whether hipBlas' daxpy works with an interop object. + daxpy(N, DA, DX, INCX, DY, INCY) + calculates (for DX = DY = 1): + DY(1:N) = DY(1:N) + DA * DX(1:N) + and otherwise N array elements, taking every INCX-th or INCY-th one, repectively. + +Based on the interop example in OpenMP's example document */ + +/* Minimal check whether HIP works - by checking whether the API routines + seem to work. This includes a fallback if the header is not + available. */ + +#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + +#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) + #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" +#endif + + +#include <assert.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + + +#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER) + #ifdef __HIP_PLATFORM_NVIDIA__ + /* There seems to be an issue with hip/library_types.h including + CUDA's "library_types.h". Include CUDA's one explicitly here. + Could possibly worked around by using -isystem vs. -I. */ + #include <library_types.h> + + /* For some reasons, the following symbols do not seem to get + mapped from HIP to CUDA, causing link errors. */ + #define hipblasSetStream cublasSetStream_v2 + #define hipblasDaxpy cublasDaxpy_v2 + #define hipblasCreate cublasCreate_v2 + #endif + #include <hipblas/hipblas.h> + +#elif defined(__HIP_PLATFORM_AMD__) + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__" + #endif + + typedef enum + { + HIPBLAS_STATUS_SUCCESS = 0 + + } hipblasStatus_t; + + typedef struct ihipStream_t* hipStream_t; + typedef void* hipblasHandle_t; + + hipblasStatus_t hipblasCreate (hipblasHandle_t*); + hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t); + hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int); + +#else + /* Add a poor man's fallback declaration. */ + #if !defined(USE_HIP_FALLBACK_HEADER) + #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__" + #endif + + #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER) + #include <cuda.h> + #include <cudaTypedefs.h> + #include <cuda_runtime.h> + #include <cublas_v2.h> + + #else + /* Add a poor man's fallback declaration. */ + #if defined(USE_CUDA_FALLBACK_HEADER) + // no warning + #elif !__has_include(<cuda.h>) + #warning "Using GCC's cuda.h as fallback for cuda.h" + #elif !__has_include(<cudaTypedefs.h>) + #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" + #elif !__has_include(<cuda_runtime.h>) + #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" + #else + #warning "Using GCC's cuda.h as fallback for cublas_v2.h" + #endif + #include "../../../include/cuda/cuda.h" + + typedef enum { + CUBLAS_STATUS_SUCCESS = 0, + } cublasStatus_t; + + typedef CUstream cudaStream_t; + typedef struct cublasContext* cublasHandle_t; + + #define cublasCreate cublasCreate_v2 + cublasStatus_t cublasCreate_v2 (cublasHandle_t *); + + #define cublasSetStream cublasSetStream_v2 + cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t); + + #define cublasDaxpy cublasDaxpy_v2 + cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int); + #endif + + #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS + #define hipblasStatus_t cublasStatus_t + #define hipStream_t cudaStream_t + #define hipblasHandle_t cublasHandle_t + #define hipblasCreate cublasCreate + #define hipblasSetStream cublasSetStream + #define hipblasDaxpy cublasDaxpy +#endif + +static int used_variant = 0; + +void +run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj) +{ + used_variant = 1; + + omp_interop_rc_t res; + hipblasStatus_t stat; + + omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res); + assert (res == omp_irc_success && fr == omp_ifr_hip); + + hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); + assert (res == omp_irc_success); + + hipblasHandle_t handle; + stat = hipblasCreate (&handle); + assert (stat == HIPBLAS_STATUS_SUCCESS); + + stat = hipblasSetStream (handle, stream); + assert (stat == HIPBLAS_STATUS_SUCCESS); + + /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */ + stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ; + assert (stat == HIPBLAS_STATUS_SUCCESS); +} + +#if defined(__HIP_PLATFORM_AMD__) +#pragma omp declare variant(run_hipBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("hip"))) +#elif defined(__HIP_PLATFORM_NVIDIA__) +#pragma omp declare variant(run_hipBlasdaxpy) \ + match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \ + adjust_args(need_device_ptr : dx, dy) \ + append_args(interop(targetsync, prefer_type("hip"))) +#else + #error "wrong platform" +#endif + +void +run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy) +{ + used_variant = 2; + + if (incx == 1 && incy == 1) + #pragma omp simd + for (int i = 0; i < n; i++) + dy[i] += da * dx[i]; + else + { + int ix = 0; + int iy = 0; + for (int i = 0; i < n; i++) + { + dy[iy] += da * dx[ix]; + ix += incx; + iy += incy; + } + } +} + + +void +run_test (int dev) +{ + constexpr int N = 1024; + + // A = {1,2,...,N} + // B = {-1, -2, ..., N} + // B' = daxpy (N, 3, A, incx=1, B, incy=1) + // = B + 3*A + // -> B' = {0, 2, 4, 6, ... } + + double A[N], B[N]; + double factor = 3.0; + for (int i = 0; i < N; i++) + { + A[i] = i; + B[i] = -i; + } + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target enter data device(dev) map(A, B) + } + + used_variant = 99; + #pragma omp dispatch device(dev) + run_daxpy (N, factor, A, 1, B, 1); + + if (dev != omp_initial_device && dev != omp_get_num_devices ()) + { + #pragma omp target exit data device(dev) map(release: A) map(from: B) + + int tmp = omp_get_default_device (); + omp_set_default_device (dev); +#if defined(__HIP_PLATFORM_AMD__) + if (on_device_arch_gcn ()) +#else + if (on_device_arch_nvptx ()) +#endif + assert (used_variant == 1); + else + assert (used_variant == 2); + omp_set_default_device (tmp); + } + else + assert (used_variant == 2); + + for (int i = 0; i < N; i++) + assert (B[i] == 2*i); +} + +int +main () +{ + int ndev = omp_get_num_devices (); + + for (int dev = 0; dev <= ndev; dev++) + run_test (dev); + run_test (omp_initial_device); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c new file mode 100644 index 0000000..21ac91c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hsa.c @@ -0,0 +1,205 @@ +/* { dg-additional-options "-ldl" } */ +/* { dg-require-effective-target offload_device_gcn } + The 'asm' insert is valid for GCN only: + { dg-additional-options -foffload=amdgcn-amdhsa } */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include <assert.h> +#include <dlfcn.h> +#include "../../../include/hsa.h" +#include "../../config/gcn/libgomp-gcn.h" + +#define STACKSIZE (100 * 1024) +#define HEAPSIZE (10 * 1024 * 1024) +#define ARENASIZE HEAPSIZE + +/* This code fragment must be optimized or else the host-fallback kernel has + * invalid ASM inserts. The rest of the file can be compiled safely at -O0. */ +#pragma omp declare target +uintptr_t __attribute__((optimize("O1"))) +get_kernel_ptr () +{ + uintptr_t val; + if (!omp_is_initial_device ()) + /* "main._omp_fn.0" is the name GCC gives the first OpenMP target + * region in the "main" function. + * The ".kd" suffix is added by the LLVM assembler when it creates the + * kernel meta-data, and this is what we need to launch a kernel. */ + asm ("s_getpc_b64 %0\n\t" + "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t" + "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4" + : "=Sg"(val)); + return val; +} +#pragma omp end declare target + +int +main(int argc, char** argv) +{ + + /* Load the HSA runtime DLL. */ + void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY); + assert (hsalib); + + hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal) + = dlsym (hsalib, "hsa_signal_create"); + assert (hsa_signal_create); + + uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue) + = dlsym (hsalib, "hsa_queue_load_write_index_relaxed"); + assert (hsa_queue_load_write_index_relaxed); + + void (*hsa_signal_store_relaxed) (hsa_signal_t signal, + hsa_signal_value_t value) + = dlsym (hsalib, "hsa_signal_store_relaxed"); + assert (hsa_signal_store_relaxed); + + hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint) + = dlsym (hsalib, "hsa_signal_wait_relaxed"); + assert (hsa_signal_wait_relaxed); + + void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue, + uint64_t value) + = dlsym (hsalib, "hsa_queue_store_write_index_relaxed"); + assert (hsa_queue_store_write_index_relaxed); + + hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal) + = dlsym (hsalib, "hsa_signal_destroy"); + assert (hsa_signal_destroy); + + /* Set up the device data environment. */ + int test_data_value = 0; +#pragma omp target enter data map(test_data_value) + + /* Get the interop details. */ + int device_num = omp_get_default_device(); + hsa_agent_t *gpu_agent; + hsa_queue_t *hsa_queue = NULL; + + omp_interop_t interop = omp_interop_none; +#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num) + assert (interop != omp_interop_none); + + omp_interop_rc_t retcode; + omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode); + assert (retcode == omp_irc_success); + assert (fr == omp_ifr_hsa); + + gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode); + assert (retcode == omp_irc_success); + + hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode); + assert (retcode == omp_irc_success); + assert (hsa_queue); + + /* Call an offload kernel via OpenMP/libgomp. + * + * This kernel serves two purposes: + * 1) Lookup the device-side load-address of itself (thus avoiding the + * need to access the libgomp internals). + * 2) Count how many times it is called. + * We then call it once using OpenMP, and once manually, and check + * the counter reads "2". */ + uint64_t kernel_object = 0; +#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value) + { + kernel_object = get_kernel_ptr (); + ++test_data_value; + } + + assert (kernel_object != 0); + + /* Configure the same kernel to run again, using HSA manually this time. */ + hsa_status_t status; + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + assert (status == HSA_STATUS_SUCCESS); + + /* The kernel is built by GCC for OpenMP, so we need to pass the same + * data pointers that libgomp would pass in. */ + struct { + uintptr_t test_data_value; + uintptr_t kernel_object; + } tgtaddrs; + +#pragma omp target data use_device_addr(test_data_value) + { + tgtaddrs.test_data_value = (uintptr_t)&test_data_value; + tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num); + } + + /* We also need to duplicate the launch ABI used by plugin-gcn.c. */ + struct kernargs_abi args; /* From libgomp-gcn.h. */ + args.dummy1 = (int64_t)&tgtaddrs; + args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side. */ + args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num); + args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num); + args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num); + args.arena_size_per_team = ARENASIZE; + args.stack_size_per_thread = STACKSIZE; + + /* Build the HSA dispatch packet, and insert it into the queue. */ + uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue); + const uint32_t queueMask = hsa_queue->size - 1; + hsa_kernel_dispatch_packet_t *dispatch_packet = + &(((hsa_kernel_dispatch_packet_t *) + (hsa_queue->base_address))[packet_id & queueMask]); + + dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 1; + dispatch_packet->workgroup_size_y = 64; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 1; + dispatch_packet->grid_size_y = 64; + dispatch_packet->grid_size_z = 1; + dispatch_packet->completion_signal = signal; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = &args; + dispatch_packet->private_segment_size = 0; + dispatch_packet->group_segment_size = 1536; + + uint16_t header = 0; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + /* Finish writing the packet header with an atomic release. */ + __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE); + + hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1); + + ;/* Run the kernel and wait for it to complete. */ + hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id); + while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) + ; + + /* Clean up HSA. */ + hsa_signal_destroy(signal); + free ((void*)args.out_ptr); + omp_target_free ((void*)args.heap_ptr, device_num); + omp_target_free ((void*)args.arena_ptr, device_num); + omp_target_free ((void*)args.stack_ptr, device_num); + omp_target_free ((void*)tgtaddrs.kernel_object, device_num); + + /* Clean up OpenMP. */ + #pragma omp interop destroy(interop) + + /* Bring the data back from the device. */ +#pragma omp target exit data map(test_data_value) + + /* Ensure the kernel was called twice. Once by OpenMP, once by HSA. */ + assert (test_data_value == 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c new file mode 100644 index 0000000..3220828 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c @@ -0,0 +1,74 @@ +int +main () +{ + int i, n; + int data[] = {1,2}; + struct S { int **ptrset; }; + +// ----------------------------------- + +/* The produced mapping for sptr1->ptrset[i][:n] + + GOMP_MAP_STRUCT (size = 1) + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_ATTACH + GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION + +which get split into 3 separate map_vars call; in particular, +the latter is separate and points to an unmpapped variable. + +Thus, it failed with: + libgomp: pointer target not mapped for attach */ + + struct S s1, *sptr1; + s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset[0] = data; + s1.ptrset[1] = data; + s1.ptrset[2] = data; + sptr1 = &s1; + + i = 1; + n = 0; + #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3]) + #pragma omp target enter data map(sptr1->ptrset[i][:n]) + + #pragma omp target exit data map(sptr1->ptrset[i][:n]) + #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3]) + + __builtin_free (s1.ptrset); + +// ----------------------------------- + +/* The produced mapping for sptr2->ptrset[i][:n] is similar: + + GOMP_MAP_STRUCT (size = 1) + GOMP_MAP_ZERO_LEN_ARRAY_SECTION + GOMP_MAP_TO ! this one has now a finite size + GOMP_MAP_ATTACH + GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO + +As the latter GOMP_MAP_ATTACH has now a pointer target, +the attachment worked. */ + + struct S s2, *sptr2; + s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s2.ptrset[0] = data; + s2.ptrset[1] = data; + s2.ptrset[2] = data; + sptr2 = &s2; + + i = 1; + n = 2; + #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3]) + #pragma omp target enter data map(sptr2->ptrset[i][:n]) + + #pragma omp target + if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2) + __builtin_abort (); + + #pragma omp target exit data map(sptr2->ptrset[i][:n]) + #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3]) + + __builtin_free (s2.ptrset); +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c new file mode 100644 index 0000000..580c6ad --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c @@ -0,0 +1,50 @@ +int +main () +{ + int i, n; + int data[] = {1,2}; + struct S { + int **ptrset; + int **ptrset2; + }; + + /* This is the same as target-map-zero-sized-3.c, but by mixing + mapped and non-mapped items, the mapping before the ATTACH + might (or here: is) not actually associated with the the + pointer used for attaching. Thus, if one does a simple + + if (openmp_p + && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) + && mapnum == 1) + check in target.c's gomp_map_vars_internal will fail + as mapnum > 1 but still the map associated with this + ATTACH is in a different set. */ + + struct S s1, *sptr1; + s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3); + s1.ptrset[0] = data; + s1.ptrset[1] = data; + s1.ptrset[2] = data; + s1.ptrset2[0] = data; + s1.ptrset2[1] = data; + s1.ptrset2[2] = data; + sptr1 = &s1; + + i = 1; + n = 0; + #pragma omp target enter data map(data) + #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3]) + #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + + #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2) + __builtin_abort (); + + #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n]) + #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3]) + #pragma omp target exit data map(data) + + __builtin_free (s1.ptrset); + __builtin_free (s1.ptrset2); +} diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c new file mode 100644 index 0000000..7c4ab80 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O0" } */ + +/* Issue showed up in the real world when large data was distributed + over multiple MPI progresses - such that for one process n == 0 + happend at run time. + + Before map(var[:0]) and map(var[:n]) with n > 0 was handled, + this patch now also handles map(var[:n]) with n == 0. + + Failed before with "libgomp: pointer target not mapped for attach". */ + +/* Here, the base address is shifted - which should have no effect, + but must work as well. */ +void +with_offset () +{ + struct S { + int *ptr1, *ptr2; + }; + struct S s1, s2; + int *a, *b, *c, *d; + s1.ptr1 = (int *) 0L; + s1.ptr2 = (int *) 0xdeedbeef; + s2.ptr1 = (int *) 0L; + s2.ptr2 = (int *) 0xdeedbeef; + a = (int *) 0L; + b = (int *) 0xdeedbeef; + c = (int *) 0L; + d = (int *) 0xdeedbeef; + + int n1, n2, n3, n4; + n1 = n2 = n3 = n4 = 0; + + #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) + + #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4]) + { + if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef + || c != (void *) 0L || d != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4]) +} + +int +main () +{ + struct S { + int *ptr1, *ptr2; + }; + struct S s1, s2; + int *a, *b, *c, *d; + s1.ptr1 = (int *) 0L; + s1.ptr2 = (int *) 0xdeedbeef; + s2.ptr1 = (int *) 0L; + s2.ptr2 = (int *) 0xdeedbeef; + a = (int *) 0L; + b = (int *) 0xdeedbeef; + c = (int *) 0L; + d = (int *) 0xdeedbeef; + + int n1, n2, n3, n4; + n1 = n2 = n3 = n4 = 0; + + #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + + #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4]) + { + if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef + || c != (void *) 0L || d != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target + { + if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef + || a != (void *) 0L || b != (void *) 0xdeedbeef) + __builtin_abort (); + } + + #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4]) + + with_offset (); +} diff --git a/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 new file mode 100644 index 0000000..d5e982b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 @@ -0,0 +1,75 @@ +! +! Check that mapping with map(var%tiles(1)) works. +! +! This uses deep mapping to handle the allocatable +! derived-type components +! +! The tricky part is that GCC generates intermittently +! an SSA_NAME that needs to be resolved. +! +module m +type t + integer, allocatable :: den1(:,:), den2(:,:) +end type t + +type t2 + type(t), allocatable :: tiles(:) +end type t2 +end + +use m +use iso_c_binding +implicit none (type, external) +type(t2), target :: var +logical :: is_self_map +type(C_ptr) :: pden1, pden2, ptiles, ptiles1 + +allocate(var%tiles(1)) +var%tiles(1)%den1 = reshape([1,2,3,4],[2,2]) +var%tiles(1)%den2 = reshape([11,22,33,44],[2,2]) + +ptiles = c_loc(var%tiles) +ptiles1 = c_loc(var%tiles(1)) +pden1 = c_loc(var%tiles(1)%den1) +pden2 = c_loc(var%tiles(1)%den2) + + +is_self_map = .false. +!$omp target map(to: is_self_map) + is_self_map = .true. +!$omp end target + +!$omp target enter data map(var%tiles(1)) + +!$omp target firstprivate(ptiles, ptiles1, pden1, pden2) + if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1 + if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 2 + var%tiles(1)%den1 = var%tiles(1)%den1 + 5 + var%tiles(1)%den2 = var%tiles(1)%den2 + 7 + + if (is_self_map) then + if (.not. c_associated (ptiles, c_loc(var%tiles))) stop 3 + if (.not. c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4 + if (.not. c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5 + if (.not. c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6 + else + if (c_associated (ptiles, c_loc(var%tiles))) stop 3 + if (c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4 + if (c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5 + if (c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6 + endif +!$omp end target + +if (is_self_map) then + if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7 + if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8 +else + if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 7 + if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 8 +endif + +!$omp target exit data map(var%tiles(1)) + +if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7 +if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8 +end diff --git a/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 new file mode 100644 index 0000000..383ecba --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target map(to: x) + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target map(to: x) + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target map(to: y, z) + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target map(to: y) + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 new file mode 100644 index 0000000..5f6c8c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 @@ -0,0 +1,45 @@ +! { dg-additional-options "-fopenmp-allocators" } +! { dg-additional-options "-fdump-tree-omplower" } +program main + use iso_c_binding + use omp_lib + implicit none (type, external) + integer(omp_allocator_handle_kind):: alloc_h + integer :: i, N + integer(c_intptr_t) :: intptr + integer, allocatable :: A(:) + type(omp_alloctrait):: traits(1) = [omp_alloctrait(omp_atk_alignment, 128)] + + N = 10 + alloc_h = omp_init_allocator(omp_default_mem_space, 1, traits) + + !$omp allocate(A) allocator(alloc_h) + allocate(A(N)) + a(:) = [(i, i=1,N)] + if (mod (transfer (loc(a), intptr),128) /= 0) & + stop 1 + if (any (a /= [(i, i=1,N)])) & + stop 2 + deallocate(A) + !$omp allocate(A) allocator(alloc_h) align(512) + allocate(A(N)) + block + integer, allocatable :: B(:) + !$omp allocators allocate(allocator(alloc_h), align(256) : B) + allocate(B(N)) + B(:) = [(2*i, i=1,N)] + A(:) = B + if (mod (transfer (loc(B), intptr), 256) /= 0) & + stop 1 + ! end of scope deallocation + end block + if (mod (transfer (loc(a), intptr),512) /= 0) & + stop 1 + if (any (a /= [(2*i, i=1,N)])) & + stop 2 + deallocate(A) ! Must deallocate here - before deallocator is destroyed + call omp_destroy_allocator(alloc_h) + ! No auto dealloc of A because it is SAVE +end +! { dg-final { scan-tree-dump-times "__builtin_GOMP_alloc \\(" 3 "omplower" } } +! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(" 3 "omplower" } } diff --git a/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90 b/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90 index 9b6334f..be60acc 100644 --- a/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90 @@ -19,7 +19,7 @@ program main if (omp_target_associate_ptr (c_loc (q), p, c_sizeof (q), & 0_c_size_t, d) == 0) then - if(c_associated (omp_get_mapped_ptr (c_loc (q), -5))) & + if(c_associated (omp_get_mapped_ptr (c_loc (q), -6))) & stop 1 if(c_associated (omp_get_mapped_ptr (c_loc (q), & diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 new file mode 100644 index 0000000..eb2f437 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 @@ -0,0 +1,10 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define HAVE_HIPFORT 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 new file mode 100644 index 0000000..0ebbe80 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 @@ -0,0 +1,9 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 new file mode 100644 index 0000000..d29a689 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 @@ -0,0 +1,12 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target openacc_cudart } +! { dg-require-effective-target openacc_cuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define HAVE_HIPFORT 1 +#define USE_CUDA_NAMES 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 new file mode 100644 index 0000000..2063610 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 @@ -0,0 +1,11 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + +! { dg-require-effective-target openacc_libcudart } +! { dg-require-effective-target openacc_libcuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define USE_CUDA_NAMES 1 +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h new file mode 100644 index 0000000..753ccce --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h @@ -0,0 +1,214 @@ +! Minimal check whether HIP works - by checking whether the API routines +! seem to work. This includes a fallback if hipfort is not available + +#ifndef HAVE_HIPFORT +#ifndef USE_HIP_FALLBACK_MODULE +#if USE_CUDA_NAMES +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)" +#else +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset" +#endif +#endif +module hipfort ! Minimal implementation for the testsuite + implicit none + + enum, bind(c) + enumerator :: hipSuccess = 0 + enumerator :: hipErrorNotSupported = 801 + end enum + + enum, bind(c) + enumerator :: hipDeviceAttributeClockRate = 5 + enumerator :: hipDeviceAttributeMaxGridDimX = 29 + end enum + + interface + integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) & +#if USE_CUDA_NAMES + bind(c, name="cudaDeviceGetAttribute") +#else + bind(c, name="hipDeviceGetAttribute") +#endif + use iso_c_binding, only: c_ptr, c_int + import + implicit none + type(c_ptr), value :: ip + integer(kind(hipDeviceAttributeClockRate)), value :: attr + integer(c_int), value :: dev + end + + integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) & +#if USE_CUDA_NAMES + bind(c, name="cudaCtxGetApiVersion") +#else + bind(c, name="hipCtxGetApiVersion") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: ctx, ip + end + + integer(kind(hipSuccess)) function hipStreamQuery (stream) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamQuery") +#else + bind(c, name="hipStreamQuery") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + end + + integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamGetFlags") +#else + bind(c, name="hipStreamGetFlags") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + type(c_ptr), value :: flags + end + end interface +end module +#endif + +program main + use iso_c_binding, only: c_ptr, c_int, c_loc + use omp_lib + use hipfort + implicit none (type, external) + +! Only supported since CUDA 12.8 - skip for better compatibility +! ! Manally implement hipStreamGetDevice as hipfort misses it +! ! -> https://github.com/ROCm/hipfort/issues/238 +! interface +! integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) & +!#if USE_CUDA_NAMES +! bind(c, name="cudaStreamGetDevice") +!#else +! bind(c, name="hipStreamGetDevice") +!#endif +! use iso_c_binding, only: c_ptr, c_int +! import +! implicit none +! type(c_ptr), value :: stream +! integer(c_int) :: dev +! end +! end interface + + integer(c_int), target :: ivar + integer(omp_interop_rc_kind) :: res + integer(omp_interop_kind) :: obj + integer(omp_interop_fr_kind) :: fr + integer(kind(hipSuccess)) :: hip_err + integer(c_int) :: hip_dev, dev_stream + type(c_ptr) :: hip_ctx, hip_sm + + logical :: vendor_is_amd + + obj = omp_interop_none + + !$omp interop init(target, targetsync, prefer_type("hip") : obj) + + fr = omp_get_interop_int (obj, omp_ipr_fr_id, res) + if (res /= omp_irc_success) error stop 1 + if (fr /= omp_ifr_hip) error stop 1 + + ivar = omp_get_interop_int (obj, omp_ipr_vendor, res) + if (ivar == 1) then ! AMD + vendor_is_amd = .true. + else if (ivar == 11) then ! Nvidia + vendor_is_amd = .false. + else + error stop 1 ! Unknown + endif +#if USE_CUDA_NAMES + if (vendor_is_amd) error stop 1 +#else + if (.not. vendor_is_amd) error stop 1 +#endif + + ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device. + + hip_dev = omp_get_interop_int (obj, omp_ipr_device, res) + if (res /= omp_irc_success) error stop 1 + +! AMD messed up in Fortran with the attribute handling, missing the +! translation table it has for C. +block + enum, bind(c) + enumerator :: cudaDevAttrClockRate = 13 + enumerator :: cudaDevAttrMaxGridDimX = 5 + end enum + + ! Assume a clock size is available and > 1 GHz; value is in kHz. + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1000000) error stop 1 ! in kHz + + ! Assume that the MaxGridDimX is available and > 1024 + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1024) error stop 1 +end block + + + ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context. + + hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res) + if (res /= omp_irc_success) error stop 1 + +! ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ +! ivar = -99 +! ! AMD deprectated hipCtxGetApiVersion (in C/C++) +! hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar)) +! +! if (vendor_is_amd) then +! if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1 +! else +! if (hip_err /= hipSuccess) error stop 1 +! if (ivar <= 0) error stop 1 +! end if + + + ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. + + hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res) + if (res /= omp_irc_success) error stop 1 + +! Skip as this is only in CUDA 12.8 +! dev_stream = 99 +! ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238 +! ! hip_err = hipStreamGetDevice (hip_sm, dev_stream) +! hip_err = my_hipStreamGetDevice (hip_sm, dev_stream) +! if (hip_err /= hipSuccess) error stop 1 +! if (dev_stream /= hip_dev) error stop 1 + + ! Get flags of the stream + hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar)) + if (hip_err /= hipSuccess) error stop 1 + ! Accept any value + + ! All jobs should have been completed (as there were none none) + hip_err = hipStreamQuery (hip_sm) + if (hip_err /= hipSuccess) error stop 1 + + !$omp interop destroy(obj) +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 new file mode 100644 index 0000000..9d48c7c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 @@ -0,0 +1,121 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var) map(tofrom: var2) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q) map(tofrom: var4%Q) + call foo(var3%Q, var4%Q) +!$omp end target + +!$omp target map(to: var3%R(2)) map(tofrom: var4%R(3)) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 new file mode 100644 index 0000000..fb9859d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 @@ -0,0 +1,124 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var%A, var%B, var%C, var%D) & +!$omp& map(tofrom: var2%A, var2%B, var2%C, var2%D) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 new file mode 100644 index 0000000..b2e36b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 new file mode 100644 index 0000000..48d4aea --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 @@ -0,0 +1,308 @@ +! NOTE: This code uses POINTER. +! While map(p, var%p) etc. maps the ptr/ptr comp p / var%p (incl. allocatable comps), +! map(var) does not map var%p. + +use iso_c_binding +implicit none +type t2 + integer, allocatable :: x, y, z +end type t2 +type t + integer, pointer :: A => null() + integer, pointer :: B(:) => null() + type(t2), pointer :: C => null() + type(t2), pointer :: D(:,:) => null() +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 +integer(c_intptr_t) :: iptr + +! -------------------------------------- +! Assign + allocate +allocate (var%A, source=45) +allocate (var%B(3), source=[1,2,3]) +allocate (var%C) +var%C%x = 6; var%C%y = 5; var%C%z = 4 +allocate (var%D(2,2)) +var%D(1,1)%x = 1 +var%D(1,1)%y = 2 +var%D(1,1)%z = 3 +var%D(2,1)%x = 4 +var%D(2,1)%y = 5 +var%D(2,1)%z = 6 +var%D(1,2)%x = 11 +var%D(1,2)%y = 12 +var%D(1,2)%z = 13 +var%D(2,2)%x = 14 +var%D(2,2)%y = 15 +var%D(2,2)%z = 16 + +! Assign + allocate +allocate (var2%A, source=145) +allocate (var2%B, source=[991,992,993]) +allocate (var2%C) +var2%C%x = 996; var2%C%y = 995; var2%C%z = 994 +allocate (var2%D(2,2)) +var2%D(1,1)%x = 199 +var2%D(1,1)%y = 299 +var2%D(1,1)%z = 399 +var2%D(2,1)%x = 499 +var2%D(2,1)%y = 599 +var2%D(2,1)%z = 699 +var2%D(1,2)%x = 1199 +var2%D(1,2)%y = 1299 +var2%D(1,2)%z = 1399 +var2%D(2,2)%x = 1499 +var2%D(2,2)%y = 1599 +var2%D(2,2)%z = 1699 + +block + integer(c_intptr_t) :: loc_a, loc_b, loc_c, loc_d, loc2_a, loc2_b, loc2_c, loc2_d + loc_a = loc (var%a) + loc_b = loc (var%b) + loc_c = loc (var%d) + loc_d = loc (var%d) + loc2_a = loc (var2%a) + loc2_b = loc (var2%b) + loc2_c = loc (var2%c) + loc2_d = loc (var2%d) + ! var/var2 are mapped, but the pointer components aren't + !$omp target map(to: var) map(tofrom: var2) + if (loc_a /= loc (var%a)) stop 31 + if (loc_b /= loc (var%b)) stop 32 + if (loc_c /= loc (var%d)) stop 33 + if (loc_d /= loc (var%d)) stop 34 + if (loc2_a /= loc (var2%a)) stop 35 + if (loc2_b /= loc (var2%b)) stop 36 + if (loc2_c /= loc (var2%c)) stop 37 + if (loc2_d /= loc (var2%d)) stop 38 + !$omp end target + if (loc_a /= loc (var%a)) stop 41 + if (loc_b /= loc (var%b)) stop 42 + if (loc_c /= loc (var%d)) stop 43 + if (loc_d /= loc (var%d)) stop 44 + if (loc2_a /= loc (var2%a)) stop 45 + if (loc2_b /= loc (var2%b)) stop 46 + if (loc2_c /= loc (var2%c)) stop 47 + if (loc2_d /= loc (var2%d)) stop 48 +end block + +block + ! Map only (all) components, but this maps also the alloc comps + !$omp target map(to: var%a, var%b, var%c, var%d) map(tofrom: var2%a, var2%b, var2%c, var2%d) + call foo (var,var2) + !$omp end target +end block + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var2%D(i,j)%x /= tmp_x(i,j)) stop 12 + if (var2%D(i,j)%y /= tmp_y(i,j)) stop 12 + if (var2%D(i,j)%z /= tmp_z(i,j)) stop 12 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var%C%x, var%C%y, var%C%z) +deallocate(var%D(1,1)%x, var%D(1,1)%y, var%D(1,1)%z) +deallocate(var%D(2,1)%x, var%D(2,1)%y, var%D(2,1)%z) +deallocate(var%D(1,2)%x, var%D(1,2)%y, var%D(1,2)%z) +deallocate(var%D(2,2)%x, var%D(2,2)%y, var%D(2,2)%z) +deallocate(var%A, var%B, var%C, var%D) + +deallocate(var2%C%x, var2%C%y, var2%C%z) +deallocate(var2%D(1,1)%x, var2%D(1,1)%y, var2%D(1,1)%z) +deallocate(var2%D(2,1)%x, var2%D(2,1)%y, var2%D(2,1)%z) +deallocate(var2%D(1,2)%x, var2%D(1,2)%y, var2%D(1,2)%z) +deallocate(var2%D(2,2)%x, var2%D(2,2)%y, var2%D(2,2)%z) +deallocate(var2%A, var2%B, var2%C, var2%D) + +! -------------------------------------- +! Assign + allocate +allocate (var3%Q%A, source=45) +allocate (var3%Q%B, source=[1,2,3]) +allocate (var3%Q%C, source=t2(6,5,4)) +allocate (var3%Q%D(2,2)) +var3%Q%D(1,1) = t2(1,2,3) +var3%Q%D(2,1) = t2(4,5,6) +var3%Q%D(1,2) = t2(11,12,13) +var3%Q%D(2,2) = t2(14,15,16) + +allocate (var3%R(2)%A, source=45) +allocate (var3%R(2)%B, source=[1,2,3]) +allocate (var3%R(2)%C, source=t2(6,5,4)) +allocate (var3%R(2)%D(2,2)) +var3%R(2)%D(1,1) = t2(1,2,3) +var3%R(2)%D(2,1) = t2(4,5,6) +var3%R(2)%D(1,2) = t2(11,12,13) +var3%R(2)%D(2,2) = t2(14,15,16) + +! Assign + allocate +allocate (var4%Q%A, source=145) +allocate (var4%Q%B, source=[991,992,993]) +allocate (var4%Q%C, source=t2(996,995,994)) +allocate (var4%Q%D(2,2)) +var4%Q%D(1,1) = t2(199,299,399) +var4%Q%D(2,1) = t2(499,599,699) +var4%Q%D(1,2) = t2(1199,1299,1399) +var4%Q%D(2,2) = t2(1499,1599,1699) + +allocate (var4%R(3)%A, source=145) +allocate (var4%R(3)%B, source=[991,992,993]) +allocate (var4%R(3)%C, source=t2(996,995,994)) +allocate (var4%R(3)%D(2,2)) +var4%R(3)%D(1,1) = t2(199,299,399) +var4%R(3)%D(2,1) = t2(499,599,699) +var4%R(3)%D(1,2) = t2(1199,1299,1399) +var4%R(3)%D(2,2) = t2(1499,1599,1699) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +iptr = loc(var3%R(2)%A) + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%Q%D(i,j)%x /= tmp_x(i,j)) stop 16 + if (var4%Q%D(i,j)%y /= tmp_y(i,j)) stop 16 + if (var4%Q%D(i,j)%z /= tmp_z(i,j)) stop 16 + end do + end do +end block + +! Cf. PR fortran/104696 +! { dg-output "valid mapping, OK" { xfail { offload_device_nonshared_as } } } +if (iptr /= loc(var3%R(2)%A)) then + print *, "invalid mapping, cf. PR fortran/104696" +else + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%R(3)%D(i,j)%x /= tmp_x(i,j)) stop 20 + if (var4%R(3)%D(i,j)%y /= tmp_y(i,j)) stop 20 + if (var4%R(3)%D(i,j)%z /= tmp_z(i,j)) stop 20 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var3%Q%C%x, var3%Q%D(1,1)%x, var3%Q%D(2,1)%x, var3%Q%D(1,2)%x, var3%Q%D(2,2)%x) +deallocate(var3%Q%C%y, var3%Q%D(1,1)%y, var3%Q%D(2,1)%y, var3%Q%D(1,2)%y, var3%Q%D(2,2)%y) +deallocate(var3%Q%C%z, var3%Q%D(1,1)%z, var3%Q%D(2,1)%z, var3%Q%D(1,2)%z, var3%Q%D(2,2)%z) +deallocate(var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) + +deallocate(var4%Q%C%x, var4%Q%D(1,1)%x, var4%Q%D(2,1)%x, var4%Q%D(1,2)%x, var4%Q%D(2,2)%x) +deallocate(var4%Q%C%y, var4%Q%D(1,1)%y, var4%Q%D(2,1)%y, var4%Q%D(1,2)%y, var4%Q%D(2,2)%y) +deallocate(var4%Q%C%z, var4%Q%D(1,1)%z, var4%Q%D(2,1)%z, var4%Q%D(1,2)%z, var4%Q%D(2,2)%z) +deallocate(var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + +deallocate(var3%R(2)%C%x, var3%R(2)%D(1,1)%x, var3%R(2)%D(2,1)%x, var3%R(2)%D(1,2)%x, var3%R(2)%D(2,2)%x) +deallocate(var3%R(2)%C%y, var3%R(2)%D(1,1)%y, var3%R(2)%D(2,1)%y, var3%R(2)%D(1,2)%y, var3%R(2)%D(2,2)%y) +deallocate(var3%R(2)%C%z, var3%R(2)%D(1,1)%z, var3%R(2)%D(2,1)%z, var3%R(2)%D(1,2)%z, var3%R(2)%D(2,2)%z) +deallocate(var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) + +deallocate(var4%R(3)%C%x, var4%R(3)%D(1,1)%x, var4%R(3)%D(2,1)%x, var4%R(3)%D(1,2)%x, var4%R(3)%D(2,2)%x) +deallocate(var4%R(3)%C%y, var4%R(3)%D(1,1)%y, var4%R(3)%D(2,1)%y, var4%R(3)%D(1,2)%y, var4%R(3)%D(2,2)%y) +deallocate(var4%R(3)%C%z, var4%R(3)%D(1,1)%z, var4%R(3)%D(2,1)%z, var4%R(3)%D(1,2)%z, var4%R(3)%D(2,2)%z) +deallocate(var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + + print *, "valid mapping, OK" +endif + +contains + subroutine foo(x, y) + type(t) :: x, y + intent(in) :: x + intent(inout) :: y + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (x%D(i,j)%x /= tmp_x(i,j)) stop 4 + if (x%D(i,j)%y /= tmp_y(i,j)) stop 4 + if (x%D(i,j)%z /= tmp_z(i,j)) stop 4 + end do + end do + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + tmp_x = reshape([199, 499, 1199, 1499], [2,2]) + tmp_y = reshape([299, 599, 1299, 1599], [2,2]) + tmp_z = reshape([399, 699, 1399, 1699], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (y%D(i,j)%x /= tmp_x(i,j)) stop 8 + if (y%D(i,j)%y /= tmp_y(i,j)) stop 8 + if (y%D(i,j)%z /= tmp_z(i,j)) stop 8 + end do + end do + + y%A = x%A + y%B(:) = x%B + y%C%x = x%C%x + y%C%y = x%C%y + y%C%z = x%C%z + do j = 1, 2 + do i = 1, 2 + y%D(i,j)%x = x%D(i,j)%x + y%D(i,j)%y = x%D(i,j)%y + y%D(i,j)%z = x%D(i,j)%z + end do + end do + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 new file mode 100644 index 0000000..1493c5f --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 @@ -0,0 +1,672 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: arr(:,:) + integer :: var + integer, allocatable :: slr + end type t + +contains + + subroutine check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42) stop 1 + if (l_scalar%var /= 42) stop 1 + if (is_present) then + if (opt_scalar%var /= 42) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42) stop 1 + if (la_scalar%var /= 42) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467) stop 5 + if (l_scalar%slr /= 467) stop 5 + if (a_scalar%slr /= 467) stop 6 + if (la_scalar%slr /= 467) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467) stop 7 + if (a_opt_scalar%slr /= 467) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! SET VALUE + scalar%var = 42 + 13 + l_scalar%var = 42 + 13 + if (is_present) then + opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + 13 + l_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + 13 + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + 13 + la_scalar%var = 42 + 13 + if (is_present) then + a_opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + 13 + la_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + 13 + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + 13 + l_scalar%slr = 467 + 13 + a_scalar%slr = 467 + 13 + la_scalar%slr = 467 + 13 + if (is_present) then + opt_scalar%slr = 467 + 13 + a_opt_scalar%slr = 467 + 13 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + end if + end do + end do + end do + end do + end if + + end subroutine + subroutine check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42 + 13) stop 1 + if (l_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (opt_scalar%var /= 42 + 13) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42 + 13) stop 1 + if (la_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42 + 13) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467 + 13) stop 5 + if (l_scalar%slr /= 467 + 13) stop 5 + if (a_scalar%slr /= 467 + 13) stop 6 + if (la_scalar%slr /= 467 + 13) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467 + 13) stop 7 + if (a_opt_scalar%slr /= 467 + 13) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! (RE)SET VALUE + scalar%var = 42 + l_scalar%var = 42 + if (is_present) then + opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + l_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + la_scalar%var = 42 + if (is_present) then + a_opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + l_scalar%slr = 467 + a_scalar%slr = 467 + la_scalar%slr = 467 + if (is_present) then + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end if + end do + end do + end do + end do + end if + end subroutine + + subroutine test(scalar, array, a_scalar, a_array, opt_scalar, opt_array, & + a_opt_scalar, a_opt_array) + type(t) :: scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + type(t) :: l_scalar, l_array(3,2), la_scalar, la_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + + integer :: i, j, k, l + logical :: is_present, dummy_alloced, local_alloced, inner_alloc + is_present = present(opt_scalar) + dummy_alloced = allocated(a_scalar) + inner_alloc = allocated(scalar%slr) + + l_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + end do + end do + + if (dummy_alloced) then + allocate(la_scalar, la_array(3,2)) + a_scalar%var = 42 + la_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + end do + end do + end if + + if (inner_alloc) then + l_scalar%slr = 467 + la_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(l_scalar%arr(4,5), la_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(l_array(i,j)%arr(i,j), la_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + end if + + ! implicit mapping + !$omp target + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + end if + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + + ! explicit mapping + !$omp target map(scalar, array, opt_scalar, opt_array, a_scalar, a_array) & + !$omp& map(a_opt_scalar, a_opt_array) & + !$omp& map(l_scalar, l_array, la_scalar, la_array) + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + end subroutine +end module + +program main + use m + implicit none (type, external) + type(t) :: scalar, array(3,2), opt_scalar, opt_array(3,2), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array + integer :: i, j, k, l, n + + scalar%var = 42 + opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + ! unallocated + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! allocated + allocate(a_scalar, a_opt_scalar, a_array(3,2), a_opt_array(3,2)) + a_scalar%var = 42 + a_opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + a_opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! comps allocated + scalar%slr = 467 + a_scalar%slr = 467 + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(scalar%arr(4,5), a_scalar%arr(4,5), opt_scalar%arr(4,5), a_opt_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(array(i,j)%arr(i,j), a_array(i,j)%arr(i,j), opt_array(i,j)%arr(i,j), a_opt_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + deallocate(a_scalar, a_opt_scalar, a_array, a_opt_array) +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 new file mode 100644 index 0000000..f5a286e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 @@ -0,0 +1,268 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: A(:) + end type t + type t2 + type(t), allocatable :: vT + integer, allocatable :: x + end type t2 + +contains + + subroutine test_alloc() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_alloc + + subroutine test2_alloc() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_alloc + + + subroutine test_alloc_target() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + end subroutine test_alloc_target + + subroutine test2_alloc_target() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + end subroutine test2_alloc_target + + + + subroutine test_from() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_from + + subroutine test2_from() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_from + +end module m + +use m + implicit none (type, external) + call test_alloc + call test2_alloc + call test_alloc_target + call test2_alloc_target + + call test_from + call test2_from +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 new file mode 100644 index 0000000..90378c0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 @@ -0,0 +1,11 @@ +! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" } +! +! We silence the warning: +! Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp] +! +! Ensure that polymorphic mapping is diagnosed as undefined behavior +! Ensure that static access to polymorphic variables works + +! Run map-alloc-comp-9.f90 in unified-shared-memory mode + +#include "map-alloc-comp-9.f90" diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 new file mode 100644 index 0000000..26c73d7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 @@ -0,0 +1,578 @@ +! { dg-additional-options "-cpp" } +! +! Ensure that polymorphic mapping is diagnosed as undefined behavior +! Ensure that static access to polymorphic variables works + +! Some extended tests are only run with shared memory +! To enforce this (where possible) on the device side: +! #define USE_USM_REQUIREMENT +! which is done in map-alloc-comp-9-usm.f90 + +subroutine test(case) +implicit none(type, external) +#ifdef USE_USM_REQUIREMENT + !$omp requires unified_shared_memory +#endif + +type t + integer :: x(4) +end type t + +type ta + integer, allocatable :: x(:) +end type ta + +type t2 + class(t), allocatable :: x + class(t), allocatable :: x2(:) +end type t2 + +type t3 + type(t2) :: y + type(t2) :: y2(2) +end type t3 + +type t4 + type(t3), allocatable :: y + type(t3), allocatable :: y2(:) +end type t4 + +integer, value :: case + +logical :: is_shared_mem + +! Mangle stack addresses +integer, volatile :: case_var(100*case) + +type(t), allocatable :: var1 +type(ta), allocatable :: var1a +class(t), allocatable :: var2 +type(t2), allocatable :: var3 +type(t4), allocatable :: var4 + +case_var(100) = 0 +!print *, 'case', case + +var1 = t([1,2,3,4]) +var1a = ta([-1,-2,-3,-4,-5]) + +var2 = t([11,22,33,44]) + +allocate(t2 :: var3) +allocate(t :: var3%x) +allocate(t :: var3%x2(2)) +var3%x%x = [111,222,333,444] +var3%x2(1)%x = 2*[111,222,333,444] +var3%x2(2)%x = 3*[111,222,333,444] + +allocate(t4 :: var4) +allocate(t3 :: var4%y) +allocate(t3 :: var4%y2(2)) +allocate(t :: var4%y%y%x) +allocate(t :: var4%y%y%x2(2)) +allocate(t :: var4%y2(1)%y%x) +allocate(t :: var4%y2(1)%y%x2(2)) +allocate(t :: var4%y2(2)%y%x) +allocate(t :: var4%y2(2)%y%x2(2)) +var4%y%y%x%x = -1 * [1111,2222,3333,4444] +var4%y%y%x2(1)%x = -2 * [1111,2222,3333,4444] +var4%y%y%x2(2)%x = -3 * [1111,2222,3333,4444] +var4%y2(1)%y%x%x = -4 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(1)%x = -5 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(2)%x = -6 * [1111,2222,3333,4444] +var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444] + +#ifdef USE_USM_REQUIREMENT +is_shared_mem = .true. +#else +is_shared_mem = .false. +!$omp target map(to: is_shared_mem) + is_shared_mem = .true. +!$omp end target +#endif + +if (case == 1) then + ! implicit mapping + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + end if + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + end if + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 2) then + ! Use target with defaultmap(TO) + + !$omp target defaultmap(to : all) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target defaultmap(to : all) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target defaultmap(to : all) firstprivate(is_shared_mem) ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 3) then + ! Use target with map clause + + !$omp target map(tofrom: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(tofrom: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(tofrom: var2) ! { dg-warning "28: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(tofrom: var3) ! { dg-warning "28: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(tofrom: var4) ! { dg-warning "28: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + end if + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 4) then + ! Use target with map clause -- NOTE: This uses TO not TOFROM + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 5) then + ! Use target enter/exit data + target with explicit map + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else if (case == 6) then + ! Use target enter/exit data + target with implicit map + + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else + error stop +end if + +if ((case /= 2 .and. case /= 4) .or. is_shared_mem) then + ! The target update should have been active, check for the updated values + if (any (var1%x /= 2 * [1,2,3,4])) stop 11 + if (any (var1a%x /= 3 * [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= 4 * [11,22,33,44])) stop 33 + + if (any (var3%x%x /= 5 * [111,222,333,444])) stop 44 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2 * 5 * [111,222,333,444])) stop 44 + if (any (var3%x2(2)%x /= 3 * 5 * [111,222,333,444])) stop 44 + endif + + if (any (var4%y%y%x%x /= -1 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(1)%y%x%x /= -4 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(2)%y%x%x /= -7 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * 6 * [1111,2222,3333,4444])) stop 55 + endif +else + ! The old host values should still be there as 'to:' created a device copy + if (any (var1%x /= [1,2,3,4])) stop 12 + if (any (var1a%x /= [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= [11,22,33,44])) stop 33 + + if (any (var3%x%x /= [111,222,333,444])) stop 44 + ! .not. is_shared_mem: + ! if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 44 + ! if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 44 + + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 55 +end if +if (case_var(100) /= 0) stop 123 +end subroutine test + +program main + use omp_lib + implicit none(type, external) +#ifdef USE_USM_REQUIREMENT + !$omp requires unified_shared_memory +#endif + + interface + subroutine test(case) + integer, value :: case + end + end interface + integer :: dev + call run_it(omp_get_default_device()) + do dev = 0, omp_get_num_devices() + call run_it(dev) + end do + call run_it(omp_initial_device) +! print *, 'all done' +contains +subroutine run_it(dev) + integer, value :: dev +! print *, 'DEVICE', dev + call omp_set_default_device(dev) + call test(1) + call test(2) + call test(3) + call test(4) + call test(5) + call test(6) +end +end diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 index 7b3e09f..d6f4d5b 100644 --- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { target { ! offload_target_nvptx } } } +! { dg-do compile { target offload_target_nvptx } } program test implicit none @@ -33,6 +34,10 @@ program test contains subroutine f (x, y, z) integer :: x(N), y(N), z(N) + ! The following fails as on the host the target side cannot be + ! resolved - and the 'teams' or not status affects how 'target' + ! is called. -> See PR118694, esp. comment 9. + ! Note also the dg-do compile above for offload_target_nvptx !$omp target map (to: x, y) map(from: z) block @@ -43,6 +48,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ end subroutine subroutine g (x, y, z) integer :: x(N), y(N), z(N) @@ -56,6 +62,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ !$omp end target end subroutine end program diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 new file mode 100644 index 0000000..2641086 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 @@ -0,0 +1,67 @@ +! PR libgomp/120444 +! Async version + +use omp_lib +use iso_c_binding +implicit none (type, external) +integer(c_int) :: dev + +!$omp parallel do +do dev = omp_initial_device, omp_get_num_devices () +block + integer(c_int) :: i, val, start, tail + type(c_ptr) :: ptr, ptr2, tmpptr + integer(c_int8_t), pointer, contiguous :: fptr(:) + integer(c_intptr_t) :: intptr + integer(c_size_t), parameter :: count = 1024 + integer(omp_depend_kind) :: dep(1) + + ptr = omp_target_alloc (count, dev) + + !$omp depobj(dep(1)) depend(inout: ptr) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp taskwait + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + fptr(i) = fptr(i) + 2_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val + 3, & + count - start - tail, dev, 1, dep) + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val + 3, c_int8_t)) stop 3 + fptr(i) = fptr(i) - 1_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val - 3, & + count - start - tail, dev, 1, dep) + + !$omp taskwait depend (depobj: dep(1)) + end do + end do + + !$omp depobj(dep(1)) destroy + call omp_target_free (ptr, dev); +end block +end do +end diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 new file mode 100644 index 0000000..1ee184a --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 @@ -0,0 +1,39 @@ +! PR libgomp/120444 + +use omp_lib +use iso_c_binding +implicit none (type, external) + +integer(c_int) :: dev, i, val, start, tail +type(c_ptr) :: ptr, ptr2, tmpptr +integer(c_int8_t), pointer, contiguous :: fptr(:) +integer(c_intptr_t) :: intptr +integer(c_size_t), parameter :: count = 1024 + +do dev = omp_initial_device, omp_get_num_devices () + ptr = omp_target_alloc (count, dev) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp target device(dev) is_device_ptr(ptr) + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + end do + !$omp end target + end do + end do + + call omp_target_free (ptr, dev); +end do +end diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 new file mode 100644 index 0000000..c6d671c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 @@ -0,0 +1,532 @@ +! { dg-additional-options "-cpp" } + +! FIXME: Some tests do not work yet. Those are for now in '#if 0' + +! Check that 'map(alloc:' properly works with +! - deferred-length character strings +! - arrays with array descriptors +! For those, the array descriptor / string length must be mapped with 'to:' + +program main +implicit none + +type t + integer :: ic(2:5), ic2 + character(len=11) :: ccstr(3:4), ccstr2 + character(len=11,kind=4) :: cc4str(3:7), cc4str2 + integer, pointer :: pc(:), pc2 + character(len=:), pointer :: pcstr(:), pcstr2 + character(len=:,kind=4), pointer :: pc4str(:), pc4str2 +end type t + +type(t) :: dt + +integer :: ii(5), ii2 +character(len=11) :: clstr(-1:1), clstr2 +character(len=11,kind=4) :: cl4str(0:3), cl4str2 +integer, pointer :: ip(:), ip2 +integer, allocatable :: ia(:), ia2 +character(len=:), pointer :: pstr(:), pstr2 +character(len=:), allocatable :: astr(:), astr2 +character(len=:,kind=4), pointer :: p4str(:), p4str2 +character(len=:,kind=4), allocatable :: a4str(:), a4str2 + + +allocate(dt%pc(5), dt%pc2) +allocate(character(len=2) :: dt%pcstr(2)) +allocate(character(len=4) :: dt%pcstr2) + +allocate(character(len=3,kind=4) :: dt%pc4str(2:3)) +allocate(character(len=5,kind=4) :: dt%pc4str2) + +allocate(ip(5), ip2, ia(8), ia2) +allocate(character(len=2) :: pstr(-2:0)) +allocate(character(len=4) :: pstr2) +allocate(character(len=6) :: astr(3:5)) +allocate(character(len=8) :: astr2) + +allocate(character(len=3,kind=4) :: p4str(2:4)) +allocate(character(len=5,kind=4) :: p4str2) +allocate(character(len=7,kind=4) :: a4str(-2:3)) +allocate(character(len=9,kind=4) :: a4str2) + + +! integer :: ic(2:5), ic2 + +!$omp target enter data map(alloc: dt%ic) +!$omp target map(alloc: dt%ic) + if (size(dt%ic) /= 4) error stop + if (lbound(dt%ic, 1) /= 2) error stop + if (ubound(dt%ic, 1) /= 5) error stop + dt%ic = [22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: dt%ic) +if (size(dt%ic) /= 4) error stop +if (lbound(dt%ic, 1) /= 2) error stop +if (ubound(dt%ic, 1) /= 5) error stop +if (any (dt%ic /= [22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: dt%ic2) +!$omp target map(alloc: dt%ic2) + dt%ic2 = 42 +!$omp end target +!$omp target exit data map(from: dt%ic2) +if (dt%ic2 /= 42) error stop + + +! character(len=11) :: ccstr(3:4), ccstr2 + +!$omp target enter data map(alloc: dt%ccstr) +!$omp target map(alloc: dt%ccstr) + if (len(dt%ccstr) /= 11) error stop + if (size(dt%ccstr) /= 2) error stop + if (lbound(dt%ccstr, 1) /= 3) error stop + if (ubound(dt%ccstr, 1) /= 4) error stop + dt%ccstr = ["12345678901", "abcdefghijk"] +!$omp end target +!$omp target exit data map(from: dt%ccstr) +if (len(dt%ccstr) /= 11) error stop +if (size(dt%ccstr) /= 2) error stop +if (lbound(dt%ccstr, 1) /= 3) error stop +if (ubound(dt%ccstr, 1) /= 4) error stop +if (any (dt%ccstr /= ["12345678901", "abcdefghijk"])) error stop + +!$omp target enter data map(alloc: dt%ccstr2) +!$omp target map(alloc: dt%ccstr2) + if (len(dt%ccstr2) /= 11) error stop + dt%ccstr2 = "ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: dt%ccstr2) +if (len(dt%ccstr2) /= 11) error stop +if (dt%ccstr2 /= "ABCDEFGHIJK") error stop + + +! character(len=11,kind=4) :: cc4str(3:7), cc4str2 + +#if 0 +! Value check fails +!$omp target map(alloc: dt%cc4str) + if (len(dt%cc4str) /= 11) error stop + if (size(dt%cc4str) /= 5) error stop + if (lbound(dt%cc4str, 1) /= 3) error stop + if (ubound(dt%cc4str, 1) /= 7) error stop + dt%cc4str = [4_"12345678901", 4_"abcdefghijk", & + 4_"qerftcea6ds", 4_"a1f9g37ga4.", & + 4_"45ngwj56sj2"] +!$omp end target +!$omp target exit data map(from: dt%cc4str) +if (len(dt%cc4str) /= 11) error stop +if (size(dt%cc4str) /= 5) error stop +if (lbound(dt%cc4str, 1) /= 3) error stop +if (ubound(dt%cc4str, 1) /= 7) error stop +if (dt%cc4str(3) /= 4_"12345678901") error stop +if (dt%cc4str(4) /= 4_"abcdefghijk") error stop +if (dt%cc4str(5) /= 4_"qerftcea6ds") error stop +if (dt%cc4str(6) /= 4_"a1f9g37ga4.") error stop +if (dt%cc4str(7) /= 4_"45ngwj56sj2") error stop +#endif + +!$omp target enter data map(alloc: dt%cc4str2) +!$omp target map(alloc: dt%cc4str2) + if (len(dt%cc4str2) /= 11) error stop + dt%cc4str2 = 4_"ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: dt%cc4str2) +if (len(dt%cc4str2) /= 11) error stop +if (dt%cc4str2 /= 4_"ABCDEFGHIJK") error stop + + +! integer, pointer :: pc(:), pc2 +! allocate(dt%pc(5), dt%pc2) + +!$omp target enter data map(alloc: dt%pc) +!$omp target map(alloc: dt%pc) + if (.not. associated(dt%pc)) error stop + if (size(dt%pc) /= 5) error stop + if (lbound(dt%pc, 1) /= 1) error stop + if (ubound(dt%pc, 1) /= 5) error stop + dt%pc = [11, 22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: dt%pc) +if (.not. associated(dt%pc)) error stop +if (size(dt%pc) /= 5) error stop +if (lbound(dt%pc, 1) /= 1) error stop +if (ubound(dt%pc, 1) /= 5) error stop +if (any (dt%pc /= [11, 22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: dt%pc2) +!$omp target map(alloc: dt%pc2) + if (.not. associated(dt%pc2)) error stop + dt%pc2 = 99 +!$omp end target +!$omp target exit data map(from: dt%pc2) +if (dt%pc2 /= 99) error stop +if (.not. associated(dt%pc2)) error stop + + +! character(len=:), pointer :: pcstr(:), pcstr2 +! allocate(character(len=2) :: dt%pcstr(2)) +! allocate(character(len=4) :: dt%pcstr2) + +!$omp target enter data map(alloc: dt%pcstr) +!$omp target map(alloc: dt%pcstr) + if (.not. associated(dt%pcstr)) error stop + if (len(dt%pcstr) /= 2) error stop + if (size(dt%pcstr) /= 2) error stop + if (lbound(dt%pcstr, 1) /= 1) error stop + if (ubound(dt%pcstr, 1) /= 2) error stop + dt%pcstr = ["01", "jk"] +!$omp end target +!$omp target exit data map(from: dt%pcstr) +if (.not. associated(dt%pcstr)) error stop +if (len(dt%pcstr) /= 2) error stop +if (size(dt%pcstr) /= 2) error stop +if (lbound(dt%pcstr, 1) /= 1) error stop +if (ubound(dt%pcstr, 1) /= 2) error stop +if (any (dt%pcstr /= ["01", "jk"])) error stop + + +!$omp target enter data map(alloc: dt%pcstr2) +!$omp target map(alloc: dt%pcstr2) + if (.not. associated(dt%pcstr2)) error stop + if (len(dt%pcstr2) /= 4) error stop + dt%pcstr2 = "HIJK" +!$omp end target +!$omp target exit data map(from: dt%pcstr2) +if (.not. associated(dt%pcstr2)) error stop +if (len(dt%pcstr2) /= 4) error stop +if (dt%pcstr2 /= "HIJK") error stop + + +! character(len=:,kind=4), pointer :: pc4str(:), pc4str2 +! allocate(character(len=3,kind=4) :: dt%pc4str(2:3)) +! allocate(character(len=5,kind=4) :: dt%pc4str2) + +!$omp target enter data map(alloc: dt%pc4str) +!$omp target map(alloc: dt%pc4str) + if (.not. associated(dt%pc4str)) error stop + if (len(dt%pc4str) /= 3) error stop + if (size(dt%pc4str) /= 2) error stop + if (lbound(dt%pc4str, 1) /= 2) error stop + if (ubound(dt%pc4str, 1) /= 3) error stop + dt%pc4str = [4_"456", 4_"tzu"] +!$omp end target +!$omp target exit data map(from: dt%pc4str) +if (.not. associated(dt%pc4str)) error stop +if (len(dt%pc4str) /= 3) error stop +if (size(dt%pc4str) /= 2) error stop +if (lbound(dt%pc4str, 1) /= 2) error stop +if (ubound(dt%pc4str, 1) /= 3) error stop +if (dt%pc4str(2) /= 4_"456") error stop +if (dt%pc4str(3) /= 4_"tzu") error stop + +!$omp target enter data map(alloc: dt%pc4str2) +!$omp target map(alloc: dt%pc4str2) + if (.not. associated(dt%pc4str2)) error stop + if (len(dt%pc4str2) /= 5) error stop + dt%pc4str2 = 4_"98765" +!$omp end target +!$omp target exit data map(from: dt%pc4str2) +if (.not. associated(dt%pc4str2)) error stop +if (len(dt%pc4str2) /= 5) error stop +if (dt%pc4str2 /= 4_"98765") error stop + + +! integer :: ii(5), ii2 + +!$omp target enter data map(alloc: ii) +!$omp target map(alloc: ii) + if (size(ii) /= 5) error stop + if (lbound(ii, 1) /= 1) error stop + if (ubound(ii, 1) /= 5) error stop + ii = [-1, -2, -3, -4, -5] +!$omp end target +!$omp target exit data map(from: ii) +if (size(ii) /= 5) error stop +if (lbound(ii, 1) /= 1) error stop +if (ubound(ii, 1) /= 5) error stop +if (any (ii /= [-1, -2, -3, -4, -5])) error stop + +!$omp target enter data map(alloc: ii2) +!$omp target map(alloc: ii2) + ii2 = -410 +!$omp end target +!$omp target exit data map(from: ii2) +if (ii2 /= -410) error stop + + +! character(len=11) :: clstr(-1:1), clstr2 + +!$omp target enter data map(alloc: clstr) +!$omp target map(alloc: clstr) + if (len(clstr) /= 11) error stop + if (size(clstr) /= 3) error stop + if (lbound(clstr, 1) /= -1) error stop + if (ubound(clstr, 1) /= 1) error stop + clstr = ["12345678901", "abcdefghijk", "ABCDEFGHIJK"] +!$omp end target +!$omp target exit data map(from: clstr) +if (len(clstr) /= 11) error stop +if (size(clstr) /= 3) error stop +if (lbound(clstr, 1) /= -1) error stop +if (ubound(clstr, 1) /= 1) error stop +if (any (clstr /= ["12345678901", "abcdefghijk", "ABCDEFGHIJK"])) error stop + +!$omp target enter data map(alloc: clstr2) +!$omp target map(alloc: clstr2) + if (len(clstr2) /= 11) error stop + clstr2 = "ABCDEFghijk" +!$omp end target +!$omp target exit data map(from: clstr2) +if (len(clstr2) /= 11) error stop +if (clstr2 /= "ABCDEFghijk") error stop + + +! character(len=11,kind=4) :: cl4str(0:3), cl4str2 + +!$omp target enter data map(alloc: cl4str) +!$omp target map(alloc: cl4str) + if (len(cl4str) /= 11) error stop + if (size(cl4str) /= 4) error stop + if (lbound(cl4str, 1) /= 0) error stop + if (ubound(cl4str, 1) /= 3) error stop + cl4str = [4_"12345678901", 4_"abcdefghijk", & + 4_"qerftcea6ds", 4_"a1f9g37ga4."] +!$omp end target +!$omp target exit data map(from: cl4str) +if (len(cl4str) /= 11) error stop +if (size(cl4str) /= 4) error stop +if (lbound(cl4str, 1) /= 0) error stop +if (ubound(cl4str, 1) /= 3) error stop +if (cl4str(0) /= 4_"12345678901") error stop +if (cl4str(1) /= 4_"abcdefghijk") error stop +if (cl4str(2) /= 4_"qerftcea6ds") error stop +if (cl4str(3) /= 4_"a1f9g37ga4.") error stop + +!$omp target enter data map(alloc: cl4str2) +!$omp target map(alloc: cl4str2) + if (len(cl4str2) /= 11) error stop + cl4str2 = 4_"ABCDEFGHIJK" +!$omp end target +!$omp target exit data map(from: cl4str2) +if (len(cl4str2) /= 11) error stop +if (cl4str2 /= 4_"ABCDEFGHIJK") error stop + + +! allocate(ip(5), ip2, ia(8), ia2) + +!$omp target enter data map(alloc: ip) +!$omp target map(alloc: ip) + if (.not. associated(ip)) error stop + if (size(ip) /= 5) error stop + if (lbound(ip, 1) /= 1) error stop + if (ubound(ip, 1) /= 5) error stop + ip = [11, 22, 33, 44, 55] +!$omp end target +!$omp target exit data map(from: ip) +if (.not. associated(ip)) error stop +if (size(ip) /= 5) error stop +if (lbound(ip, 1) /= 1) error stop +if (ubound(ip, 1) /= 5) error stop +if (any (ip /= [11, 22, 33, 44, 55])) error stop + +!$omp target enter data map(alloc: ip2) +!$omp target map(alloc: ip2) + if (.not. associated(ip2)) error stop + ip2 = 99 +!$omp end target +!$omp target exit data map(from: ip2) +if (ip2 /= 99) error stop +if (.not. associated(ip2)) error stop + + +! allocate(ip(5), ip2, ia(8), ia2) + +!$omp target enter data map(alloc: ia) +!$omp target map(alloc: ia) + if (.not. allocated(ia)) error stop + if (size(ia) /= 8) error stop + if (lbound(ia, 1) /= 1) error stop + if (ubound(ia, 1) /= 8) error stop + ia = [1,2,3,4,5,6,7,8] +!$omp end target +!$omp target exit data map(from: ia) +if (.not. allocated(ia)) error stop +if (size(ia) /= 8) error stop +if (lbound(ia, 1) /= 1) error stop +if (ubound(ia, 1) /= 8) error stop +if (any (ia /= [1,2,3,4,5,6,7,8])) error stop + +!$omp target enter data map(alloc: ia2) +!$omp target map(alloc: ia2) + if (.not. allocated(ia2)) error stop + ia2 = 102 +!$omp end target +!$omp target exit data map(from: ia2) +if (ia2 /= 102) error stop +if (.not. allocated(ia2)) error stop + + +! character(len=:), pointer :: pstr(:), pstr2 +! allocate(character(len=2) :: pstr(-2:0)) +! allocate(character(len=4) :: pstr2) + +!$omp target enter data map(alloc: pstr) +!$omp target map(alloc: pstr) + if (.not. associated(pstr)) error stop + if (len(pstr) /= 2) error stop + if (size(pstr) /= 3) error stop + if (lbound(pstr, 1) /= -2) error stop + if (ubound(pstr, 1) /= 0) error stop + pstr = ["01", "jk", "aq"] +!$omp end target +!$omp target exit data map(from: pstr) +if (.not. associated(pstr)) error stop +if (len(pstr) /= 2) error stop +if (size(pstr) /= 3) error stop +if (lbound(pstr, 1) /= -2) error stop +if (ubound(pstr, 1) /= 0) error stop +if (any (pstr /= ["01", "jk", "aq"])) error stop + +!$omp target enter data map(alloc: pstr2) +!$omp target map(alloc: pstr2) + if (.not. associated(pstr2)) error stop + if (len(pstr2) /= 4) error stop + pstr2 = "HIJK" +!$omp end target +!$omp target exit data map(from: pstr2) +if (.not. associated(pstr2)) error stop +if (len(pstr2) /= 4) error stop +if (pstr2 /= "HIJK") error stop + + +! character(len=:), allocatable :: astr(:), astr2 +! allocate(character(len=6) :: astr(3:5)) +! allocate(character(len=8) :: astr2) + + +!$omp target enter data map(alloc: astr) +!$omp target map(alloc: astr) + if (.not. allocated(astr)) error stop + if (len(astr) /= 6) error stop + if (size(astr) /= 3) error stop + if (lbound(astr, 1) /= 3) error stop + if (ubound(astr, 1) /= 5) error stop + astr = ["01db45", "jk$D%S", "zutg47"] +!$omp end target +!$omp target exit data map(from: astr) +if (.not. allocated(astr)) error stop +if (len(astr) /= 6) error stop +if (size(astr) /= 3) error stop +if (lbound(astr, 1) /= 3) error stop +if (ubound(astr, 1) /= 5) error stop +if (any (astr /= ["01db45", "jk$D%S", "zutg47"])) error stop + + +!$omp target enter data map(alloc: astr2) +!$omp target map(alloc: astr2) + if (.not. allocated(astr2)) error stop + if (len(astr2) /= 8) error stop + astr2 = "HIJKhijk" +!$omp end target +!$omp target exit data map(from: astr2) +if (.not. allocated(astr2)) error stop +if (len(astr2) /= 8) error stop +if (astr2 /= "HIJKhijk") error stop + + +! character(len=:,kind=4), pointer :: p4str(:), p4str2 +! allocate(character(len=3,kind=4) :: p4str(2:4)) +! allocate(character(len=5,kind=4) :: p4str2) + +! FAILS with value check + +!$omp target enter data map(alloc: p4str) +!$omp target map(alloc: p4str) + if (.not. associated(p4str)) error stop + if (len(p4str) /= 3) error stop + if (size(p4str) /= 3) error stop + if (lbound(p4str, 1) /= 2) error stop + if (ubound(p4str, 1) /= 4) error stop + p4str(:) = [4_"f85", 4_"8af", 4_"A%F"] +!$omp end target +!$omp target exit data map(from: p4str) +if (.not. associated(p4str)) error stop +if (len(p4str) /= 3) error stop +if (size(p4str) /= 3) error stop +if (lbound(p4str, 1) /= 2) error stop +if (ubound(p4str, 1) /= 4) error stop +if (p4str(2) /= 4_"f85") error stop +if (p4str(3) /= 4_"8af") error stop +if (p4str(4) /= 4_"A%F") error stop + +!$omp target enter data map(alloc: p4str2) +!$omp target map(alloc: p4str2) + if (.not. associated(p4str2)) error stop + if (len(p4str2) /= 5) error stop + p4str2 = 4_"9875a" +!$omp end target +!$omp target exit data map(from: p4str2) +if (.not. associated(p4str2)) error stop +if (len(p4str2) /= 5) error stop +if (p4str2 /= 4_"9875a") error stop + + +! character(len=:,kind=4), allocatable :: a4str(:), a4str2 +! allocate(character(len=7,kind=4) :: a4str(-2:3)) +! allocate(character(len=9,kind=4) :: a4str2) + +!$omp target enter data map(alloc: a4str) +!$omp target map(alloc: a4str) + if (.not. allocated(a4str)) error stop + if (len(a4str) /= 7) error stop + if (size(a4str) /= 6) error stop + if (lbound(a4str, 1) /= -2) error stop + if (ubound(a4str, 1) /= 3) error stop + ! See PR fortran/107508 why '(:)' is required + a4str(:) = [4_"sf456aq", 4_"3dtzu24", 4_"_4fh7sm", 4_"=ff85s7", 4_"j=8af4d", 4_".,A%Fsz"] +!$omp end target +!$omp target exit data map(from: a4str) +if (.not. allocated(a4str)) error stop +if (len(a4str) /= 7) error stop +if (size(a4str) /= 6) error stop +if (lbound(a4str, 1) /= -2) error stop +if (ubound(a4str, 1) /= 3) error stop +if (a4str(-2) /= 4_"sf456aq") error stop +if (a4str(-1) /= 4_"3dtzu24") error stop +if (a4str(0) /= 4_"_4fh7sm") error stop +if (a4str(1) /= 4_"=ff85s7") error stop +if (a4str(2) /= 4_"j=8af4d") error stop +if (a4str(3) /= 4_".,A%Fsz") error stop + +!$omp target enter data map(alloc: a4str2) +!$omp target map(alloc: a4str2) + if (.not. allocated(a4str2)) error stop + if (len(a4str2) /= 9) error stop + a4str2 = 4_"98765a23d" +!$omp end target +!$omp target exit data map(from: a4str2) +if (.not. allocated(a4str2)) error stop +if (len(a4str2) /= 9) error stop +if (a4str2 /= 4_"98765a23d") error stop + + +deallocate(dt%pc, dt%pc2) +deallocate(dt%pcstr) +deallocate(dt%pcstr2) + +deallocate(dt%pc4str) +deallocate(dt%pc4str2) + +deallocate(ip, ip2, ia, ia2) +deallocate(pstr) +deallocate(pstr2) +deallocate(astr) +deallocate(astr2) + +deallocate(p4str) +deallocate(p4str2) +deallocate(a4str) +deallocate(a4str2) + +end diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C new file mode 100644 index 0000000..6957a6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C @@ -0,0 +1,57 @@ +/* 'std::bad_cast' exception in OpenACC compute region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C'. */ + +#include <iostream> + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + C1 c1; + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'std::bad_cast' + what(): std::bad_cast + Aborted (core dumped) + { dg-output {.*std::bad_cast} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + { dg-shouldfail {'std::bad_cast' exception} } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C new file mode 100644 index 0000000..8260966 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C @@ -0,0 +1,18 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_radeon_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..86d3f6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C @@ -0,0 +1,20 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_nvidia_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C new file mode 100644 index 0000000..0f84cf2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C @@ -0,0 +1,63 @@ +/* 'std::bad_cast' exception in OpenACC compute region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target openacc_nvidia_accel_selected xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail openacc_nvidia_accel_selected } } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-2.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C'. */ + +#include <iostream> +#include <typeinfo> + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + { + C1 c1; + try + { + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } + catch (const std::bad_cast &e) + { + __builtin_printf("caught '%s'\n", e.what()); + } + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to + the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast'; + PR119692. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'std::bad_cast' exception} { ! openacc_host_selected } } */ +/* There are configurations where we 'WARNING: program timed out.' while in + 'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>. + { dg-timeout 10 } ... to make sure that happens quickly. */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C new file mode 100644 index 0000000..4fa419f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C @@ -0,0 +1,49 @@ +/* 'std::bad_cast' exception in OpenACC compute region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-bad_cast-3.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C'. */ + +/* For PR119692 workarounds. */ +#ifndef DEFAULT +# define DEFAULT +#endif + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + { + C1 c1; + bool a = false; + asm volatile ("" : : "r" (&a) : "memory"); + if (a) + { + [[maybe_unused]] + C2 &c2 = dynamic_cast<C2 &>(c1); + /* 'std::bad_cast' is thrown. */ + } + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C new file mode 100644 index 0000000..08c5766 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C @@ -0,0 +1,43 @@ +/* 'throw' in OpenACC compute region. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-throw-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-1.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C'. */ + +#include <iostream> + +class MyException +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + MyException e1; + throw e1; + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + For host execution, we print something like: + terminate called after throwing an instance of 'MyException' + Aborted (core dumped) + { dg-output {.*MyException} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C new file mode 100644 index 0000000..40be837 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C @@ -0,0 +1,20 @@ +/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_radeon_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C new file mode 100644 index 0000000..9461455 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C @@ -0,0 +1,22 @@ +/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'. */ + +/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually. + { dg-do link { target openacc_nvidia_accel_selected } } */ +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Given '-foffload-options=-mno-fake-exceptions', offload compilation fails: + { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) + { dg-excess-errors {'mkoffload' failure etc.} } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C new file mode 100644 index 0000000..a7408cd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C @@ -0,0 +1,52 @@ +/* 'throw' in OpenACC compute region, caught. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ +/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */ +/* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */ + +/* See also '../libgomp.c++/target-exceptions-throw-2.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-2.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C'. */ + +#include <iostream> + +class MyException +{ +}; + +int main() +{ + std::cerr << "CheCKpOInT\n"; +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + try + { + MyException e1; + throw e1; + } + catch (const MyException &e) + { + __builtin_printf("caught '%s'\n", "MyException"); + } + } +} + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + { dg-output {.*caught 'MyException'[\r\n]+} { target openacc_host_selected } } + For GCN, nvptx offload execution, we don't print anything, but just 'abort'. + + For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} { ! openacc_host_selected } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C new file mode 100644 index 0000000..74a62b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C @@ -0,0 +1,43 @@ +/* 'throw' in OpenACC compute region, dead code. */ + +/* { dg-require-effective-target exceptions } + { dg-additional-options -fexceptions } */ +/* Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } + { dg-additional-options -fdump-tree-gimple } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +/* See also '../libgomp.c++/target-exceptions-throw-3.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-3.C', + '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C'. */ + +/* For PR119692 workarounds. */ +#ifndef DEFAULT +# define DEFAULT +#endif + +class MyException +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + bool a = false; + asm volatile ("" : : "r" (&a) : "memory"); + if (a) + { + MyException e1; + throw e1; + } + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI11MyException \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C new file mode 100644 index 0000000..d4d28a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C @@ -0,0 +1,6 @@ +// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. +// But actually, as none of the '#pragma acc routine' syntax is accepted, force inlining: +#define ALWAYS_INLINE __attribute__((always_inline)) + +#include "../libgomp.c++/pr101544-1.C" +//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C new file mode 100644 index 0000000..5c3e037 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C @@ -0,0 +1,42 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -UDEFAULT } + Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +/* See also '../libgomp.c++/pr119692-1-1.C'. */ + +/* See also '../../../gcc/testsuite/g++.target/gcn/pr119692-1-1.C', + '../../../gcc/testsuite/g++.target/nvptx/pr119692-1-1.C'. */ + +#ifndef DEFAULT +# define DEFAULT +#endif + +struct C1 +{ + virtual void f() + {} +}; + +struct C2 : C1 +{ +}; + +int main() +{ +#pragma omp target DEFAULT +#pragma acc serial DEFAULT + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + C1 c1; + C1 *c1p = &c1; + asm volatile ("" : : "r" (&c1p) : "memory"); + C2 *c2 = dynamic_cast<C2 *>(c1p); + if (c2) + __builtin_abort(); + } +} + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C new file mode 100644 index 0000000..207b183 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C @@ -0,0 +1,12 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=default(none) } + Wrong code for offloading execution. + { dg-skip-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(none\) map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C new file mode 100644 index 0000000..e9b44de --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C @@ -0,0 +1,12 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-additional-options -DDEFAULT=default(present) } + Wrong code for offloading execution. + { dg-xfail-run-if PR119692 { ! openacc_host_selected } } */ +/* { dg-additional-options -fdump-tree-gimple } */ + +#include "pr119692-1-1.C" + +/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */ + +/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C new file mode 100644 index 0000000..0a6ee22 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C @@ -0,0 +1,6 @@ +// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. +// But actually, as none of the '#pragma acc routine' syntax is accepted (see '../libgomp.c++/pr101544-1.C'), force inlining: +#define ALWAYS_INLINE inline __attribute__((always_inline)) + +#include "../libgomp.c++/pr96835-1.C" +//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c new file mode 100644 index 0000000..4b54171 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c @@ -0,0 +1,125 @@ +/* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'. */ + +/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'. */ + +/* To exercise PR119835 (if optimizations enabled): disable inlining, so that + GIMPLE passes still see the functions that return aggregate types. */ +#pragma GCC optimize "-fno-inline" + +typedef struct {} empty; /* See 'gcc/doc/extend.texi', "Empty Structures". */ +typedef struct {char a;} schar; +typedef struct {short a;} sshort; +typedef struct {int a;} sint; +typedef struct {long long a;} slonglong; +typedef struct {int a, b[12];} sint_13; + +#pragma omp declare target + +#define M(T) ({T t; t.a = sizeof t; t;}) + +static __SIZE_TYPE__ empty_a; +#pragma acc declare create(empty_a) +#pragma acc routine +static empty rempty(void) +{ + return ({empty t; empty_a = sizeof t; t;}); +} + +#pragma acc routine +static schar rschar(void) +{ + return M(schar); +} + +#pragma acc routine +static sshort rsshort(void) +{ + return M(sshort); +} + +#pragma acc routine +static sint rsint(void) +{ + return M(sint); +} + +#pragma acc routine +static slonglong rslonglong(void) +{ + return M(slonglong); +} + +#pragma acc routine +static sint_13 rsint_13(void) +{ + return M(sint_13); +} + +#pragma acc routine +static void aempty(empty empty) +{ + (void) empty; + + __SIZE_TYPE__ empty_a_exp; +#ifndef __cplusplus + empty_a_exp = 0; +#else + empty_a_exp = sizeof (char); +#endif + if (empty_a != empty_a_exp) + __builtin_abort(); +} + +#pragma acc routine +static void aschar(schar schar) +{ + if (schar.a != sizeof (char)) + __builtin_abort(); +} + +#pragma acc routine +static void asshort(sshort sshort) +{ + if (sshort.a != sizeof (short)) + __builtin_abort(); +} + +#pragma acc routine +static void asint(sint sint) +{ + if (sint.a != sizeof (int)) + __builtin_abort(); +} + +#pragma acc routine +static void aslonglong(slonglong slonglong) +{ + if (slonglong.a != sizeof (long long)) + __builtin_abort(); +} + +#pragma acc routine +static void asint_13(sint_13 sint_13) +{ + if (sint_13.a != (sizeof (int) * 13)) + __builtin_abort(); +} + +#pragma omp end declare target + +int main() +{ +#pragma omp target +#pragma acc serial + /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */ + { + aempty(rempty()); + aschar(rschar()); + asshort(rsshort()); + asint(rsint()); + aslonglong(rslonglong()); + asint_13(rsint_13()); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c new file mode 100644 index 0000000..eda651d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c @@ -0,0 +1,96 @@ +/* { dg-prune-output "using .vector_length \\(32\\)" } */ + +/* PR libgomp/93226 */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <openacc.h> + +enum { N = 1024 }; + +static int D[N]; +#pragma acc declare device_resident(D) + +#pragma acc routine +intptr_t init_d() +{ + for (int i = 0; i < N; i++) + D[i] = 27*i; + return (intptr_t) &D[0]; +} + +int +main () +{ + int *a, *b, *e; + void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f; + intptr_t intptr; + bool fail = false; + + a = (int *) malloc (N*sizeof (int)); + b = (int *) malloc (N*sizeof (int)); + e = (int *) malloc (N*sizeof (int)); + d_c = acc_malloc (N*sizeof (int)); + d_f = acc_malloc (N*sizeof (int)); + + memset (e, 0xff, N*sizeof (int)); + d_e = acc_copyin (e, N*sizeof (int)); + + #pragma acc serial copyout(intptr) + intptr = init_d (); + d_d = (void*) intptr; + acc_memcpy_device (d_c, d_d, N*sizeof (int)); + + #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr) + { + int *cc = (int *) d_c; + int *dd = (int *) intptr; + for (int i = 0; i < N; i++) + if (dd[i] != 27*i || cc[i] != 27*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); + + for (int i = 0; i < N; i++) + a[i] = 11*i; + for (int i = 0; i < N; i++) + b[i] = 31*i; + + d_a = acc_copyin (a, N*sizeof (int)); + acc_copyin_async (b, N*sizeof (int), acc_async_noval); + + #pragma acc parallel deviceptr(d_c) async + { + int *cc = (int *) d_c; + #pragma acc loop + for (int i = 0; i < N; i++) + cc[i] = -17*i; + } + + acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval); + acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + d_b = acc_deviceptr (b); + acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + + #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail) + { + int *dd = (int *) d_d; + int *ee = (int *) d_e; + int *ff = (int *) d_f; + for (int i = 0; i < N; i++) + if (dd[i] != 11*i + || ee[i] != 31*i + || ff[i] != -17*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 new file mode 100644 index 0000000..15393b4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 @@ -0,0 +1,25 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +use openacc +implicit none (type, external) +integer,pointer :: a, b(:) +integer,allocatable :: c, d(:) + +call acc_attach(a) ! ICE +call acc_attach_async(b, 4) +call acc_attach(c) + +call acc_detach(a) +call acc_detach_async(b, 4) +call acc_detach_finalize(c) +call acc_detach_finalize_async(d,7) +end + +! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 new file mode 100644 index 0000000..b2204ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 @@ -0,0 +1,62 @@ +! { dg-do run } + +use openacc +implicit none (type, external) +integer, target :: tgt_a, tgt_b(5) + +integer, pointer :: p1, p2(:) + +type t + integer,pointer :: a => null () + integer,pointer :: b(:) => null () + integer,allocatable :: c, d(:) +end type t + +type(t), target :: var + +tgt_a = 51 +tgt_b = [11,22,33,44,55] + +var%b => tgt_b +!$acc enter data copyin(var, tgt_a, tgt_b) +var%a => tgt_a + +call acc_attach(var%a) +call acc_attach(var%b) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%a /= 51) stop 1 + if (any (var%b /= [11,22,33,44,55])) stop 2 +!$acc end serial + +call acc_detach(var%a) +call acc_detach(var%b) + +!$acc exit data delete(var, tgt_a, tgt_b) + +var%c = 9 +var%d = [1,2,3] + +p1 => var%c +p2 => var%d + +!$acc enter data copyin(p1, p2) +!$acc enter data copyin(var) +call acc_attach(var%c) +call acc_attach(var%d) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%c /= 9) stop 3 + if (any (var%d /= [1,2,3])) stop 4 +!$acc end serial + +call acc_detach(var%c) +call acc_detach(var%d) + +!$acc exit data delete(var, p1, p2) + +deallocate(var%d) + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 new file mode 100644 index 0000000..8f3a8f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 @@ -0,0 +1,113 @@ +! { dg-prune-output "using .vector_length \\(32\\)" } + +! PR libgomp/93226 */ + +module m + use iso_c_binding + use openacc + implicit none (external, type) + + integer, parameter :: N = 1024 + + integer :: D(N) + !$acc declare device_resident(D) + +contains + + integer(c_intptr_t) function init_d() + !$acc routine + integer :: i + do i = 1, N + D(i) = 27*i + end do + init_d = loc(D) + end +end module + +program main + use m + implicit none (external, type) + + integer, allocatable, target :: a(:), b(:), e(:) + type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f + integer(c_intptr_t) intptr + integer :: i + logical fail + + fail = .false. + + allocate(a(N), b(N), e(N)) + d_c = acc_malloc (N*c_sizeof (i)) + d_f = acc_malloc (N*c_sizeof (i)) + + e = huge(e) + call acc_copyin (e, N*c_sizeof (i)); + d_e = acc_deviceptr (e); + + !$acc serial copyout(intptr) + intptr = init_d () + !$acc end serial + d_d = transfer(intptr, d_d) + call acc_memcpy_device (d_c, d_d, N*c_sizeof (i)) + + !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr) + block + integer, pointer :: cc(:), dd(:) + call c_f_pointer (d_c, cc, [N]) + call c_f_pointer (d_d, dd, [N]) + a = cc + do i = 1, N + if (dd(i) /= 27*i .or. cc(i) /= 27*i) then + fail = .true. + stop 1 + end if + end do + end block + !$acc end serial + if (fail) error stop 1 + + do i = 1, N + a(i) = 11*i + b(i) = 31*i + end do + + call acc_copyin (a, N*c_sizeof (i)) + d_a = acc_deviceptr (a) + call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval) + + !$acc parallel deviceptr(d_c) private(i) async + block + integer, pointer :: cc(:) + call c_f_pointer (d_c, cc, [N]) + !$acc loop + do i = 1, N + cc(i) = -17*i + end do + end block + !$acc end parallel + + call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval) + call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + d_b = acc_deviceptr (b) + call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + + !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail) + block + integer, pointer :: dd(:), ee(:), ff(:) + call c_f_pointer (d_d, dd, [N]) + call c_f_pointer (d_e, ee, [N]) + call c_f_pointer (d_f, ff, [N]) + do i = 1, N + if (dd(i) /= 11*i & + .or. ee(i) /= 31*i & + .or. ff(i) /= -17*i) then + fail = .true. + stop 2 + end if + end do + end block + !$acc end serial + if (fail) error stop 2 +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 index deb2c28..f6bd27a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 @@ -19,11 +19,10 @@ program main end do !$acc end parallel end do - !$acc end data call acc_wait_all_async (nprocs + 1) - call acc_wait (nprocs + 1) + !$acc end data if (acc_async_test (1) .neqv. .TRUE.) stop 1 if (acc_async_test (2) .neqv. .TRUE.) stop 2 |