diff options
Diffstat (limited to 'libgomp')
100 files changed, 7257 insertions, 25 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 4aab62b..2c044a7 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,188 @@ +2025-06-02 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/120444 + * libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare. + * libgomp.h (struct gomp_device_descr): Add memset_func. + * libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}. + * libgomp.texi (Device Memory Routines): Document them. + * omp.h.in (omp_target_memset, omp_target_memset_async): Declare. + * omp_lib.f90.in (omp_target_memset, omp_target_memset_async): + Add interfaces. + * omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise. + * plugin/cuda-lib.def: Add cuMemsetD8. + * plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add + hsa_amd_memory_fill_fn. + (init_hsa_runtime_functions): DLSYM_OPT_FN load it. + (GOMP_OFFLOAD_memset): New. + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New. + * target.c (omp_target_memset_int, omp_target_memset, + omp_target_memset_async_helper, omp_target_memset_async): New. + (gomp_load_plugin_for_device): Add DLSYM (memset). + * testsuite/libgomp.c-c++-common/omp_target_memset.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test. + * testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test. + * testsuite/libgomp.fortran/omp_target_memset.f90: New test. + * testsuite/libgomp.fortran/omp_target_memset-2.f90: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__valarray-1.C: New. + * testsuite/libgomp.c++/target-std__valarray-1.output: Likewise. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__array-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__list-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__map-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__set-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__span-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Adjust. + * testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: New. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Adjust. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-std__array-concurrent.C: New. + * testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_map-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__unordered_set-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise. + * testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise. + +2025-05-30 Kwok Cheung Yeung <kcyeung@baylibre.com> + + * testsuite/libgomp.c++/target-std__cmath.C: New. + * testsuite/libgomp.c++/target-std__complex.C: Likewise. + * testsuite/libgomp.c++/target-std__numbers.C: Likewise. + +2025-05-30 Waffl3x <waffl3x@baylibre.com> + Thomas Schwinge <tschwinge@baylibre.com> + + * testsuite/libgomp.c++/target-flex-10.C: New test. + * testsuite/libgomp.c++/target-flex-100.C: New test. + * testsuite/libgomp.c++/target-flex-101.C: New test. + * testsuite/libgomp.c++/target-flex-11.C: New test. + * testsuite/libgomp.c++/target-flex-12.C: New test. + * testsuite/libgomp.c++/target-flex-2000.C: New test. + * testsuite/libgomp.c++/target-flex-2001.C: New test. + * testsuite/libgomp.c++/target-flex-2002.C: New test. + * testsuite/libgomp.c++/target-flex-2003.C: New test. + * testsuite/libgomp.c++/target-flex-30.C: New test. + * testsuite/libgomp.c++/target-flex-300.C: New test. + * testsuite/libgomp.c++/target-flex-31.C: New test. + * testsuite/libgomp.c++/target-flex-32.C: New test. + * testsuite/libgomp.c++/target-flex-33.C: New test. + * testsuite/libgomp.c++/target-flex-41.C: New test. + * testsuite/libgomp.c++/target-flex-60.C: New test. + * testsuite/libgomp.c++/target-flex-61.C: New test. + * testsuite/libgomp.c++/target-flex-62.C: New test. + * testsuite/libgomp.c++/target-flex-70.C: New test. + * testsuite/libgomp.c++/target-flex-80.C: New test. + * testsuite/libgomp.c++/target-flex-81.C: New test. + * testsuite/libgomp.c++/target-flex-90.C: New test. + * testsuite/libgomp.c++/target-flex-common.h: New test. + +2025-05-30 Thomas Schwinge <tschwinge@baylibre.com> + Richard Biener <rguenther@suse.de> + + PR middle-end/119835 + * testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: + '#pragma GCC optimize "-fno-inline"'. + * testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New. + * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust. + +2025-05-30 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: Enable for C. + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-30 Julian Brown <julian@codesourcery.com> + Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.c++/declare-mapper-1.C: New test. + * testsuite/libgomp.c++/declare-mapper-2.C: New test. + * testsuite/libgomp.c++/declare-mapper-3.C: New test. + * testsuite/libgomp.c++/declare-mapper-4.C: New test. + * testsuite/libgomp.c++/declare-mapper-5.C: New test. + * testsuite/libgomp.c++/declare-mapper-6.C: New test. + * testsuite/libgomp.c++/declare-mapper-7.C: New test. + * testsuite/libgomp.c++/declare-mapper-8.C: New test. + * testsuite/libgomp.c-c++-common/declare-mapper-9.c: New test (only + enabled for C++ for now). + * testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise. + * testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise. + +2025-05-29 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/93226 + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New + prototype. + * libgomp.h (struct acc_dispatch_t): Add dev2dev_func. + (gomp_copy_dev2dev): New prototype. + * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}. + * libgomp.texi (acc_memcpy_device): New. + * oacc-mem.c (memcpy_tofrom_device): Change to take from/to + device boolean; use memcpy not memmove; add early return if + size == 0 or same device + same ptr. + (acc_memcpy_to_device, acc_memcpy_to_device_async, + acc_memcpy_from_device, acc_memcpy_from_device_async): Update. + (acc_memcpy_device, acc_memcpy_device_async): New. + * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async): + Add interface. + * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async): + Likewise. + * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add + prototype. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): + Update comment. + (GOMP_OFFLOAD_openacc_async_dev2host): Update call. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New. + (GOMP_OFFLOAD_dev2dev): Call it. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * target.c (gomp_copy_dev2dev): New. + (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev. + * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test. + * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test. + 2025-05-28 Tobias Burnus <tburnus@baylibre.com> PR middle-end/118694 diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 924fc1f..191106b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -177,6 +177,7 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *, size_t, size_t, size_t, size_t, size_t, const void *, size_t, size_t, size_t, size_t, size_t); +extern bool GOMP_OFFLOAD_memset (int, void *, int, size_t); extern bool GOMP_OFFLOAD_can_run (void *); extern void GOMP_OFFLOAD_run (int, void *, void *, void **); extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *); @@ -200,6 +201,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, struct goacc_asyncqueue *); +extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t, + struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 6030f9d..a433983 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1360,6 +1360,7 @@ typedef struct acc_dispatch_t __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func; __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func; } async; __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func; @@ -1420,9 +1421,10 @@ struct gomp_device_descr __typeof (GOMP_OFFLOAD_free) *free_func; __typeof (GOMP_OFFLOAD_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func; __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func; - __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func; + __typeof (GOMP_OFFLOAD_memset) *memset_func; __typeof (GOMP_OFFLOAD_can_run) *can_run_func; __typeof (GOMP_OFFLOAD_run) *run_func; __typeof (GOMP_OFFLOAD_async_run) *async_run_func; @@ -1467,6 +1469,9 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *, extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); +extern void gomp_copy_dev2dev (struct gomp_device_descr *, + struct goacc_asyncqueue *, void *, const void *, + size_t); extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); extern bool gomp_attach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree, diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index eae2f53..f6aee7c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -453,6 +453,12 @@ GOMP_6.0 { omp_get_uid_from_device_8_; } GOMP_5.1.3; +GOMP_6.0.1 { + global: + omp_target_memset; + omp_target_memset_async; +} GOMP_6.0; + OACC_2.0 { global: acc_get_num_devices; @@ -609,6 +615,12 @@ OACC_2.6 { acc_get_property_string_h_; } OACC_2.5.1; +OACC_2.6.1 { + global: + acc_memcpy_device; + acc_memcpy_device_async; +} OACC_2.6; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 6909c2b..8e487bc 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -603,7 +603,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @code{omp_get_device_teams_thread_limit}, and @code{omp_set_device_teams_thread_limit} routines @tab N @tab @item @code{omp_target_memset} and @code{omp_target_memset_async} routines - @tab N @tab + @tab Y @tab @item Fortran version of the interop runtime routines @tab Y @tab @item Routines for obtaining memory spaces/allocators for shared/device memory @tab N @tab @@ -1984,8 +1984,8 @@ pointers on devices. They have C linkage and do not throw exceptions. * omp_target_memcpy_async:: Copy data between devices asynchronously * omp_target_memcpy_rect:: Copy a subvolume of data between devices * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously -@c * omp_target_memset:: <fixme>/TR12 -@c * omp_target_memset_async:: <fixme>/TR12 +* omp_target_memset:: Set bytes in device memory +* omp_target_memset_async:: Set bytes in device memory asynchronously * omp_target_associate_ptr:: Associate a device pointer with a host pointer * omp_target_disassociate_ptr:: Remove device--host pointer association * omp_get_mapped_ptr:: Return device pointer to a host pointer @@ -2398,6 +2398,98 @@ the initial device. @end table +@node omp_target_memset +@subsection @code{omp_target_memset} -- Set bytes in device memory +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset( &} +@item @tab @code{ ptr, val, count, device_num) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num} +@end multitable + +@item @emph{See also}: +@ref{omp_target_memset_async} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.1 +@end table + + + +@node omp_target_memset_async +@subsection @code{omp_target_memset} -- Set bytes in device memory asynchronously +@table @asis +@item @emph{Description}: +This routine fills memory on the device identified by device number +@var{device_num}. Starting from the device address @var{ptr}, the first +@var{count} bytes are set to the value @var{val}, converted to +@code{unsigned char}. If @var{count} is zero, the routine has no effect; +if @var{ptr} is @code{NULL}, the behavior is unspecified. Task dependence +is expressed by passing an array of depend objects to @var{depobj_list}, where +the number of array elements is passed as @var{depobj_count}; if the count is +zero, the @var{depobj_list} argument is ignored. In C++ and Fortran, the +@var{depobj_list} argument can also be omitted in that case. The function +returns @var{ptr}. + +The @var{device_num} must be a conforming device number and @var{ptr} must be +a valid device pointer for that device. Running this routine in a +@code{target} region except on the initial device is not supported. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void *omp_target_memcpy_async(void *ptr,} +@item @tab @code{ int val,} +@item @tab @code{ size_t count,} +@item @tab @code{ int device_num,} +@item @tab @code{ int depobj_count,} +@item @tab @code{ omp_depend_t *depobj_list)} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset_async( &} +@item @tab @code{ ptr, val, count, device_num, &} +@item @tab @code{ depobj_count, depobj_list) bind(C)} +@item @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int} +@item @tab @code{type(c_ptr), value :: ptr} +@item @tab @code{integer(c_size_t), value :: count} +@item @tab @code{integer(c_int), value :: val, device_num, depobj_count} +@item @tab @code{integer(omp_depend_kind), optional :: depobj_list(*)} +@end multitable + + +@item @emph{See also}: +@ref{omp_target_memset} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.2 +@end table + + @node omp_target_associate_ptr @subsection @code{omp_target_associate_ptr} -- Associate a device pointer with a host pointer @@ -4763,6 +4855,7 @@ acceleration device. present on device. * acc_memcpy_to_device:: Copy host memory to device memory. * acc_memcpy_from_device:: Copy device memory to host memory. +* acc_memcpy_device:: Copy memory within a device. * acc_attach:: Let device pointer point to device-pointer target. * acc_detach:: Let device pointer point to host-pointer target. @@ -5837,6 +5930,44 @@ This function copies device memory specified by device address of +@node acc_memcpy_device +@section @code{acc_memcpy_device} -- Copy memory within a device. +@table @asis +@item @emph{Description} +This function copies device memory from one memory location to another +on the current device. It copies @var{bytes} bytes of data from the device +address, specified by @var{data_dev_src}, to the device address +@var{data_dev_dest}. The @code{_async} version performs the transfer +asnychronously using the queue associated with @var{async_arg}. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes);} +@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes)} +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes, async_arg)} +@item @tab @code{type(c_ptr), value :: data_dev_dest} +@item @tab @code{type(c_ptr), value :: data_dev_src} +@item @tab @code{integer(c_size_t), value :: bytes} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.28. +@end table + + + @node acc_attach @section @code{acc_attach} -- Let device pointer point to device-pointer target. @table @asis diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 0482ed3..5b8ba7e 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -171,21 +171,22 @@ acc_free (void *d) } static void -memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, - const char *libfnname) +memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src, + size_t s, int async, const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ struct goacc_thread *thr = goacc_thread (); assert (thr && thr->dev); + if (s == 0) + return; if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - if (from) - memmove (h, d, s); - else - memmove (d, h, s); + if (src == dst) + return; + memcpy (dst, src, s); return; } @@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, } goacc_aq aq = get_goacc_asyncqueue (async); - if (from) - gomp_copy_dev2host (thr->dev, aq, h, d, s); + if (dev_to && dev_from) + { + if (dst != src) + gomp_copy_dev2dev (thr->dev, aq, dst, src, s); + } + else if (dev_from) + gomp_copy_dev2host (thr->dev, aq, dst, src, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, void acc_memcpy_to_device (void *d, void *h, size_t s) { - memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) { - memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__); } void acc_memcpy_from_device (void *h, void *d, size_t s) { - memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) { - memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__); +} + +void +acc_memcpy_device (void *dst, void *src, size_t s) +{ + memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__); +} + +void +acc_memcpy_device_async (void *dst, void *src, size_t s, int async) +{ + memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 8d17db1..4f2bc46 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -347,6 +347,10 @@ extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__, const __SIZE_TYPE__ *, int, int, int, omp_depend_t * __GOMP_DEFAULT_NULL) __GOMP_NOTHROW; +extern void *omp_target_memset (void *, int, __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern void *omp_target_memset_async (void *, int, __SIZE_TYPE__, int, + int, omp_depend_t * __GOMP_DEFAULT_NULL) + __GOMP_NOTHROW; extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__, __SIZE_TYPE__, int) __GOMP_NOTHROW; extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW; diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index cb6b95f..ce866c0 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -904,6 +904,29 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) :: omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + depobj_count, depobj_list) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & device_offset, device_num) bind(c) use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index f7af5ff..9047095 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -505,6 +505,31 @@ end interface interface + function omp_target_memset (ptr, val, count, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + type(c_ptr) omp_target_memset + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num + end function omp_target_memset + end interface + + interface + function omp_target_memset_async (ptr, val, count, device_num, & + & depobj_count, depobj_list) & + & bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + import :: omp_depend_kind + type(c_ptr) :: omp_target_memset_async + type(c_ptr), value :: ptr + integer(c_size_t), value :: count + integer(c_int), value :: val, device_num, depobj_count + integer(omp_depend_kind), optional :: depobj_list(*) + end function omp_target_memset_async + end interface + + + interface function omp_target_associate_ptr (host_ptr, device_ptr, size, & & device_offset, device_num) & & bind(c) diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index 8ef107e..9d51f01 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -797,6 +797,7 @@ module openacc public :: acc_copyout_finalize, acc_delete_finalize public :: acc_memcpy_to_device, acc_memcpy_to_device_async public :: acc_memcpy_from_device, acc_memcpy_from_device_async + public :: acc_memcpy_device, acc_memcpy_device_async integer, parameter :: openacc_version = 201711 @@ -1046,6 +1047,27 @@ module openacc end subroutine end interface + interface + subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, & + bytes, async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + interface acc_copyin_async procedure :: acc_copyin_async_32_h procedure :: acc_copyin_async_64_h diff --git a/libgomp/openacc.h b/libgomp/openacc.h index a520bbe..3085b00 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_attach (void **) __GOACC_NOTHROW; void acc_attach_async (void **, int) __GOACC_NOTHROW; void acc_detach (void **) __GOACC_NOTHROW; @@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_detach_finalize (void **) __GOACC_NOTHROW; void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; -/* Async functions, specified in OpenACC 2.5. */ +/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; void acc_create_async (void *, size_t, int) __GOACC_NOTHROW; void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW; @@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW; void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index b0d287e..9333c48 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -528,6 +528,30 @@ end subroutine end interface + interface + subroutine acc_memcpy_device(data_dev_dest, data_dev_src, & + & bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async(data_dev_dest, & + & data_dev_src, bytes, & + & async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface acc_copyin_async subroutine acc_copyin_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index eb562ac..7f4ddcc 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync) CUDA_ONE_CALL (cuMemcpy2D) CUDA_ONE_CALL (cuMemcpy2DUnaligned) CUDA_ONE_CALL (cuMemcpy3D) +CUDA_ONE_CALL (cuMemsetD8) CUDA_ONE_CALL (cuMemFree) CUDA_ONE_CALL (cuMemFreeHost) CUDA_ONE_CALL (cuMemGetAddressRange) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 4b42a59..498b549 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -208,6 +208,8 @@ struct hsa_runtime_fn_info hsa_status_t (*hsa_code_object_deserialize_fn) (void *serialized_code_object, size_t serialized_code_object_size, const char *options, hsa_code_object_t *code_object); + hsa_status_t (*hsa_amd_memory_fill_fn)(void *ptr, uint32_t value, + size_t count); hsa_status_t (*hsa_amd_memory_lock_fn) (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent, void **agent_ptr); @@ -1456,6 +1458,7 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_signal_load_acquire) DLSYM_FN (hsa_queue_destroy) DLSYM_FN (hsa_code_object_deserialize) + DLSYM_OPT_FN (hsa_amd_memory_fill) DLSYM_OPT_FN (hsa_amd_memory_lock) DLSYM_OPT_FN (hsa_amd_memory_unlock) DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect) @@ -4435,6 +4438,83 @@ init_hip_runtime_functions (void) return true; } +bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + + /* A memset feature is only provided via hsa_amd_memory_fill; while it + is fast, it is an HSA extension and it has two requirements: The memory + must be aligned to multiples of 4 bytes - and, by construction, only + multiples of 4 bytes can be filled (uint32_t value argument). + + This means: Either not using that function or up to three function calls: + - copy 1 to 3 bytes to get alignment (hsa_memory_copy), if unaligned + - call hsa_amd_memory_fill + - copy remaining 1 to 3 bytes (hsa_memory_copy), if after alignment + count is not a multiple of 4 bytes. + + Having more than one function call is only profitable if there is + enough data to process; see below for the used heuristic values. */ + + uint8_t v8 = (uint8_t) val; + size_t before = (4 - (uintptr_t) ptr % 4) % 4; /* 0 to 3 bytes. */ + size_t tail = (count - before) % 4; /* 0 to 3 bytes. */ + + /* Heuristic */ + enum { + /* Prefer alloca to malloc up to ... */ + alloca_size = 256, /* bytes */ + /* Call hsa_amd_memory_fill also when two copy calls are required. */ + always_use_fill = 256*1024, /* bytes */ + /* Call hsa_amd_memory_fill also when on copy call is required. */ + use_fill_one_copy = (128+64)*1024 /* bytes */ + }; + + /* Do not call hsa_amd_memory_fill when any of the following conditions + is true. Note that it is always preferred if available and + before == tail == 0. */ + if (__builtin_expect (!hsa_fns.hsa_amd_memory_fill_fn, 0) + || (before && tail && count < always_use_fill) + || ((before || tail) && count < use_fill_one_copy)) + before = count; + + /* Copy call for alignment - or all data, if condition above is true. */ + if (before) + { + void *data; + if (before > alloca_size) + data = malloc (before * sizeof (uint8_t)); + else + data = alloca (before * sizeof (uint8_t)); + memset (data, val, before); + status = hsa_fns.hsa_memory_copy_fn (ptr, data, before); + if (before > alloca_size) + free (data); + if (data == 0 || status != HSA_STATUS_SUCCESS) + goto fail; + count -= before; + } + + if (count == 0) + return true; + + ptr += before; + + uint32_t values = v8 | (v8 << 8) | (v8 << 16) | (v8 << 24); + status = hsa_fns.hsa_amd_memory_fill_fn (ptr, values, count / 4); + if (tail && status == HSA_STATUS_SUCCESS) + { + ptr += count - tail; + status = hsa_fns.hsa_memory_copy_fn (ptr, &values, tail); + } + if (status == HSA_STATUS_SUCCESS) + return true; + +fail: + GOMP_PLUGIN_error ("memory set failed"); + return false; +} void GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, @@ -5079,7 +5159,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, queue_push_callback (aq, fn, data); } -/* Queue up an asynchronous data copy from host to DEVICE. */ +/* Queue up an asynchronous data copy from host to DEVICE. + (Also handles dev2host and dev2dev.) */ bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, @@ -5097,10 +5178,16 @@ bool GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { - struct agent_info *agent = get_agent_info (device); - assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n); - return true; + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); +} + +/* Queue up an asynchronous data copy from DEVICE to DEVICE. */ + +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); } union goacc_property_value diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index a5cf859..0ba445e 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2019,6 +2019,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2077,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2267,6 +2298,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size, } bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + if (!nvptx_attach_host_thread_to_device (ord)) + return false; + CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count); + return true; +} + +bool GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { @@ -2288,6 +2328,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { diff --git a/libgomp/target.c b/libgomp/target.c index 9674ff4..a2a4a72 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } +attribute_hidden void +gomp_copy_dev2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, + void *dst, const void *src, size_t sz) +{ + if (__builtin_expect (aq != NULL, 0)) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func, + "dev", dst, "dev", src, NULL, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst, + "dev", src, sz); +} + static void gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) { @@ -4990,6 +5003,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, return 0; } +static void +omp_target_memset_int (void *ptr, int val, size_t count, + struct gomp_device_descr *devicep) +{ + if (__builtin_expect (count == 0, 0)) + return; + if (devicep == NULL) + { + memset (ptr, val, count); + return; + } + + gomp_mutex_lock (&devicep->lock); + int ret = devicep->memset_func (devicep->target_id, ptr, val, count); + gomp_mutex_unlock (&devicep->lock); + if (!ret) + gomp_fatal ("omp_target_memset failed"); +} + +void* +omp_target_memset (void *ptr, int val, size_t count, int device_num) +{ + struct gomp_device_descr *devicep; + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_int (ptr, val, count, devicep); + return ptr; +} + +typedef struct +{ + void *ptr; + size_t count; + struct gomp_device_descr *devicep; + int val; +} omp_target_memset_data; + +static void +omp_target_memset_async_helper (void *args) +{ + omp_target_memset_data *a = args; + omp_target_memset_int (a->ptr, a->val, a->count, a->devicep); +} + +void* +omp_target_memset_async (void *ptr, int val, size_t count, int device_num, + int depobj_count, omp_depend_t *depobj_list) +{ + void *depend[depobj_count + 5]; + struct gomp_device_descr *devicep; + unsigned flags = 0; + int i; + + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count, + .devicep = devicep}; + if (depobj_count > 0 && depobj_list != NULL) + { + flags |= GOMP_TASK_FLAG_DEPEND; + depend[0] = 0; + depend[1] = (void *) (uintptr_t) depobj_count; + depend[2] = depend[3] = depend[4] = 0; + for (i = 0; i < depobj_count; ++i) + depend[i + 5] = &depobj_list[i]; + } + + GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s), + __alignof__ (s), true, flags, depend, 0, NULL); + return ptr; +} + int omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, size_t size, size_t device_offset, int device_num) @@ -5555,6 +5650,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM_OPT (async_run, async_run); DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); + DLSYM (memset); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { @@ -5573,6 +5669,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev) || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C new file mode 100644 index 0000000..aba4f42 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C @@ -0,0 +1,87 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +#define N 64 + +struct points +{ + double *x; + double *y; + double *z; + size_t len; +}; + +#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \ + map(p.x[0:p.len]) \ + map(p.y[0:p.len]) \ + map(p.z[0:p.len]) + +struct shape +{ + points tmp; + points *pts; + int metadata[128]; +}; + +#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp) + +void +alloc_points (points *pts, size_t sz) +{ + pts->x = new double[sz]; + pts->y = new double[sz]; + pts->z = new double[sz]; + pts->len = sz; + for (int i = 0; i < sz; i++) + pts->x[i] = pts->y[i] = pts->z[i] = 0; +} + +int main (int argc, char *argv[]) +{ + shape myshape; + points mypts; + + myshape.pts = &mypts; + + alloc_points (&myshape.tmp, N); + myshape.pts = new points; + alloc_points (myshape.pts, N); + + #pragma omp target map(myshape) + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 1); + assert (myshape.pts->y[i] == 1); + assert (myshape.pts->z[i] == 1); + } + + #pragma omp target + { + for (int i = 0; i < N; i++) + { + myshape.pts->x[i]++; + myshape.pts->y[i]++; + myshape.pts->z[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (myshape.pts->x[i] == 2); + assert (myshape.pts->y[i] == 2); + assert (myshape.pts->z[i] == 2); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C new file mode 100644 index 0000000..d848fdb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C @@ -0,0 +1,55 @@ +// { dg-do run } + +#include <cassert> + +#define N 256 + +struct doublebuf +{ + int buf_a[N][N]; + int buf_b[N][N]; +}; + +#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \ + map(b.buf_b[0:N/2][0:N]) + +#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \ + map(b.buf_b[N/2:N/2][0:N]) + +int main (int argc, char *argv[]) +{ + doublebuf db; + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + db.buf_a[i][j] = db.buf_b[i][j] = 0; + + #pragma omp target map(mapper(lo), tofrom:db) + { + for (int i = 0; i < N / 2; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + #pragma omp target map(mapper(hi), tofrom:db) + { + for (int i = N / 2; i < N; i++) + for (int j = 0; j < N; j++) + { + db.buf_a[i][j]++; + db.buf_b[i][j]++; + } + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + assert (db.buf_a[i][j] == 1); + assert (db.buf_b[i][j] == 1); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C new file mode 100644 index 0000000..ea9b7de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C new file mode 100644 index 0000000..f194e63 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C @@ -0,0 +1,63 @@ +// { dg-do run } + +#include <cstdlib> +#include <cassert> + +struct S { + int *myarr; +}; + +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20]) + +namespace A { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100]) +} + +namespace B { +#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100]) +} + +namespace A +{ + void incr_a (S &my_s) + { +#pragma omp target + { + for (int i = 0; i < 100; i++) + my_s.myarr[i]++; + } + } +} + +namespace B +{ + void incr_b (S &my_s) + { +#pragma omp target + { + for (int i = 100; i < 200; i++) + my_s.myarr[i]++; + } + } +} + +int main (int argc, char *argv[]) +{ + S my_s; + + my_s.myarr = (int *) calloc (200, sizeof (int)); + +#pragma omp target + { + for (int i = 0; i < 20; i++) + my_s.myarr[i]++; + } + + A::incr_a (my_s); + B::incr_b (my_s); + + for (int i = 0; i < 200; i++) + assert (my_s.myarr[i] == (i < 20) ? 2 : 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C new file mode 100644 index 0000000..0030de8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C @@ -0,0 +1,52 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +class C +{ + S smemb; +#pragma omp declare mapper (custom:S s) map(to:s.myarr) \ + map(tofrom:s.myarr[0:s.len]) + +public: + C(int l) + { + smemb.myarr = new int[l]; + smemb.len = l; + for (int i = 0; i < l; i++) + smemb.myarr[i] = 0; + } + void bump(); + void check(); +}; + +void +C::bump () +{ +#pragma omp target map(mapper(custom), tofrom: smemb) + { + for (int i = 0; i < smemb.len; i++) + smemb.myarr[i]++; + } +} + +void +C::check () +{ + for (int i = 0; i < smemb.len; i++) + assert (smemb.myarr[i] == 1); +} + +int main (int argc, char *argv[]) +{ + C test (100); + test.bump (); + test.check (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C new file mode 100644 index 0000000..14ed10d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C @@ -0,0 +1,37 @@ +// { dg-do run } + +#include <cassert> + +template <typename T> +void adjust (T param) +{ +#pragma omp declare mapper (T x) map(to:x.len, x.base) \ + map(tofrom:x.base[0:x.len]) + +#pragma omp target + for (int i = 0; i < param.len; i++) + param.base[i]++; +} + +struct S { + int len; + int *base; +}; + +int main (int argc, char *argv[]) +{ + S a; + + a.len = 100; + a.base = new int[a.len]; + + for (int i = 0; i < a.len; i++) + a.base[i] = 0; + + adjust (a); + + for (int i = 0; i < a.len; i++) + assert (a.base[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C new file mode 100644 index 0000000..ba4792a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C @@ -0,0 +1,59 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; +}; + +struct T +{ + S *s; +}; + +#pragma omp declare mapper (s100: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) +// Define this because ... +#pragma omp declare mapper (default: S x) map(to: x.myarr) \ + map(tofrom: x.myarr[0:100]) + + +void +bump (T t) +{ + /* Here we have an implicit/default mapper invoking a named mapper. We + need to make sure that can be located properly at gimplification + time. */ + +// ... the following is invalid in OpenMP - albeit supported by GCC +// (after disabling: error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ ) + +// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0]) + +// ... thus, we now use ... +#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0]) + +#pragma omp target + for (int i = 0; i < 100; i++) + t.s->myarr[i]++; +} + +int main (int argc, char *argv[]) +{ + S my_s; + T my_t; + + my_s.myarr = new int[100]; + my_t.s = &my_s; + + for (int i = 0; i < 100; i++) + my_s.myarr[i] = 0; + + bump (my_t); + + for (int i = 0; i < 100; i++) + assert (my_s.myarr[i] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C new file mode 100644 index 0000000..3818e52 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C @@ -0,0 +1,61 @@ +// { dg-do run } + +#include <cassert> + +struct S +{ + int *myarr; + int len; +}; + +template<typename T> +class C +{ + T memb; +#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \ + map(tofrom:t.myarr[0:t.len]) + +public: + C(int sz); + ~C(); + void bump(); + void check(); +}; + +template<typename T> +C<T>::C(int sz) +{ + memb.myarr = new int[sz]; + for (int i = 0; i < sz; i++) + memb.myarr[i] = 0; + memb.len = sz; +} + +template<typename T> +C<T>::~C() +{ + delete[] memb.myarr; +} + +template<typename T> +void C<T>::bump() +{ +#pragma omp target map(memb) + for (int i = 0; i < memb.len; i++) + memb.myarr[i]++; +} + +template<typename T> +void C<T>::check() +{ + for (int i = 0; i < memb.len; i++) + assert (memb.myarr[i] == 1); +} + +int main(int argc, char *argv[]) +{ + C<S> c_int(100); + c_int.bump(); + c_int.check(); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C new file mode 100644 index 0000000..8fa9af7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C @@ -0,0 +1,215 @@ +/* Basic container usage. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::vector<int> vector; + ok = vector.empty(); + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::deque<int> deque; + ok = deque.empty(); + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::list<int> list; + ok = list.empty(); + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::map<int, int> map; + ok = map.empty(); + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::set<int> set; + ok = set.empty(); + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multimap<int, int> multimap; + ok = multimap.empty(); + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::multiset<int, int> multiset; + ok = multiset.empty(); + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + static constexpr std::size_t array_size = 42; + bool ok; + #pragma omp target map(from: ok) + { + std::array<int, array_size> array{}; + ok = array[0] == 0 + && array[array_size - 1] == 0; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::forward_list<int> forward_list; + ok = forward_list.empty(); + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_map<int, int> unordered_map; + ok = unordered_map.empty(); + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_set<int> unordered_set; + ok = unordered_set.empty(); + } + return ok; +} + +bool unordered_multimap_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multimap<int, int> unordered_multimap; + ok = unordered_multimap.empty(); + } + return ok; +} + +bool unordered_multiset_test() +{ + + bool ok; + #pragma omp target map(from: ok) + { + std::unordered_multiset<int> unordered_multiset; + ok = unordered_multiset.empty(); + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool vec_res = vector_test(); + __builtin_printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + const bool deque_res = deque_test(); + __builtin_printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + const bool list_res = list_test(); + __builtin_printf("list : %s\n", list_res ? "PASS" : "FAIL"); + const bool map_res = map_test(); + __builtin_printf("map : %s\n", map_res ? "PASS" : "FAIL"); + const bool set_res = set_test(); + __builtin_printf("set : %s\n", set_res ? "PASS" : "FAIL"); + const bool multimap_res = multimap_test(); + __builtin_printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + const bool multiset_res = multiset_test(); + __builtin_printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + const bool array_res = array_test(); + __builtin_printf("array : %s\n", array_res ? "PASS" : "FAIL"); + const bool forward_list_res = forward_list_test(); + __builtin_printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + const bool unordered_map_res = unordered_map_test(); + __builtin_printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + const bool unordered_set_res = unordered_set_test(); + __builtin_printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + const bool unordered_multimap_res = unordered_multimap_test(); + __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + const bool unordered_multiset_res = unordered_multiset_test(); + __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C new file mode 100644 index 0000000..7ab047f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C @@ -0,0 +1,210 @@ +/* Container adaptors in target region. + Does not test comparison operators other than equality to allow these tests + to be generalized to arbitrary input data. */ + +#include <algorithm> +#include <cstdio> +#include <deque> +#include <queue> +#include <stack> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test_stack(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T middle_element = arr[half_size - 1]; + const T last_element = arr[Size - 1]; + typedef std::stack<T, std::vector<T> > stack_type; + stack_type stack; + VERIFY (stack.empty()); + VERIFY (stack.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + stack.push(arr[idx]); + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == half_size); + VERIFY (static_cast<const stack_type&>(stack).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + stack.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + stack.push(arr[idx]); + #endif + VERIFY (stack.top() == arr[idx]); + } + VERIFY (stack.size() == Size); + VERIFY (static_cast<const stack_type&>(stack).size() == Size); + + const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size)); + VERIFY (stack == stack_orig); + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_top = static_cast<const stack_type&>(stack).top(); + VERIFY (const_top == last_element); + T& mutable_top = stack.top(); + mutable_top = first_element; + VERIFY (const_top == first_element); + } + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || stack != stack_orig); + for (std::size_t count = Size - half_size; count != 0; --count) + stack.pop(); + VERIFY (stack.top() == middle_element); + const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size)); + VERIFY (stack == stack_half_orig); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_queue(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + const std::size_t half_size = Size / 2; + const T first_element = arr[0]; + const T last_element = arr[Size - 1]; + typedef std::queue<T, std::deque<T> > queue_type; + queue_type queue; + VERIFY (queue.empty()); + VERIFY (queue.size() == 0); + { + /* Do half with push. */ + std::size_t idx = 0; + for (; idx < half_size; ++idx) + { + queue.push(arr[idx]); + VERIFY (queue.back() == arr[idx]); + VERIFY (queue.front() == first_element); + } + VERIFY (queue.size() == half_size); + VERIFY (static_cast<const queue_type&>(queue).size() == half_size); + for (; idx < Size; ++idx) + { + #if __cplusplus >= 201103L + /* Do the rest with emplace if C++11 or higher. */ + queue.emplace(arr[idx]); + #else + /* Otherwise just use push again. */ + queue.push(arr[idx]); + #endif + VERIFY (queue.back() == arr[idx]); + } + VERIFY (queue.size() == Size); + VERIFY (static_cast<const queue_type&>(queue).size() == Size); + + const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size)); + VERIFY (queue == queue_orig); + + /* References are contained in their own scope so we don't accidently + add tests referencing them after they have been invalidated. */ + { + const T& const_front = static_cast<const queue_type&>(queue).front(); + VERIFY (const_front == first_element); + T& mutable_front = queue.front(); + + const T& const_back = static_cast<const queue_type&>(queue).back(); + VERIFY (const_back == last_element); + T& mutable_back = queue.back(); + { + using std::swap; + swap(mutable_front, mutable_back); + } + VERIFY (const_front == last_element); + VERIFY (const_back == first_element); + /* Will only compare inequal if the first and last elements are different. */ + VERIFY (first_element != last_element || queue != queue_orig); + /* Return the last element to normal for the next comparison. */ + mutable_back = last_element; + } + + const T middle_element = arr[half_size]; + for (std::size_t count = Size - half_size; count != 0; --count) + queue.pop(); + VERIFY (queue.front() == middle_element); + const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size)); + VERIFY (queue == queue_upper_half); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, std::size_t Size> +bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + typedef std::priority_queue<T, std::vector<T> > priority_queue_type; + { + priority_queue_type pqueue; + VERIFY (pqueue.empty()); + VERIFY (pqueue.size() == 0); + } + { + priority_queue_type pqueue(arr, arr + Size); + VERIFY (!pqueue.empty()); + VERIFY (pqueue.size() == Size); + VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size); + + const T old_max = pqueue.top(); + + #if __cplusplus >= 201103L + pqueue.emplace(max_value); + #else + pqueue.push(max_value); + #endif + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + pqueue.push(min_value); + VERIFY (pqueue.top() == old_max); + pqueue.push(max_value); + VERIFY (pqueue.top() == max_value); + pqueue.pop(); + VERIFY (pqueue.top() == old_max); + VERIFY (pqueue.size() == Size + 1); + + for (std::size_t count = Size; count != 0; --count) + pqueue.pop(); + VERIFY (pqueue.size() == 1); + VERIFY (pqueue.top() == min_value); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + + return test_stack(arr) + && test_queue(arr) + && test_priority_queue(arr, 0, 1000) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C new file mode 100644 index 0000000..be9037e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C @@ -0,0 +1,136 @@ +/* { dg-additional-options -std=c++23 } */ + +/* C++23 container adaptors in target region. + Severely needs additional tests. */ + +#include <cstdio> +#include <utility> +#include <version> + +#if __cpp_lib_flat_map >= 202207L +#define ENABLE_FLAT_MAP 1 +#endif +#if __cpp_lib_flat_set >= 202207L +#define ENABLE_FLAT_SET 1 +#endif + +#ifdef ENABLE_FLAT_MAP +#include <flat_map> +#endif +#ifdef ENABLE_FLAT_SET +#include <flat_set> +#endif + +#include "target-flex-common.h" + +#ifdef ENABLE_FLAT_MAP +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_map_type = std::flat_map<K, V>; + flat_map_type map = {arr, arr + Size}; + + VERIFY (!map.empty()); + for (const auto& element : arr) + VERIFY (map.contains(element.first)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename K, typename V, typename std::size_t Size> +bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; } + +template<typename K, typename V, typename std::size_t Size> +bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; } +#endif + +#ifdef ENABLE_FLAT_SET +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_set_type = std::flat_set<T>; + flat_set_type set = {arr, arr + Size}; + + VERIFY (!set.empty()); + for (const auto& element : arr) + VERIFY (set.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + bool inner_ok = true; + { + using flat_multiset_type = std::flat_multiset<T>; + flat_multiset_type multiset = {arr, arr + Size}; + + VERIFY (!multiset.empty()); + for (const auto& element : arr) + VERIFY (multiset.contains(element)); + } + end: + ok = inner_ok; + } + return ok; +} +#else +template<typename T, typename std::size_t Size> +bool test_flat_set(T (&arr)[Size]) { return true; } + +template<typename T, typename std::size_t Size> +bool test_flat_multiset(T (&arr)[Size]) { return true; } +#endif + +int main() +{ + int arr[10] = {0,1,2,3,4,5,6,7,8,9}; + std::pair<int, int> pairs[10] = {{ 1, 2}, { 2, 4}, { 3, 6}, { 4, 8}, { 5, 10}, + { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}}; + + return test_flat_set(arr) + && test_flat_multiset(arr) + && test_flat_map(pairs) + && test_flat_multimap(pairs) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C new file mode 100644 index 0000000..6d55129 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C @@ -0,0 +1,444 @@ +/* Check constructors/destructors are called in containers. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#include <utility> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include "target-flex-common.h" + +struct indirect_counter +{ + typedef int counter_value_type; + counter_value_type *_count_ptr; + + indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) { + ++(*_count_ptr); + } + indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) { + ++(*_count_ptr); + } + /* Don't declare a move constructor, we want to copy no matter what. */ + ~indirect_counter() { + --(*_count_ptr); + } +}; + +bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr == rhs._count_ptr; } +bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT + { return lhs._count_ptr < rhs._count_ptr; } + +#if __cplusplus >= 201103L +template<> +struct std::hash<indirect_counter> +{ + std::size_t operator()(const indirect_counter& ic) const noexcept + { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); } +}; +#endif + +/* Not a container, just a sanity check really. */ +bool automatic_lifetime_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter c = indirect_counter(&counter); + indirect_counter(static_cast<int*>(&counter)); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool vector_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::vector<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool deque_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::deque<indirect_counter> vec(42, indirect_counter(&counter)); + VERIFY (counter == 42); + vec.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + vec.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + vec.pop_back(); + VERIFY (counter == 32); + vec.pop_back(); + VERIFY (counter == 31); + vec.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::list<indirect_counter> list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + list.push_back(indirect_counter(&counter)); + VERIFY (counter == 33); + list.pop_back(); + VERIFY (counter == 32); + list.pop_back(); + VERIFY (counter == 31); + list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::map<int, indirect_counter> map; + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + map.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::set<indirect_counter> set; + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::multimap<int, indirect_counter> multimap; + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 1); + multimap.insert(std::make_pair(1, indirect_counter(&counter))); + VERIFY (counter == 2); + multimap.insert(std::make_pair(2, indirect_counter(&counter))); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::multiset<indirect_counter> multiset; + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#if __cplusplus >= 201103L + +bool array_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + indirect_counter ic(&counter); + std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic, + ic, ic, ic, ic, ic}; + VERIFY (counter == 11); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool forward_list_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter)); + VERIFY (counter == 42); + forward_list.resize(32, indirect_counter(&counter)); + VERIFY (counter == 32); + forward_list.push_front(indirect_counter(&counter)); + VERIFY (counter == 33); + forward_list.pop_front(); + VERIFY (counter == 32); + forward_list.pop_front(); + VERIFY (counter == 31); + forward_list.resize(100, indirect_counter(&counter)); + VERIFY (counter == 100); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_map_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_map<int, indirect_counter> unordered_map; + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_map.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 2); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_set_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_set<indirect_counter> unordered_set; + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_set.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 1 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multimap_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter = 0; + { + std::unordered_multimap<int, indirect_counter> unordered_multimap; + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 1); + unordered_multimap.insert({1, indirect_counter(&counter)}); + VERIFY (counter == 2); + unordered_multimap.insert({2, indirect_counter(&counter)}); + VERIFY (counter == 3); + } + VERIFY (counter == 0); + end: + ok = inner_ok; + } + return ok; +} + +bool unordered_multiset_test() +{ + bool ok; + #pragma omp target map(from: ok) + { + bool inner_ok = true; + int counter0 = 0; + int counter1 = 0; + { + std::unordered_multiset<indirect_counter> unordered_multiset; + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 1); + unordered_multiset.insert(indirect_counter(&counter0)); + VERIFY (counter0 == 2); + unordered_multiset.insert(indirect_counter(&counter1)); + VERIFY (counter0 == 2 && counter1 == 1); + } + VERIFY (counter0 == 0 && counter1 == 0); + end: + ok = inner_ok; + } + return ok; +} + +#else +bool array_test() { return true; } +bool forward_list_test() { return true; } +bool unordered_map_test() { return true; } +bool unordered_set_test() { return true; } +bool unordered_multimap_test() { return true; } +bool unordered_multiset_test() { return true; } +#endif + +int main() +{ + const bool auto_res = automatic_lifetime_test(); + const bool vec_res = vector_test(); + const bool deque_res = deque_test(); + const bool list_res = list_test(); + const bool map_res = map_test(); + const bool set_res = set_test(); + const bool multimap_res = multimap_test(); + const bool multiset_res = multiset_test(); + const bool array_res = array_test(); + const bool forward_list_res = forward_list_test(); + const bool unordered_map_res = unordered_map_test(); + const bool unordered_set_res = unordered_set_test(); + const bool unordered_multimap_res = unordered_multimap_test(); + const bool unordered_multiset_res = unordered_multiset_test(); + std::printf("sanity check : %s\n", auto_res ? "PASS" : "FAIL"); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = auto_res + && vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C new file mode 100644 index 0000000..024fb73 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C @@ -0,0 +1,736 @@ +/* Populated with mapped data, validate, mutate, validate again. + The cases using sets do not mutate. + Note: Some of the code in here really sucks due to being made to be + compatible with c++98. */ + +#include <vector> +#include <deque> +#include <list> +#include <set> +#include <map> +#if __cplusplus >= 201103L +#include <array> +#include <forward_list> +#include <unordered_set> +#include <unordered_map> +#endif + +#include <limits> +#include <iterator> + +#include "target-flex-common.h" + +template<bool B, class T = void> +struct enable_if {}; + +template<class T> +struct enable_if<true, T> { typedef T type; }; + +struct identity_func +{ +#if __cplusplus < 201103L + template<typename T> + T& operator()(T& arg) const BL_NOEXCEPT { return arg; } + template<typename T> + T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; } +#else + template<typename T> + constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); } +#endif +}; + +/* Applies projection to the second iterator. */ +template<typename It0, typename It1, typename Proj> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1, + Proj proj) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + { + /* Sizes mismatch, don't bother aborting though just fail the test. */ + if (it1 == end1) + return false; + if (*it0 != proj(*it1)) + return false; + } + /* Sizes mismatch, do as above. */ + if (it1 != end1) + return false; + return true; +} + +template<typename It0, typename It1> +bool validate_sequential_elements(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + return validate_sequential_elements(begin0, end0, begin1, end1, identity_func()); +} + +/* Inefficient, but simple. */ +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename It, typename MutateFn> +void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + *it = mut_fn(*it); +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool vector_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> vector(arr, arr + Size); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size)); + simple_copy(vector.begin(), vector.end(), out_arr); + simple_mutate(vector.begin(), vector.end(), MutationFunc()); + VERIFY (validate_sequential_elements(vector.begin(), vector.end(), + arr, arr + Size, MutationFunc())); + simple_copy(vector.begin(), vector.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool deque_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::deque<T> deque(arr, arr + Size); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size)); + simple_copy(deque.begin(), deque.end(), out_arr); + simple_mutate(deque.begin(), deque.end(), MutationFunc()); + VERIFY (validate_sequential_elements(deque.begin(), deque.end(), + arr, arr + Size, MutationFunc())); + simple_copy(deque.begin(), deque.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::list<T> list(arr, arr + Size); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size)); + simple_copy(list.begin(), list.end(), out_arr); + simple_mutate(list.begin(), list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(list.begin(), list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(list.begin(), list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename T> +const T& get_key(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.first; } +template<typename T> +const T& get_value(const T& arg) BL_NOEXCEPT + { return arg; } +template<typename K, typename V> +const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT + { return pair.second; } + +template<typename T> +struct key_type { typedef T type; }; +template<typename K, typename V> +struct key_type<std::pair<K, V> > { typedef K type; }; + +template<typename Proj, typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const typename Container::const_iterator elem_it = container.find(get_key(*compare_it)); + VERIFY_NON_TARGET (elem_it != elem_end); + VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it)); + } + return true; +} + +template<typename Container, typename It> +bool validate_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename It, typename MutateFn> +void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it) + it->second = mut_fn(it->second); +} + +template<typename It, typename OutIter> +void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT +{ + /* In case anyone reads this, I want it to be known that I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + std::set<key_t> already_seen; + for (It it = begin; it != end; ++it, ++out) + { + key_t key = get_key(*it); + if (already_seen.find(key) != already_seen.end()) + continue; + already_seen.insert(key); + *out = *it; + } +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool set_test(const T (&arr)[Size]) +{ + std::set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename Proj, typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end, + Proj proj) BL_NOEXCEPT +{ + /* Once again, for the poor soul reviewing these, I hate c++98. */ + typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t; + typedef std::map<key_t, std::size_t> counter_map; + counter_map key_count_map; + for (It it = compare_begin; it != compare_end; ++it) + { + const key_t& key = get_key(*it); + typename counter_map::iterator counter_it + = key_count_map.find(key); + if (counter_it != key_count_map.end()) + ++counter_it->second; + else + key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1))); + } + const typename Container::const_iterator elem_end = container.end(); + for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it) + { + const key_t& key = get_key(*compare_it); + typename counter_map::iterator count_it = key_count_map.find(key); + std::size_t key_count = count_it != key_count_map.end() ? count_it->second + : std::size_t(0); + VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen"); + /* This gets tested multiple times but that should be fine. */ + VERIFY_NON_TARGET (key_count == container.count(key)); + typename Container::const_iterator elem_it = container.find(key); + /* This will never happen if the previous case passed. */ + VERIFY_NON_TARGET (elem_it != elem_end); + bool found_element = false; + for (; elem_it != elem_end; ++elem_it) + if (proj(get_value(*compare_it)) == get_value(*elem_it)) + { + found_element = true; + break; + } + VERIFY_NON_TARGET (found_element); + } + return true; +} + +template<typename Container, typename It> +bool validate_multi_associative(const Container& container, + const It compare_begin, + const It compare_end) BL_NOEXCEPT +{ + return validate_multi_associative(container, compare_begin, compare_end, identity_func()); +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool multiset_test(const T (&arr)[Size]) +{ + std::multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#if __cplusplus >= 201103L + +template<typename MutationFunc, typename T, std::size_t Size> +bool array_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::array<T, Size> std_array{}; + /* Special case for std::array since it can't be initialized + with iterators. */ + { + T zero_val = T{}; + for (auto it = std_array.begin(); it != std_array.end(); ++it) + VERIFY (*it == zero_val); + } + simple_copy(arr, arr + Size, std_array.begin()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size)); + simple_copy(std_array.begin(), std_array.end(), out_arr); + simple_mutate(std_array.begin(), std_array.end(), MutationFunc()); + VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(), + arr, arr + Size, MutationFunc())); + simple_copy(std_array.begin(), std_array.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename T, std::size_t Size> +bool forward_list_test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + T out_mut_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::forward_list<T> fwd_list(arr, arr + Size); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size)); + simple_copy(fwd_list.begin(), fwd_list.end(), out_arr); + simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc()); + VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(), + arr, arr + Size, MutationFunc())); + simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size, + arr, arr + Size)); + VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size, + arr, arr + Size, MutationFunc())); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_map_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_map<K, V> reference_map(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + std::pair<K, V> out_pairs[Size]; + std::size_t out_size; + std::pair<K, V> out_pairs_mut[Size]; + std::size_t out_size_mut; + #pragma omp target map(from: ok, out_pairs[:Size], out_size, \ + out_pairs_mut[:Size], out_size_mut) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<std::pair<K, V> > unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems)); + + std::unordered_map<K, V> map(arr, arr + Size); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end())); + simple_copy(map.begin(), map.end(), out_pairs); + out_size = map.size(); + simple_mutate_map(map.begin(), map.end(), MutationFunc()); + VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(), + MutationFunc())); + simple_copy(map.begin(), map.end(), out_pairs_mut); + out_size_mut = map.size(); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out_size == out_size_mut); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs, out_pairs + out_size)); + simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_associative(reference_map, + out_pairs_mut, out_pairs_mut + out_size_mut)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_set_test(const T (&arr)[Size]) +{ + std::unordered_set<T> reference_set(arr, arr + Size); + bool ok; + /* Both sizes should be the same. */ + T out_arr[Size]; + std::size_t out_size; + #pragma omp target map(from: ok, out_arr[:Size], out_size) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::vector<T> unique_elems; + simple_copy_unique(arr, arr + Size, + std::back_insert_iterator<std::vector<T> >(unique_elems)); + + std::unordered_set<T> set(arr, arr + Size); + VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end())); + simple_copy(set.begin(), set.end(), out_arr); + out_size = set.size(); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_associative(reference_set, + out_arr, out_arr + out_size)); + return true; +} + +template<typename MutationFunc, typename K, typename V, std::size_t Size> +bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size]) +{ + std::unordered_multimap<K, V> reference_multimap(arr, arr + Size); + bool ok; + std::pair<K, V> out_pairs[Size]; + std::pair<K, V> out_pairs_mut[Size]; + #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multimap<K, V> multimap(arr, arr + Size); + VERIFY (validate_multi_associative(multimap, arr, arr + Size)); + simple_copy(multimap.begin(), multimap.end(), out_pairs); + simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc()); + VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc())); + simple_copy(multimap.begin(), multimap.end(), out_pairs_mut); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs, out_pairs + Size)); + simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc()); + VERIFY_NON_TARGET (validate_multi_associative(reference_multimap, + out_pairs_mut, out_pairs_mut + Size)); + return true; +} + +template<typename T, std::size_t Size> +bool unordered_multiset_test(const T (&arr)[Size]) +{ + std::unordered_multiset<T> reference_multiset(arr, arr + Size); + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok, out_arr[:Size]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::unordered_multiset<T> set(arr, arr + Size); + VERIFY (validate_multi_associative(set, arr, arr + Size)); + simple_copy(set.begin(), set.end(), out_arr); + /* Sets can't be mutated, we could create another set with mutated + but it gets a little annoying and probably isn't an interesting test. */ + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (validate_multi_associative(reference_multiset, + out_arr, out_arr + Size)); + return true; +} + +#else +template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; } +template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; } +template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; } +#endif + +/* This clamps to the maximum value to guard against overflowing, + assuming std::numeric_limits is specialized for T. */ +struct multiply_by_2 +{ + template<typename T> + typename enable_if<std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + if (arg < static_cast<T>(0)) + { + if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg) + return std::numeric_limits<T>::min(); + } + else + { + if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg) + return std::numeric_limits<T>::max(); + } + return arg * 2; + } + template<typename T> + typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type + operator()(T arg) const BL_NOEXCEPT { + return arg * 2; + } +}; + +int main() +{ + int data[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + std::pair<int, int> pairs[10] = {std::pair<int, int>( 1, 2), + std::pair<int, int>( 2, 4), + std::pair<int, int>( 3, 6), + std::pair<int, int>( 4, 8), + std::pair<int, int>( 5, 10), + std::pair<int, int>( 6, 12), + std::pair<int, int>( 7, 14), + std::pair<int, int>( 8, 16), + std::pair<int, int>( 9, 18), + std::pair<int, int>(10, 20)}; + const bool vec_res = vector_test<multiply_by_2>(data); + const bool deque_res = deque_test<multiply_by_2>(data); + const bool list_res = list_test<multiply_by_2>(data); + const bool map_res = map_test<multiply_by_2>(pairs); + const bool set_res = set_test(data); + const bool multimap_res = multimap_test<multiply_by_2>(pairs); + const bool multiset_res = multiset_test(data); + const bool array_res = array_test<multiply_by_2>(data); + const bool forward_list_res = forward_list_test<multiply_by_2>(data); + const bool unordered_map_res = unordered_map_test<multiply_by_2>(pairs); + const bool unordered_set_res = unordered_set_test(data); + const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs); + const bool unordered_multiset_res = unordered_multiset_test(data); + std::printf("vector : %s\n", vec_res ? "PASS" : "FAIL"); + std::printf("deque : %s\n", deque_res ? "PASS" : "FAIL"); + std::printf("list : %s\n", list_res ? "PASS" : "FAIL"); + std::printf("map : %s\n", map_res ? "PASS" : "FAIL"); + std::printf("set : %s\n", set_res ? "PASS" : "FAIL"); + std::printf("multimap : %s\n", multimap_res ? "PASS" : "FAIL"); + std::printf("multiset : %s\n", multiset_res ? "PASS" : "FAIL"); + std::printf("array : %s\n", array_res ? "PASS" : "FAIL"); + std::printf("forward_list : %s\n", forward_list_res ? "PASS" : "FAIL"); + std::printf("unordered_map : %s\n", unordered_map_res ? "PASS" : "FAIL"); + std::printf("unordered_set : %s\n", unordered_set_res ? "PASS" : "FAIL"); + std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL"); + std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL"); + const bool ok = vec_res + && deque_res + && list_res + && map_res + && set_res + && multimap_res + && multiset_res + && array_res + && forward_list_res + && unordered_map_res + && unordered_set_res + && unordered_multimap_res + && unordered_multiset_res; + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C new file mode 100644 index 0000000..688c014 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C @@ -0,0 +1,32 @@ +/* Tiny tuple test. */ + +#include <tuple> + +#include "target-flex-common.h" + +bool test(int arg) +{ + bool ok; + int out; + std::tuple tup = {'a', arg, 3.14f}; + #pragma omp target map(from: ok, out) map(to: tup) + { + bool inner_ok = true; + { + VERIFY (std::get<0>(tup) == 'a'); + out = std::get<1>(tup); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (out == arg); + return true; +} + +int main() +{ + volatile int arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C new file mode 100644 index 0000000..f1a6c12 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C @@ -0,0 +1,61 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* Functional */ + +#include <functional> +#include <utility> + +#include "target-flex-common.h" + +template<typename T,typename Fn> +auto invoke_unary(T&& a, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a)); +} + +template<typename T, typename U, typename Fn> +auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept +{ + return std::invoke(std::forward<Fn>(fn), + std::forward<T>(a), + std::forward<U>(b)); +} + +bool test(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + VERIFY (std::plus{}(arg, 2) == arg + 2); + auto bound_plus_arg = std::bind_front(std::plus{}, arg); + VERIFY (bound_plus_arg(10) == arg + 10); + VERIFY (bound_plus_arg(20) == arg + 20); + + VERIFY (std::not_fn(std::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{}))); + auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg); + VERIFY (bound_equals_arg(arg)); + VERIFY (std::not_fn(bound_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_equals_arg)); + + VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg)); + VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{}))); + auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg); + VERIFY (bound_ranges_equals_arg(arg)); + VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1)); + VERIFY (invoke_unary(arg, bound_ranges_equals_arg)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42u; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C new file mode 100644 index 0000000..f738806 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C @@ -0,0 +1,97 @@ +/* { dg-additional-options "-std=c++23" } */ + +/* expected/optional */ + +#include <optional> +#include <expected> + +#include "target-flex-common.h" + +std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::nullopt; + return {arg}; +} + +bool test_optional(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto null_opt = make_optional(false); + VERIFY (!null_opt); + VERIFY (!null_opt.has_value()); + VERIFY (null_opt.value_or(arg * 2u) == arg * 2u); + VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto opt = make_optional(true, arg); + VERIFY (opt); + VERIFY (opt.has_value()); + VERIFY (opt.value() == arg); + VERIFY (*opt == arg); + VERIFY (opt.value_or(arg + 42) == arg); + VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +struct my_error +{ + int _e; +}; + +std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept +{ + if (!b) + return std::unexpected{my_error{-1}}; + return {arg}; +} + +bool test_expected(unsigned arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + auto unexpected = make_expected(false); + VERIFY (!unexpected); + VERIFY (!unexpected.has_value()); + VERIFY (unexpected.error()._e == -1); + VERIFY (unexpected.value_or(arg * 2u) == arg * 2u); + VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + + auto expected = make_expected(true, arg); + VERIFY (expected); + VERIFY (expected.has_value()); + VERIFY (expected.value() == arg); + VERIFY (*expected == arg); + VERIFY (expected.value_or(arg + 42) == arg); + VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; }) + .transform([](int a){ return a * 2u; }) + .value_or(0) == arg * 2u); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile unsigned arg = 42; + return test_optional(arg) + && test_expected(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C new file mode 100644 index 0000000..8e8ca8e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C @@ -0,0 +1,176 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* bit_cast and memcpy */ + +#include <bit> +#include <cstring> + +#include "target-flex-common.h" + +struct S0 +{ + int _v0; + char _v1; + long long _v2; +}; + +struct S1 +{ + int _v0; + char _v1; + long long _v2; +}; + +bool test_bit_cast(int arg) +{ + bool ok; + S1 s1_out; + #pragma omp target map(from: ok, s1_out) map(to: arg) + { + bool inner_ok = true; + { + long long v = static_cast<long long>(arg + 42ll); + S0 s = {arg, 'a', v}; + VERIFY (std::bit_cast<S1>(s)._v0 == arg); + VERIFY (std::bit_cast<S1>(s)._v1 == 'a'); + VERIFY (std::bit_cast<S1>(s)._v2 == v); + s1_out = std::bit_cast<S1>(s); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + long long v = static_cast<long long>(arg + 42ll); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a'); + VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v); + return true; +} + + +struct OutStruct +{ + std::size_t _id; + void *_next; +}; + +struct Extendable1 +{ + std::size_t _id; + void *_next; + int _v; +}; + +struct Extendable2 +{ + std::size_t _id; + void *_next; + char _str[256]; +}; + +struct Extendable3 +{ + std::size_t _id; + void *_next; + const int *_nums; + std::size_t _size; +}; + +struct ExtendableUnknown +{ + std::size_t _id; + void *_next; +}; + +template<typename To, std::size_t Id> +To *get_extendable(void *p) +{ + while (p != nullptr) + { + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + if (out._id == Id) + return static_cast<To *>(p); + p = out._next; + } + return nullptr; +} + +bool test_memcpy(int arg, const int *nums, std::size_t nums_size) +{ + bool ok; + Extendable2 e2_out; + #pragma omp target map(from: ok, e2_out) map(to: arg, nums[:nums_size], nums_size) + { + bool inner_ok = true; + { + Extendable3 e3 = {3u, nullptr, nums, nums_size}; + ExtendableUnknown u1 = {100u, &e3}; + Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}}; + ExtendableUnknown u2 = {101u, &e2}; + ExtendableUnknown u3 = {102u, &u2}; + ExtendableUnknown u4 = {142u, &u3}; + Extendable1 e1 = {1u, &u4, arg}; + + void *p = &e1; + while (p != nullptr) + { + /* You can always cast a pointer to a struct to a pointer to + the type of it's first member. */ + switch (*static_cast<std::size_t *>(p)) + { + case 1: + { + Extendable1 *e1_p = static_cast<Extendable1 *>(p); + p = e1_p->_next; + VERIFY (e1_p->_v == arg); + break; + } + case 2: + { + Extendable2 *e2_p = static_cast<Extendable2 *>(p); + p = e2_p->_next; + VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0); + break; + } + case 3: + { + Extendable3 *e3_p = static_cast<Extendable3 *>(p); + p = e3_p->_next; + VERIFY (nums == e3_p->_nums); + VERIFY (nums_size == e3_p->_size); + break; + } + default: + { + /* Casting to a pointer to OutStruct invokes undefined + behavior though, memcpy is required to extract the _next + member. */ + OutStruct out; + std::memcpy(&out, p, sizeof(OutStruct)); + p = out._next; + } + } + } + Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1); + VERIFY (e2_p != nullptr); + e2_out = *e2_p; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (e2_out._id == 2u); + VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0); + return true; +} + +int main() +{ + volatile int arg = 42; + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test_bit_cast(arg) + && test_memcpy(arg, arr, 8) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C new file mode 100644 index 0000000..c66075b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C @@ -0,0 +1,51 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + auto il = {0, 1, 2, 3, 4, 5, arg}; + + int sum = 0; + for (auto const& e : il) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : il) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C new file mode 100644 index 0000000..ef9e5a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C @@ -0,0 +1,49 @@ +/* { dg-additional-options -std=c++23 } */ + +/* numerics */ + +#include <algorithm> +#include <numeric> +#include <ranges> +#include <span> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::iota) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +bool test(std::size_t arg) +{ + bool ok; + int midpoint_out; + std::vector<int> vec(arg); + int *data = vec.data(); + std::size_t size = vec.size(); + #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[:size]) map(to: arg, size) + { + std::span span = {data, size}; + bool inner_ok = true; + { + VERIFY (stdr::all_of(span, [](int v){ return v == int{}; })); + stdr::iota(span, 0); + midpoint_out = *std::midpoint(span.data(), span.data() + span.size()); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size())))); + VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size()) + == midpoint_out); + return true; +} + +int main() +{ + volatile std::size_t arg = 42; + return test(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C new file mode 100644 index 0000000..adaf18f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C @@ -0,0 +1,80 @@ +/* std::initializer_list in target region. */ + +#include <initializer_list> + +#include "target-flex-common.h" + +struct S0 +{ + int _v; + S0(std::initializer_list<int> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +struct S1 +{ + int _v; + template<typename T> + S1(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +template<typename T> +struct S2 +{ + T _v; + S2(std::initializer_list<T> il) + : _v(0) + { + for (auto const& e : il) + _v += e; + } +}; + +#if __cplusplus >= 201703L +template<typename T> +S2(std::initializer_list<T>) -> S2<T>; +#endif + +bool test_initializer_list(int arg) +{ + bool ok; + #pragma omp target map(from: ok) map(to: arg) + { + bool inner_ok = true; + { + static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5; + + S0 s0{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s0._v == partial_sum + arg); + + S1 s1{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s1._v == partial_sum + arg); + + S2<int> s2{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2._v == partial_sum + arg); + + #if __cplusplus >= 201703L + S2 s2_ctad{0, 1, 2, 3, 4, 5, arg}; + VERIFY (s2_ctad._v == partial_sum + arg); + #endif + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C new file mode 100644 index 0000000..7f74401a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C @@ -0,0 +1,50 @@ +/* std::initializer_list constructor of std::vector (explicit template arg) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector<int> vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C new file mode 100644 index 0000000..bb8a39b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C @@ -0,0 +1,52 @@ +/* { dg-additional-options "-std=c++17" } */ + +/* deduced std::initializer_list constructor of std::vector (CTAD) */ + +#include <vector> +#include <array> + +#include "target-flex-common.h" + +bool test_initializer_list(int arg) +{ + static constexpr std::size_t out_arr_size = 7; + int out_arr[out_arr_size]; + bool ok; + #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg) + { + bool inner_ok = true; + { + std::vector vec{0, 1, 2, 3, 4, 5, arg}; + int sum = 0; + for (auto const& e : vec) + sum += e; + VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg); + + auto* out_it = out_arr; + const auto* const out_end = out_arr + out_arr_size; + for (auto const& e : vec) + { + VERIFY (out_it != out_end); + *out_it = e; + ++out_it; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + + std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg}; + const auto *out_arr_it = out_arr; + for (auto const& e : reference_array) + VERIFY_NON_TARGET (e == *(out_arr_it++)); + + return true; +} + +int main() +{ + volatile int arg = 42; + return test_initializer_list(arg) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C new file mode 100644 index 0000000..4d36341 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C @@ -0,0 +1,94 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* <iterator> c++20 */ + +/* std::common_iterator uses std::variant. */ + +#include <vector> +#include <iterator> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::distance, std::ranges::next) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(const It0 begin0, const It0 end0, + const It1 begin1, const It1 end1) BL_NOEXCEPT +{ + It0 it0 = begin0; + It1 it1 = begin1; + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename It, typename OutIt> +void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT +{ + for (It it = begin; it != end; ++it, ++out) + *out = *it; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_rev_arr[Size]; + T out_fwd_arr[Size]; + T out_first_half_arr[Size / 2]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_rev_arr[:Size], out_fwd_arr[:Size], \ + out_first_half_arr[:Size / 2]) \ + map(to: arr[:Size]) + { + bool inner_ok = true; + { + std::span<const T> span = {arr, Size}; + std::vector<T> rev_vec(std::reverse_iterator{span.end()}, + std::reverse_iterator{span.begin()}); + VERIFY (std::distance(span.begin(), span.end()) + == std::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span.begin(), span.end()) + == stdr::distance(rev_vec.begin(), rev_vec.end())); + VERIFY (stdr::distance(span) == stdr::distance(rev_vec)); + VERIFY (simple_equal(span.begin(), span.end(), + std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()})); + simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr); + simple_copy(std::reverse_iterator{rev_vec.end()}, + std::reverse_iterator{rev_vec.begin()}, + out_fwd_arr); + using counted_iter = std::counted_iterator<decltype(span.begin())>; + using common_iter = std::common_iterator<counted_iter, + std::default_sentinel_t>; + std::vector<T> front_half; + simple_copy(common_iter{counted_iter{span.begin(), Size / 2}}, + common_iter{std::default_sentinel}, + std::back_insert_iterator{front_half}); + VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2), + front_half.begin(), front_half.end())); + simple_copy(front_half.begin(), front_half.end(), out_first_half_arr); + } + end: + ok = inner_ok; + } + VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size}, + std::reverse_iterator{arr}, + out_rev_arr, out_rev_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_fwd_arr, out_fwd_arr + Size)); + VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2, + out_first_half_arr, out_first_half_arr + Size / 2)); + return ok; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C new file mode 100644 index 0000000..014b9f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C @@ -0,0 +1,46 @@ +/* algorithms pre c++20 */ + +#include <algorithm> +#include <vector> + +#include "target-flex-common.h" + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + std::copy(arr, arr + Size, vec.begin()); + VERIFY (std::equal(arr, arr + Size, vec.begin())); + std::transform(vec.begin(), vec.end(), mutated.begin(), + [](const T& v){ return v * 2; }); + std::copy(mutated.begin(), mutated.end(), out_2x_arr); + std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end()); + std::copy(vec.begin(), vec.end(), out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr, + [](const T& a, const T& b){ return a * 2 == b; })); + std::vector<T> shifted(arr, arr + Size); + std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end()); + VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin())); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C new file mode 100644 index 0000000..9070c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C @@ -0,0 +1,54 @@ +/* { dg-additional-options "-std=c++20" } */ + +/* ranged algorithms c++20 */ + +#include <algorithm> +#include <ranges> +#include <vector> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_2x_arr[Size]; + T out_shifted_arr[Size]; + #pragma omp target defaultmap(none) \ + map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \ + map(to: arr[:Size]) + { + std::vector<T> vec(Size); + std::vector<T> mutated(Size); + bool inner_ok = true; + { + stdr::copy(arr, vec.begin()); + VERIFY (stdr::equal(arr, vec)); + stdr::transform(vec, mutated.begin(), + [](const T& v){ return v * 2; }); + stdr::copy(mutated, out_2x_arr); + stdr::rotate(vec, std::next(vec.begin(), Size / 2)); + stdr::copy(vec, out_shifted_arr); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; })); + std::vector<T> shifted(arr, arr + Size); + stdr::rotate(shifted, std::next(shifted.begin(), Size / 2)); + VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted)); + return true; +} + +int main() +{ + int arr[] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C new file mode 100644 index 0000000..ef6b942 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C @@ -0,0 +1,50 @@ +/* { dg-additional-options -std=c++23 } */ + +/* std::views stuff. Also tests std::tuple with std::views::zip. */ + +#include <algorithm> +#include <ranges> +#include <span> + +//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping" +#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip) + +#include "target-flex-common.h" + +namespace stdr = std::ranges; +namespace stdv = std::views; + +bool f() +{ + const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0}; + + bool ok; + #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[:8], arr_rev[:8]) + { + std::span<const int> fwd = {arr_fwd, 8}; + std::span<const int> rev = {arr_rev, 8}; + bool inner_ok = true; + { + VERIFY(stdr::equal(fwd, rev | stdv::reverse)); + VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse, + rev | stdv::take(4))); + for (auto [first, second] : stdv::zip(fwd, rev)) + VERIFY(first + second == 7); + auto plus = [](int a, int b){ return a + b; }; + auto is_even = [](int v){ return v % 2 == 0; }; + VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus) + == 12); + VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }), + is_even)); + } + end: + ok = inner_ok; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C new file mode 100644 index 0000000..9e9383d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C @@ -0,0 +1,26 @@ +/* CTAD in target regions. */ + +template<typename T> +struct S +{ + T _v; +}; + +template<typename T> +S(T) -> S<T>; + +bool f() +{ + bool ok; + #pragma omp target map(from: ok) + { + S s{42}; + ok = s._v == 42; + } + return ok; +} + +int main() +{ + return f() ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C new file mode 100644 index 0000000..f41a1bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C @@ -0,0 +1,49 @@ +// { dg-additional-options "-std=c++20" } + +/* std::span */ + +#include <span> + +#include "target-flex-common.h" + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename T, std::size_t Size> +bool test(const T (&arr)[Size]) +{ + bool ok; + T out_arr[Size]; + #pragma omp target map(from: ok) map(to: arr[:Size]) + { + std::span span = {arr, Size}; + bool inner_ok = true; + { + VERIFY (!span.empty()); + VERIFY (span.size() == Size); + auto out_it = out_arr; + for (auto elem : span) + *(out_it++) = elem; + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (simple_equal(arr, arr + Size, + out_arr, out_arr + Size)); + return true; +} + +int main() +{ + int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + return test(arr) ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C new file mode 100644 index 0000000..a86fefb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C @@ -0,0 +1,75 @@ +/* { dg-additional-options "-std=c++20" } */ + +#include <ranges> +#include <span> +#include <type_traits> +#include <vector> + +#include "target-flex-common.h" + +namespace stdr = std::ranges; + +template<typename It0, typename It1> +bool simple_equal(It0 it0, const It0 end0, + It1 it1, const It1 end1) noexcept +{ + for (; it0 != end0; ++it0, ++it1) + if (it1 == end1 || *it0 != *it1) + return false; + return true; +} + +template<typename Rn0, typename Rn1> +bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept +{ + return simple_equal(stdr::begin(rn0), stdr::end(rn0), + stdr::begin(rn1), stdr::end(rn1)); +} + +template<typename Rn> +bool test(Rn&& range) +{ + using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>; + std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)}; + value_type *data = vec.data(); + std::size_t size = vec.size(); + bool ok; + #pragma omp target map(from: ok) map(tofrom: data[:size]) map(to: size) + { + std::vector<value_type> orig = {data, data + size}; + std::span<value_type> span = {data, size}; + bool inner_ok = true; + { + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY (simple_equal(orig, span)); + for (auto& elem : span) + elem = mul_by_2(elem); + VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span)); + } + end: + ok = inner_ok; + } + if (!ok) + return false; + auto mul_by_2 = [](const value_type& v){ return v * 2; }; + VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec)); + return true; +} + +struct my_int +{ + int _v; + bool operator==(my_int const&) const = default; + my_int operator*(int rhs) const noexcept { + return {_v * rhs}; + } +}; + +int main() +{ + std::vector<int> ints = {1, 2, 3, 4, 5}; + const bool ints_res = test(ints); + std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}}; + const bool my_ints_res = test(my_ints); + return ints_res && my_ints_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C new file mode 100644 index 0000000..b3f1197 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C @@ -0,0 +1,107 @@ +/* structured bindings */ + +#include <array> +#include <tuple> + +#include "target-flex-common.h" + +template<typename Array, typename Tuple, typename Struct> +bool test(Array array, Tuple tuple, Struct s) +{ + bool ok; + auto array_2nd_in = std::get<2>(array); + auto tuple_2nd_in = std::get<2>(tuple); + auto s_2nd_in = s._2; + decltype(array_2nd_in) array_2nd_out_0; + decltype(tuple_2nd_in) tuple_2nd_out_0; + decltype(s_2nd_in) s_2nd_out_0; + decltype(array_2nd_in) array_2nd_out_1; + decltype(tuple_2nd_in) tuple_2nd_out_1; + decltype(s_2nd_in) s_2nd_out_1; + decltype(array_2nd_in) array_2nd_out_2; + decltype(tuple_2nd_in) tuple_2nd_out_2; + decltype(s_2nd_in) s_2nd_out_2; + #pragma omp target map(from: ok, \ + array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \ + array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \ + array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \ + map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s) + { + bool inner_ok = true; + { + { + auto [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_0 = array_2nd; + auto [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_0 = tuple_2nd; + auto [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_0 = s_2nd; + } + { + auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_1 = array_2nd; + auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_1 = tuple_2nd; + auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_1 = s_2nd; + } + { + const auto& [array_0th, array_1st, array_2nd] = array; + VERIFY (array_2nd_in == array_2nd); + VERIFY (std::get<2>(array) == array_2nd); + array_2nd_out_2 = array_2nd; + const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple; + VERIFY (tuple_2nd_in == tuple_2nd); + VERIFY (std::get<2>(tuple) == tuple_2nd); + tuple_2nd_out_2 = tuple_2nd; + const auto& [s_0th, s_1st, s_2nd] = s; + VERIFY (s_2nd_in == s_2nd); + VERIFY (s._2 == s_2nd); + s_2nd_out_2 = s_2nd; + } + } + end: + ok = inner_ok; + } + if (!ok) + return false; + VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in); + VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in); + VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in); + VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in); + + return true; +} + +struct S +{ + char _0; + float _1; + int _2; +}; + +int main() +{ + const bool test_res + = test(std::array{0, 1, 2}, + std::tuple{'a', 3.14f, 42}, + S{'a', 3.14f, 42}); + return test_res ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h new file mode 100644 index 0000000..14523c4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h @@ -0,0 +1,40 @@ +#include <cstdio> + +#if __cplusplus >= 201103L + #define BL_NOEXCEPT noexcept +#else + #define BL_NOEXCEPT throw() +#endif + +#if defined __has_builtin +# if __has_builtin (__builtin_LINE) +# define VERIFY_LINE __builtin_LINE () +# endif +#endif +#if !defined VERIFY_LINE +# define VERIFY_LINE __LINE__ +#endif + +/* I'm not a huge fan of macros but in the interest of keeping the code that + isn't being tested as simple as possible, we use them. */ + +#define VERIFY(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + inner_ok = false; \ + goto end; \ + } \ + } while (false) + +#define VERIFY_NON_TARGET(EXPR) \ + do { \ + if (!(EXPR)) \ + { \ + std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n", \ + VERIFY_LINE); \ + return false; \ + } \ + } while (false) diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C new file mode 100644 index 0000000..9923783 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__array-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C new file mode 100644 index 0000000..c42105a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <array> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::array<int,N> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + std::array<int,N> arr; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::array<int,N> (); +#endif + std::copy (data, data + N, arr.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] *= arr[i]; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~array (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C new file mode 100644 index 0000000..9023ef8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__bitset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C new file mode 100644 index 0000000..4fcce93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C @@ -0,0 +1,69 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <bitset> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::bitset<MAX> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::bitset<MAX> (); +#endif + for (int i = 0; i < N; ++i) + _set[data[i]] = true; + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set[i]) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~bitset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C new file mode 100644 index 0000000..aaf7152 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C @@ -0,0 +1,340 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_basic () +{ + T x = -3.456789; + T y = 1.234567; + T z = 5.678901; + + if (std::abs (x) != -x) + return false; + if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x)) + return false; + if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y))) + return false; + if (!FP_EQUAL (std::fma (x, y, z), x * y + z)) + return false; + if (std::fmax (x, y) != (x > y ? x : y)) + return false; + if (std::fmin (x, y) != (x < y ? x : y)) + return false; + if (std::fdim (x, y) != std::max(x - y, (T) 0.0)) + return false; + if (std::fdim (y, x) != std::max(y - x, (T) 0.0)) + return false; + return true; +} + +template<typename T> bool test_exp () +{ + T x = -4.567890; + T y = 2.345678; + + if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x))) + return false; + if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y))) + return false; + if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0)) + return false; + if (!FP_EQUAL (std::log (std::exp (x)), x)) + return false; + if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y)) + return false; + if (!FP_EQUAL (std::log2 (std::exp2 (y)), y)) + return false; + if (!FP_EQUAL (std::log1p (std::expm1 (y)), y)) + return false; + return true; +} + +template<typename T> bool test_power () +{ + T x = 7.234251; + T y = 0.340128; + + if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y)) + return false; + if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x)) + return false; + if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x)) + return false; + if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y))) + return false; + return true; +} + +template<typename T> bool test_trig () +{ + T theta = std::numbers::pi / 4; + T phi = std::numbers::pi / 6; + + if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::sin (phi), 0.5)) + return false; + if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2)) + return false; + if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2)) + return false; + if (!FP_EQUAL (std::tan (theta), 1.0)) + return false; + if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3)) + return false; + + T x = 0.33245623; + + if (!FP_EQUAL (std::asin (std::sin (x)), x)) + return false; + if (!FP_EQUAL (std::acos (std::cos (x)), x)) + return false; + if (!FP_EQUAL (std::atan (std::tan (x)), x)) + return false; + if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x)) + return false; + return true; +} + +template<typename T> bool test_hyperbolic () +{ + T x = 0.7423532; + + if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0)) + return false; + if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x))) + return false; + if (!FP_EQUAL (std::asinh (std::sinh (x)), x)) + return false; + if (!FP_EQUAL (std::acosh (std::cosh (x)), x)) + return false; + if (!FP_EQUAL (std::atanh (std::tanh (x)), x)) + return false; + return true; +} + +template<typename T> bool test_erf () +{ + if (!FP_EQUAL (std::erf ((T) 0), 0)) + return false; + if (!FP_EQUAL (std::erf ((T) INFINITY), 1)) + return false; + if (!FP_EQUAL (std::erf ((T) -INFINITY), -1)) + return false; + + if (!FP_EQUAL (std::erfc (0), 1)) + return false; + if (!FP_EQUAL (std::erfc ((T) INFINITY), 0)) + return false; + if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2)) + return false; + + return true; +} + +template<typename T> bool test_gamma () +{ + if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1)) + return false; + if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>))) + return false; + if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>))) + return false; + + if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>)))) + return false; + if (!FP_EQUAL (std::lgamma ((T) 2.5), + std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))) + return false; + + return true; +} + +template<typename T> bool test_rounding () +{ + T x = -2.5678; + T y = 3.6789; + + if (std::ceil (x) != -2) + return false; + if (std::floor (x) != -3) + return false; + if (std::trunc (x) != -2) + return false; + if (std::round (x) != -3) + return false; + + if (std::ceil (y) != 4) + return false; + if (std::floor (y) != 3) + return false; + if (std::trunc (y) != 3) + return false; + if (std::round (y) != 4) + return false; + + /* Not testing std::rint and std::nearbyint due to dependence on + floating-point environment. */ + + return true; +} + +template<typename T> bool test_fpmanip () +{ + T x = -2.3456789; + T y = 3.6789012; + int exp; + + T mantissa = std::frexp (x, &exp); + if (std::ldexp (mantissa, exp) != x) + return false; + if (std::logb (x) + 1 != exp) + return false; + if (std::ilogb (x) + 1 != exp) + return false; + if (std::scalbn (x, -exp) != mantissa) + return false; + + T next = std::nextafter (x, y); + if (!(next > x && next < y)) + return false; + +#if 0 + /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a + '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error + with x86_64 host and nvptx, GCN offload compilers, or + '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error + with powerpc64le host and nvptx offload compiler, for example; + PR71064 'nvptx offloading: "long double" data type'. + It ought to work on systems where the host's 'long double' is the same as + 'double' ('DF'): aarch64, for example? */ + next = std::nexttoward (x, y); + if (!(next > x && next < y)) + return false; +#endif + + if (std::copysign (x, y) != std::abs (x)) + return false; + if (std::copysign (y, x) != -y) + return false; + + return true; +} + +template<typename T> bool test_classify () +{ + T x = -2.3456789; + T y = 3.6789012; + + if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL) + return false; + if (std::fpclassify ((T) INFINITY) != FP_INFINITE + || std::fpclassify ((T) -INFINITY) != FP_INFINITE) + return false; + if (std::fpclassify ((T) 0.0) != FP_ZERO) + return false; + if (std::fpclassify ((T) NAN) != FP_NAN) + return false; + if (!std::isfinite (x) || !std::isfinite (y)) + return false; + if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY)) + return false; + if (std::isinf (x) || std::isinf (y)) + return false; + if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnan ((T) 0.0 / (T) 0.0)) + return false; + if (std::isnan (x) || std::isnan (y)) + return false; + if (!std::isnormal (x) || !std::isnormal (y)) + return false; + if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN)) + return false; + if (!std::signbit (x) || std::signbit (y)) + return false; + + return true; +} + +template<typename T> bool test_compare () +{ + T x = 5.6789012; + T y = 8.9012345; + + if (std::isgreater (x, y)) + return false; + if (std::isgreater (x, x)) + return false; + if (std::isgreaterequal (x, y)) + return false; + if (!std::isgreaterequal (x, x)) + return false; + if (!std::isless (x, y)) + return false; + if (std::isless (x, x)) + return false; + if (!std::islessequal (x, y)) + return false; + if (!std::islessequal (x, x)) + return false; + if (!std::islessgreater (x, y)) + return false; + if (std::islessgreater (x, x)) + return false; + if (std::isunordered (x, y)) + return false; + if (!std::isunordered (x, NAN)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (basic); + RUN_TEST (exp); + RUN_TEST (power); + RUN_TEST (trig); + RUN_TEST (hyperbolic); + RUN_TEST (erf); + RUN_TEST (gamma); + RUN_TEST (rounding); + RUN_TEST (fpmanip); + RUN_TEST (classify); + RUN_TEST (compare); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C new file mode 100644 index 0000000..e392d17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C @@ -0,0 +1,175 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <complex> +#include <numbers> + +using namespace std::complex_literals; + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) +#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \ + && FP_EQUAL ((x).imag (), (y).imag ())) + +#pragma omp declare target +template<typename T> bool test_complex () +{ + std::complex<T> z (-1.334, 5.763); + + if (!FP_EQUAL (z.real (), (T) -1.334)) + return false; + if (!FP_EQUAL (z.imag (), (T) 5.763)) + return false; + if (!FP_EQUAL (std::abs (z), + std::sqrt (z.real () * z.real () + z.imag () * z.imag ()))) + return false; + if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ()))) + return false; + if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ())) + return false; + + auto conj = std::conj (z); + if (!FP_EQUAL (conj.real (), z.real ()) + || !FP_EQUAL (conj.imag (), -z.imag ())) + return false; + + if (std::proj (z) != z) + return false; + + auto infz1 = std::proj (std::complex<float> (INFINITY, -1)); + if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0) + return false; + auto infz2 = std::proj (std::complex<float> (0, -INFINITY)); + if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0) + return false; + + auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4); + if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4)) + || !FP_EQUAL (polarz.imag (), + (T) 1.5* std::sin (std::numbers::pi_v<T> / 4))) + return false; + + return true; +} + +template<typename T> bool test_complex_exp_log () +{ + std::complex<T> z (-1.724, -3.763); + + // Euler's identity + auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi)); + eulerz += 1.0; + if (!COMPLEX_EQUAL (eulerz, std::complex<T> ())) + return false; + + auto my_exp_z + = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()), + std::exp (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::exp (z), my_exp_z)) + return false; + + if (!COMPLEX_EQUAL (std::log10 (z), + std::log (z) / std::log (std::complex<T> (10)))) + return false; + + return true; +} + +template<typename T> bool test_complex_trig () +{ + std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10); + const std::complex<T> i (0, 1); + + auto my_sin_z + = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()), + std::cos (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::sin (z), my_sin_z)) + return false; + + auto my_cos_z + = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()), + -std::sin (z.real ()) * std::sinh (z.imag ())); + if (!COMPLEX_EQUAL (std::cos (z), my_cos_z)) + return false; + + auto my_tan_z + = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ())) + / (std::cos (2*z.real ()) + std::cosh (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tan (z), my_tan_z)) + return false; + + auto my_sinh_z + = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()), + std::cosh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z)) + return false; + + auto my_cosh_z + = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()), + std::sinh (z.real ()) * std::sin (z.imag ())); + if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z)) + return false; + + auto my_tanh_z + = std::complex<T> (std::sinh (2*z.real ()), + std::sin (2*z.imag ())) + / (std::cosh (2*z.real ()) + std::cos (2*z.imag ())); + if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z)) + return false; + + auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::asin (z), my_asin_z)) + return false; + + auto my_acos_z + = std::complex<T> (std::numbers::pi / 2) + + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z)); + if (!COMPLEX_EQUAL (std::acos (z), my_acos_z)) + return false; + + auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z))); + if (!COMPLEX_EQUAL (std::atan (z), my_atan_z)) + return false; + + auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0)); + if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z)) + return false; + + auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0)); + if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z)) + return false; + + auto my_atanh_z + = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z)); + if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z)) + return false; + + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (complex); + RUN_TEST (complex_exp_log); + RUN_TEST (complex_trig); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C new file mode 100644 index 0000000..863a1de --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__deque-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C new file mode 100644 index 0000000..9c2d6fa --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C @@ -0,0 +1,64 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <deque> +#include <algorithm> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::deque<int> &_deque, int data[]) +{ + for (int i = 0; i < N; ++i) + if (_deque[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::deque<int> _deque (std::begin (data), std::end (data)); +#else + std::deque<int> _deque; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _deque) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_deque) std::deque<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + _deque[i] *= _deque[i]; + + #pragma omp target map (from: ok) + { + ok = validate (_deque, data); +#ifndef MEM_SHARED + _deque.~deque (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C new file mode 100644 index 0000000..9e59907 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C @@ -0,0 +1,71 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <flat_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C new file mode 100644 index 0000000..1dc60c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } } + { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } } + (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.) */ + +#include <stdlib.h> +#include <time.h> +#include <flat_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::flat_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::flat_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~flat_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C new file mode 100644 index 0000000..59b59bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C @@ -0,0 +1,60 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::flat_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~flat_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C new file mode 100644 index 0000000..b255cd5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C @@ -0,0 +1,67 @@ +// { dg-do run } +// { dg-additional-options "-std=c++23" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <flat_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::flat_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::flat_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::flat_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~flat_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C new file mode 100644 index 0000000..60d5cee --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__forward_list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C new file mode 100644 index 0000000..6b0ee65 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <forward_list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::forward_list<int> &list, int data[]) +{ + int i = 0; + for (auto &v : list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::forward_list<int> list (std::begin (data), std::end (data)); +#else + std::forward_list<int> list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&list) std::forward_list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (list, data); +#ifndef MEM_SHARED + list.~forward_list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C new file mode 100644 index 0000000..5057bf9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__list-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C new file mode 100644 index 0000000..1f44a17 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C @@ -0,0 +1,83 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <omp.h> +#include <list> +#include <algorithm> + +#define N 3000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::list<int> &_list, int data[]) +{ + int i = 0; + for (auto &v : _list) + { + if (v != data[i] * data[i]) + return false; + ++i; + } + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::list<int> _list (std::begin (data), std::end (data)); +#else + std::list<int> _list; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _list) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&_list) std::list<int> (std::begin (data), std::end (data)); +#endif + + #pragma omp target teams + do + { + int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0); + int start = len * omp_get_team_num (); + if (start >= N) + break; + if (start + len >= N) + len = N - start; + auto it = _list.begin (); + std::advance (it, start); + for (int i = 0; i < len; ++i) + { + *it *= *it; + ++it; + } + } while (false); + + #pragma omp target map (from: ok) + { + ok = validate (_list, data); +#ifndef MEM_SHARED + _list.~list (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C new file mode 100644 index 0000000..fe37426 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__map-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C new file mode 100644 index 0000000..36556ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~map (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C new file mode 100644 index 0000000..79f9245 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multimap-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C new file mode 100644 index 0000000..6a4a4e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) + sum += (long long) it->first * it->second; + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~multimap (); +#endif + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: _map) +#endif + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C new file mode 100644 index 0000000..2d80756 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__multiset-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C new file mode 100644 index 0000000..b12402e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C @@ -0,0 +1,62 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <stdio.h> +#include <time.h> +#include <set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C new file mode 100644 index 0000000..a6b3665 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C @@ -0,0 +1,93 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } + +#include <cmath> +#include <numbers> + +#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6) + +#pragma omp declare target +template<typename T> bool test_pi () +{ + if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0)) + return false; + if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0)) + return false; + if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T> + * std::numbers::inv_sqrtpi_v<T>, (T) 1.0)) + return false; + return true; +} + +template<typename T> bool test_sqrt () +{ + if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0)) + return false; + if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0)) + return false; + return true; +} + +template<typename T> bool test_phi () +{ + T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0; + if (!FP_EQUAL (myphi, std::numbers::phi_v<T>)) + return false; + return true; +} + +template<typename T> bool test_log () +{ + if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>)) + return false; + if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>)) + return false; + if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>)) + return false; + if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>)) + return false; + return true; +} + +template<typename T> bool test_egamma () +{ + T myegamma = 0.0; + #pragma omp parallel for reduction(+:myegamma) + for (int k = 2; k < 100000; ++k) + myegamma += (std::riemann_zeta (k) - 1) / k; + myegamma = (T) 1 - myegamma; + if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>)) + return false; + return true; +} +#pragma omp end declare target + +#define RUN_TEST(func) \ +{ \ + pass++; \ + bool ok = test_##func<float> (); \ + if (!ok) { result = pass; break; } \ + pass++; \ + ok = test_##func<double> (); \ + if (!ok) { result = pass; break; } \ +} + +int main (void) +{ + int result = 0; + + #pragma omp target map (tofrom: result) + do { + int pass = 0; + + RUN_TEST (pi); + RUN_TEST (sqrt); + RUN_TEST (phi); + RUN_TEST (log); + RUN_TEST (egamma); + } while (false); + + return result; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C new file mode 100644 index 0000000..54f62e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__set-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C new file mode 100644 index 0000000..cd23128 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C @@ -0,0 +1,68 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.find (data[i]) != _set.end ()); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: _set) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.find (i) != _set.end ()) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C new file mode 100644 index 0000000..7ef16bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C @@ -0,0 +1,7 @@ +// { dg-additional-options "-std=c++20" } + +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__span-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C new file mode 100644 index 0000000..046b3c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options "-std=c++20" } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <span> + +#define N 64 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::span<int, N> &span, int data[]) +{ + for (int i = 0; i < N; ++i) + if (span[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + int elements[N]; + std::span<int, N> span(elements); + + srand (time (NULL)); + init (data); + +#ifndef MEM_SHARED + #pragma omp target enter data map (to: data[:N]) map (alloc: elements, span) +#endif + + #pragma omp target + { +#ifndef MEM_SHARED + new (&span) std::span<int, N> (elements); +#endif + std::copy (data, data + N, span.begin ()); + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + span[i] *= span[i]; + + #pragma omp target map (from: ok) + { + ok = validate (span, data); +#ifndef MEM_SHARED + span.~span (); +#endif + } + +#ifndef MEM_SHARED + #pragma omp target exit data map (release: elements, span) +#endif + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C new file mode 100644 index 0000000..00d7943 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <set> +#include <unordered_map> + +#define N 3000 + +void init (int data[], bool unique) +{ + std::set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array if unique is true. + do + data[i] = rand (); + while (unique && _set.count (data[i]) > 0); + _set.insert (data[i]); + } +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_map<int,int> _map; + + srand (time (NULL)); + init (keys, true); + init (data, false); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_map<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map[keys[i]] = data[i]; + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < N; ++i) + sum += (long long) keys[i] * _map[keys[i]]; + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_map (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C new file mode 100644 index 0000000..2567634 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C @@ -0,0 +1,65 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_map> + +// Make sure that KEY_MAX is less than N to ensure some duplicate keys. +#define N 3000 +#define KEY_MAX 1000 + +void init (int data[], int max) +{ + for (int i = 0; i < N; ++i) + data[i] = i % max; +} + +bool validate (long long sum, int keys[], int data[]) +{ + long long total = 0; + for (int i = 0; i < N; ++i) + total += (long long) keys[i] * data[i]; + return sum == total; +} + +int main (void) +{ + int keys[N], data[N]; + std::unordered_multimap<int,int> _map; + + srand (time (NULL)); + init (keys, KEY_MAX); + init (data, RAND_MAX); + + #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map) + + #pragma omp target + { +#ifndef MEM_SHARED + new (&_map) std::unordered_multimap<int,int> (); +#endif + for (int i = 0; i < N; ++i) + _map.insert({keys[i], data[i]}); + } + + long long sum = 0; + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < KEY_MAX; ++i) + { + auto range = _map.equal_range (i); + for (auto it = range.first; it != range.second; ++it) { + sum += (long long) it->first * it->second; + } + } + +#ifndef MEM_SHARED + #pragma omp target + _map.~unordered_multimap (); +#endif + + #pragma omp target exit data map (release: _map) + + bool ok = validate (sum, keys, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C new file mode 100644 index 0000000..da6c875 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C @@ -0,0 +1,59 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +// MAX should be less than N to ensure that some duplicates occur. +#define N 4000 +#define MAX 1000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand () % MAX; +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_multiset<int> set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&set) std::unordered_multiset<int> (); +#endif + for (int i = 0; i < N; ++i) + set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + sum += i * set.count (i); + +#ifndef MEM_SHARED + #pragma omp target + set.~unordered_multiset (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C new file mode 100644 index 0000000..b7bd935 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <unordered_set> +#include <algorithm> + +#define N 4000 +#define MAX 16384 + +void init (int data[]) +{ + std::unordered_set<int> _set; + for (int i = 0; i < N; ++i) + { + // Avoid duplicates in data array. + do + data[i] = rand () % MAX; + while (_set.count (data[i]) != 0); + _set.insert (data[i]); + } +} + +bool validate (int sum, int data[]) +{ + int total = 0; + for (int i = 0; i < N; ++i) + total += data[i]; + return sum == total; +} + +int main (void) +{ + int data[N]; + std::unordered_set<int> _set; + int sum = 0; + + srand (time (NULL)); + init (data); + + #pragma omp target data map (to: data[:N]) map (alloc: _set) + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&_set) std::unordered_set<int> (); +#endif + for (int i = 0; i < N; ++i) + _set.insert (data[i]); + } + + #pragma omp target teams distribute parallel for reduction (+:sum) + for (int i = 0; i < MAX; ++i) + if (_set.count (i) > 0) + sum += i; + +#ifndef MEM_SHARED + #pragma omp target + _set.~unordered_set (); +#endif + } + + bool ok = validate (sum, data); + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C new file mode 100644 index 0000000..865cde2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C @@ -0,0 +1,179 @@ +// { dg-additional-options -std=c++20 } +// { dg-output-file target-std__valarray-1.output } + +#include <valarray> +#include <ostream> +#include <sstream> + + +/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467". + +We can't: + + #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf) + +... because: + + error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’ + +Therefore, use dummy classes in '#pragma omp declare target': +*/ + +#pragma omp declare target + +// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf': + +class dummy_basic_streambuf__char + : public std::basic_streambuf<char> +{ +public: + dummy_basic_streambuf__char() {} +}; + +// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()': + +class dummy_basic_ios__char + : public std::basic_ios<char> +{ +public: + dummy_basic_ios__char() {} +}; + +#pragma omp end declare target + + +int main() +{ + // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device. + std::initializer_list<int> v1_i = {10, 20, 30, 40, 50}; + const int *v1_i_data = std::data(v1_i); + size_t v1_i_size = v1_i.size(); + std::initializer_list<int> v2_i = {5, 4, 3, 2, 1}; + const int *v2_i_data = std::data(v2_i); + size_t v2_i_size = v2_i.size(); + std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5}; + const int *shiftData_i_data = std::data(shiftData_i); + size_t shiftData_i_size = shiftData_i.size(); +#pragma omp target \ + defaultmap(none) \ + map(to: v1_i_data[:v1_i_size], v1_i_size, \ + v2_i_data[:v2_i_size], v2_i_size, \ + shiftData_i_data[:shiftData_i_size], shiftData_i_size) + { + /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region. */ + std::stringbuf out_b; + std::ostream out(&out_b); + + std::valarray<int> v1(v1_i_data, v1_i_size); + out << "\nv1:"; + for (auto val : v1) + out << " " << val; + + std::valarray<int> v2(v2_i_data, v2_i_size); + out << "\nv2:"; + for (auto val : v2) + out << " " << val; + + std::valarray<int> sum = v1 + v2; + out << "\nv1 + v2:"; + for (auto val : sum) + out << " " << val; + + std::valarray<int> diff = v1 - v2; + out << "\nv1 - v2:"; + for (auto val : diff) + out << " " << val; + + std::valarray<int> product = v1 * v2; + out << "\nv1 * v2:"; + for (auto val : product) + out << " " << val; + + std::valarray<int> quotient = v1 / v2; + out << "\nv1 / v2:"; + for (auto val : quotient) + out << " " << val; + + std::valarray<int> squares = pow(v1, 2); + out << "\npow(v1, 2):"; + for (auto val : squares) + out << " " << val; + + std::valarray<int> sinhs = sinh(v2); + out << "\nsinh(v2):"; + for (auto val : sinhs) + out << " " << val; + + std::valarray<int> logs = log(v1 * v2); + out << "\nlog(v1 * v2):"; + for (auto val : logs) + out << " " << val; + + std::valarray<int> data(12); + for (size_t i = 0; i < data.size(); ++i) + data[i] = i; + out << "\nOriginal array:"; + for (auto val : data) + out << " " << val; + + std::slice slice1(2, 5, 1); + std::valarray<int> sliced1 = data[slice1]; + out << "\nSlice(2, 5, 1):"; + for (auto val : sliced1) + out << " " << val; + + std::slice slice2(1, 4, 3); + std::valarray<int> sliced2 = data[slice2]; + out << "\nSlice(1, 4, 3):"; + for (auto val : sliced2) + out << " " << val; + + data[slice1] = 99; + out << "\nArray after slice modification:"; + for (auto val : data) + out << " " << val; + + std::valarray<bool> mask = (v1 > 20); + out << "\nElements of v1 > 20:"; + for (size_t i = 0; i < v1.size(); ++i) + { + if (mask[i]) + out << " " << v1[i]; + } + + std::valarray<int> masked = v1[mask]; + out << "\nMasked array:"; + for (auto val : masked) + out << " " << val; + + std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size); + out << "\nOriginal shiftData:"; + for (auto val : shiftData) + out << " " << val; + + std::valarray<int> shifted = shiftData.shift(2); + out << "\nshift(2):"; + for (auto val : shifted) + out << " " << val; + + std::valarray<int> cshifted = shiftData.cshift(-1); + out << "\ncshift(-1):"; + for (auto val : cshifted) + out << " " << val; + + out << "\nSum(v1): " << v1.sum(); + out << "\nMin(v1): " << v1.min(); + out << "\nMax(v1): " << v1.max(); + + out << "\n"; + + /* Terminate with a NUL. Otherwise, we'd have to use: + __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data()); + ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO). */ + out << '\0'; + std::string_view out_b_sv = out_b.view(); + __builtin_printf("%s", out_b_sv.data()); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output new file mode 100644 index 0000000..c441e06 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output @@ -0,0 +1,22 @@ + +v1: 10 20 30 40 50 +v2: 5 4 3 2 1 +v1 + v2: 15 24 33 42 51 +v1 - v2: 5 16 27 38 49 +v1 * v2: 50 80 90 80 50 +v1 / v2: 2 5 10 20 50 +pow(v1, 2): 100 400 900 1600 2500 +sinh(v2): 74 27 10 3 1 +log(v1 * v2): 3 4 4 4 3 +Original array: 0 1 2 3 4 5 6 7 8 9 10 11 +Slice(2, 5, 1): 2 3 4 5 6 +Slice(1, 4, 3): 1 4 7 10 +Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11 +Elements of v1 > 20: 30 40 50 +Masked array: 30 40 50 +Original shiftData: 1 2 3 4 5 +shift(2): 3 4 5 0 0 +cshift(-1): 5 1 2 3 4 +Sum(v1): 150 +Min(v1): 10 +Max(v1): 50 diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C new file mode 100644 index 0000000..41ec80e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__valarray-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C new file mode 100644 index 0000000..8933072b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C @@ -0,0 +1,66 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <valarray> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::valarray<int> &arr, int data[]) +{ + for (int i = 0; i < N; ++i) + if (arr[i] != data[i] * data[i] + i) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::valarray<int> arr (data, N); +#else + std::valarray<int> arr; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: arr) +#endif + { + #pragma omp target + { +#ifndef MEM_SHARED + new (&arr) std::valarray<int> (data, N); +#endif + arr *= arr; + } + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + arr[i] += i; + + #pragma omp target map (from: ok) + { + ok = validate (arr, data); +#ifndef MEM_SHARED + arr.~valarray (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C new file mode 100644 index 0000000..967bff3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C @@ -0,0 +1,5 @@ +#pragma omp requires unified_shared_memory self_maps + +#define MEM_SHARED + +#include "target-std__vector-concurrent.C" diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C new file mode 100644 index 0000000..a94b4cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C @@ -0,0 +1,63 @@ +// { dg-do run } +// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } } + +#include <stdlib.h> +#include <time.h> +#include <vector> + +#define N 50000 + +void init (int data[]) +{ + for (int i = 0; i < N; ++i) + data[i] = rand (); +} + +#pragma omp declare target +bool validate (const std::vector<int> &vec, int data[]) +{ + for (int i = 0; i < N; ++i) + if (vec[i] != data[i] * data[i]) + return false; + return true; +} +#pragma omp end declare target + +int main (void) +{ + int data[N]; + bool ok; + + srand (time (NULL)); + init (data); + +#ifdef MEM_SHARED + std::vector<int> vec (data, data + N); +#else + std::vector<int> vec; +#endif + +#ifndef MEM_SHARED + #pragma omp target data map (to: data[:N]) map (alloc: vec) +#endif + { +#ifndef MEM_SHARED + #pragma omp target + new (&vec) std::vector<int> (data, data + N); +#endif + + #pragma omp target teams distribute parallel for + for (int i = 0; i < N; ++i) + vec[i] *= vec[i]; + + #pragma omp target map (from: ok) + { + ok = validate (vec, data); +#ifndef MEM_SHARED + vec.~vector (); +#endif + } + } + + return ok ? 0 : 1; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c new file mode 100644 index 0000000..00eb48b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (float)); + var.arr2->size = N; + + { + // ... permitted here: + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c new file mode 100644 index 0000000..942d6a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct B_tag { + int *arr; + int size; +} B; + +#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) + +struct A { + int *arr1; + B *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2->arr[i] == 1); + assert (var.arr3[i] == 0); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c new file mode 100644 index 0000000..cfc6a91 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c @@ -0,0 +1,94 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +typedef struct { + int *arr; + int size; +} B; + +#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +// While GCC handles more, only default is ... +#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \ + map(tofrom: myb.arr[0:myb.size]) +typedef struct { + int *arr; + int size; +} C; + + +struct A { + int *arr1; + B *arr2; + C *arr3; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (B *) malloc (sizeof (B)); + var.arr2->arr = (int *) calloc (N, sizeof (int)); + var.arr2->size = N; + var.arr3 = (C *) malloc (sizeof (C)); + var.arr3->arr = (int *) calloc (N, sizeof (int)); + var.arr3->size = N; + + { + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper(default), tofrom: x.arr2[0:1]) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr2->arr[i]++; + } + } + } + + { + #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // While GCC handles more, only default is ... + #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \ + map(tofrom: myc.arr[0:myc.size]) + // ... permitted here. + #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \ + map(tofrom: x.arr1[0:N]) \ + map(mapper( default ) , tofrom: *x.arr3) + #pragma omp target + { + for (int i = 0; i < N; i++) + { + var.arr1[i]++; + var.arr3->arr[i]++; + } + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 2); + assert (var.arr2->arr[i] == 1); + assert (var.arr3->arr[i] == 1); + } + + free (var.arr1); + free (var.arr2->arr); + free (var.arr2); + free (var.arr3->arr); + free (var.arr3); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c new file mode 100644 index 0000000..c4784eb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include <assert.h> + +struct T { + int a; + int b; + int c; +}; + +void foo (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.a++; + x.c++; + } + + assert (x.a == 1); + assert (x.b == 0); + assert (x.c == 1); +} + +// An identity mapper. This should do the same thing as the default! +#pragma omp declare mapper (struct T v) map(v) + +void bar (void) +{ + struct T x; + x.a = x.b = x.c = 0; + +#pragma omp target + { + x.b++; + } + +#pragma omp target map(x) + { + x.a++; + } + + assert (x.a == 1); + assert (x.b == 1); + assert (x.c == 0); +} + +int main (int argc, char *argv[]) +{ + foo (); + bar (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c new file mode 100644 index 0000000..3e6027e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <assert.h> + +struct Z { + int *arr; +}; + +void baz (struct Z *zarr, int len) +{ +#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \ + map(tofrom: myvar.arr[0:len]) + zarr[0].arr = (int *) calloc (len, sizeof (int)); + zarr[5].arr = (int *) calloc (len, sizeof (int)); + +#pragma omp target map(zarr, *zarr) + { + for (int i = 0; i < len; i++) + zarr[0].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr[5]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + +#pragma omp target map(zarr, zarr[5:1]) + { + for (int i = 0; i < len; i++) + zarr[5].arr[i]++; + } + + for (int i = 0; i < len; i++) + assert (zarr[0].arr[i] == 1); + + for (int i = 0; i < len; i++) + assert (zarr[5].arr[i] == 3); + + free (zarr[5].arr); + free (zarr[0].arr); +} + +int +main (int argc, char *argv[]) +{ + struct Z myzarr[10]; + baz (myzarr, 256); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c new file mode 100644 index 0000000..324d535 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define N 64 + +struct A { + int *arr1; + float *arr2; + int arr3[N]; +}; + +int +main (int argc, char *argv[]) +{ + struct A var; + + memset (&var, 0, sizeof var); + var.arr1 = (int *) calloc (N, sizeof (int)); + var.arr2 = (float *) calloc (N, sizeof (float)); + + { + #pragma omp declare mapper (struct A x) map(to: x.arr1) \ + map(tofrom: x.arr1[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr1[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(to: x.arr2) \ + map(tofrom: x.arr2[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr2[i]++; + } + } + + { + #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N]) + #pragma omp target + { + for (int i = 0; i < N; i++) + var.arr3[i]++; + } + } + + for (int i = 0; i < N; i++) + { + assert (var.arr1[i] == 1); + assert (var.arr2[i] == 1); + assert (var.arr3[i] == 1); + } + + free (var.arr1); + free (var.arr2); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c new file mode 100644 index 0000000..b36d2f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 +// Async version + +#include <omp.h> + +int main() +{ + #pragma omp parallel for + for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + omp_depend_t dep; + #pragma omp depobj(dep) depend(inout: ptr) + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; +#if __cplusplus + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0); +#else + void *ptr2 = omp_target_memset_async (ptr + start, val, + 1024 - start - tail, dev, 0, nullptr); +#endif + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp taskwait + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val) + __builtin_abort (); + ptr[i] += 2; + } + + omp_target_memset_async (ptr + start, val + 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait + for (int i = start; i < 1024 - start - tail; i++) + { + if (ptr[i] != val + 3) + __builtin_abort (); + ptr[i] += 1; + } + + omp_target_memset_async (ptr + start, val - 3, + 1024 - start - tail, dev, 1, &dep); + + #pragma omp taskwait depend (depobj: dep) + } + #pragma omp depobj(dep) destroy + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c new file mode 100644 index 0000000..c0e4fa9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c @@ -0,0 +1,80 @@ +#include <stddef.h> +#include <stdint.h> +#include <omp.h> + +#define MIN(x,y) ((x) < (y) ? x : y) + +enum { N = 524288 + 8 }; + +static void +init_val (int8_t *ptr, int val, size_t count) +{ + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + __builtin_memset (ptr, val, count); +} + +static void +check_val (int8_t *ptr, int val, size_t count) +{ + if (count == 0) + return; + #pragma omp target is_device_ptr(ptr) firstprivate(val, count) + for (size_t i = 0; i < count; i++) + if (ptr[i] != val) __builtin_abort (); +} + +static void +test_it (int8_t *ptr, int lshift, size_t count) +{ + if (N < count + lshift) __builtin_abort (); + if (lshift >= 4) __builtin_abort (); + ptr += lshift; + + init_val (ptr, 'z', MIN (count + 32, N - lshift)); + + omp_target_memset (ptr, '1', count, omp_get_default_device()); + + check_val (ptr, '1', count); + check_val (ptr + count, 'z', MIN (32, N - lshift - count)); +} + + +int main() +{ + size_t size; + int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device()); + ptr += (4 - (uintptr_t) ptr % 4) % 4; + if ((uintptr_t) ptr % 4 != 0) __builtin_abort (); + + test_it (ptr, 0, 1); + test_it (ptr, 3, 1); + test_it (ptr, 0, 4); + test_it (ptr, 3, 4); + test_it (ptr, 0, 5); + test_it (ptr, 3, 5); + test_it (ptr, 0, 6); + test_it (ptr, 3, 6); + + for (int i = 1; i <= 9; i++) + { + switch (i) + { + case 1: size = 16; break; // = 2^4 bytes + case 2: size = 32; break; // = 2^5 bytes + case 3: size = 64; break; // = 2^7 bytes + case 4: size = 128; break; // = 2^7 bytes + case 5: size = 256; break; // = 2^8 bytes + case 6: size = 512; break; // = 2^9 bytes + case 7: size = 65536; break; // = 2^16 bytes + case 8: size = 262144; break; // = 2^18 bytes + case 9: size = 524288; break; // = 2^20 bytes + default: __builtin_abort (); + } + test_it (ptr, 0, size); + test_it (ptr, 3, size); + test_it (ptr, 0, size + 1); + test_it (ptr, 3, size + 1); + test_it (ptr, 3, size + 2); + } + omp_target_free (ptr, omp_get_default_device()); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c new file mode 100644 index 0000000..01909f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c @@ -0,0 +1,62 @@ +// PR libgomp/120444 + +#include <omp.h> + +int main() +{ + for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++) + { + char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev); + + /* Play also around with the alignment - as hsa_amd_memory_fill operates + on multiples of 4 bytes (uint32_t). */ + + for (int start = 0; start < 32; start++) + for (int tail = 0; tail < 32; tail++) + { + unsigned char val = '0' + start + tail; + void *ptr2 = omp_target_memset (ptr + start, val, + 1024 - start - tail, dev); + if (ptr + start != ptr2) + __builtin_abort (); + + #pragma omp target device(dev) is_device_ptr(ptr) + for (int i = start; i < 1024 - start - tail; i++) + if (ptr[i] != val) + __builtin_abort (); + + } + + /* Check 'small' values for correctness. */ + + for (int start = 0; start < 32; start++) + for (int size = 0; size <= 64 + 32; size++) + { + omp_target_memset (ptr, 'a' - 2, 1024, dev); + + unsigned char val = '0' + start + size % 32; + void *ptr2 = omp_target_memset (ptr + start, val, size, dev); + + if (ptr + start != ptr2) + __builtin_abort (); + + if (size == 0) + continue; + + #pragma omp target device(dev) is_device_ptr(ptr) + { + for (int i = 0; i < start; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + for (int i = start; i < start + size; i++) + if (ptr[i] != val) + __builtin_abort (); + for (int i = start + size + 1; i < 1024; i++) + if (ptr[i] != 'a' - 2) + __builtin_abort (); + } + } + + omp_target_free (ptr, dev); + } +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c index 35ec75d..9bf949a 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c @@ -1,3 +1,3 @@ /* { dg-additional-options -O0 } */ -#include "../libgomp.oacc-c-c++-common/abi-struct-1.c" +#include "target-abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c new file mode 100644 index 0000000..d9268af --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c @@ -0,0 +1 @@ +#include "../libgomp.oacc-c-c++-common/abi-struct-1.c" diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 new file mode 100644 index 0000000..2641086 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 @@ -0,0 +1,67 @@ +! PR libgomp/120444 +! Async version + +use omp_lib +use iso_c_binding +implicit none (type, external) +integer(c_int) :: dev + +!$omp parallel do +do dev = omp_initial_device, omp_get_num_devices () +block + integer(c_int) :: i, val, start, tail + type(c_ptr) :: ptr, ptr2, tmpptr + integer(c_int8_t), pointer, contiguous :: fptr(:) + integer(c_intptr_t) :: intptr + integer(c_size_t), parameter :: count = 1024 + integer(omp_depend_kind) :: dep(1) + + ptr = omp_target_alloc (count, dev) + + !$omp depobj(dep(1)) depend(inout: ptr) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp taskwait + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + fptr(i) = fptr(i) + 2_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val + 3, & + count - start - tail, dev, 1, dep) + + !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val + 3, c_int8_t)) stop 3 + fptr(i) = fptr(i) - 1_c_int8_t + end do + !$omp end target + + ptr2 = omp_target_memset_async (tmpptr, val - 3, & + count - start - tail, dev, 1, dep) + + !$omp taskwait depend (depobj: dep(1)) + end do + end do + + !$omp depobj(dep(1)) destroy + call omp_target_free (ptr, dev); +end block +end do +end diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 new file mode 100644 index 0000000..1ee184a --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 @@ -0,0 +1,39 @@ +! PR libgomp/120444 + +use omp_lib +use iso_c_binding +implicit none (type, external) + +integer(c_int) :: dev, i, val, start, tail +type(c_ptr) :: ptr, ptr2, tmpptr +integer(c_int8_t), pointer, contiguous :: fptr(:) +integer(c_intptr_t) :: intptr +integer(c_size_t), parameter :: count = 1024 + +do dev = omp_initial_device, omp_get_num_devices () + ptr = omp_target_alloc (count, dev) + + ! Play also around with the alignment - as hsa_amd_memory_fill operates + ! on multiples of 4 bytes (c_int32_t) + + do start = 0, 31 + do tail = 0, 31 + val = iachar('0') + start + tail + + tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr) + ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev) + + if (.not. c_associated (tmpptr, ptr2)) stop 1 + + !$omp target device(dev) is_device_ptr(ptr) + do i = 1 + start, int(count, c_int) - start - tail + call c_f_pointer (ptr, fptr, [count]) + if (fptr(i) /= int (val, c_int8_t)) stop 2 + end do + !$omp end target + end do + end do + + call omp_target_free (ptr, dev); +end do +end diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c index 8078655..4b54171 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c @@ -1,6 +1,10 @@ /* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'. */ -/* See also '../libgomp.c-c++-common/target-abi-struct-1-O0.c'. */ +/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'. */ + +/* To exercise PR119835 (if optimizations enabled): disable inlining, so that + GIMPLE passes still see the functions that return aggregate types. */ +#pragma GCC optimize "-fno-inline" typedef struct {} empty; /* See 'gcc/doc/extend.texi', "Empty Structures". */ typedef struct {char a;} schar; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c new file mode 100644 index 0000000..eda651d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c @@ -0,0 +1,96 @@ +/* { dg-prune-output "using .vector_length \\(32\\)" } */ + +/* PR libgomp/93226 */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <openacc.h> + +enum { N = 1024 }; + +static int D[N]; +#pragma acc declare device_resident(D) + +#pragma acc routine +intptr_t init_d() +{ + for (int i = 0; i < N; i++) + D[i] = 27*i; + return (intptr_t) &D[0]; +} + +int +main () +{ + int *a, *b, *e; + void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f; + intptr_t intptr; + bool fail = false; + + a = (int *) malloc (N*sizeof (int)); + b = (int *) malloc (N*sizeof (int)); + e = (int *) malloc (N*sizeof (int)); + d_c = acc_malloc (N*sizeof (int)); + d_f = acc_malloc (N*sizeof (int)); + + memset (e, 0xff, N*sizeof (int)); + d_e = acc_copyin (e, N*sizeof (int)); + + #pragma acc serial copyout(intptr) + intptr = init_d (); + d_d = (void*) intptr; + acc_memcpy_device (d_c, d_d, N*sizeof (int)); + + #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr) + { + int *cc = (int *) d_c; + int *dd = (int *) intptr; + for (int i = 0; i < N; i++) + if (dd[i] != 27*i || cc[i] != 27*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); + + for (int i = 0; i < N; i++) + a[i] = 11*i; + for (int i = 0; i < N; i++) + b[i] = 31*i; + + d_a = acc_copyin (a, N*sizeof (int)); + acc_copyin_async (b, N*sizeof (int), acc_async_noval); + + #pragma acc parallel deviceptr(d_c) async + { + int *cc = (int *) d_c; + #pragma acc loop + for (int i = 0; i < N; i++) + cc[i] = -17*i; + } + + acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval); + acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + d_b = acc_deviceptr (b); + acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + + #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail) + { + int *dd = (int *) d_d; + int *ee = (int *) d_e; + int *ff = (int *) d_f; + for (int i = 0; i < N; i++) + if (dd[i] != 11*i + || ee[i] != 31*i + || ff[i] != -17*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 new file mode 100644 index 0000000..8f3a8f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 @@ -0,0 +1,113 @@ +! { dg-prune-output "using .vector_length \\(32\\)" } + +! PR libgomp/93226 */ + +module m + use iso_c_binding + use openacc + implicit none (external, type) + + integer, parameter :: N = 1024 + + integer :: D(N) + !$acc declare device_resident(D) + +contains + + integer(c_intptr_t) function init_d() + !$acc routine + integer :: i + do i = 1, N + D(i) = 27*i + end do + init_d = loc(D) + end +end module + +program main + use m + implicit none (external, type) + + integer, allocatable, target :: a(:), b(:), e(:) + type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f + integer(c_intptr_t) intptr + integer :: i + logical fail + + fail = .false. + + allocate(a(N), b(N), e(N)) + d_c = acc_malloc (N*c_sizeof (i)) + d_f = acc_malloc (N*c_sizeof (i)) + + e = huge(e) + call acc_copyin (e, N*c_sizeof (i)); + d_e = acc_deviceptr (e); + + !$acc serial copyout(intptr) + intptr = init_d () + !$acc end serial + d_d = transfer(intptr, d_d) + call acc_memcpy_device (d_c, d_d, N*c_sizeof (i)) + + !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr) + block + integer, pointer :: cc(:), dd(:) + call c_f_pointer (d_c, cc, [N]) + call c_f_pointer (d_d, dd, [N]) + a = cc + do i = 1, N + if (dd(i) /= 27*i .or. cc(i) /= 27*i) then + fail = .true. + stop 1 + end if + end do + end block + !$acc end serial + if (fail) error stop 1 + + do i = 1, N + a(i) = 11*i + b(i) = 31*i + end do + + call acc_copyin (a, N*c_sizeof (i)) + d_a = acc_deviceptr (a) + call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval) + + !$acc parallel deviceptr(d_c) private(i) async + block + integer, pointer :: cc(:) + call c_f_pointer (d_c, cc, [N]) + !$acc loop + do i = 1, N + cc(i) = -17*i + end do + end block + !$acc end parallel + + call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval) + call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + d_b = acc_deviceptr (b) + call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + + !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail) + block + integer, pointer :: dd(:), ee(:), ff(:) + call c_f_pointer (d_d, dd, [N]) + call c_f_pointer (d_e, ee, [N]) + call c_f_pointer (d_f, ff, [N]) + do i = 1, N + if (dd(i) /= 11*i & + .or. ee(i) /= 31*i & + .or. ff(i) /= -17*i) then + fail = .true. + stop 2 + end if + end do + end block + !$acc end serial + if (fail) error stop 2 +end |