diff options
author | Ian Lance Taylor <iant@golang.org> | 2023-03-29 09:01:23 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2023-03-29 09:01:23 -0700 |
commit | 6612f4f8cb9b0d5af18ec69ad04e56debc3e6ced (patch) | |
tree | 1deecdcfbf185c7044bc861d0ace51285c96cb62 /libgomp | |
parent | 795cffe109e28b248a54b8ee583cbae48368c2a7 (diff) | |
parent | aa8f4242efc99f24de73c59d53996f28db28c13f (diff) | |
download | gcc-6612f4f8cb9b0d5af18ec69ad04e56debc3e6ced.zip gcc-6612f4f8cb9b0d5af18ec69ad04e56debc3e6ced.tar.gz gcc-6612f4f8cb9b0d5af18ec69ad04e56debc3e6ced.tar.bz2 |
Merge from trunk revision aa8f4242efc99f24de73c59d53996f28db28c13f.
Diffstat (limited to 'libgomp')
259 files changed, 10849 insertions, 931 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index d3b4758..c50e591 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,1758 @@ +2023-03-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + * testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c: Add + weak_undefined options. + +2023-03-24 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (Offload-Target Specifics): Grammar fix. + +2023-03-24 Thomas Schwinge <thomas@codesourcery.com> + + PR fortran/104949 + * target.c (gomp_map_vars_internal) <GOMP_MAP_FIRSTPRIVATE>: Add + caveat/safeguard. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/90596 + * target.c (gomp_map_vars_internal): Allow for + 'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'. + * oacc-parallel.c (GOACC_parallel_keyed): Pass + 'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'. + * plugin/plugin-gcn.c (alloc_by_agent, gcn_exec) + (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): + Adjust, simplify. + (gomp_offload_free): Remove. + * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) + (GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify. + (cuda_free_argmem): Remove. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Adjust. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + * target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow + libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' + data. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + * target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for + 'GOMP_MAP_IF_PRESENT'. + * plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec) + (GOMP_OFFLOAD_openacc_async_exec): Adjust. + * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) + (GOMP_OFFLOAD_openacc_async_exec): Likewise. + * testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async' + testing. + * testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-async.c (goacc_wait): Remove 'acc_async_test' -> skip + shortcut. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/data-3.c: Document/verify + another aspect of OpenACC 'async' semantics. + +2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + + * plugin/plugin-gcn.c (gcn_exec): Fix 'acc_ev_enqueue_launch_end' + position. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Verify 'acc_ev_alloc', 'acc_ev_free'. + +2023-03-09 Hongyu Wang <hongyu.wang@intel.com> + + PR libgomp/109062 + * env.c (wait_policy): Initialize to -1. + (initialize_icvs): Initialize icvs->wait_policy to -1. + * testsuite/libgomp.c-c++-common/pr109062.c: New test. + +2023-03-08 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (Offload-Target Specifics): Mention GCN_STACK_SIZE. + +2023-03-02 Kwok Cheung Yeung <kcy@codesourcery.com> + Paul-Antoine Arras <pa@codesourcery.com> + + * testsuite/libgomp.c/simd-math-1.c: New testcase. + +2023-03-01 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/108546 + * testsuite/libgomp.fortran/is_device_ptr-3.f90: New test. + * testsuite/libgomp.fortran/use_device_ptr-optional-4.f90: New test. + +2023-02-22 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.fortran/alloc-10.f90: Use + '-Wno-complain-wrong-lang'. + * testsuite/libgomp.fortran/alloc-11.f90: Likewise. + * testsuite/libgomp.fortran/alloc-7.f90: Likewise. + * testsuite/libgomp.fortran/alloc-9.f90: Likewise. + * testsuite/libgomp.fortran/allocate-1.f90: Likewise. + * testsuite/libgomp.fortran/depend-4.f90: Likewise. + * testsuite/libgomp.fortran/depend-5.f90: Likewise. + * testsuite/libgomp.fortran/depend-6.f90: Likewise. + * testsuite/libgomp.fortran/depend-7.f90: Likewise. + * testsuite/libgomp.fortran/depend-inoutset-1.f90: Likewise. + * testsuite/libgomp.fortran/examples-4/declare_target-1.f90: + Likewise. + * testsuite/libgomp.fortran/examples-4/declare_target-2.f90: + Likewise. + * testsuite/libgomp.fortran/order-reproducible-1.f90: Likewise. + * testsuite/libgomp.fortran/order-reproducible-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise. + * testsuite/libgomp.fortran/task-detach-6.f90: Remove left-over + 'dg-prune-output'. + +2023-02-16 Jakub Jelinek <jakub@redhat.com> + + * libgomp.texi: Fix typos - theads -> threads. + +2023-02-16 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.fortran/target-nowait-array-section.f90: Fix + comment typo and improve its wording. + +2023-02-15 Tobias Burnus <tobias@codesourcery.com> + + * target.c (gomp_target_rev): Dereference ptr + to get device address. + * testsuite/libgomp.fortran/reverse-offload-5.f90: Add test + for unallocated allocatable. + +2023-02-15 Tobias Burnus <tobias@codesourcery.com> + + * target.c (gomp_map_vars_internal): Add 'i > 0' before doing a + kind check. + (GOMP_target_enter_exit_data): If the next map item is + GOMP_MAP_ALWAYS_POINTER map it together with the current item. + * testsuite/libgomp.fortran/target-enter-data-3.f90: New test. + +2023-02-09 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/107424 + * testsuite/libgomp.fortran/non-rectangular-loop-1.f90: New test. + * testsuite/libgomp.fortran/non-rectangular-loop-1a.f90: New test. + * testsuite/libgomp.fortran/non-rectangular-loop-2.f90: New test. + * testsuite/libgomp.fortran/non-rectangular-loop-3.f90: New test. + * testsuite/libgomp.fortran/non-rectangular-loop-4.f90: New test. + * testsuite/libgomp.fortran/non-rectangular-loop-5.f90: New test. + +2023-02-07 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.fortran/reverse-offload-6.f90: Fix nvptx + offloading compilation. + +2023-02-03 Tobias Burnus <tobias@codesourcery.com> + + * target.c (gomp_target_rev): Handle mapnum == 0 and avoid + freeing not allocated memory. + * testsuite/libgomp.fortran/reverse-offload-6.f90: New test. + +2023-02-03 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (5.0 Impl. Status, gcn specifics): Update for + reverse offload. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Accept + reverse-offload requirement. + +2023-02-02 Andrew Stubbs <ams@codesourcery.com> + + * config/gcn/libgomp-gcn.h (DEFAULT_GCN_STACK_SIZE): New define. + (DEFAULT_TEAM_ARENA_SIZE): New define. + (struct heap): Move to this file. + (struct kernargs_abi): Likewise. + * config/gcn/team.c (gomp_gcn_enter_kernel): Use team arena size from + the kernargs. + * libgomp.h: Include libgomp-gcn.h. + (TEAM_ARENA_SIZE): Remove. + (team_malloc): Update the error message. + * plugin/plugin-gcn.c (struct kernargs): Move common content to + struct kernargs_abi. + (struct agent_info): Rename team arenas to ephemeral memories. + (struct team_arena_list): Rename .... + (struct ephemeral_memories_list): to this. + (struct heap): Delete. + (team_arena_size): New variable. + (stack_size): New variable. + (print_kernel_dispatch): Update debug messages. + (init_environment_variables): Read GCN_TEAM_ARENA_SIZE. + Read GCN_STACK_SIZE. + (get_team_arena): Rename ... + (configure_ephemeral_memories): ... to this, and set up stacks. + (release_team_arena): Rename ... + (release_ephemeral_memories): ... to this. + (destroy_team_arenas): Rename ... + (destroy_ephemeral_memories): ... to this. + (create_kernel_dispatch): Add num_threads parameter. + Adjust for kernargs_abi refactor and ephemeral memories. + (release_kernel_dispatch): Adjust for ephemeral memories. + (run_kernel): Pass thread-count to create_kernel_dispatch. + (GOMP_OFFLOAD_init_device): Adjust for ephemeral memories. + (GOMP_OFFLOAD_fini_device): Adjust for ephemeral memories. + +2023-02-02 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP TR11): Fix item for 'strict' modifier. + +2023-02-01 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/allocate-3.f90: Fix ALIGN + usage, remove unused -fdump-tree-original. + * testsuite/libgomp.fortran/allocate-4.f90: New. + +2023-02-01 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (5.0 Impl. Status): Update 'requires' and 'ancestor'. + (GCN): Add item about 'omp requires'. + (nvptx): Likewise; add item about reverse offload. + +2023-01-27 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/108558 + * testsuite/libgomp.fortran/has_device_addr.f90: New test. + +2023-01-23 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP 5.0): Set non-rectangular + loop nest back to 'P' as Fortran support is incomplete. + +2023-01-19 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/108459 + * testsuite/libgomp.c/pr108459.c: New test. + +2023-01-17 Martin Liska <mliska@suse.cz> + + * Makefile.in: Regenerate. + * configure: Regenerate. + +2023-01-07 LIU Hao <lh_mouse@126.com> + + PR middle-end/108300 + * config/mingw32/proc.c: Define `WIN32_LEAN_AND_MEAN` before + <windows.h>. + +2023-01-05 Jakub Jelinek <jakub@redhat.com> + + PR c++/108286 + * testsuite/libgomp.c++/pr108286.C: New test. + +2023-01-02 Jakub Jelinek <jakub@redhat.com> + + * libgomp.texi: Bump @copying's copyright year. + +2022-12-21 Chung-Lin Tang <cltang@codesourcery.com> + + PR target/99555 + * config/nvptx/bar.c (generation_to_barrier): Remove. + (futex_wait,futex_wake,do_spin,do_wait): Remove. + (GOMP_WAIT_H): Remove. + (#include "../linux/bar.c"): Remove. + (gomp_barrier_wait_end): New function. + (gomp_barrier_wait): Likewise. + (gomp_barrier_wait_last): Likewise. + (gomp_team_barrier_wait_end): Likewise. + (gomp_team_barrier_wait): Likewise. + (gomp_team_barrier_wait_final): Likewise. + (gomp_team_barrier_wait_cancel_end): Likewise. + (gomp_team_barrier_wait_cancel): Likewise. + (gomp_team_barrier_cancel): Likewise. + * config/nvptx/bar.h (gomp_barrier_t): Remove waiters, lock fields. + (gomp_barrier_init): Remove init of waiters, lock fields. + (gomp_team_barrier_wake): Remove prototype, add new static inline + function. + +2022-12-21 Jakub Jelinek <jakub@redhat.com> + + PR c++/108180 + * testsuite/libgomp.c++/pr108180.C: New test. + +2022-12-16 Tobias Burnus <tobias@codesourcery.com> + + PR libfortran/108056 + * testsuite/libgomp.fortran/allocate-4.f90: Remove + accidentally added file. + +2022-12-15 Tobias Burnus <tobias@codesourcery.com> + + PR libfortran/108056 + * testsuite/libgomp.fortran/allocate-4.f90: New file. + +2022-12-14 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.fortran/combined-directive-splitting-1.f90: New + test. + +2022-12-10 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.h (struct target_mem_desc): Predeclare; move + below after 'reverse_splay_tree_node' and add rev_array + member. + (struct reverse_splay_tree_key_s, reverse_splay_compare): New. + (reverse_splay_tree_node, reverse_splay_tree, + reverse_splay_tree_key): New typedef. + (struct gomp_device_descr): Add mem_map_rev member. + * oacc-host.c (host_dispatch): NULL init .mem_map_rev. + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices): Claim + support for GOMP_REQUIRES_REVERSE_OFFLOAD. + * splay-tree.h (splay_tree_callback_stop): New typedef; like + splay_tree_callback but returning int not void. + (splay_tree_foreach_lazy): Define; like splay_tree_foreach but + taking splay_tree_callback_stop as argument. + * splay-tree.c (splay_tree_foreach_internal_lazy, + splay_tree_foreach_lazy): New; but early exit if callback returns + nonzero. + * target.c: Instatiate splay_tree_c with splay_tree_prefix 'reverse'. + (gomp_map_lookup_rev): New. + (gomp_load_image_to_device): Handle reverse-offload function + lookup table. + (gomp_unload_image_from_device): Free devicep->mem_map_rev. + (struct gomp_splay_tree_rev_lookup_data, gomp_splay_tree_rev_lookup, + gomp_map_rev_lookup, struct cpy_data, gomp_map_cdata_lookup_int, + gomp_map_cdata_lookup): New auxiliary structs and functions for + gomp_target_rev. + (gomp_target_rev): Implement reverse offloading and its mapping. + (gomp_target_init): Init current_device.mem_map_rev.root. + * testsuite/libgomp.fortran/reverse-offload-2.f90: New test. + * testsuite/libgomp.fortran/reverse-offload-3.f90: New test. + * testsuite/libgomp.fortran/reverse-offload-4.f90: New test. + * testsuite/libgomp.fortran/reverse-offload-5.f90: New test. + * testsuite/libgomp.fortran/reverse-offload-5a.f90: New test without + mapping of on-device allocated variables. + +2022-12-09 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (5.1 Impl. Status): Split allocate clause/directive + item about 'align'; mark clause as 'Y' and directive as 'N'. + * testsuite/libgomp.fortran/allocate-2.f90: New test. + * testsuite/libgomp.fortran/allocate-3.f90: New test. + +2022-12-06 Marcel Vollweiler <marcel@codesourcery.com> + + * config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to + allow processing of device-specific values. + (omp_set_teams_thread_limit): Likewise. + (ialias): Likewise. + * config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise. + (omp_set_teams_thread_limit): Likewise. + (ialias): Likewise. + * icv-device.c (omp_get_teams_thread_limit): Likewise. + (ialias): Likewise. + (omp_set_teams_thread_limit): Likewise. + * icv.c (omp_set_teams_thread_limit): Removed. + (omp_get_teams_thread_limit): Likewise. + (ialias): Likewise. + * libgomp.texi: Updated documentation for nvptx and gcn corresponding + to the limitation of the number of teams. + * plugin/plugin-gcn.c (limit_teams): New helper function that limits + the number of teams by twice the number of compute units. + (parse_target_attributes): Limit the number of teams on gcn offload + devices. + * target.c (get_gomp_offload_icvs): Added teams_thread_limit_var + handling. + (gomp_load_image_to_device): Added a size check for the ICVs struct + variable. + (gomp_copy_back_icvs): New function that is used in GOMP_target_ext to + copy back the ICV values from device to host. + (GOMP_target_ext): Update the number of teams and threads in the kernel + args also considering device-specific values. + * testsuite/libgomp.c-c++-common/icv-4.c: Fixed an error in the reading + of OMP_TEAMS_THREAD_LIMIT from the environment. + * testsuite/libgomp.c-c++-common/icv-5.c: Extended. + * testsuite/libgomp.c-c++-common/icv-6.c: Extended. + * testsuite/libgomp.c-c++-common/icv-7.c: Extended. + * testsuite/libgomp.c-c++-common/icv-9.c: New test. + * testsuite/libgomp.fortran/icv-5.f90: New test. + * testsuite/libgomp.fortran/icv-6.f90: New test. + +2022-12-06 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP 5.2): Add missing 'the'. + (TR11): Add missing '@tab N @tab'. + +2022-11-30 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP Context Selectors): Add 'gfx803' to gcn's isa. + +2022-11-30 Paul-Antoine Arras <pa@codesourcery.com> + + * testsuite/libgomp.c/declare-variant-4-fiji.c: New test. + * testsuite/libgomp.c/declare-variant-4-gfx803.c: New test. + * testsuite/libgomp.c/declare-variant-4-gfx900.c: New test. + * testsuite/libgomp.c/declare-variant-4-gfx906.c: New test. + * testsuite/libgomp.c/declare-variant-4-gfx908.c: New test. + * testsuite/libgomp.c/declare-variant-4-gfx90a.c: New test. + * testsuite/libgomp.c/declare-variant-4.h: New header file. + +2022-11-28 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP 5.2): Mark end-directive as Y. + +2022-11-25 Sandra Loosemore <sandra@codesourcery.com> + + * testsuite/lib/libgomp.exp: Load scanoffloadipa.exp library. + * testsuite/libgomp.c/target-simd-clone-1.c: New. + * testsuite/libgomp.c/target-simd-clone-2.c: New. + * testsuite/libgomp.c/target-simd-clone-3.c: New. + +2022-11-25 Tobias Burnus <tobias@codesourcery.com> + + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Use unsigned int + for 'i' to match 'fn_entries'; regard absent GOMP_REV_OFFLOAD_VAR + as valid and the code having no reverse-offload code. + * testsuite/libgomp.c-c++-common/reverse-offload-2.c: New test. + +2022-11-25 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP Implementation Status): Add three 5.1 items + and status for Technical Report (TR) 11. + +2022-11-21 Tobias Burnus <tobias@codesourcery.com> + + * config/gcn/libgomp-gcn.h (struct output): + Remove 'msg_u64' from the union, change + value_u64[2] to value_u64[6]. + * config/gcn/target.c (GOMP_target_ext): Update accordingly. + * plugin/plugin-gcn.c (process_reverse_offload, console_output): + Likewise. + +2022-11-19 Tobias Burnus <tobias@codesourcery.com> + + * config/gcn/libgomp-gcn.h: New file; contains + struct output, declared previously in plugin-gcn.c. + * config/gcn/target.c: Include it. + (GOMP_ADDITIONAL_ICVS): Declare as extern var. + (GOMP_target_ext): Handle reverse offload. + * plugin/plugin-gcn.c: Include libgomp-gcn.h. + (struct kernargs): Replace struct def by the one + from libgomp-gcn.h for output_data. + (process_reverse_offload): New. + (console_output): Call it. + +2022-11-16 Tobias Burnus <tobias@codesourcery.com> + Andrew Stubbs <ams@codesourcery.com> + + * config/gcn/team.c (gomp_gcn_enter_kernel): Use + __builtin_gcn_kernarg_ptr instead of asm ("s8"). + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * doc/amd-radeon-gcn.rst: New file. + * doc/conf.py: New file. + * doc/copyright.rst: New file. + * doc/cuda-streams-usage.rst: New file. + * doc/enabling-openacc.rst: New file. + * doc/enabling-openmp.rst: New file. + * doc/first-invocation-nvidia-cublas-library-api.rst: New file. + * doc/first-invocation-openacc-library-api.rst: New file. + * doc/funding.rst: New file. + * doc/general-public-license-3.rst: New file. + * doc/gnu-free-documentation-license.rst: New file. + * doc/implementation-status-and-implementation-defined-behavior.rst: New file. + * doc/index.rst: New file. + * doc/indices-and-tables.rst: New file. + * doc/introduction.rst: New file. + * doc/memory-allocation-with-libmemkind.rst: New file. + * doc/nvptx.rst: New file. + * doc/offload-target-specifics.rst: New file. + * doc/openacc-environment-variables.rst: New file. + * doc/openacc-environment-variables/accdevicenum.rst: New file. + * doc/openacc-environment-variables/accdevicetype.rst: New file. + * doc/openacc-environment-variables/accproflib.rst: New file. + * doc/openacc-environment-variables/gccaccnotify.rst: New file. + * doc/openacc-introduction.rst: New file. + * doc/openacc-library-and-environment-variables.rst: New file. + * doc/openacc-library-interoperability.rst: New file. + * doc/openacc-profiling-interface.rst: New file. + * doc/openacc-runtime-library-routines.rst: New file. + * doc/openacc-runtime-library-routines/accasynctest.rst: New file. + * doc/openacc-runtime-library-routines/accasynctestall.rst: New file. + * doc/openacc-runtime-library-routines/accattach.rst: New file. + * doc/openacc-runtime-library-routines/acccopyin.rst: New file. + * doc/openacc-runtime-library-routines/acccopyout.rst: New file. + * doc/openacc-runtime-library-routines/acccreate.rst: New file. + * doc/openacc-runtime-library-routines/accdelete.rst: New file. + * doc/openacc-runtime-library-routines/accdetach.rst: New file. + * doc/openacc-runtime-library-routines/accdeviceptr.rst: New file. + * doc/openacc-runtime-library-routines/accfree.rst: New file. + * doc/openacc-runtime-library-routines/accgetcudastream.rst: New file. + * doc/openacc-runtime-library-routines/accgetcurrentcudacontext.rst: New file. + * doc/openacc-runtime-library-routines/accgetcurrentcudadevice.rst: New file. + * doc/openacc-runtime-library-routines/accgetdevicenum.rst: New file. + * doc/openacc-runtime-library-routines/accgetdevicetype.rst: New file. + * doc/openacc-runtime-library-routines/accgetnumdevices.rst: New file. + * doc/openacc-runtime-library-routines/accgetproperty.rst: New file. + * doc/openacc-runtime-library-routines/acchostptr.rst: New file. + * doc/openacc-runtime-library-routines/accinit.rst: New file. + * doc/openacc-runtime-library-routines/accispresent.rst: New file. + * doc/openacc-runtime-library-routines/accmalloc.rst: New file. + * doc/openacc-runtime-library-routines/accmapdata.rst: New file. + * doc/openacc-runtime-library-routines/accmemcpyfromdevice.rst: New file. + * doc/openacc-runtime-library-routines/accmemcpytodevice.rst: New file. + * doc/openacc-runtime-library-routines/accondevice.rst: New file. + * doc/openacc-runtime-library-routines/accpresentorcopyin.rst: New file. + * doc/openacc-runtime-library-routines/accpresentorcreate.rst: New file. + * doc/openacc-runtime-library-routines/accproflookup.rst: New file. + * doc/openacc-runtime-library-routines/accprofregister.rst: New file. + * doc/openacc-runtime-library-routines/accprofunregister.rst: New file. + * doc/openacc-runtime-library-routines/accregisterlibrary.rst: New file. + * doc/openacc-runtime-library-routines/accsetcudastream.rst: New file. + * doc/openacc-runtime-library-routines/accsetdevicenum.rst: New file. + * doc/openacc-runtime-library-routines/accsetdevicetype.rst: New file. + * doc/openacc-runtime-library-routines/accshutdown.rst: New file. + * doc/openacc-runtime-library-routines/accunmapdata.rst: New file. + * doc/openacc-runtime-library-routines/accupdatedevice.rst: New file. + * doc/openacc-runtime-library-routines/accupdateself.rst: New file. + * doc/openacc-runtime-library-routines/accwait.rst: New file. + * doc/openacc-runtime-library-routines/accwaitall.rst: New file. + * doc/openacc-runtime-library-routines/accwaitallasync.rst: New file. + * doc/openacc-runtime-library-routines/accwaitasync.rst: New file. + * doc/openmp-context-selectors.rst: New file. + * doc/openmp-environment-variables.rst: New file. + * doc/openmp-environment-variables/gompcpuaffinity.rst: New file. + * doc/openmp-environment-variables/gompdebug.rst: New file. + * doc/openmp-environment-variables/gomprtemsthreadpools.rst: New file. + * doc/openmp-environment-variables/gompspincount.rst: New file. + * doc/openmp-environment-variables/gompstacksize.rst: New file. + * doc/openmp-environment-variables/ompcancellation.rst: New file. + * doc/openmp-environment-variables/ompdefaultdevice.rst: New file. + * doc/openmp-environment-variables/ompdisplayenv.rst: New file. + * doc/openmp-environment-variables/ompdynamic.rst: New file. + * doc/openmp-environment-variables/ompmaxactivelevels.rst: New file. + * doc/openmp-environment-variables/ompmaxtaskpriority.rst: New file. + * doc/openmp-environment-variables/ompnested.rst: New file. + * doc/openmp-environment-variables/ompnumteams.rst: New file. + * doc/openmp-environment-variables/ompnumthreads.rst: New file. + * doc/openmp-environment-variables/ompplaces.rst: New file. + * doc/openmp-environment-variables/ompprocbind.rst: New file. + * doc/openmp-environment-variables/ompschedule.rst: New file. + * doc/openmp-environment-variables/ompstacksize.rst: New file. + * doc/openmp-environment-variables/omptargetoffload.rst: New file. + * doc/openmp-environment-variables/ompteamsthreadlimit.rst: New file. + * doc/openmp-environment-variables/ompthreadlimit.rst: New file. + * doc/openmp-environment-variables/ompwaitpolicy.rst: New file. + * doc/openmp-implementation-specifics.rst: New file. + * doc/openmp-implementation-status.rst: New file. + * doc/openmp-implementation-status/openmp-45.rst: New file. + * doc/openmp-implementation-status/openmp-50.rst: New file. + * doc/openmp-implementation-status/openmp-51.rst: New file. + * doc/openmp-implementation-status/openmp-52.rst: New file. + * doc/openmp-runtime-library-routines.rst: New file. + * doc/openmp-runtime-library-routines/ompdestroylock.rst: New file. + * doc/openmp-runtime-library-routines/ompdestroynestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompfulfillevent.rst: New file. + * doc/openmp-runtime-library-routines/ompgetactivelevel.rst: New file. + * doc/openmp-runtime-library-routines/ompgetancestorthreadnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetcancellation.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdefaultdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdevicenum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdynamic.rst: New file. + * doc/openmp-runtime-library-routines/ompgetinitialdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompgetlevel.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxtaskpriority.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxteams.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnested.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumdevices.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumprocs.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumteams.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompgetprocbind.rst: New file. + * doc/openmp-runtime-library-routines/ompgetschedule.rst: New file. + * doc/openmp-runtime-library-routines/ompgetsupportedactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamsize.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamsthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/ompgetthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/ompgetthreadnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetwtick.rst: New file. + * doc/openmp-runtime-library-routines/ompgetwtime.rst: New file. + * doc/openmp-runtime-library-routines/ompinfinal.rst: New file. + * doc/openmp-runtime-library-routines/ompinitlock.rst: New file. + * doc/openmp-runtime-library-routines/ompinitnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompinparallel.rst: New file. + * doc/openmp-runtime-library-routines/ompisinitialdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompsetdefaultdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompsetdynamic.rst: New file. + * doc/openmp-runtime-library-routines/ompsetlock.rst: New file. + * doc/openmp-runtime-library-routines/ompsetmaxactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnested.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnumteams.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnumthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompsetschedule.rst: New file. + * doc/openmp-runtime-library-routines/ompsetteamsthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/omptestlock.rst: New file. + * doc/openmp-runtime-library-routines/omptestnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompunsetlock.rst: New file. + * doc/openmp-runtime-library-routines/ompunsetnestlock.rst: New file. + * doc/reporting-bugs.rst: New file. + * doc/the-libgomp-abi.rst: New file. + * doc/the-libgomp-abi/implementing-atomic-construct.rst: New file. + * doc/the-libgomp-abi/implementing-barrier-construct.rst: New file. + * doc/the-libgomp-abi/implementing-critical-construct.rst: New file. + * doc/the-libgomp-abi/implementing-firstprivate-lastprivate-copyin-and-copyprivate-clauses.rst: + New file. + * doc/the-libgomp-abi/implementing-flush-construct.rst: New file. + * doc/the-libgomp-abi/implementing-for-construct.rst: New file. + * doc/the-libgomp-abi/implementing-master-construct.rst: New file. + * doc/the-libgomp-abi/implementing-openaccs-parallel-construct.rst: New file. + * doc/the-libgomp-abi/implementing-ordered-construct.rst: New file. + * doc/the-libgomp-abi/implementing-parallel-construct.rst: New file. + * doc/the-libgomp-abi/implementing-private-clause.rst: New file. + * doc/the-libgomp-abi/implementing-reduction-clause.rst: New file. + * doc/the-libgomp-abi/implementing-sections-construct.rst: New file. + * doc/the-libgomp-abi/implementing-single-construct.rst: New file. + * doc/the-libgomp-abi/implementing-threadprivate-construct.rst: New file. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * Makefile.in: Support Sphinx based documentation. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * libgomp.texi: Removed. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * Makefile.in: Support --with-sphinx-build. + * configure.ac: Likewise.. + * configure: Regenerate. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Build info pages conditionally. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * doc/amd-radeon-gcn.rst: + Add trailing newline. + * doc/copyright.rst: + Add trailing newline. + * doc/cuda-streams-usage.rst: + Add trailing newline. + * doc/enabling-openacc.rst: + Add trailing newline. + * doc/enabling-openmp.rst: + Add trailing newline. + * doc/first-invocation-nvidia-cublas-library-api.rst: + Add trailing newline. + * doc/first-invocation-openacc-library-api.rst: + Add trailing newline. + * doc/funding.rst: + Add trailing newline. + * doc/general-public-license-3.rst: + Add trailing newline. + * doc/gnu-free-documentation-license.rst: + Add trailing newline. + * doc/implementation-status-and-implementation-defined-behavior.rst: + Add trailing newline. + * doc/index.rst: + Add trailing newline. + * doc/indices-and-tables.rst: + Add trailing newline. + * doc/introduction.rst: + Add trailing newline. + * doc/memory-allocation-with-libmemkind.rst: + Add trailing newline. + * doc/nvptx.rst: + Add trailing newline. + * doc/offload-target-specifics.rst: + Add trailing newline. + * doc/openacc-environment-variables.rst: + Add trailing newline. + * doc/openacc-environment-variables/accdevicenum.rst: + Add trailing newline. + * doc/openacc-environment-variables/accdevicetype.rst: + Add trailing newline. + * doc/openacc-environment-variables/accproflib.rst: + Add trailing newline. + * doc/openacc-environment-variables/gccaccnotify.rst: + Add trailing newline. + * doc/openacc-introduction.rst: + Add trailing newline. + * doc/openacc-library-and-environment-variables.rst: + Add trailing newline. + * doc/openacc-library-interoperability.rst: + Add trailing newline. + * doc/openacc-profiling-interface.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accasynctest.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accasynctestall.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accattach.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccopyin.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccopyout.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccreate.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdelete.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdetach.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdeviceptr.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accfree.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcudastream.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcurrentcudacontext.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcurrentcudadevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetdevicenum.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetdevicetype.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetnumdevices.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetproperty.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acchostptr.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accinit.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accispresent.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmalloc.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmapdata.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmemcpyfromdevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmemcpytodevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accondevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accpresentorcopyin.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accpresentorcreate.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accproflookup.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accprofregister.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accprofunregister.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accregisterlibrary.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetcudastream.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetdevicenum.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetdevicetype.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accshutdown.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accunmapdata.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accupdatedevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accupdateself.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwait.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitall.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitallasync.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitasync.rst: + Add trailing newline. + * doc/openmp-context-selectors.rst: + Add trailing newline. + * doc/openmp-environment-variables.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompcpuaffinity.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompdebug.rst: + Add trailing newline. + * doc/openmp-environment-variables/gomprtemsthreadpools.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompspincount.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompstacksize.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompcancellation.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdefaultdevice.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdisplayenv.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdynamic.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompmaxtaskpriority.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnested.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnumteams.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnumthreads.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompplaces.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompprocbind.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompschedule.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompstacksize.rst: + Add trailing newline. + * doc/openmp-environment-variables/omptargetoffload.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompthreadlimit.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompwaitpolicy.rst: + Add trailing newline. + * doc/openmp-implementation-specifics.rst: + Add trailing newline. + * doc/openmp-implementation-status.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-45.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-50.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-51.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-52.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompdestroylock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompdestroynestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompfulfillevent.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetactivelevel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetancestorthreadnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetcancellation.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdefaultdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdevicenum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdynamic.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetinitialdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetlevel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxtaskpriority.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnested.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumdevices.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumprocs.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetprocbind.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetschedule.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetsupportedactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamsize.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetthreadnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetwtick.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetwtime.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinfinal.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinitlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinitnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinparallel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompisinitialdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetdefaultdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetdynamic.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnested.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnumteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnumthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetschedule.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/omptestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/omptestnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompunsetlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompunsetnestlock.rst: + Add trailing newline. + * doc/reporting-bugs.rst: + Add trailing newline. + * doc/the-libgomp-abi.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-atomic-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-barrier-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-critical-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-firstprivate-lastprivate-copyin-and-copyprivate-clauses.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-flush-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-for-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-master-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-openaccs-parallel-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-ordered-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-parallel-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-private-clause.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-reduction-clause.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-sections-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-single-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-threadprivate-construct.rst: + Add trailing newline. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Add missing HAS_SPHINX_BUILD. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + * doc/conf.py: Add newline at last line. + +2022-11-14 Martin Liska <mliska@suse.cz> + + Revert: + 2022-11-14 Martin Liska <mliska@suse.cz> + + PR other/107620 + * configure: Regenerate. + * configure.ac: Always set sphinx-build. + +2022-11-13 Martin Liska <mliska@suse.cz> + + PR other/107620 + * configure: Regenerate. + * configure.ac: Always set sphinx-build. + +2022-11-12 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/107641 + * env.c (parse_unsigned_long): Cast params[2] to uintptr_t rather than + unsigned long. Change type of upper from unsigned to unsigned long. + +2022-11-10 Martin Liska <mliska@suse.cz> + + * doc/conf.py: Add newline at last line. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Add missing HAS_SPHINX_BUILD. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * doc/amd-radeon-gcn.rst: + Add trailing newline. + * doc/copyright.rst: + Add trailing newline. + * doc/cuda-streams-usage.rst: + Add trailing newline. + * doc/enabling-openacc.rst: + Add trailing newline. + * doc/enabling-openmp.rst: + Add trailing newline. + * doc/first-invocation-nvidia-cublas-library-api.rst: + Add trailing newline. + * doc/first-invocation-openacc-library-api.rst: + Add trailing newline. + * doc/funding.rst: + Add trailing newline. + * doc/general-public-license-3.rst: + Add trailing newline. + * doc/gnu-free-documentation-license.rst: + Add trailing newline. + * doc/implementation-status-and-implementation-defined-behavior.rst: + Add trailing newline. + * doc/index.rst: + Add trailing newline. + * doc/indices-and-tables.rst: + Add trailing newline. + * doc/introduction.rst: + Add trailing newline. + * doc/memory-allocation-with-libmemkind.rst: + Add trailing newline. + * doc/nvptx.rst: + Add trailing newline. + * doc/offload-target-specifics.rst: + Add trailing newline. + * doc/openacc-environment-variables.rst: + Add trailing newline. + * doc/openacc-environment-variables/accdevicenum.rst: + Add trailing newline. + * doc/openacc-environment-variables/accdevicetype.rst: + Add trailing newline. + * doc/openacc-environment-variables/accproflib.rst: + Add trailing newline. + * doc/openacc-environment-variables/gccaccnotify.rst: + Add trailing newline. + * doc/openacc-introduction.rst: + Add trailing newline. + * doc/openacc-library-and-environment-variables.rst: + Add trailing newline. + * doc/openacc-library-interoperability.rst: + Add trailing newline. + * doc/openacc-profiling-interface.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accasynctest.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accasynctestall.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accattach.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccopyin.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccopyout.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acccreate.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdelete.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdetach.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accdeviceptr.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accfree.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcudastream.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcurrentcudacontext.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetcurrentcudadevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetdevicenum.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetdevicetype.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetnumdevices.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accgetproperty.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/acchostptr.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accinit.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accispresent.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmalloc.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmapdata.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmemcpyfromdevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accmemcpytodevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accondevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accpresentorcopyin.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accpresentorcreate.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accproflookup.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accprofregister.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accprofunregister.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accregisterlibrary.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetcudastream.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetdevicenum.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accsetdevicetype.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accshutdown.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accunmapdata.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accupdatedevice.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accupdateself.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwait.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitall.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitallasync.rst: + Add trailing newline. + * doc/openacc-runtime-library-routines/accwaitasync.rst: + Add trailing newline. + * doc/openmp-context-selectors.rst: + Add trailing newline. + * doc/openmp-environment-variables.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompcpuaffinity.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompdebug.rst: + Add trailing newline. + * doc/openmp-environment-variables/gomprtemsthreadpools.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompspincount.rst: + Add trailing newline. + * doc/openmp-environment-variables/gompstacksize.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompcancellation.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdefaultdevice.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdisplayenv.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompdynamic.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompmaxtaskpriority.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnested.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnumteams.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompnumthreads.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompplaces.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompprocbind.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompschedule.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompstacksize.rst: + Add trailing newline. + * doc/openmp-environment-variables/omptargetoffload.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompthreadlimit.rst: + Add trailing newline. + * doc/openmp-environment-variables/ompwaitpolicy.rst: + Add trailing newline. + * doc/openmp-implementation-specifics.rst: + Add trailing newline. + * doc/openmp-implementation-status.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-45.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-50.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-51.rst: + Add trailing newline. + * doc/openmp-implementation-status/openmp-52.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompdestroylock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompdestroynestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompfulfillevent.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetactivelevel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetancestorthreadnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetcancellation.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdefaultdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdevicenum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetdynamic.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetinitialdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetlevel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxtaskpriority.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetmaxthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnested.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumdevices.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumprocs.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetnumthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetprocbind.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetschedule.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetsupportedactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamsize.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetthreadnum.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetwtick.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompgetwtime.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinfinal.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinitlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinitnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompinparallel.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompisinitialdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetdefaultdevice.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetdynamic.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetmaxactivelevels.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnested.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnumteams.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetnumthreads.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetschedule.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompsetteamsthreadlimit.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/omptestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/omptestnestlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompunsetlock.rst: + Add trailing newline. + * doc/openmp-runtime-library-routines/ompunsetnestlock.rst: + Add trailing newline. + * doc/reporting-bugs.rst: + Add trailing newline. + * doc/the-libgomp-abi.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-atomic-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-barrier-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-critical-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-firstprivate-lastprivate-copyin-and-copyprivate-clauses.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-flush-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-for-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-master-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-openaccs-parallel-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-ordered-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-parallel-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-private-clause.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-reduction-clause.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-sections-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-single-construct.rst: + Add trailing newline. + * doc/the-libgomp-abi/implementing-threadprivate-construct.rst: + Add trailing newline. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Build info pages conditionally. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Support --with-sphinx-build. + * configure.ac: Likewise.. + * configure: Regenerate. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * libgomp.texi: Removed. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * Makefile.in: Support Sphinx based documentation. + +2022-11-09 Martin Liska <mliska@suse.cz> + + * doc/amd-radeon-gcn.rst: New file. + * doc/conf.py: New file. + * doc/copyright.rst: New file. + * doc/cuda-streams-usage.rst: New file. + * doc/enabling-openacc.rst: New file. + * doc/enabling-openmp.rst: New file. + * doc/first-invocation-nvidia-cublas-library-api.rst: New file. + * doc/first-invocation-openacc-library-api.rst: New file. + * doc/funding.rst: New file. + * doc/general-public-license-3.rst: New file. + * doc/gnu-free-documentation-license.rst: New file. + * doc/implementation-status-and-implementation-defined-behavior.rst: New file. + * doc/index.rst: New file. + * doc/indices-and-tables.rst: New file. + * doc/introduction.rst: New file. + * doc/memory-allocation-with-libmemkind.rst: New file. + * doc/nvptx.rst: New file. + * doc/offload-target-specifics.rst: New file. + * doc/openacc-environment-variables.rst: New file. + * doc/openacc-environment-variables/accdevicenum.rst: New file. + * doc/openacc-environment-variables/accdevicetype.rst: New file. + * doc/openacc-environment-variables/accproflib.rst: New file. + * doc/openacc-environment-variables/gccaccnotify.rst: New file. + * doc/openacc-introduction.rst: New file. + * doc/openacc-library-and-environment-variables.rst: New file. + * doc/openacc-library-interoperability.rst: New file. + * doc/openacc-profiling-interface.rst: New file. + * doc/openacc-runtime-library-routines.rst: New file. + * doc/openacc-runtime-library-routines/accasynctest.rst: New file. + * doc/openacc-runtime-library-routines/accasynctestall.rst: New file. + * doc/openacc-runtime-library-routines/accattach.rst: New file. + * doc/openacc-runtime-library-routines/acccopyin.rst: New file. + * doc/openacc-runtime-library-routines/acccopyout.rst: New file. + * doc/openacc-runtime-library-routines/acccreate.rst: New file. + * doc/openacc-runtime-library-routines/accdelete.rst: New file. + * doc/openacc-runtime-library-routines/accdetach.rst: New file. + * doc/openacc-runtime-library-routines/accdeviceptr.rst: New file. + * doc/openacc-runtime-library-routines/accfree.rst: New file. + * doc/openacc-runtime-library-routines/accgetcudastream.rst: New file. + * doc/openacc-runtime-library-routines/accgetcurrentcudacontext.rst: New file. + * doc/openacc-runtime-library-routines/accgetcurrentcudadevice.rst: New file. + * doc/openacc-runtime-library-routines/accgetdevicenum.rst: New file. + * doc/openacc-runtime-library-routines/accgetdevicetype.rst: New file. + * doc/openacc-runtime-library-routines/accgetnumdevices.rst: New file. + * doc/openacc-runtime-library-routines/accgetproperty.rst: New file. + * doc/openacc-runtime-library-routines/acchostptr.rst: New file. + * doc/openacc-runtime-library-routines/accinit.rst: New file. + * doc/openacc-runtime-library-routines/accispresent.rst: New file. + * doc/openacc-runtime-library-routines/accmalloc.rst: New file. + * doc/openacc-runtime-library-routines/accmapdata.rst: New file. + * doc/openacc-runtime-library-routines/accmemcpyfromdevice.rst: New file. + * doc/openacc-runtime-library-routines/accmemcpytodevice.rst: New file. + * doc/openacc-runtime-library-routines/accondevice.rst: New file. + * doc/openacc-runtime-library-routines/accpresentorcopyin.rst: New file. + * doc/openacc-runtime-library-routines/accpresentorcreate.rst: New file. + * doc/openacc-runtime-library-routines/accproflookup.rst: New file. + * doc/openacc-runtime-library-routines/accprofregister.rst: New file. + * doc/openacc-runtime-library-routines/accprofunregister.rst: New file. + * doc/openacc-runtime-library-routines/accregisterlibrary.rst: New file. + * doc/openacc-runtime-library-routines/accsetcudastream.rst: New file. + * doc/openacc-runtime-library-routines/accsetdevicenum.rst: New file. + * doc/openacc-runtime-library-routines/accsetdevicetype.rst: New file. + * doc/openacc-runtime-library-routines/accshutdown.rst: New file. + * doc/openacc-runtime-library-routines/accunmapdata.rst: New file. + * doc/openacc-runtime-library-routines/accupdatedevice.rst: New file. + * doc/openacc-runtime-library-routines/accupdateself.rst: New file. + * doc/openacc-runtime-library-routines/accwait.rst: New file. + * doc/openacc-runtime-library-routines/accwaitall.rst: New file. + * doc/openacc-runtime-library-routines/accwaitallasync.rst: New file. + * doc/openacc-runtime-library-routines/accwaitasync.rst: New file. + * doc/openmp-context-selectors.rst: New file. + * doc/openmp-environment-variables.rst: New file. + * doc/openmp-environment-variables/gompcpuaffinity.rst: New file. + * doc/openmp-environment-variables/gompdebug.rst: New file. + * doc/openmp-environment-variables/gomprtemsthreadpools.rst: New file. + * doc/openmp-environment-variables/gompspincount.rst: New file. + * doc/openmp-environment-variables/gompstacksize.rst: New file. + * doc/openmp-environment-variables/ompcancellation.rst: New file. + * doc/openmp-environment-variables/ompdefaultdevice.rst: New file. + * doc/openmp-environment-variables/ompdisplayenv.rst: New file. + * doc/openmp-environment-variables/ompdynamic.rst: New file. + * doc/openmp-environment-variables/ompmaxactivelevels.rst: New file. + * doc/openmp-environment-variables/ompmaxtaskpriority.rst: New file. + * doc/openmp-environment-variables/ompnested.rst: New file. + * doc/openmp-environment-variables/ompnumteams.rst: New file. + * doc/openmp-environment-variables/ompnumthreads.rst: New file. + * doc/openmp-environment-variables/ompplaces.rst: New file. + * doc/openmp-environment-variables/ompprocbind.rst: New file. + * doc/openmp-environment-variables/ompschedule.rst: New file. + * doc/openmp-environment-variables/ompstacksize.rst: New file. + * doc/openmp-environment-variables/omptargetoffload.rst: New file. + * doc/openmp-environment-variables/ompteamsthreadlimit.rst: New file. + * doc/openmp-environment-variables/ompthreadlimit.rst: New file. + * doc/openmp-environment-variables/ompwaitpolicy.rst: New file. + * doc/openmp-implementation-specifics.rst: New file. + * doc/openmp-implementation-status.rst: New file. + * doc/openmp-implementation-status/openmp-45.rst: New file. + * doc/openmp-implementation-status/openmp-50.rst: New file. + * doc/openmp-implementation-status/openmp-51.rst: New file. + * doc/openmp-implementation-status/openmp-52.rst: New file. + * doc/openmp-runtime-library-routines.rst: New file. + * doc/openmp-runtime-library-routines/ompdestroylock.rst: New file. + * doc/openmp-runtime-library-routines/ompdestroynestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompfulfillevent.rst: New file. + * doc/openmp-runtime-library-routines/ompgetactivelevel.rst: New file. + * doc/openmp-runtime-library-routines/ompgetancestorthreadnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetcancellation.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdefaultdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdevicenum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetdynamic.rst: New file. + * doc/openmp-runtime-library-routines/ompgetinitialdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompgetlevel.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxtaskpriority.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxteams.rst: New file. + * doc/openmp-runtime-library-routines/ompgetmaxthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnested.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumdevices.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumprocs.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumteams.rst: New file. + * doc/openmp-runtime-library-routines/ompgetnumthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompgetprocbind.rst: New file. + * doc/openmp-runtime-library-routines/ompgetschedule.rst: New file. + * doc/openmp-runtime-library-routines/ompgetsupportedactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamsize.rst: New file. + * doc/openmp-runtime-library-routines/ompgetteamsthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/ompgetthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/ompgetthreadnum.rst: New file. + * doc/openmp-runtime-library-routines/ompgetwtick.rst: New file. + * doc/openmp-runtime-library-routines/ompgetwtime.rst: New file. + * doc/openmp-runtime-library-routines/ompinfinal.rst: New file. + * doc/openmp-runtime-library-routines/ompinitlock.rst: New file. + * doc/openmp-runtime-library-routines/ompinitnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompinparallel.rst: New file. + * doc/openmp-runtime-library-routines/ompisinitialdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompsetdefaultdevice.rst: New file. + * doc/openmp-runtime-library-routines/ompsetdynamic.rst: New file. + * doc/openmp-runtime-library-routines/ompsetlock.rst: New file. + * doc/openmp-runtime-library-routines/ompsetmaxactivelevels.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnested.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnumteams.rst: New file. + * doc/openmp-runtime-library-routines/ompsetnumthreads.rst: New file. + * doc/openmp-runtime-library-routines/ompsetschedule.rst: New file. + * doc/openmp-runtime-library-routines/ompsetteamsthreadlimit.rst: New file. + * doc/openmp-runtime-library-routines/omptestlock.rst: New file. + * doc/openmp-runtime-library-routines/omptestnestlock.rst: New file. + * doc/openmp-runtime-library-routines/ompunsetlock.rst: New file. + * doc/openmp-runtime-library-routines/ompunsetnestlock.rst: New file. + * doc/reporting-bugs.rst: New file. + * doc/the-libgomp-abi.rst: New file. + * doc/the-libgomp-abi/implementing-atomic-construct.rst: New file. + * doc/the-libgomp-abi/implementing-barrier-construct.rst: New file. + * doc/the-libgomp-abi/implementing-critical-construct.rst: New file. + * doc/the-libgomp-abi/implementing-firstprivate-lastprivate-copyin-and-copyprivate-clauses.rst: + New file. + * doc/the-libgomp-abi/implementing-flush-construct.rst: New file. + * doc/the-libgomp-abi/implementing-for-construct.rst: New file. + * doc/the-libgomp-abi/implementing-master-construct.rst: New file. + * doc/the-libgomp-abi/implementing-openaccs-parallel-construct.rst: New file. + * doc/the-libgomp-abi/implementing-ordered-construct.rst: New file. + * doc/the-libgomp-abi/implementing-parallel-construct.rst: New file. + * doc/the-libgomp-abi/implementing-private-clause.rst: New file. + * doc/the-libgomp-abi/implementing-reduction-clause.rst: New file. + * doc/the-libgomp-abi/implementing-sections-construct.rst: New file. + * doc/the-libgomp-abi/implementing-single-construct.rst: New file. + * doc/the-libgomp-abi/implementing-threadprivate-construct.rst: New file. + +2022-11-04 Thomas Schwinge <thomas@codesourcery.com> + + * libgomp-plugin.h (OFFLOAD_TARGET_TYPE_INTEL_MIC): Remove. + * libgomp.texi (OpenMP Context Selectors): Remove Intel MIC + documentation. + * plugin/configfrag.ac <enable_offload_targets> + [*-intelmic-* | *-intelmicemul-*]: Remove. + * configure: Regenerate. + * testsuite/lib/libgomp.exp (libgomp_init): Remove 'liboffloadmic' + handling. + (offload_target_to_openacc_device_type) + [$offload_target = *-intelmic*]: Remove. + (check_effective_target_offload_device_intel_mic) + (check_effective_target_offload_device_any_intel_mic): Remove. + * testsuite/libgomp.c-c++-common/on_device_arch.h + (device_arch_intel_mic, on_device_arch_intel_mic, any_device_arch) + (any_device_arch_intel_mic): Remove. + * testsuite/libgomp.c-c++-common/target-45.c: Remove + 'offload_device_any_intel_mic' XFAIL. + * testsuite/libgomp.fortran/target10.f90: Likewise. + +2022-11-03 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/target-11.f90: New test. + * testsuite/libgomp.fortran/target-13.f90: New test. + +2022-11-02 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/106643 + PR fortran/96668 + * oacc-mem.c (goacc_enter_data_internal): Support + OpenACC 'declare create' with Fortran allocatable arrays, part II. + * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: + Adjust. + * testsuite/libgomp.oacc-fortran/pr106643-1.f90: New. + +2022-11-02 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/106643 + * oacc-mem.c (goacc_enter_data_internal): Support + OpenACC 'declare create' with Fortran allocatable arrays, part I. + * testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90: + New. + * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: + New. + +2022-11-02 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90: + New. + +2022-11-02 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90: + New. + +2022-11-02 Cesar Philippidis <cesar@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New. + +2022-10-28 Julian Brown <julian@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/90115 + * testsuite/libgomp.oacc-fortran/declare-1.f90: Adjust scan output. + * testsuite/libgomp.oacc-fortran/host_data-5.F90: Likewise. + * testsuite/libgomp.oacc-fortran/if-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/print-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise. + +2022-10-24 Thomas Schwinge <thomas@codesourcery.com> + + * plugin/plugin-nvptx.c (nvptx_open_device): Initialize + 'ptx_dev->rev_data'. + +2022-10-24 Tobias Burnus <tobias@codesourcery.com> + + * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove + 'static' for this variable. + * config/nvptx/libgomp-nvptx.h: New file. + * config/nvptx/target.c: Include it. + (GOMP_ADDITIONAL_ICVS): Declare extern var. + (GOMP_REV_OFFLOAD_VAR): Declare var. + (GOMP_target_ext): Handle reverse offload. + * libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype. + * libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ... + * target.c (gomp_target_rev): ... this new stub function. + * libgomp.h (gomp_target_rev): Declare. + * libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev. + * plugin/cuda-lib.def (cuMemHostAlloc): Add. + * plugin/plugin-nvptx.c: Include libgomp-nvptx.h. + (struct ptx_device): Add rev_data member. + (nvptx_open_device): Remove async_engines query, last used in + r10-304-g1f4c5b9b; add unified-address assert check. + (GOMP_OFFLOAD_get_num_devices): Claim unified address + support. + (GOMP_OFFLOAD_load_image): Free rev_fn_table if no + offload functions exist. Make offload var available + on host and device. + (rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New. + (GOMP_OFFLOAD_run): Handle reverse offload. + +2022-10-21 Thomas Schwinge <thomas@codesourcery.com> + + PR tree-optimization/107195 + PR target/107344 + * testsuite/libgomp.oacc-c-c++-common/nvptx-sese-1.c: Restore SESE + regions checking. + +2022-10-20 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/lib/libgomp.exp (check_effective_target_offload_device_gcn): + New. + * testsuite/libgomp.c-c++-common/on_device_arch.h (device_arch_gcn, + on_device_arch_gcn): New. + * testsuite/libgomp.c-c++-common/requires-4a.c: New test; copied from + requires-4.c but using heap-allocated memory. + +2022-10-20 Thomas Schwinge <thomas@codesourcery.com> + + PR target/105421 + * testsuite/libgomp.oacc-c-c++-common/private-big-1.c: New. + +2022-10-17 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.c/reverse-offload-sm30.c: Fix nvptx-specific + '-foffload-options' syntax. + +2022-10-13 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/task-7.f90: New test. + * testsuite/libgomp.fortran/task-8.f90: New test. + * testsuite/libgomp.fortran/task-in-explicit-1.f90: New test. + * testsuite/libgomp.fortran/task-in-explicit-2.f90: New test. + * testsuite/libgomp.fortran/task-in-explicit-3.f90: New test. + * testsuite/libgomp.fortran/task-reduction-17.f90: New test. + * testsuite/libgomp.fortran/task-reduction-18.f90: New test. + +2022-10-12 Jakub Jelinek <jakub@redhat.com> + + * libgomp.texi (OpenMP 5.2): Fix up allocator -> allocate directive + in deprecation bullet. + +2022-10-12 Jakub Jelinek <jakub@redhat.com> + + * omp.h.in (omp_in_explicit_task): Declare. + * omp_lib.h.in (omp_in_explicit_task): Likewise. + * omp_lib.f90.in (omp_in_explicit_task): New interface. + * libgomp.map (OMP_5.2): New symbol version, export + omp_in_explicit_task and omp_in_explicit_task_. + * task.c (omp_in_explicit_task): New function. + * fortran.c (omp_in_explicit_task): Add ialias_redirect. + (omp_in_explicit_task_): New function. + * libgomp.texi (OpenMP 5.2): Mark omp_in_explicit_task as implemented. + * testsuite/libgomp.c-c++-common/task-in-explicit-1.c: New test. + * testsuite/libgomp.c-c++-common/task-in-explicit-2.c: New test. + * testsuite/libgomp.c-c++-common/task-in-explicit-3.c: New test. + +2022-10-12 Jakub Jelinek <jakub@redhat.com> + + * task.c (gomp_create_artificial_team): Fix up handling of invocations + from within explicit task. + * target.c (GOMP_target_ext): Likewise. + * testsuite/libgomp.c/task-7.c: New test. + * testsuite/libgomp.c/task-8.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-17.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-18.c: New test. + +2022-10-12 Martin Liska <mliska@suse.cz> + + * configure: Regenerate. + +2022-10-11 Olivier Hainque <hainque@adacore.com> + Olivier Hainque <hainque@adacore.com> + + * configure: Regenerate. + +2022-10-05 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP 5.1 Impl. Status): Mark 'assume' as 'Y'. + +2022-10-04 Jakub Jelinek <jakub@redhat.com> + + * libgomp.texi (Support begin/end declare target syntax in C/C++): + Mark as implemented. + +2022-09-30 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/105318 + * testsuite/libgomp.fortran/is_device_ptr-2.f90: New test. + +2022-09-28 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP 5.1): Mark 'assume' as implemented + for C/C++. Remove duplicated 'begin declare target' entry. + +2022-09-24 Jakub Jelinek <jakub@redhat.com> + + PR c/106981 + * testsuite/libgomp.c-c++-common/pr106981.c: New test. + 2022-09-14 Julian Brown <julian@codesourcery.com> * testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c: New test. @@ -4468,7 +6223,7 @@ * oacc-mem.c (goacc_enter_datum): Use 'tgt' returned from 'gomp_map_vars'. - (acc_map_data): Clean up accordingly. + (acc_map_data): Clean up accordingly. 2020-06-04 Thomas Schwinge <thomas@codesourcery.com> @@ -17962,7 +19717,7 @@ Initial implementation and checkin. -Copyright (C) 2005-2022 Free Software Foundation, Inc. +Copyright (C) 2005-2023 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 8ffd45c..2c81cca 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -16,7 +16,7 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2022 Free Software Foundation, Inc. +# Copyright (C) 2014-2023 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # diff --git a/libgomp/acc_prof.h b/libgomp/acc_prof.h index 1a60d35..f70fe25 100644 --- a/libgomp/acc_prof.h +++ b/libgomp/acc_prof.h @@ -1,6 +1,6 @@ /* OpenACC Profiling Interface - Copyright (C) 2019-2022 Free Software Foundation, Inc. + Copyright (C) 2019-2023 Free Software Foundation, Inc. Contributed by Mentor, a Siemens Business. diff --git a/libgomp/affinity-fmt.c b/libgomp/affinity-fmt.c index 30b7ef9..85700a2 100644 --- a/libgomp/affinity-fmt.c +++ b/libgomp/affinity-fmt.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/affinity.c b/libgomp/affinity.c index ce90af7..082e5b6 100644 --- a/libgomp/affinity.c +++ b/libgomp/affinity.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2022 Free Software Foundation, Inc. +/* Copyright (C) 2006-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/alloc.c b/libgomp/alloc.c index c1bd72d..bdcb1c6 100644 --- a/libgomp/alloc.c +++ b/libgomp/alloc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/allocator.c b/libgomp/allocator.c index b04820b..c49931c 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2020-2022 Free Software Foundation, Inc. +/* Copyright (C) 2020-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/atomic.c b/libgomp/atomic.c index 8a57ea5..8fb3559 100644 --- a/libgomp/atomic.c +++ b/libgomp/atomic.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/barrier.c b/libgomp/barrier.c index 1b773b4..c9a822d 100644 --- a/libgomp/barrier.c +++ b/libgomp/barrier.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/lock.c b/libgomp/config/accel/lock.c index b823445..3f81b15 100644 --- a/libgomp/config/accel/lock.c +++ b/libgomp/config/accel/lock.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2022 Free Software Foundation, Inc. +/* Copyright (C) 2016-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/mutex.h b/libgomp/config/accel/mutex.h index 2d52eb5c..c167f1c 100644 --- a/libgomp/config/accel/mutex.h +++ b/libgomp/config/accel/mutex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/oacc-init.c b/libgomp/config/accel/oacc-init.c index 1203efc..7c91509 100644 --- a/libgomp/config/accel/oacc-init.c +++ b/libgomp/config/accel/oacc-init.c @@ -1,6 +1,6 @@ /* OpenACC Runtime initialization routines - Copyright (C) 2014-2022 Free Software Foundation, Inc. + Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/config/accel/openacc.f90 b/libgomp/config/accel/openacc.f90 index c50e323..2e3e5c7 100644 --- a/libgomp/config/accel/openacc.f90 +++ b/libgomp/config/accel/openacc.f90 @@ -1,6 +1,6 @@ ! OpenACC Runtime Library Definitions. -! Copyright (C) 2014-2022 Free Software Foundation, Inc. +! Copyright (C) 2014-2023 Free Software Foundation, Inc. ! Contributed by Tobias Burnus <burnus@net-b.de> ! and Mentor Embedded. diff --git a/libgomp/config/accel/pool.h b/libgomp/config/accel/pool.h index 668a86d..72d8b8e 100644 --- a/libgomp/config/accel/pool.h +++ b/libgomp/config/accel/pool.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/proc.c b/libgomp/config/accel/proc.c index 19f98c5..7824b6f 100644 --- a/libgomp/config/accel/proc.c +++ b/libgomp/config/accel/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/ptrlock.h b/libgomp/config/accel/ptrlock.h index db86f99..9391252 100644 --- a/libgomp/config/accel/ptrlock.h +++ b/libgomp/config/accel/ptrlock.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/sem.h b/libgomp/config/accel/sem.h index ebe64f2c..fb6fe60 100644 --- a/libgomp/config/accel/sem.h +++ b/libgomp/config/accel/sem.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/accel/thread-stacksize.h b/libgomp/config/accel/thread-stacksize.h index 86515e4..04ae108 100644 --- a/libgomp/config/accel/thread-stacksize.h +++ b/libgomp/config/accel/thread-stacksize.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/bsd/proc.c b/libgomp/config/bsd/proc.c index 64ab27d..9e841ee 100644 --- a/libgomp/config/bsd/proc.c +++ b/libgomp/config/bsd/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/darwin/plugin-suffix.h b/libgomp/config/darwin/plugin-suffix.h index 7c1ad31..7481e6e 100644 --- a/libgomp/config/darwin/plugin-suffix.h +++ b/libgomp/config/darwin/plugin-suffix.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jack Howarth <howarth.at.gcc@gmail.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/darwin/thread-stacksize.h b/libgomp/config/darwin/thread-stacksize.h index 477bbb1..e408038 100644 --- a/libgomp/config/darwin/thread-stacksize.h +++ b/libgomp/config/darwin/thread-stacksize.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/affinity-fmt.c b/libgomp/config/gcn/affinity-fmt.c index b8abfa9..0901bf5 100644 --- a/libgomp/config/gcn/affinity-fmt.c +++ b/libgomp/config/gcn/affinity-fmt.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. This file is part of the GNU Offloading and Multi Processing Library (libgomp). diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c index b2aac72..4b72f09 100644 --- a/libgomp/config/gcn/bar.c +++ b/libgomp/config/gcn/bar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h index 19d3a62..fe9ce63 100644 --- a/libgomp/config/gcn/bar.h +++ b/libgomp/config/gcn/bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/doacross.h b/libgomp/config/gcn/doacross.h index 33981aa..82a92f6 100644 --- a/libgomp/config/gcn/doacross.h +++ b/libgomp/config/gcn/doacross.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c index bf757ba..2ab8546 100644 --- a/libgomp/config/gcn/icv-device.c +++ b/libgomp/config/gcn/icv-device.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library @@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams) GOMP_ADDITIONAL_ICVS.nteams = num_teams; } +int +omp_get_teams_thread_limit (void) +{ + return GOMP_ADDITIONAL_ICVS.teams_thread_limit; +} + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -89,3 +102,5 @@ ialias (omp_is_initial_device) ialias (omp_get_device_num) ialias (omp_get_max_teams) ialias (omp_set_num_teams) +ialias (omp_get_teams_thread_limit) +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h new file mode 100644 index 0000000..f62b7dd --- /dev/null +++ b/libgomp/config/gcn/libgomp-gcn.h @@ -0,0 +1,92 @@ +/* Copyright (C) 2022-2023 Free Software Foundation, Inc. + Contributed by Tobias Burnus <tobias@codesourcery.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains defines and type definitions shared between the + nvptx target's libgomp.a and the plugin-nvptx.c, but that is only + needef for this target. */ + +#ifndef LIBGOMP_GCN_H +#define LIBGOMP_GCN_H 1 + +#define DEFAULT_GCN_STACK_SIZE (32*1024) +#define DEFAULT_TEAM_ARENA_SIZE (64*1024) + +struct heap +{ + int64_t size; + char data[0]; +}; + +/* This struct defines the (unofficial) ABI-defined values the compiler + expects to find in first bytes of the kernargs space. + The plugin may choose to place additional data later in the kernargs + memory allocation, but those are not in any fixed location. */ +struct kernargs_abi { + /* Leave space for the real kernel arguments. + OpenACC and OpenMP only use one pointer. */ + int64_t dummy1; + int64_t dummy2; + + /* A pointer to struct output, below, for console output data. */ + int64_t out_ptr; /* Offset 16. */ + + /* A pointer to struct heap. */ + int64_t heap_ptr; /* Offset 24. */ + + /* A pointer to the ephemeral memory areas. + The team arena is only needed for OpenMP. + Each should have enough space for all the teams and threads. */ + int64_t arena_ptr; /* Offset 32. */ + int64_t stack_ptr; /* Offset 40. */ + int arena_size_per_team; /* Offset 48. */ + int stack_size_per_thread; /* Offset 52. */ +}; + +/* This struct is also used in Newlib's libc/sys/amdgcn/write.c. */ +struct output +{ + int return_value; + unsigned int next_output; + struct printf_data { + int written; + char msg[128]; + int type; + union { + int64_t ivalue; + double dvalue; + char text[128]; + uint64_t value_u64[16]; + }; + } queue[1024]; + unsigned int consumed; +}; + +#if (__SIZEOF_SHORT__ != 2 \ + || __SIZEOF_SIZE_T__ != 8 \ + || __SIZEOF_POINTER__ != 8) +#error "Data-type conversion required for rev_offload" +#endif + +#endif /* LIBGOMP_GCN_H */ diff --git a/libgomp/config/gcn/oacc-target.c b/libgomp/config/gcn/oacc-target.c index d73f13a..fd6b18d 100644 --- a/libgomp/config/gcn/oacc-target.c +++ b/libgomp/config/gcn/oacc-target.c @@ -1,5 +1,5 @@ /* Oversized reductions lock variable - Copyright (C) 2017-2022 Free Software Foundation, Inc. + Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Mentor Graphics. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/simple-bar.h b/libgomp/config/gcn/simple-bar.h index 0a9681d..abf99ee 100644 --- a/libgomp/config/gcn/simple-bar.h +++ b/libgomp/config/gcn/simple-bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c index c8484fa..c6691fd 100644 --- a/libgomp/config/gcn/target.c +++ b/libgomp/config/gcn/target.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library @@ -24,8 +24,11 @@ <http://www.gnu.org/licenses/>. */ #include "libgomp.h" +#include "libgomp-gcn.h" #include <limits.h> +extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; + bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, unsigned int thread_limit, bool first) @@ -75,16 +78,43 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend, void **args) { - (void) device; - (void) fn; - (void) mapnum; - (void) hostaddrs; - (void) sizes; - (void) kinds; (void) flags; (void) depend; (void) args; - __builtin_unreachable (); + + if (device != GOMP_DEVICE_HOST_FALLBACK || fn == NULL) + return; + + /* The output data is at ((void*) kernargs)[2]. */ + register void **kernargs = (void**) __builtin_gcn_kernarg_ptr (); + struct output *data = (struct output *) kernargs[2]; + /* Reserve one slot. */ + unsigned int index = __atomic_fetch_add (&data->next_output, 1, + __ATOMIC_ACQUIRE); + + if ((unsigned int) (index + 1) < data->consumed) + abort (); /* Overflow. */ + + /* Spinlock while the host catches up. */ + if (index >= 1024) + while (__atomic_load_n (&data->consumed, __ATOMIC_ACQUIRE) + <= (index - 1024)) + asm ("s_sleep 64"); + + unsigned int slot = index % 1024; + data->queue[slot].value_u64[0] = (uint64_t) fn; + data->queue[slot].value_u64[1] = (uint64_t) mapnum; + data->queue[slot].value_u64[2] = (uint64_t) hostaddrs; + data->queue[slot].value_u64[3] = (uint64_t) sizes; + data->queue[slot].value_u64[4] = (uint64_t) kinds; + data->queue[slot].value_u64[5] = (uint64_t) GOMP_ADDITIONAL_ICVS.device_num; + + data->queue[slot].type = 4; /* Reverse offload. */ + __atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE); + + /* Spinlock while the host catches up. */ + while (__atomic_load_n (&data->queue[slot].written, __ATOMIC_ACQUIRE) != 0) + asm ("s_sleep 64"); } void diff --git a/libgomp/config/gcn/task.c b/libgomp/config/gcn/task.c index cfa25d5..2f5bdcb 100644 --- a/libgomp/config/gcn/task.c +++ b/libgomp/config/gcn/task.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c index 254dd4d..f03207c 100644 --- a/libgomp/config/gcn/team.c +++ b/libgomp/config/gcn/team.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library @@ -60,14 +60,16 @@ gomp_gcn_enter_kernel (void) /* Initialize the team arena for optimized memory allocation. The arena has been allocated on the host side, and the address passed in via the kernargs. Each team takes a small slice of it. */ - register void **kernargs asm("s8"); - void *team_arena = (kernargs[4] + TEAM_ARENA_SIZE*teamid); + struct kernargs_abi *kernargs = + (struct kernargs_abi*) __builtin_gcn_kernarg_ptr (); + void *team_arena = ((void*)kernargs->arena_ptr + + kernargs->arena_size_per_team * teamid); void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START; void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE; void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END; *arena_start = team_arena; *arena_free = team_arena; - *arena_end = team_arena + TEAM_ARENA_SIZE; + *arena_end = team_arena + kernargs->arena_size_per_team; /* Allocate and initialize the team-local-storage data. */ struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs) diff --git a/libgomp/config/gcn/teams.c b/libgomp/config/gcn/teams.c index 29b9e61..77a6685 100644 --- a/libgomp/config/gcn/teams.c +++ b/libgomp/config/gcn/teams.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. This file is part of the GNU Offloading and Multi Processing Library (libgomp). diff --git a/libgomp/config/gcn/time.c b/libgomp/config/gcn/time.c index 1ce351c..b16023b 100644 --- a/libgomp/config/gcn/time.c +++ b/libgomp/config/gcn/time.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/hpux/plugin-suffix.h b/libgomp/config/hpux/plugin-suffix.h index a4c48a4..23fb7fb 100644 --- a/libgomp/config/hpux/plugin-suffix.h +++ b/libgomp/config/hpux/plugin-suffix.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jack Howarth <howarth.at.gcc@gmail.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c index 5de02b0..6dcd23c 100644 --- a/libgomp/config/linux/affinity.c +++ b/libgomp/config/linux/affinity.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2022 Free Software Foundation, Inc. +/* Copyright (C) 2006-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index b73acce..15babcd 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2022 Free Software Foundation, Inc. +/* Copyright (C) 2022-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/alpha/futex.h b/libgomp/config/linux/alpha/futex.h index 0921c55..30b968b 100644 --- a/libgomp/config/linux/alpha/futex.h +++ b/libgomp/config/linux/alpha/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c index 159f418..8898c4c 100644 --- a/libgomp/config/linux/bar.c +++ b/libgomp/config/linux/bar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h index 8590837..48520b2 100644 --- a/libgomp/config/linux/bar.h +++ b/libgomp/config/linux/bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/doacross.h b/libgomp/config/linux/doacross.h index 8d88c94..64ab3eb 100644 --- a/libgomp/config/linux/doacross.h +++ b/libgomp/config/linux/doacross.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/futex.h b/libgomp/config/linux/futex.h index e01bd96..3d5126c 100644 --- a/libgomp/config/linux/futex.h +++ b/libgomp/config/linux/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2022 Free Software Foundation, Inc. +/* Copyright (C) 2010-2023 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/ia64/futex.h b/libgomp/config/linux/ia64/futex.h index 79e6fc4..1fb3dee 100644 --- a/libgomp/config/linux/ia64/futex.h +++ b/libgomp/config/linux/ia64/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/lock.c b/libgomp/config/linux/lock.c index c238e1e..a2bac76 100644 --- a/libgomp/config/linux/lock.c +++ b/libgomp/config/linux/lock.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/mips/futex.h b/libgomp/config/linux/mips/futex.h index 00af7a6..e4e929d 100644 --- a/libgomp/config/linux/mips/futex.h +++ b/libgomp/config/linux/mips/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Ilie Garbacea <ilie@mips.com>, Chao-ying Fu <fu@mips.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/mutex.c b/libgomp/config/linux/mutex.c index 419fec4..9c7d472 100644 --- a/libgomp/config/linux/mutex.c +++ b/libgomp/config/linux/mutex.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/mutex.h b/libgomp/config/linux/mutex.h index be38d14..1a1811f 100644 --- a/libgomp/config/linux/mutex.h +++ b/libgomp/config/linux/mutex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/powerpc/futex.h b/libgomp/config/linux/powerpc/futex.h index d4c8743..7f95db8 100644 --- a/libgomp/config/linux/powerpc/futex.h +++ b/libgomp/config/linux/powerpc/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/proc.c b/libgomp/config/linux/proc.c index 396617f..4b624cd 100644 --- a/libgomp/config/linux/proc.c +++ b/libgomp/config/linux/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/proc.h b/libgomp/config/linux/proc.h index f0530a5..41bb730 100644 --- a/libgomp/config/linux/proc.h +++ b/libgomp/config/linux/proc.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2022 Free Software Foundation, Inc. +/* Copyright (C) 2011-2023 Free Software Foundation, Inc. Contributed by Uros Bizjak <ubizjak@gmail.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/ptrlock.c b/libgomp/config/linux/ptrlock.c index 399bac9..3405fc5 100644 --- a/libgomp/config/linux/ptrlock.c +++ b/libgomp/config/linux/ptrlock.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2022 Free Software Foundation, Inc. +/* Copyright (C) 2008-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/ptrlock.h b/libgomp/config/linux/ptrlock.h index ae72bbf..e630464 100644 --- a/libgomp/config/linux/ptrlock.h +++ b/libgomp/config/linux/ptrlock.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2022 Free Software Foundation, Inc. +/* Copyright (C) 2008-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/s390/futex.h b/libgomp/config/linux/s390/futex.h index fb8c6ee..99ef7c1 100644 --- a/libgomp/config/linux/s390/futex.h +++ b/libgomp/config/linux/s390/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/sem.c b/libgomp/config/linux/sem.c index b4dc1db..c2b5f30 100644 --- a/libgomp/config/linux/sem.c +++ b/libgomp/config/linux/sem.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/sem.h b/libgomp/config/linux/sem.h index 5828d16..d0ac657 100644 --- a/libgomp/config/linux/sem.h +++ b/libgomp/config/linux/sem.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/sparc/futex.h b/libgomp/config/linux/sparc/futex.h index 0d9305f..77dafde 100644 --- a/libgomp/config/linux/sparc/futex.h +++ b/libgomp/config/linux/sparc/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/tile/futex.h b/libgomp/config/linux/tile/futex.h index d86d8c9..901049c 100644 --- a/libgomp/config/linux/tile/futex.h +++ b/libgomp/config/linux/tile/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2022 Free Software Foundation, Inc. +/* Copyright (C) 2011-2023 Free Software Foundation, Inc. Contributed by Walter Lee (walt@tilera.com) This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/wait.h b/libgomp/config/linux/wait.h index 0ba9db5..29d745f 100644 --- a/libgomp/config/linux/wait.h +++ b/libgomp/config/linux/wait.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2022 Free Software Foundation, Inc. +/* Copyright (C) 2008-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/linux/x86/futex.h b/libgomp/config/linux/x86/futex.h index bdb360d..c7728ec 100644 --- a/libgomp/config/linux/x86/futex.h +++ b/libgomp/config/linux/x86/futex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/mingw32/affinity-fmt.c b/libgomp/config/mingw32/affinity-fmt.c index 89f0a12..be682f7 100644 --- a/libgomp/config/mingw32/affinity-fmt.c +++ b/libgomp/config/mingw32/affinity-fmt.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/mingw32/proc.c b/libgomp/config/mingw32/proc.c index 2e20115..b170b39 100644 --- a/libgomp/config/mingw32/proc.c +++ b/libgomp/config/mingw32/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2022 Free Software Foundation, Inc. +/* Copyright (C) 2007-2023 Free Software Foundation, Inc. Contributed by Danny Smith <dannysmith@users.sourceforge.net> This file is part of the GNU Offloading and Multi Processing Library @@ -30,6 +30,7 @@ The following implementation uses win32 API routines. */ #include "libgomp.h" +#define WIN32_LEAN_AND_MEAN #include <windows.h> /* Count the CPU's currently available to this process. */ diff --git a/libgomp/config/mingw32/time.c b/libgomp/config/mingw32/time.c index 938a836..ee7c71d 100644 --- a/libgomp/config/mingw32/time.c +++ b/libgomp/config/mingw32/time.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2022 Free Software Foundation, Inc. +/* Copyright (C) 2006-2023 Free Software Foundation, Inc. Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/affinity-fmt.c b/libgomp/config/nvptx/affinity-fmt.c index 378b78c..01a16e7 100644 --- a/libgomp/config/nvptx/affinity-fmt.c +++ b/libgomp/config/nvptx/affinity-fmt.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. This file is part of the GNU Offloading and Multi Processing Library (libgomp). diff --git a/libgomp/config/nvptx/bar.c b/libgomp/config/nvptx/bar.c index eee2107..14aafca 100644 --- a/libgomp/config/nvptx/bar.c +++ b/libgomp/config/nvptx/bar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library @@ -30,137 +30,156 @@ #include <limits.h> #include "libgomp.h" -/* For cpu_relax. */ -#include "doacross.h" - -/* Assuming ADDR is &bar->generation, return bar. Copied from - rtems/bar.c. */ +void +gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) +{ + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + { + /* Next time we'll be awaiting TOTAL threads again. */ + bar->awaited = bar->total; + __atomic_store_n (&bar->generation, bar->generation + BAR_INCR, + MEMMODEL_RELEASE); + } + if (bar->total > 1) + asm ("bar.sync 1, %0;" : : "r" (32 * bar->total)); +} -static gomp_barrier_t * -generation_to_barrier (int *addr) +void +gomp_barrier_wait (gomp_barrier_t *bar) { - char *bar - = (char *) addr - __builtin_offsetof (gomp_barrier_t, generation); - return (gomp_barrier_t *)bar; + gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar)); } -/* Implement futex_wait-like behaviour to plug into the linux/bar.c - implementation. Assumes ADDR is &bar->generation. */ +/* Like gomp_barrier_wait, except that if the encountering thread + is not the last one to hit the barrier, it returns immediately. + The intended usage is that a thread which intends to gomp_barrier_destroy + this barrier calls gomp_barrier_wait, while all other threads + call gomp_barrier_wait_last. When gomp_barrier_wait returns, + the barrier can be safely destroyed. */ -static inline void -futex_wait (int *addr, int val) +void +gomp_barrier_wait_last (gomp_barrier_t *bar) { - gomp_barrier_t *bar = generation_to_barrier (addr); + /* The above described behavior matches 'bar.arrive' perfectly. */ + if (bar->total > 1) + asm ("bar.arrive 1, %0;" : : "r" (32 * bar->total)); +} - if (bar->total < 2) - /* A barrier with less than two threads, nop. */ - return; +/* Barriers are implemented mainly using 'bar.red.or', which combines a bar.sync + operation with a OR-reduction of "team->task_count != 0" across all threads. + Task processing is done only after synchronization and verifying that + task_count was non-zero in at least one of the team threads. - gomp_mutex_lock (&bar->lock); + This use of simple-barriers, and queueing of tasks till the end, is deemed + more efficient performance-wise for GPUs in the common offloading case, as + opposed to implementing futex-wait/wake operations to simultaneously process + tasks in a CPU-thread manner (which is not easy to implement efficiently + on GPUs). */ - /* Futex semantics: only go to sleep if *addr == val. */ - if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_ACQUIRE) != val, 0)) - { - gomp_mutex_unlock (&bar->lock); - return; - } +void +gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; - /* Register as waiter. */ - unsigned int waiters - = __atomic_add_fetch (&bar->waiters, 1, MEMMODEL_ACQ_REL); - if (waiters == 0) - __builtin_abort (); - unsigned int waiter_id = waiters; + bool run_tasks = (team->task_count != 0); + if (bar->total > 1) + run_tasks = __builtin_nvptx_bar_red_or (1, 32 * bar->total, true, + (team->task_count != 0)); - if (waiters > 1) + if (__builtin_expect (state & BAR_WAS_LAST, 0)) { - /* Wake other threads in bar.sync. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters)); - - /* Ensure that they have updated waiters. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters)); + /* Next time we'll be awaiting TOTAL threads again. */ + bar->awaited = bar->total; + team->work_share_cancelled = 0; } - gomp_mutex_unlock (&bar->lock); - - while (1) + if (__builtin_expect (run_tasks == true, 0)) { - /* Wait for next thread in barrier. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); + while (__atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) + & BAR_TASK_PENDING) + gomp_barrier_handle_tasks (state); - /* Get updated waiters. */ - unsigned int updated_waiters - = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE); - - /* Notify that we have updated waiters. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); - - waiters = updated_waiters; + if (bar->total > 1) + asm volatile ("bar.sync 1, %0;" : : "r" (32 * bar->total)); + } +} - if (waiter_id > waiters) - /* A wake happened, and we're in the group of woken threads. */ - break; +void +gomp_team_barrier_wait (gomp_barrier_t *bar) +{ + gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar)); +} - /* Continue waiting. */ - } +void +gomp_team_barrier_wait_final (gomp_barrier_t *bar) +{ + gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar); + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + bar->awaited_final = bar->total; + gomp_team_barrier_wait_end (bar, state); } -/* Implement futex_wake-like behaviour to plug into the linux/bar.c - implementation. Assumes ADDR is &bar->generation. */ +/* See also comments for gomp_team_barrier_wait_end. */ -static inline void -futex_wake (int *addr, int count) +bool +gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar, + gomp_barrier_state_t state) { - gomp_barrier_t *bar = generation_to_barrier (addr); + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; - if (bar->total < 2) - /* A barrier with less than two threads, nop. */ - return; + bool run_tasks = (team->task_count != 0); + if (bar->total > 1) + run_tasks = __builtin_nvptx_bar_red_or (1, 32 * bar->total, true, + (team->task_count != 0)); + if (state & BAR_CANCELLED) + return true; - gomp_mutex_lock (&bar->lock); - unsigned int waiters = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE); - if (waiters == 0) + if (__builtin_expect (state & BAR_WAS_LAST, 0)) { - /* No threads to wake. */ - gomp_mutex_unlock (&bar->lock); - return; + /* Note: BAR_CANCELLED should never be set in state here, because + cancellation means that at least one of the threads has been + cancelled, thus on a cancellable barrier we should never see + all threads to arrive. */ + + /* Next time we'll be awaiting TOTAL threads again. */ + bar->awaited = bar->total; + team->work_share_cancelled = 0; } - if (count == INT_MAX) - /* Release all threads. */ - __atomic_store_n (&bar->waiters, 0, MEMMODEL_RELEASE); - else if (count < bar->total) - /* Release count threads. */ - __atomic_add_fetch (&bar->waiters, -count, MEMMODEL_ACQ_REL); - else - /* Count has an illegal value. */ - __builtin_abort (); - - /* Wake other threads in bar.sync. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); + if (__builtin_expect (run_tasks == true, 0)) + { + while (__atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) + & BAR_TASK_PENDING) + gomp_barrier_handle_tasks (state); - /* Let them get the updated waiters. */ - asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); + if (bar->total > 1) + asm volatile ("bar.sync 1, %0;" : : "r" (32 * bar->total)); + } - gomp_mutex_unlock (&bar->lock); + return false; } -/* Copied from linux/wait.h. */ - -static inline int do_spin (int *addr, int val) +bool +gomp_team_barrier_wait_cancel (gomp_barrier_t *bar) { - /* The current implementation doesn't spin. */ - return 1; + return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar)); } -/* Copied from linux/wait.h. */ - -static inline void do_wait (int *addr, int val) +void +gomp_team_barrier_cancel (struct gomp_team *team) { - if (do_spin (addr, val)) - futex_wait (addr, val); -} + gomp_mutex_lock (&team->task_lock); + if (team->barrier.generation & BAR_CANCELLED) + { + gomp_mutex_unlock (&team->task_lock); + return; + } + team->barrier.generation |= BAR_CANCELLED; + gomp_mutex_unlock (&team->task_lock); -/* Reuse the linux implementation. */ -#define GOMP_WAIT_H 1 -#include "../linux/bar.c" + /* The 'exit' instruction cancels this thread and also fullfills any other + CTA threads waiting on barriers. */ + asm volatile ("exit;"); +} diff --git a/libgomp/config/nvptx/bar.h b/libgomp/config/nvptx/bar.h index 28bf7f4..095b8c2 100644 --- a/libgomp/config/nvptx/bar.h +++ b/libgomp/config/nvptx/bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library @@ -38,8 +38,6 @@ typedef struct unsigned generation; unsigned awaited; unsigned awaited_final; - unsigned waiters; - gomp_mutex_t lock; } gomp_barrier_t; typedef unsigned int gomp_barrier_state_t; @@ -59,8 +57,6 @@ static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count) bar->awaited = count; bar->awaited_final = count; bar->generation = 0; - bar->waiters = 0; - gomp_mutex_init (&bar->lock); } static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count) @@ -83,10 +79,16 @@ extern void gomp_team_barrier_wait_end (gomp_barrier_t *, extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *); extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *, gomp_barrier_state_t); -extern void gomp_team_barrier_wake (gomp_barrier_t *, int); struct gomp_team; extern void gomp_team_barrier_cancel (struct gomp_team *); +static inline void +gomp_team_barrier_wake (gomp_barrier_t *bar, int count) +{ + /* We never "wake up" threads on nvptx. Threads wait at barrier + instructions till barrier fullfilled. Do nothing here. */ +} + static inline gomp_barrier_state_t gomp_barrier_wait_start (gomp_barrier_t *bar) { diff --git a/libgomp/config/nvptx/doacross.h b/libgomp/config/nvptx/doacross.h index 8f0ffdc..2ebbd51 100644 --- a/libgomp/config/nvptx/doacross.h +++ b/libgomp/config/nvptx/doacross.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/error.c b/libgomp/config/nvptx/error.c index ab99130..0e8827d 100644 --- a/libgomp/config/nvptx/error.c +++ b/libgomp/config/nvptx/error.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 6f869be..0c58ed1 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library @@ -30,7 +30,7 @@ /* This is set to the ICV values of current GPU during device initialization, when the offload image containing this libgomp portion is loaded. */ -static volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; +volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; void omp_set_default_device (int device_num __attribute__((unused))) @@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams) GOMP_ADDITIONAL_ICVS.nteams = num_teams; } +int +omp_get_teams_thread_limit (void) +{ + return GOMP_ADDITIONAL_ICVS.teams_thread_limit; +} + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -89,3 +102,5 @@ ialias (omp_is_initial_device) ialias (omp_get_device_num) ialias (omp_get_max_teams) ialias (omp_set_num_teams) +ialias (omp_get_teams_thread_limit) +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/config/nvptx/libgomp-nvptx.h b/libgomp/config/nvptx/libgomp-nvptx.h new file mode 100644 index 0000000..96de869 --- /dev/null +++ b/libgomp/config/nvptx/libgomp-nvptx.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2022-2023 Free Software Foundation, Inc. + Contributed by Tobias Burnus <tobias@codesourcery.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains defines and type definitions shared between the + nvptx target's libgomp.a and the plugin-nvptx.c, but that is only + needef for this target. */ + +#ifndef LIBGOMP_NVPTX_H +#define LIBGOMP_NVPTX_H 1 + +#define GOMP_REV_OFFLOAD_VAR __gomp_rev_offload_var + +struct rev_offload { + uint64_t fn; + uint64_t mapnum; + uint64_t addrs; + uint64_t sizes; + uint64_t kinds; + int32_t dev_num; +}; + +#if (__SIZEOF_SHORT__ != 2 \ + || __SIZEOF_SIZE_T__ != 8 \ + || __SIZEOF_POINTER__ != 8) +#error "Data-type conversion required for rev_offload" +#endif + +#endif /* LIBGOMP_NVPTX_H */ + diff --git a/libgomp/config/nvptx/simple-bar.h b/libgomp/config/nvptx/simple-bar.h index 836fcc6..167e5b6 100644 --- a/libgomp/config/nvptx/simple-bar.h +++ b/libgomp/config/nvptx/simple-bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c index 11108d2..f102d7d 100644 --- a/libgomp/config/nvptx/target.c +++ b/libgomp/config/nvptx/target.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. +/* Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -24,9 +24,12 @@ <http://www.gnu.org/licenses/>. */ #include "libgomp.h" +#include "libgomp-nvptx.h" /* For struct rev_offload + GOMP_REV_OFFLOAD_VAR. */ #include <limits.h> extern int __gomp_team_num __attribute__((shared)); +extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; +volatile struct rev_offload *GOMP_REV_OFFLOAD_VAR; bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, @@ -88,16 +91,53 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend, void **args) { - (void) device; - (void) fn; - (void) mapnum; - (void) hostaddrs; - (void) sizes; - (void) kinds; + static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */ (void) flags; (void) depend; (void) args; - __builtin_unreachable (); + + if (device != GOMP_DEVICE_HOST_FALLBACK + || fn == NULL + || GOMP_REV_OFFLOAD_VAR == NULL) + return; + + gomp_mutex_lock (&lock); + + GOMP_REV_OFFLOAD_VAR->mapnum = mapnum; + GOMP_REV_OFFLOAD_VAR->addrs = (uint64_t) hostaddrs; + GOMP_REV_OFFLOAD_VAR->sizes = (uint64_t) sizes; + GOMP_REV_OFFLOAD_VAR->kinds = (uint64_t) kinds; + GOMP_REV_OFFLOAD_VAR->dev_num = GOMP_ADDITIONAL_ICVS.device_num; + + /* Set 'fn' to trigger processing on the host; wait for completion, + which is flagged by setting 'fn' back to 0 on the host. */ + uint64_t addr_struct_fn = (uint64_t) &GOMP_REV_OFFLOAD_VAR->fn; +#if __PTX_SM__ >= 700 + asm volatile ("st.global.release.sys.u64 [%0], %1;" + : : "r"(addr_struct_fn), "r" (fn) : "memory"); +#else + __sync_synchronize (); /* membar.sys */ + asm volatile ("st.volatile.global.u64 [%0], %1;" + : : "r"(addr_struct_fn), "r" (fn) : "memory"); +#endif + +#if __PTX_SM__ >= 700 + uint64_t fn2; + do + { + asm volatile ("ld.acquire.sys.global.u64 %0, [%1];" + : "=r" (fn2) : "r" (addr_struct_fn) : "memory"); + } + while (fn2 != 0); +#else + /* ld.global.u64 %r64,[__gomp_rev_offload_var]; + ld.u64 %r36,[%r64]; + membar.sys; */ + while (__atomic_load_n (&GOMP_REV_OFFLOAD_VAR->fn, __ATOMIC_ACQUIRE) != 0) + ; /* spin */ +#endif + + gomp_mutex_unlock (&lock); } void diff --git a/libgomp/config/nvptx/task.c b/libgomp/config/nvptx/task.c index 39a2289..a1f1b93 100644 --- a/libgomp/config/nvptx/task.c +++ b/libgomp/config/nvptx/task.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index 6923416..af5f317 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/teams.c b/libgomp/config/nvptx/teams.c index ea9a777..923840c 100644 --- a/libgomp/config/nvptx/teams.c +++ b/libgomp/config/nvptx/teams.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/nvptx/time.c b/libgomp/config/nvptx/time.c index ff0da9b..77bed8d 100644 --- a/libgomp/config/nvptx/time.c +++ b/libgomp/config/nvptx/time.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Dmitry Melnik <dm@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/bar.c b/libgomp/config/posix/bar.c index cf2bdf7..98851f4 100644 --- a/libgomp/config/posix/bar.c +++ b/libgomp/config/posix/bar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/bar.h b/libgomp/config/posix/bar.h index a1a18eb..6ae7d22 100644 --- a/libgomp/config/posix/bar.h +++ b/libgomp/config/posix/bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/doacross.h b/libgomp/config/posix/doacross.h index f4dd56b..7f16a25 100644 --- a/libgomp/config/posix/doacross.h +++ b/libgomp/config/posix/doacross.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/lock.c b/libgomp/config/posix/lock.c index 7df4297..9c7d636 100644 --- a/libgomp/config/posix/lock.c +++ b/libgomp/config/posix/lock.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/mutex.h b/libgomp/config/posix/mutex.h index c2ffa1b..79b012a 100644 --- a/libgomp/config/posix/mutex.h +++ b/libgomp/config/posix/mutex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/plugin-suffix.h b/libgomp/config/posix/plugin-suffix.h index cf03f64..d0a7b11 100644 --- a/libgomp/config/posix/plugin-suffix.h +++ b/libgomp/config/posix/plugin-suffix.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jack Howarth <howarth.at.gcc@gmail.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/pool.h b/libgomp/config/posix/pool.h index ddd253d..7f30f44 100644 --- a/libgomp/config/posix/pool.h +++ b/libgomp/config/posix/pool.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/proc.c b/libgomp/config/posix/proc.c index 3e70fe4..c08ca3e 100644 --- a/libgomp/config/posix/proc.c +++ b/libgomp/config/posix/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/ptrlock.h b/libgomp/config/posix/ptrlock.h index c352ae5..f4aa8d8 100644 --- a/libgomp/config/posix/ptrlock.h +++ b/libgomp/config/posix/ptrlock.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2022 Free Software Foundation, Inc. +/* Copyright (C) 2008-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/sem.c b/libgomp/config/posix/sem.c index 1042dc3..5d3608b 100644 --- a/libgomp/config/posix/sem.c +++ b/libgomp/config/posix/sem.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/sem.h b/libgomp/config/posix/sem.h index 20dd4ee..bf136df 100644 --- a/libgomp/config/posix/sem.h +++ b/libgomp/config/posix/sem.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/simple-bar.h b/libgomp/config/posix/simple-bar.h index ebc6402..024ec5e 100644 --- a/libgomp/config/posix/simple-bar.h +++ b/libgomp/config/posix/simple-bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Alexander Monakov <amonakov@ispras.ru> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/thread-stacksize.h b/libgomp/config/posix/thread-stacksize.h index f2300b1..92000f8 100644 --- a/libgomp/config/posix/thread-stacksize.h +++ b/libgomp/config/posix/thread-stacksize.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com> This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/posix/time.c b/libgomp/config/posix/time.c index 748645d..1d3265d 100644 --- a/libgomp/config/posix/time.c +++ b/libgomp/config/posix/time.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/rtems/affinity-fmt.c b/libgomp/config/rtems/affinity-fmt.c index b192dd3..f4bc3e1 100644 --- a/libgomp/config/rtems/affinity-fmt.c +++ b/libgomp/config/rtems/affinity-fmt.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. This file is part of the GNU Offloading and Multi Processing Library (libgomp). diff --git a/libgomp/config/rtems/bar.c b/libgomp/config/rtems/bar.c index ab0c585..133feab 100644 --- a/libgomp/config/rtems/bar.c +++ b/libgomp/config/rtems/bar.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU OpenMP Library (libgomp). diff --git a/libgomp/config/rtems/bar.h b/libgomp/config/rtems/bar.h index 71a3c5c..67c76e7 100644 --- a/libgomp/config/rtems/bar.h +++ b/libgomp/config/rtems/bar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU OpenMP Library (libgomp). diff --git a/libgomp/config/rtems/mutex.h b/libgomp/config/rtems/mutex.h index 0349973..e112fc1 100644 --- a/libgomp/config/rtems/mutex.h +++ b/libgomp/config/rtems/mutex.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU OpenMP Library (libgomp). diff --git a/libgomp/config/rtems/pool.h b/libgomp/config/rtems/pool.h index 774060a..1ea34b5 100644 --- a/libgomp/config/rtems/pool.h +++ b/libgomp/config/rtems/pool.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/rtems/proc.c b/libgomp/config/rtems/proc.c index 3b7bf06..0f396b5 100644 --- a/libgomp/config/rtems/proc.c +++ b/libgomp/config/rtems/proc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/config/rtems/sem.h b/libgomp/config/rtems/sem.h index 1d2098d..300b9ea 100644 --- a/libgomp/config/rtems/sem.h +++ b/libgomp/config/rtems/sem.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Sebastian Huber <sebastian.huber@embedded-brains.de>. This file is part of the GNU OpenMP Library (libgomp). diff --git a/libgomp/configure b/libgomp/configure index b11170e..fd0e337 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -6173,6 +6173,11 @@ sysv4 | sysv4.3*) tpf*) lt_cv_deplibs_check_method=pass_all ;; +vxworks*) + # Assume VxWorks cross toolchains are built on Linux, possibly + # as canadian for Windows hosts. + lt_cv_deplibs_check_method=pass_all + ;; esac fi @@ -10953,6 +10958,25 @@ uts4*) shlibpath_var=LD_LIBRARY_PATH ;; +# Shared libraries for VwWorks, >= 7 only at this stage +# and (fpic) still incompatible with "large" code models +# in a few configurations. Only for RTP mode in any case, +# and upon explicit request at configure time. +vxworks7*) + dynamic_linker=no + case ${with_multisubdir}-${enable_shared} in + *large*) + ;; + *mrtp*-yes) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker="$host_os module_loader" + ;; + esac + ;; *) dynamic_linker=no ;; @@ -11394,7 +11418,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11397 "configure" +#line 11421 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11500,7 +11524,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11503 "configure" +#line 11527 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14587,6 +14611,25 @@ uts4*) shlibpath_var=LD_LIBRARY_PATH ;; +# Shared libraries for VwWorks, >= 7 only at this stage +# and (fpic) still incompatible with "large" code models +# in a few configurations. Only for RTP mode in any case, +# and upon explicit request at configure time. +vxworks7*) + dynamic_linker=no + case ${with_multisubdir}-${enable_shared} in + *large*) + ;; + *mrtp*-yes) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker="$host_os module_loader" + ;; + esac + ;; *) dynamic_linker=no ;; @@ -15010,7 +15053,7 @@ _ACEOF # Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- # -# Copyright (C) 2014-2022 Free Software Foundation, Inc. +# Copyright (C) 2014-2023 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -15157,9 +15200,6 @@ if test x"$enable_offload_targets" != x; then tgt=`echo $tgt | sed 's/=.*//'` tgt_plugin= case $tgt in - *-intelmic-* | *-intelmicemul-*) - tgt_plugin=intelmic - ;; nvptx*) case "${target}" in aarch64*-*-* | powerpc64le-*-* | x86_64-*-*) diff --git a/libgomp/critical.c b/libgomp/critical.c index faf05c1..27af6f3 100644 --- a/libgomp/critical.c +++ b/libgomp/critical.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/env.c b/libgomp/env.c index 0249966..e7a035b 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -124,7 +124,7 @@ int goacc_default_dims[GOMP_DIM_MAX]; #ifndef LIBGOMP_OFFLOADED_ONLY -static int wait_policy; +static int wait_policy = -1; static unsigned long stacksize = GOMP_DEFAULT_STACKSIZE; static void @@ -283,7 +283,7 @@ parse_unsigned_long_1 (const char *env, const char *val, unsigned long *pvalue, static bool parse_unsigned_long (const char *env, const char *val, void *const params[]) { - unsigned upper = (unsigned long) params[2]; + unsigned long upper = (uintptr_t) params[2]; unsigned long pvalue = 0; bool ret = parse_unsigned_long_1 (env, val, &pvalue, (bool) params[1]); if (!ret) @@ -1981,7 +1981,7 @@ initialize_icvs (struct gomp_initial_icvs *icvs) icvs->bind_var = gomp_default_icv_values.bind_var; icvs->nteams_var = gomp_default_icv_values.nteams_var; icvs->teams_thread_limit_var = gomp_default_icv_values.teams_thread_limit_var; - icvs->wait_policy = 0; + icvs->wait_policy = -1; } /* Helper function for initialize_env to add a device specific ICV value diff --git a/libgomp/error.c b/libgomp/error.c index 50ed85e..1fe62df 100644 --- a/libgomp/error.c +++ b/libgomp/error.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/fortran.c b/libgomp/fortran.c index d984ce5..dd8b73d 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -76,6 +76,7 @@ ialias_redirect (omp_get_ancestor_thread_num) ialias_redirect (omp_get_team_size) ialias_redirect (omp_get_active_level) ialias_redirect (omp_in_final) +ialias_redirect (omp_in_explicit_task) ialias_redirect (omp_get_cancellation) ialias_redirect (omp_get_proc_bind) ialias_redirect (omp_get_num_places) @@ -482,6 +483,12 @@ omp_in_final_ (void) return omp_in_final (); } +int32_t +omp_in_explicit_task_ (void) +{ + return omp_in_explicit_task (); +} + void omp_set_num_teams_ (const int32_t *num_teams) { diff --git a/libgomp/hashtab.h b/libgomp/hashtab.h index 1c9c908..9426c51 100644 --- a/libgomp/hashtab.h +++ b/libgomp/hashtab.h @@ -1,5 +1,5 @@ /* An expandable hash tables datatype. - Copyright (C) 1999-2022 Free Software Foundation, Inc. + Copyright (C) 1999-2023 Free Software Foundation, Inc. Contributed by Vladimir Makarov <vmakarov@cygnus.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index d8acf0e..a2bbedc 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -97,3 +97,20 @@ omp_set_num_teams (int num_teams) } ialias (omp_set_num_teams) + +int +omp_get_teams_thread_limit (void) +{ + return gomp_teams_thread_limit_var; +} + +ialias (omp_get_teams_thread_limit) + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + gomp_teams_thread_limit_var = thread_limit; +} + +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/icv.c b/libgomp/icv.c index df423c0..a0a555d 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -148,19 +148,6 @@ omp_get_supported_active_levels (void) return gomp_supported_active_levels; } -void -omp_set_teams_thread_limit (int thread_limit) -{ - if (thread_limit >= 0) - gomp_teams_thread_limit_var = thread_limit; -} - -int -omp_get_teams_thread_limit (void) -{ - return gomp_teams_thread_limit_var; -} - int omp_get_cancellation (void) { @@ -261,8 +248,6 @@ ialias (omp_get_thread_limit) ialias (omp_set_max_active_levels) ialias (omp_get_max_active_levels) ialias (omp_get_supported_active_levels) -ialias (omp_set_teams_thread_limit) -ialias (omp_get_teams_thread_limit) ialias (omp_get_cancellation) ialias (omp_get_proc_bind) ialias (omp_get_max_task_priority) diff --git a/libgomp/iter.c b/libgomp/iter.c index 40e9124..d6f9817 100644 --- a/libgomp/iter.c +++ b/libgomp/iter.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/iter_ull.c b/libgomp/iter_ull.c index 491af74..527a76b 100644 --- a/libgomp/iter_ull.c +++ b/libgomp/iter_ull.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/libgomp-plugin.c b/libgomp/libgomp-plugin.c index 9d4cc62..27e7c94 100644 --- a/libgomp/libgomp-plugin.c +++ b/libgomp/libgomp-plugin.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2022 Free Software Foundation, Inc. +/* Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -78,3 +78,15 @@ GOMP_PLUGIN_fatal (const char *msg, ...) gomp_vfatal (msg, ap); va_end (ap); } + +void +GOMP_PLUGIN_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, + uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, + void (*dev_to_host_cpy) (void *, const void *, size_t, + void *), + void (*host_to_dev_cpy) (void *, const void *, size_t, + void *), void *token) +{ + gomp_target_rev (fn_ptr, mapnum, devaddrs_ptr, sizes_ptr, kinds_ptr, dev_num, + dev_to_host_cpy, host_to_dev_cpy, token); +} diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 6ab5ac6..28267f7 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -1,6 +1,6 @@ /* The libgomp plugin API. - Copyright (C) 2014-2022 Free Software Foundation, Inc. + Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -49,7 +49,6 @@ enum offload_target_type OFFLOAD_TARGET_TYPE_HOST = 2, /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, - OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, OFFLOAD_TARGET_TYPE_HSA = 7, OFFLOAD_TARGET_TYPE_GCN = 8 }; @@ -121,6 +120,13 @@ extern void GOMP_PLUGIN_error (const char *, ...) extern void GOMP_PLUGIN_fatal (const char *, ...) __attribute__ ((noreturn, format (printf, 1, 2))); +extern void GOMP_PLUGIN_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, int, + void (*) (void *, const void *, size_t, + void *), + void (*) (void *, const void *, size_t, + void *), void *); + /* Prototypes for functions implemented by libgomp plugins. */ extern const char *GOMP_OFFLOAD_get_name (void); extern unsigned int GOMP_OFFLOAD_get_caps (void); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 7519274..ba8fe34 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -112,8 +112,8 @@ extern void gomp_aligned_free (void *); /* Optimized allocators for team-specific data that will die with the team. */ #ifdef __AMDGCN__ +#include "libgomp-gcn.h" /* The arena is initialized in config/gcn/team.c. */ -#define TEAM_ARENA_SIZE 64*1024 /* Must match the value in plugin-gcn.c. */ #define TEAM_ARENA_START 16 /* LDS offset of free pointer. */ #define TEAM_ARENA_FREE 24 /* LDS offset of free pointer. */ #define TEAM_ARENA_END 32 /* LDS offset of end pointer. */ @@ -135,7 +135,8 @@ team_malloc (size_t size) { /* While this is experimental, let's make sure we know when OOM happens. */ - const char msg[] = "GCN team arena exhausted\n"; + const char msg[] = "GCN team arena exhausted;" + " configure with GCN_TEAM_ARENA_SIZE=bytes\n"; write (2, msg, sizeof(msg)-1); /* Fall back to using the heap (slowly). */ @@ -1128,6 +1129,11 @@ extern int gomp_pause_host (void); extern void gomp_init_targets_once (void); extern int gomp_get_num_devices (void); extern bool gomp_target_task_fn (void *); +extern void gomp_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + int, + void (*) (void *, const void *, size_t, void *), + void (*) (void *, const void *, size_t, void *), + void *); /* Splay tree definitions. */ typedef struct splay_tree_node_s *splay_tree_node; @@ -1152,29 +1158,7 @@ struct target_var_desc { uintptr_t length; }; -struct target_mem_desc { - /* Reference count. */ - uintptr_t refcount; - /* All the splay nodes allocated together. */ - splay_tree_node array; - /* Start of the target region. */ - uintptr_t tgt_start; - /* End of the targer region. */ - uintptr_t tgt_end; - /* Handle to free. */ - void *to_free; - /* Previous target_mem_desc. */ - struct target_mem_desc *prev; - /* Number of items in following list. */ - size_t list_count; - - /* Corresponding target device descriptor. */ - struct gomp_device_descr *device_descr; - - /* List of target items to remove (or decrease refcount) - at the end of region. */ - struct target_var_desc list[]; -}; +struct target_mem_desc; /* Special value for refcount - mask to indicate existence of special values. Right now we allocate 3 bits. */ @@ -1268,6 +1252,58 @@ splay_compare (splay_tree_key x, splay_tree_key y) #include "splay-tree.h" +/* Reverse offload splay-tree handling (functions only). */ + +struct reverse_splay_tree_key_s { + /* Address of the device object. */ + uint64_t dev; + splay_tree_key k; +}; + +typedef struct reverse_splay_tree_node_s *reverse_splay_tree_node; +typedef struct reverse_splay_tree_s *reverse_splay_tree; +typedef struct reverse_splay_tree_key_s *reverse_splay_tree_key; + +static inline int +reverse_splay_compare (reverse_splay_tree_key x, reverse_splay_tree_key y) +{ + if (x->dev < y->dev) + return -1; + if (x->dev > y->dev) + return 1; + return 0; +} + +#define splay_tree_prefix reverse +#include "splay-tree.h" + +struct target_mem_desc { + /* Reference count. */ + uintptr_t refcount; + /* All the splay nodes allocated together. */ + splay_tree_node array; + /* Likewise for the reverse lookup device->host for reverse offload. */ + reverse_splay_tree_node rev_array; + /* Start of the target region. */ + uintptr_t tgt_start; + /* End of the targer region. */ + uintptr_t tgt_end; + /* Handle to free. */ + void *to_free; + /* Previous target_mem_desc. */ + struct target_mem_desc *prev; + /* Number of items in following list. */ + size_t list_count; + + /* Corresponding target device descriptor. */ + struct gomp_device_descr *device_descr; + + /* List of target items to remove (or decrease refcount) + at the end of region. */ + struct target_var_desc list[]; +}; + + typedef struct acc_dispatch_t { /* Execute. */ @@ -1362,6 +1398,7 @@ struct gomp_device_descr /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; + struct reverse_splay_tree_s mem_map_rev; /* Mutex for the mutable data. */ gomp_mutex_t lock; diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 46d5f10..ce6b719 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -234,6 +234,12 @@ OMP_5.1.1 { omp_target_memcpy_rect_async; } OMP_5.1; +OMP_5.2 { + global: + omp_in_explicit_task; + omp_in_explicit_task_; +} OMP_5.1.1; + GOMP_1.0 { global: GOMP_atomic_end; @@ -622,3 +628,8 @@ GOMP_PLUGIN_1.3 { GOMP_PLUGIN_goacc_profiling_dispatch; GOMP_PLUGIN_goacc_thread; } GOMP_PLUGIN_1.2; + +GOMP_PLUGIN_1.4 { + global: + GOMP_PLUGIN_target_rev; +} GOMP_PLUGIN_1.3; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 5aef987..dc6b4ac 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -7,7 +7,7 @@ @copying -Copyright @copyright{} 2006-2022 Free Software Foundation, Inc. +Copyright @copyright{} 2006-2023 Free Software Foundation, Inc. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or @@ -162,6 +162,7 @@ See also @ref{OpenMP Implementation Status}. * OpenMP 5.0:: Feature completion status to 5.0 specification * OpenMP 5.1:: Feature completion status to 5.1 specification * OpenMP 5.2:: Feature completion status to 5.2 specification +* OpenMP Technical Report 11:: Feature completion status to first 6.0 preview @end menu The @code{_OPENMP} preprocessor macro and Fortran's @code{openmp_version} @@ -191,10 +192,10 @@ The OpenMP 4.5 specification is fully supported. env variable @tab Y @tab @item Nested-parallel changes to @emph{max-active-levels-var} ICV @tab Y @tab @item @code{requires} directive @tab P - @tab complete but no non-host devices provides @code{unified_address}, - @code{unified_shared_memory} or @code{reverse_offload} + @tab complete but no non-host devices provides @code{unified_address} or + @code{unified_shared_memory} @item @code{teams} construct outside an enclosing target region @tab Y @tab -@item Non-rectangular loop nests @tab Y @tab +@item Non-rectangular loop nests @tab P @tab Full support for C/C++, partial for Fortran @item @code{!=} as relational-op in canonical loop form for C/C++ @tab Y @tab @item @code{nonmonotonic} as default loop schedule modifier for worksharing-loop constructs @tab Y @tab @@ -226,8 +227,7 @@ The OpenMP 4.5 specification is fully supported. @item @code{allocate} directive @tab N @tab @item @code{allocate} clause @tab P @tab Initial support @item @code{use_device_addr} clause on @code{target data} @tab Y @tab -@item @code{ancestor} modifier on @code{device} clause - @tab Y @tab See comment for @code{requires} +@item @code{ancestor} modifier on @code{device} clause @tab Y @tab @item Implicit declare target directive @tab Y @tab @item Discontiguous array section with @code{target update} construct @tab N @tab @@ -287,7 +287,7 @@ The OpenMP 4.5 specification is fully supported. @code{append_args} @tab N @tab @item @code{dispatch} construct @tab N @tab @item device-specific ICV settings with environment variables @tab Y @tab -@item @code{assume} directive @tab N @tab +@item @code{assume} and @code{assumes} directives @tab Y @tab @item @code{nothing} directive @tab Y @tab @item @code{error} directive @tab Y @tab @item @code{masked} construct @tab Y @tab @@ -295,8 +295,8 @@ The OpenMP 4.5 specification is fully supported. @item Loop transformation constructs @tab N @tab @item @code{strict} modifier in the @code{grainsize} and @code{num_tasks} clauses of the @code{taskloop} construct @tab Y @tab -@item @code{align} clause/modifier in @code{allocate} directive/clause - and @code{allocator} directive @tab P @tab C/C++ on clause only +@item @code{align} clause in @code{allocate} directive @tab N @tab +@item @code{align} modifier in @code{allocate} clause @tab Y @tab @item @code{thread_limit} clause to @code{target} construct @tab Y @tab @item @code{has_device_addr} clause to @code{target} construct @tab Y @tab @item Iterators in @code{target update} motion clauses and @code{map} @@ -345,12 +345,14 @@ The OpenMP 4.5 specification is fully supported. @item Support of structured block sequences in C/C++ @tab Y @tab @item @code{unconstrained} and @code{reproducible} modifiers on @code{order} clause @tab Y @tab -@item Support @code{begin/end declare target} syntax in C/C++ @tab N @tab +@item Support @code{begin/end declare target} syntax in C/C++ @tab Y @tab @item Pointer predetermined firstprivate getting initialized to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab -@item @code{begin declare target} directive @tab N @tab @item For Fortran, diagnose placing declarative before/between @code{USE}, @code{IMPORT}, and @code{IMPLICIT} as invalid @tab N @tab +@item Optional comma between directive and clause in the @code{#pragma} form @tab Y @tab +@item @code{indirect} clause in @code{declare target} @tab N @tab +@item @code{device_type(nohost)}/@code{device_type(host)} for variables @tab N @tab @end multitable @@ -361,8 +363,8 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @multitable @columnfractions .60 .10 .25 @headitem Description @tab Status @tab Comments -@item @code{omp_in_explicit_task} routine and @emph{implicit-task-var} ICV - @tab N @tab +@item @code{omp_in_explicit_task} routine and @emph{explicit-task-var} ICV + @tab Y @tab @item @code{omp}/@code{ompx}/@code{omx} sentinels and @code{omp_}/@code{ompx_} namespaces @tab N/A @tab warning for @code{ompx/omx} sentinels@footnote{The @code{ompx} @@ -372,7 +374,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab a warning enabled by default and, for fixed-source code, the @code{omx} sentinel is warned for with with @code{-Wsurprising} (enabled by @code{-Wall}). Unknown clauses are always rejected with an error.} -@item Clauses on @code{end} directive can be on directive @tab N @tab +@item Clauses on @code{end} directive can be on directive @tab Y @tab @item Deprecation of no-argument @code{destroy} clause on @code{depobj} @tab N @tab @item @code{linear} clause syntax changes and @code{step} modifier @tab Y @tab @@ -388,7 +390,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item Extended list of directives permitted in Fortran pure procedures @tab N @tab @item New @code{allocators} directive for Fortran @tab N @tab -@item Deprecation of @code{allocator} directive for Fortran +@item Deprecation of @code{allocate} directive for Fortran allocatables/pointers @tab N @tab @item Optional paired @code{end} directive with @code{dispatch} @tab N @tab @item New @code{memspace} and @code{traits} modifiers for @code{uses_allocators} @@ -403,7 +405,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item @code{allocate} and @code{firstprivate} clauses on @code{scope} @tab Y @tab @item @code{ompt_callback_work} @tab N @tab -@item Default map-type for @code{map} clause in @code{target enter/exit data} +@item Default map-type for the @code{map} clause in @code{target enter/exit data} @tab Y @tab @item New @code{doacross} clause as alias for @code{depend} with @code{source}/@code{sink} modifier @tab Y @tab @@ -426,6 +428,70 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @end multitable +@node OpenMP Technical Report 11 +@section OpenMP Technical Report 11 + +Technical Report (TR) 11 is the first preview for OpenMP 6.0. + +@unnumberedsubsec New features listed in Appendix B of the OpenMP specification +@multitable @columnfractions .60 .10 .25 +@item Features deprecated in versions 5.2, 5.1 and 5.0 were removed + @tab N/A @tab Backward compatibility +@item The @code{decl} attribute was added to the C++ attribute syntax + @tab N @tab +@item @code{_ALL} suffix to the device-scope environment variables + @tab P @tab Host device number wrongly accepted +@item For Fortran, @emph{locator list} can be also function reference with + data pointer result @tab N @tab +@item Ref-count change for @code{use_device_ptr}/@code{use_device_addr} + @tab N @tab +@item Implicit reduction identifiers of C++ classes + @tab N @tab +@item Change of the @emph{map-type} property from @emph{ultimate} to + @emph{default} @tab N @tab +@item Concept of @emph{assumed-size arrays} in C and C++ + @tab N @tab +@item Mapping of @emph{assumed-size arrays} in C, C++ and Fortran + @tab N @tab +@item @code{groupprivate} directive @tab N @tab +@item @code{local} clause to declare target directive @tab N @tab +@item @code{part_size} allocator trait @tab N @tab +@item @code{pin_device}, @code{preferred_device} and @code{target_access} + allocator traits + @tab N @tab +@item @code{access} allocator trait changes @tab N @tab +@item Extension of @code{interop} operation of @code{append_args}, allowing all + modifiers of the @code{init} clause + @tab N @tab +@item @code{interop} clause to @code{dispatch} @tab N @tab +@item @code{apply} code to loop-transforming constructs @tab N @tab +@item @code{omp_curr_progress_width} identifier @tab N @tab +@item @code{safesync} clause to the @code{parallel} construct @tab N @tab +@item @code{omp_get_max_progress_width} runtime routine @tab N @tab +@item @code{strict} modifier keyword to @code{num_threads} @tab N @tab +@item @code{memscope} clause to @code{atomic} and @code{flush} @tab N @tab +@item Routines for obtaining memory spaces/allocators for shared/device memory + @tab N @tab +@item @code{omp_get_memspace_num_resources} routine @tab N @tab +@item @code{omp_get_submemspace} routine @tab N @tab +@item @code{ompt_get_buffer_limits} OMPT routine @tab N @tab +@item Extension of @code{OMP_DEFAULT_DEVICE} and new + @code{OMP_AVAILABLE_DEVICES} environment vars @tab N @tab +@item Supporting increments with abstract names in @code{OMP_PLACES} @tab N @tab +@end multitable + +@unnumberedsubsec Other new TR 11 features +@multitable @columnfractions .60 .10 .25 +@item Relaxed Fortran restrictions to the @code{aligned} clause @tab N @tab +@item Mapping lambda captures @tab N @tab +@item For Fortran, atomic compare with storing the comparison result + @tab N @tab +@item @code{aligned} clause changes for @code{simd} and @code{declare simd} + @tab N @tab +@end multitable + + + @c --------------------------------------------------------------------- @c OpenMP Runtime Library Routines @c --------------------------------------------------------------------- @@ -458,7 +524,7 @@ linkage, and do not throw exceptions. * omp_get_num_procs:: Number of processors online * omp_get_num_teams:: Number of teams * omp_get_num_threads:: Size of the active team -* omp_get_proc_bind:: Whether theads may be moved between CPUs +* omp_get_proc_bind:: Whether threads may be moved between CPUs * omp_get_schedule:: Obtain the runtime scheduling method * omp_get_supported_active_levels:: Maximum number of active regions supported * omp_get_team_num:: Get team number @@ -965,7 +1031,7 @@ one thread per CPU online is used. @node omp_get_proc_bind -@section @code{omp_get_proc_bind} -- Whether theads may be moved between CPUs +@section @code{omp_get_proc_bind} -- Whether threads may be moved between CPUs @table @asis @item @emph{Description}: This functions returns the currently active thread affinity policy, which is @@ -1879,8 +1945,8 @@ beginning with @env{GOMP_} are GNU extensions. * OMP_NESTED:: Nested parallel regions * OMP_NUM_TEAMS:: Specifies the number of teams to use by teams region * OMP_NUM_THREADS:: Specifies the number of threads to use -* OMP_PROC_BIND:: Whether theads may be moved between CPUs -* OMP_PLACES:: Specifies on which CPUs the theads should be placed +* OMP_PROC_BIND:: Whether threads may be moved between CPUs +* OMP_PLACES:: Specifies on which CPUs the threads should be placed * OMP_STACKSIZE:: Set default thread stack size * OMP_SCHEDULE:: How threads are scheduled * OMP_TARGET_OFFLOAD:: Controls offloading behaviour @@ -2078,12 +2144,12 @@ nesting by default. If undefined one thread per CPU is used. @node OMP_PROC_BIND -@section @env{OMP_PROC_BIND} -- Whether theads may be moved between CPUs +@section @env{OMP_PROC_BIND} -- Whether threads may be moved between CPUs @cindex Environment Variable @table @asis @item @emph{Description}: Specifies whether threads may be moved between processors. If set to -@code{TRUE}, OpenMP theads should not be moved; if set to @code{FALSE} +@code{TRUE}, OpenMP threads should not be moved; if set to @code{FALSE} they may be moved. Alternatively, a comma separated list with the values @code{PRIMARY}, @code{MASTER}, @code{CLOSE} and @code{SPREAD} can be used to specify the thread affinity policy for the corresponding nesting @@ -2108,7 +2174,7 @@ When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @node OMP_PLACES -@section @env{OMP_PLACES} -- Specifies on which CPUs the theads should be placed +@section @env{OMP_PLACES} -- Specifies on which CPUs the threads should be placed @cindex Environment Variable @table @asis @item @emph{Description}: @@ -3888,7 +3954,7 @@ same context. @section First invocation: OpenACC library API In this second use case (see below), a function in the OpenACC library is -called prior to any of the functions in the CUBLAS library. More specificially, +called prior to any of the functions in the CUBLAS library. More specifically, the function @code{acc_set_device_num()}. In the use case presented here, the function @code{acc_set_device_num()} @@ -4304,13 +4370,14 @@ offloading devices (it's not clear if they should be): @multitable @columnfractions .60 .10 .25 @headitem @code{arch} @tab @code{kind} @tab @code{isa} -@item @code{intel_mic}, @code{x86}, @code{x86_64}, @code{i386}, @code{i486}, +@item @code{x86}, @code{x86_64}, @code{i386}, @code{i486}, @code{i586}, @code{i686}, @code{ia32} @tab @code{host} @tab See @code{-m...} flags in ``x86 Options'' (without @code{-m}) @item @code{amdgcn}, @code{gcn} @tab @code{gpu} - @tab See @code{-march=} in ``AMD GCN Options'' + @tab See @code{-march=} in ``AMD GCN Options''@footnote{Additionally, + @code{gfx803} is supported as an alias for @code{fiji}.} @item @code{nvptx} @tab @code{gpu} @tab See @code{-march=} in ``Nvidia PTX Options'' @@ -4352,7 +4419,7 @@ On the hardware side, there is the hierarchy (fine to coarse): @item work item (thread) @item wavefront @item work group -@item compute unite (CU) +@item compute unit (CU) @end itemize All OpenMP and OpenACC levels are used, i.e. @@ -4367,7 +4434,8 @@ All OpenMP and OpenACC levels are used, i.e. The used sizes are @itemize @item Number of teams is the specified @code{num_teams} (OpenMP) or - @code{num_gangs} (OpenACC) or otherwise the number of CU + @code{num_gangs} (OpenACC) or otherwise the number of CU. It is limited + by two times the number of CU. @item Number of wavefronts is 4 for gfx900 and 16 otherwise; @code{num_threads} (OpenMP) and @code{num_workers} (OpenACC) overrides this if smaller. @@ -4386,6 +4454,15 @@ The implementation remark: @item I/O within OpenMP target regions and OpenACC parallel/kernels is supported using the C library @code{printf} functions and the Fortran @code{print}/@code{write} statements. +@item Reverse offload regions (i.e. @code{target} regions with + @code{device(ancestor:1)}) are processed serially per @code{target} region + such that the next reverse offload region is only executed after the previous + one returned. +@item OpenMP code that has a requires directive with @code{unified_address} or + @code{unified_shared_memory} will remove any GCN device from the list of + available devices (``host fallback''). +@item The available stack size can be changed using the @code{GCN_STACK_SIZE} + environment variable; the default is 32 kiB per thread. @end itemize @@ -4413,6 +4490,8 @@ The used sizes are @itemize @item The @code{warp_size} is always 32 @item CUDA kernel launched: @code{dim=@{#teams,1,1@}, blocks=@{#threads,warp_size,1@}}. +@item The number of teams is limited by the number of blocks the device can + host simultaneously. @end itemize Additional information can be obtained by setting the environment variable to @@ -4424,7 +4503,7 @@ which caches the JIT in the user's directory (see CUDA documentation; can be tuned by the environment variables @code{CUDA_CACHE_@{DISABLE,MAXSIZE,PATH@}}. Note: While PTX ISA is generic, the @code{-mptx=} and @code{-march=} commandline -options still affect the used PTX ISA code and, thus, the requirments on +options still affect the used PTX ISA code and, thus, the requirements on CUDA version and hardware. The implementation remark: @@ -4435,6 +4514,15 @@ The implementation remark: @item Compilation OpenMP code that contains @code{requires reverse_offload} requires at least @code{-march=sm_35}, compiling for @code{-march=sm_30} is not supported. +@item For code containing reverse offload (i.e. @code{target} regions with + @code{device(ancestor:1)}), there is a slight performance penalty + for @emph{all} target regions, consisting mostly of shutdown delay + Per device, reverse offload regions are processed serially such that + the next reverse offload region is only executed after the previous + one returned. +@item OpenMP code that has a requires directive with @code{unified_address} + or @code{unified_shared_memory} will remove any nvptx device from the + list of available devices (``host fallback''). @end itemize diff --git a/libgomp/libgomp_f.h.in b/libgomp/libgomp_f.h.in index 7063364..d8e61c8 100644 --- a/libgomp/libgomp_f.h.in +++ b/libgomp/libgomp_f.h.in @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 84b9f2c..5c1675c 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/lock.c b/libgomp/lock.c index ec7e3a3..3095181 100644 --- a/libgomp/lock.c +++ b/libgomp/lock.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/loop.c b/libgomp/loop.c index be85162..d6450fe 100644 --- a/libgomp/loop.c +++ b/libgomp/loop.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c index 6027372..c0068fa 100644 --- a/libgomp/loop_ull.c +++ b/libgomp/loop_ull.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c index 026df3c..82d00b6 100644 --- a/libgomp/oacc-async.c +++ b/libgomp/oacc-async.c @@ -1,6 +1,6 @@ /* OpenACC Runtime Library Definitions. - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -410,9 +410,6 @@ goacc_wait (int async, int num_waits, va_list *ap) break; } - if (acc_async_test (qid)) - continue; - if (async == acc_async_sync) acc_wait (qid); else if (qid == async) diff --git a/libgomp/oacc-cuda.c b/libgomp/oacc-cuda.c index 91b602d..314b2da 100644 --- a/libgomp/oacc-cuda.c +++ b/libgomp/oacc-cuda.c @@ -1,6 +1,6 @@ /* OpenACC Runtime Library: CUDA support glue. - Copyright (C) 2014-2022 Free Software Foundation, Inc. + Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 4e3971a..b75e8be 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -1,6 +1,6 @@ /* OpenACC Runtime Library: acc_device_host. - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -284,6 +284,7 @@ static struct gomp_device_descr host_dispatch = .run_func = host_run, .mem_map = { NULL }, + .mem_map_rev = { NULL }, /* .lock initialized in goacc_host_init. */ .state = GOMP_DEVICE_UNINITIALIZED, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 42c3e74e..3077da3 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -1,6 +1,6 @@ /* OpenACC Runtime initialization routines - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h index 2642be9..051c882 100644 --- a/libgomp/oacc-int.h +++ b/libgomp/oacc-int.h @@ -1,6 +1,6 @@ /* OpenACC Runtime - internal declarations - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 73b2710..fe63274 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1,6 +1,6 @@ /* OpenACC Runtime initialization routines - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -1150,8 +1150,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, } else if (n && groupnum > 1) { - assert (n->refcount != REFCOUNT_INFINITY - && n->refcount != REFCOUNT_LINK); + assert (n->refcount != REFCOUNT_LINK); for (size_t j = i + 1; j <= group_last; j++) if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH) @@ -1166,6 +1165,44 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, bool processed = false; struct target_mem_desc *tgt = n->tgt; + + /* Minimal OpenACC variant corresponding to PR96668 + "[OpenMP] Re-mapping allocated but previously unallocated + allocatable does not work" 'libgomp/target.c' changes, so that + OpenACC 'declare' code à la PR106643 + "[gfortran + OpenACC] Allocate in module causes refcount error" + has a chance to work. */ + if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET + && tgt->list_count == 0) + { + /* 'declare target'. */ + assert (n->refcount == REFCOUNT_INFINITY); + + for (size_t k = 1; k < groupnum; k++) + { + /* The only thing we expect to see here. */ + assert ((kinds[i + k] & 0xff) == GOMP_MAP_POINTER); + } + + /* Let 'goacc_map_vars' -> 'gomp_map_vars_internal' handle + this. */ + gomp_mutex_unlock (&acc_dev->lock); + struct target_mem_desc *tgt_ + = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + assert (tgt_ == NULL); + gomp_mutex_lock (&acc_dev->lock); + + /* Given that 'goacc_exit_data_internal'/'goacc_exit_datum_1' + will always see 'n->refcount == REFCOUNT_INFINITY', + there's no need to adjust 'n->dynamic_refcount' here. */ + + processed = true; + } + else + assert (n->refcount != REFCOUNT_INFINITY); + for (size_t j = 0; j < tgt->list_count; j++) if (tgt->list[j].key == n) { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 9f97cbd..363e665 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. +/* Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -108,8 +108,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), va_list ap; struct goacc_thread *thr; struct gomp_device_descr *acc_dev; - struct target_mem_desc *tgt; - void **devaddrs; unsigned int i; struct splay_tree_key_s k; splay_tree_key tgt_fn_key; @@ -290,8 +288,10 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), goacc_aq aq = get_goacc_asyncqueue (async); - tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, 0); + struct target_mem_desc *tgt + = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_TARGET); + if (profiling_p) { prof_info.event_type = acc_ev_enter_data_end; @@ -301,10 +301,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), &api_info); } - devaddrs = gomp_alloca (sizeof (void *) * mapnum); - for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); - + void **devaddrs = (void **) tgt->tgt_start; if (aq == NULL) acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, tgt); diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c index 721907f..b0d0cb1 100644 --- a/libgomp/oacc-plugin.c +++ b/libgomp/oacc-plugin.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2022 Free Software Foundation, Inc. +/* Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/oacc-plugin.h b/libgomp/oacc-plugin.h index 153833d..6bbc3fb 100644 --- a/libgomp/oacc-plugin.h +++ b/libgomp/oacc-plugin.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2022 Free Software Foundation, Inc. +/* Copyright (C) 2014-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/oacc-profiling.c b/libgomp/oacc-profiling.c index 3df6eeb..467bca3 100644 --- a/libgomp/oacc-profiling.c +++ b/libgomp/oacc-profiling.c @@ -1,6 +1,6 @@ /* OpenACC Profiling Interface - Copyright (C) 2019-2022 Free Software Foundation, Inc. + Copyright (C) 2019-2023 Free Software Foundation, Inc. Contributed by Mentor, a Siemens Business. diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 925a650..bd1286c 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -244,6 +244,7 @@ extern int omp_get_team_size (int) __GOMP_NOTHROW; extern int omp_get_active_level (void) __GOMP_NOTHROW; extern int omp_in_final (void) __GOMP_NOTHROW; +extern int omp_in_explicit_task (void) __GOMP_NOTHROW; extern int omp_get_cancellation (void) __GOMP_NOTHROW; extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 7ba115f..e451527 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -1,4 +1,4 @@ -! Copyright (C) 2005-2022 Free Software Foundation, Inc. +! Copyright (C) 2005-2023 Free Software Foundation, Inc. ! Contributed by Jakub Jelinek <jakub@redhat.com>. ! This file is part of the GNU Offloading and Multi Processing Library @@ -445,6 +445,12 @@ end interface interface + function omp_in_explicit_task () + logical (4) :: omp_in_explicit_task + end function omp_in_explicit_task + end interface + + interface function omp_get_cancellation () logical (4) :: omp_get_cancellation end function omp_get_cancellation diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index 3626836..5fd2274 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -1,4 +1,4 @@ -! Copyright (C) 2005-2022 Free Software Foundation, Inc. +! Copyright (C) 2005-2023 Free Software Foundation, Inc. ! Contributed by Jakub Jelinek <jakub@redhat.com>. ! This file is part of the GNU Offloading and Multi Processing Library @@ -220,6 +220,8 @@ external omp_in_final logical(4) omp_in_final + external omp_in_explicit_task + logical(4) omp_in_explicit_task external omp_get_cancellation logical(4) omp_get_cancellation diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index e80e3a2..e6d548f 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -1,6 +1,6 @@ ! OpenACC Runtime Library Definitions. -! Copyright (C) 2014-2022 Free Software Foundation, Inc. +! Copyright (C) 2014-2023 Free Software Foundation, Inc. ! Contributed by Tobias Burnus <burnus@net-b.de> ! and Mentor Embedded. diff --git a/libgomp/openacc.h b/libgomp/openacc.h index 1ca6300..09aba52 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -1,6 +1,6 @@ /* OpenACC Runtime Library User-facing Declarations - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index 1c81edd..74f3e87 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -1,6 +1,6 @@ ! OpenACC Runtime Library Definitions. -*- mode: fortran -*- -! Copyright (C) 2014-2022 Free Software Foundation, Inc. +! Copyright (C) 2014-2023 Free Software Foundation, Inc. ! Contributed by Tobias Burnus <burnus@net-b.de> ! and Mentor Embedded. diff --git a/libgomp/ordered.c b/libgomp/ordered.c index ca5cf09..969f6b1 100644 --- a/libgomp/ordered.c +++ b/libgomp/ordered.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/parallel.c b/libgomp/parallel.c index f772299..936b580 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am index 5aad9ab..218aacb 100644 --- a/libgomp/plugin/Makefrag.am +++ b/libgomp/plugin/Makefrag.am @@ -1,6 +1,6 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2022 Free Software Foundation, Inc. +# Copyright (C) 2014-2023 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac index ab03f94..2fd26cd 100644 --- a/libgomp/plugin/configfrag.ac +++ b/libgomp/plugin/configfrag.ac @@ -1,6 +1,6 @@ # Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- # -# Copyright (C) 2014-2022 Free Software Foundation, Inc. +# Copyright (C) 2014-2023 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -59,9 +59,6 @@ if test x"$enable_offload_targets" != x; then tgt=`echo $tgt | sed 's/=.*//'` tgt_plugin= case $tgt in - *-intelmic-* | *-intelmicemul-*) - tgt_plugin=intelmic - ;; nvptx*) case "${target}" in aarch64*-*-* | powerpc64le-*-* | x86_64-*-*) diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index cd91b39..dff42d6 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -29,6 +29,7 @@ CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2) CUDA_ONE_CALL (cuLinkDestroy) CUDA_ONE_CALL (cuMemAlloc) CUDA_ONE_CALL (cuMemAllocHost) +CUDA_ONE_CALL (cuMemHostAlloc) CUDA_ONE_CALL (cuMemcpy) CUDA_ONE_CALL (cuMemcpyDtoDAsync) CUDA_ONE_CALL (cuMemcpyDtoH) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 04b122f..3478037 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -1,6 +1,6 @@ /* Plugin for AMD GCN execution. - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded @@ -42,6 +42,7 @@ #include <dlfcn.h> #include <signal.h> #include "libgomp-plugin.h" +#include "config/gcn/libgomp-gcn.h" /* For struct output. */ #include "gomp-constants.h" #include <elf.h> #include "oacc-plugin.h" @@ -236,37 +237,10 @@ struct kernel_dispatch in libgomp target code. */ struct kernargs { - /* Leave space for the real kernel arguments. - OpenACC and OpenMP only use one pointer. */ - int64_t dummy1; - int64_t dummy2; - - /* A pointer to struct output, below, for console output data. */ - int64_t out_ptr; - - /* A pointer to struct heap, below. */ - int64_t heap_ptr; - - /* A pointer to an ephemeral memory arena. - Only needed for OpenMP. */ - int64_t arena_ptr; + struct kernargs_abi abi; /* Output data. */ - struct output { - int return_value; - unsigned int next_output; - struct printf_data { - int written; - char msg[128]; - int type; - union { - int64_t ivalue; - double dvalue; - char text[128]; - }; - } queue[1024]; - unsigned int consumed; - } output_data; + struct output output_data; }; /* A queue entry for a future asynchronous launch. */ @@ -439,9 +413,9 @@ struct agent_info /* The HSA memory region from which to allocate device data. */ hsa_region_t data_region; - /* Allocated team arenas. */ - struct team_arena_list *team_arena_list; - pthread_mutex_t team_arena_write_lock; + /* Allocated ephemeral memories (team arena and stack space). */ + struct ephemeral_memories_list *ephemeral_memories_list; + pthread_mutex_t ephemeral_memories_write_lock; /* Read-write lock that protects kernels which are running or about to be run from interference with loading and unloading of images. Needs to be @@ -523,17 +497,18 @@ struct module_info }; /* A linked list of memory arenas allocated on the device. - These are only used by OpenMP, as a means to optimize per-team malloc. */ + These are used by OpenMP, as a means to optimize per-team malloc, + and for host-accessible stack space. */ -struct team_arena_list +struct ephemeral_memories_list { - struct team_arena_list *next; + struct ephemeral_memories_list *next; - /* The number of teams determines the size of the allocation. */ - int num_teams; - /* The device address of the arena itself. */ - void *arena; - /* A flag to prevent two asynchronous kernels trying to use the same arena. + /* The size is determined by the number of teams and threads. */ + size_t size; + /* The device address allocated memory. */ + void *address; + /* A flag to prevent two asynchronous kernels trying to use the same memory. The mutex is locked until the kernel exits. */ pthread_mutex_t in_use; }; @@ -552,15 +527,6 @@ struct hsa_context_info char driver_version_s[30]; }; -/* Format of the on-device heap. - - This must match the definition in Newlib and gcn-run. */ - -struct heap { - int64_t size; - char data[0]; -}; - /* }}} */ /* {{{ Global variables */ @@ -578,6 +544,11 @@ static struct hsa_runtime_fn_info hsa_fns; static size_t gcn_kernel_heap_size = DEFAULT_GCN_HEAP_SIZE; +/* Ephemeral memory sizes for each kernel launch. */ + +static int team_arena_size = DEFAULT_TEAM_ARENA_SIZE; +static int stack_size = DEFAULT_GCN_STACK_SIZE; + /* Flag to decide whether print to stderr information about what is going on. Set in init_debug depending on environment variables. */ @@ -1033,9 +1004,13 @@ print_kernel_dispatch (struct kernel_dispatch *dispatch, unsigned indent) fprintf (stderr, "%*squeue: %p\n", indent, "", dispatch->queue); fprintf (stderr, "%*skernarg_address: %p\n", indent, "", kernargs); fprintf (stderr, "%*sheap address: %p\n", indent, "", - (void*)kernargs->heap_ptr); - fprintf (stderr, "%*sarena address: %p\n", indent, "", - (void*)kernargs->arena_ptr); + (void*)kernargs->abi.heap_ptr); + fprintf (stderr, "%*sarena address: %p (%d bytes per workgroup)\n", indent, + "", (void*)kernargs->abi.arena_ptr, + kernargs->abi.arena_size_per_team); + fprintf (stderr, "%*sstack address: %p (%d bytes per wavefront)\n", indent, + "", (void*)kernargs->abi.stack_ptr, + kernargs->abi.stack_size_per_thread); fprintf (stderr, "%*sobject: %lu\n", indent, "", dispatch->object); fprintf (stderr, "%*sprivate_segment_size: %u\n", indent, "", dispatch->private_segment_size); @@ -1095,6 +1070,22 @@ init_environment_variables (void) if (tmp) gcn_kernel_heap_size = tmp; } + + const char *arena = secure_getenv ("GCN_TEAM_ARENA_SIZE"); + if (arena) + { + int tmp = atoi (arena); + if (tmp) + team_arena_size = tmp;; + } + + const char *stack = secure_getenv ("GCN_STACK_SIZE"); + if (stack) + { + int tmp = atoi (stack); + if (tmp) + stack_size = tmp;; + } } /* Return malloc'd string with name of SYMBOL. */ @@ -1163,6 +1154,18 @@ limit_worker_threads (int threads) return threads; } +/* This sets the maximum number of teams to twice the number of GPU Compute + Units to avoid memory waste and corresponding memory access faults. */ + +static int +limit_teams (int teams, struct agent_info *agent) +{ + int max_teams = 2 * get_cu_count (agent); + if (teams > max_teams) + teams = max_teams; + return teams; +} + /* Parse the target attributes INPUT provided by the compiler and return true if we should run anything all. If INPUT is NULL, fill DEF with default values, then store INPUT or DEF into *RESULT. @@ -1207,7 +1210,7 @@ parse_target_attributes (void **input, switch (id & GOMP_TARGET_ARG_ID_MASK) { case GOMP_TARGET_ARG_NUM_TEAMS: - gcn_teams = val; + gcn_teams = limit_teams (val, agent); break; case GOMP_TARGET_ARG_THREAD_LIMIT: gcn_threads = limit_worker_threads (val); @@ -1694,85 +1697,103 @@ isa_code(const char *isa) { /* }}} */ /* {{{ Run */ -/* Create or reuse a team arena. +/* Create or reuse a team arena and stack space. Team arenas are used by OpenMP to avoid calling malloc multiple times while setting up each team. This is purely a performance optimization. - Allocating an arena also costs performance, albeit on the host side, so - this function will reuse an existing arena if a large enough one is idle. - The arena is released, but not deallocated, when the kernel exits. */ + The stack space is used by all kernels. We must allocate it in such a + way that the reverse offload implmentation can access the data. -static void * -get_team_arena (struct agent_info *agent, int num_teams) + Allocating this memory costs performance, so this function will reuse an + existing allocation if a large enough one is idle. + The memory lock is released, but not deallocated, when the kernel exits. */ + +static void +configure_ephemeral_memories (struct kernel_info *kernel, + struct kernargs_abi *kernargs, int num_teams, + int num_threads) { - struct team_arena_list **next_ptr = &agent->team_arena_list; - struct team_arena_list *item; + struct agent_info *agent = kernel->agent; + struct ephemeral_memories_list **next_ptr = &agent->ephemeral_memories_list; + struct ephemeral_memories_list *item; + + int actual_arena_size = (kernel->kind == KIND_OPENMP + ? team_arena_size : 0); + int actual_arena_total_size = actual_arena_size * num_teams; + size_t size = (actual_arena_total_size + + num_teams * num_threads * stack_size); for (item = *next_ptr; item; next_ptr = &item->next, item = item->next) { - if (item->num_teams < num_teams) - continue; - - if (pthread_mutex_trylock (&item->in_use)) + if (item->size < size) continue; - return item->arena; + if (pthread_mutex_trylock (&item->in_use) == 0) + break; } - GCN_DEBUG ("Creating a new arena for %d teams\n", num_teams); - - if (pthread_mutex_lock (&agent->team_arena_write_lock)) + if (!item) { - GOMP_PLUGIN_error ("Could not lock a GCN agent program mutex"); - return false; - } - item = malloc (sizeof (*item)); - item->num_teams = num_teams; - item->next = NULL; - *next_ptr = item; + GCN_DEBUG ("Creating a new %sstack for %d teams with %d threads" + " (%zd bytes)\n", (actual_arena_size ? "arena and " : ""), + num_teams, num_threads, size); - if (pthread_mutex_init (&item->in_use, NULL)) - { - GOMP_PLUGIN_error ("Failed to initialize a GCN team arena write mutex"); - return false; - } - if (pthread_mutex_lock (&item->in_use)) - { - GOMP_PLUGIN_error ("Could not lock a GCN agent program mutex"); - return false; - } - if (pthread_mutex_unlock (&agent->team_arena_write_lock)) - { - GOMP_PLUGIN_error ("Could not unlock a GCN agent program mutex"); - return false; - } + if (pthread_mutex_lock (&agent->ephemeral_memories_write_lock)) + { + GOMP_PLUGIN_error ("Could not lock a GCN agent program mutex"); + return; + } + item = malloc (sizeof (*item)); + item->size = size; + item->next = NULL; + *next_ptr = item; - const int TEAM_ARENA_SIZE = 64*1024; /* Must match libgomp.h. */ - hsa_status_t status; - status = hsa_fns.hsa_memory_allocate_fn (agent->data_region, - TEAM_ARENA_SIZE*num_teams, - &item->arena); - if (status != HSA_STATUS_SUCCESS) - hsa_fatal ("Could not allocate memory for GCN kernel arena", status); - status = hsa_fns.hsa_memory_assign_agent_fn (item->arena, agent->id, - HSA_ACCESS_PERMISSION_RW); - if (status != HSA_STATUS_SUCCESS) - hsa_fatal ("Could not assign arena memory to device", status); + if (pthread_mutex_init (&item->in_use, NULL)) + { + GOMP_PLUGIN_error ("Failed to initialize a GCN memory write mutex"); + return; + } + if (pthread_mutex_lock (&item->in_use)) + { + GOMP_PLUGIN_error ("Could not lock a GCN agent program mutex"); + return; + } + if (pthread_mutex_unlock (&agent->ephemeral_memories_write_lock)) + { + GOMP_PLUGIN_error ("Could not unlock a GCN agent program mutex"); + return; + } + + hsa_status_t status; + status = hsa_fns.hsa_memory_allocate_fn (agent->data_region, size, + &item->address); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not allocate memory for GCN kernel arena", status); + status = hsa_fns.hsa_memory_assign_agent_fn (item->address, agent->id, + HSA_ACCESS_PERMISSION_RW); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not assign arena & stack memory to device", status); + } - return item->arena; + kernargs->arena_ptr = (actual_arena_total_size + ? (uint64_t)item->address + : 0); + kernargs->stack_ptr = (uint64_t)item->address + actual_arena_total_size; + kernargs->arena_size_per_team = actual_arena_size; + kernargs->stack_size_per_thread = stack_size; } -/* Mark a team arena available for reuse. */ +/* Mark an ephemeral memory space available for reuse. */ static void -release_team_arena (struct agent_info* agent, void *arena) +release_ephemeral_memories (struct agent_info* agent, void *address) { - struct team_arena_list *item; + struct ephemeral_memories_list *item; - for (item = agent->team_arena_list; item; item = item->next) + for (item = agent->ephemeral_memories_list; item; item = item->next) { - if (item->arena == arena) + if (item->address == address) { if (pthread_mutex_unlock (&item->in_use)) GOMP_PLUGIN_error ("Could not unlock a GCN agent program mutex"); @@ -1785,22 +1806,22 @@ release_team_arena (struct agent_info* agent, void *arena) /* Clean up all the allocated team arenas. */ static bool -destroy_team_arenas (struct agent_info *agent) +destroy_ephemeral_memories (struct agent_info *agent) { - struct team_arena_list *item, *next; + struct ephemeral_memories_list *item, *next; - for (item = agent->team_arena_list; item; item = next) + for (item = agent->ephemeral_memories_list; item; item = next) { next = item->next; - hsa_fns.hsa_memory_free_fn (item->arena); + hsa_fns.hsa_memory_free_fn (item->address); if (pthread_mutex_destroy (&item->in_use)) { - GOMP_PLUGIN_error ("Failed to destroy a GCN team arena mutex"); + GOMP_PLUGIN_error ("Failed to destroy a GCN memory mutex"); return false; } free (item); } - agent->team_arena_list = NULL; + agent->ephemeral_memories_list = NULL; return true; } @@ -1812,13 +1833,6 @@ alloc_by_agent (struct agent_info *agent, size_t size) { GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id); - /* Zero-size allocations are invalid, so in order to return a valid pointer - we need to pass a valid size. One source of zero-size allocations is - kernargs for kernels that have no inputs or outputs (the kernel may - only use console output, for example). */ - if (size == 0) - size = 4; - void *ptr; hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region, size, &ptr); @@ -1872,7 +1886,8 @@ alloc_by_agent (struct agent_info *agent, size_t size) the necessary device signals and memory allocations. */ static struct kernel_dispatch * -create_kernel_dispatch (struct kernel_info *kernel, int num_teams) +create_kernel_dispatch (struct kernel_info *kernel, int num_teams, + int num_threads) { struct agent_info *agent = kernel->agent; struct kernel_dispatch *shadow @@ -1907,7 +1922,7 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) struct kernargs *kernargs = shadow->kernarg_address; /* Zero-initialize the output_data (minimum needed). */ - kernargs->out_ptr = (int64_t)&kernargs->output_data; + kernargs->abi.out_ptr = (int64_t)&kernargs->output_data; kernargs->output_data.next_output = 0; for (unsigned i = 0; i < (sizeof (kernargs->output_data.queue) @@ -1917,13 +1932,10 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) kernargs->output_data.consumed = 0; /* Pass in the heap location. */ - kernargs->heap_ptr = (int64_t)kernel->module->heap; + kernargs->abi.heap_ptr = (int64_t)kernel->module->heap; - /* Create an arena. */ - if (kernel->kind == KIND_OPENMP) - kernargs->arena_ptr = (int64_t)get_team_arena (agent, num_teams); - else - kernargs->arena_ptr = 0; + /* Create the ephemeral memory spaces. */ + configure_ephemeral_memories (kernel, &kernargs->abi, num_teams, num_threads); /* Ensure we can recognize unset return values. */ kernargs->output_data.return_value = 0xcafe0000; @@ -1931,6 +1943,15 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) return shadow; } +static void +process_reverse_offload (uint64_t fn, uint64_t mapnum, uint64_t hostaddrs, + uint64_t sizes, uint64_t kinds, uint64_t dev_num64) +{ + int dev_num = dev_num64; + GOMP_PLUGIN_target_rev (fn, mapnum, hostaddrs, sizes, kinds, dev_num, + NULL, NULL, NULL); +} + /* Output any data written to console output from the kernel. It is expected that this function is polled during kernel execution. @@ -1975,6 +1996,11 @@ console_output (struct kernel_info *kernel, struct kernargs *kernargs, case 1: printf ("%.128s%f\n", data->msg, data->dvalue); break; case 2: printf ("%.128s%.128s\n", data->msg, data->text); break; case 3: printf ("%.128s%.128s", data->msg, data->text); break; + case 4: + process_reverse_offload (data->value_u64[0], data->value_u64[1], + data->value_u64[2], data->value_u64[3], + data->value_u64[4], data->value_u64[5]); + break; default: printf ("GCN print buffer error!\n"); break; } data->written = 0; @@ -1993,9 +2019,10 @@ release_kernel_dispatch (struct kernel_dispatch *shadow) GCN_DEBUG ("Released kernel dispatch: %p\n", shadow); struct kernargs *kernargs = shadow->kernarg_address; - void *arena = (void *)kernargs->arena_ptr; - if (arena) - release_team_arena (shadow->agent, arena); + void *addr = (void *)kernargs->abi.arena_ptr; + if (!addr) + addr = (void *)kernargs->abi.stack_ptr; + release_ephemeral_memories (shadow->agent, addr); hsa_fns.hsa_memory_free_fn (shadow->kernarg_address); @@ -2225,7 +2252,8 @@ run_kernel (struct kernel_info *kernel, void *vars, packet->workgroup_size_z); struct kernel_dispatch *shadow - = create_kernel_dispatch (kernel, packet->grid_size_x); + = create_kernel_dispatch (kernel, packet->grid_size_x, + packet->grid_size_z); shadow->queue = command_q; if (debug) @@ -2954,15 +2982,6 @@ copy_data (void *data_) free (data); } -/* Free device data. This is intended for use as an async callback event. */ - -static void -gomp_offload_free (void *ptr) -{ - GCN_DEBUG ("Async thread ?:?: Freeing %p\n", ptr); - GOMP_OFFLOAD_free (0, ptr); -} - /* Request an asynchronous data copy, to or from a device, on a given queue. The event will be registered as a callback. */ @@ -3029,7 +3048,7 @@ wait_queue (struct goacc_asyncqueue *aq) /* Execute an OpenACC kernel, synchronously or asynchronously. */ static void -gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, +gcn_exec (struct kernel_info *kernel, void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async, struct goacc_asyncqueue *aq) { @@ -3039,13 +3058,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, /* If we get here then this must be an OpenACC kernel. */ kernel->kind = KIND_OPENACC; - /* devaddrs must be double-indirect on the target. */ - void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum); - for (size_t i = 0; i < mapnum; i++) - hsa_fns.hsa_memory_copy_fn (&ind_da[i], - devaddrs[i] ? &devaddrs[i] : &hostaddrs[i], - sizeof (void *)); - struct hsa_kernel_description *hsa_kernel_desc = NULL; for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++) { @@ -3157,18 +3169,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, } if (!async) - { - run_kernel (kernel, ind_da, &kla, NULL, false); - gomp_offload_free (ind_da); - } + run_kernel (kernel, devaddrs, &kla, NULL, false); else - { - queue_push_launch (aq, kernel, ind_da, &kla); - if (DEBUG_QUEUES) - GCN_DEBUG ("queue_push_callback %d:%d gomp_offload_free, %p\n", - aq->agent->device_id, aq->id, ind_da); - queue_push_callback (aq, gomp_offload_free, ind_da); - } + queue_push_launch (aq, kernel, devaddrs, &kla); if (profiling_dispatch_p) { @@ -3227,7 +3230,8 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask) return 0; /* Return -1 if no omp_requires_mask cannot be fulfilled but devices were present. */ - if (hsa_context.agent_count > 0 && omp_requires_mask != 0) + if (hsa_context.agent_count > 0 + && (omp_requires_mask & ~GOMP_REQUIRES_REVERSE_OFFLOAD) != 0) return -1; return hsa_context.agent_count; } @@ -3267,14 +3271,14 @@ GOMP_OFFLOAD_init_device (int n) GOMP_PLUGIN_error ("Failed to initialize a GCN agent queue mutex"); return false; } - if (pthread_mutex_init (&agent->team_arena_write_lock, NULL)) + if (pthread_mutex_init (&agent->ephemeral_memories_write_lock, NULL)) { GOMP_PLUGIN_error ("Failed to initialize a GCN team arena write mutex"); return false; } agent->async_queues = NULL; agent->omp_async_queue = NULL; - agent->team_arena_list = NULL; + agent->ephemeral_memories_list = NULL; uint32_t queue_size; hsa_status_t status; @@ -3627,7 +3631,7 @@ GOMP_OFFLOAD_fini_device (int n) agent->module = NULL; } - if (!destroy_team_arenas (agent)) + if (!destroy_ephemeral_memories (agent)) return false; if (!destroy_hsa_program (agent)) @@ -3653,9 +3657,9 @@ GOMP_OFFLOAD_fini_device (int n) GOMP_PLUGIN_error ("Failed to destroy a GCN agent queue mutex"); return false; } - if (pthread_mutex_destroy (&agent->team_arena_write_lock)) + if (pthread_mutex_destroy (&agent->ephemeral_memories_write_lock)) { - GOMP_PLUGIN_error ("Failed to destroy a GCN team arena mutex"); + GOMP_PLUGIN_error ("Failed to destroy a GCN memory mutex"); return false; } agent->initialized = false; @@ -3849,28 +3853,30 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, already-loaded KERNEL. */ void -GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, unsigned *dims, +GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), + size_t mapnum __attribute__((unused)), + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc) { struct kernel_info *kernel = (struct kernel_info *) fn_ptr; - gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, false, - NULL); + gcn_exec (kernel, devaddrs, dims, targ_mem_desc, false, NULL); } /* Run an asynchronous OpenACC kernel on the specified queue. */ void -GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, +GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), + size_t mapnum __attribute__((unused)), + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc, struct goacc_asyncqueue *aq) { struct kernel_info *kernel = (struct kernel_info *) fn_ptr; - gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, true, - aq); + gcn_exec (kernel, devaddrs, dims, targ_mem_desc, true, aq); } /* Create a new asynchronous thread and queue for running future kernels. */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index ba6b229..b3481c4 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1,6 +1,6 @@ /* Plugin for NVPTX execution. - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Mentor Embedded. @@ -40,6 +40,9 @@ #include "gomp-constants.h" #include "oacc-int.h" +/* For struct rev_offload + GOMP_REV_OFFLOAD_VAR. */ +#include "config/nvptx/libgomp-nvptx.h" + #include <pthread.h> #ifndef PLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H # include "cuda/cuda.h" @@ -329,6 +332,7 @@ struct ptx_device pthread_mutex_t lock; } omp_stacks; + struct rev_offload *rev_data; struct ptx_device *next; }; @@ -423,7 +427,7 @@ nvptx_open_device (int n) struct ptx_device *ptx_dev; CUdevice dev, ctx_dev; CUresult r; - int async_engines, pi; + int pi; CUDA_CALL_ERET (NULL, cuDeviceGet, &dev, n); @@ -519,10 +523,12 @@ nvptx_open_device (int n) CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev); ptx_dev->max_threads_per_multiprocessor = pi; - r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &async_engines, - CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev); - if (r != CUDA_SUCCESS) - async_engines = 1; + /* Required below for reverse offload as implemented, but with compute + capability >= 2.0 and 64bit device processes, this should be universally be + the case; hence, an assert. */ + r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi, + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, dev); + assert (r == CUDA_SUCCESS && pi); for (int i = 0; i != GOMP_DIM_MAX; i++) ptx_dev->default_dims[i] = 0; @@ -540,6 +546,8 @@ nvptx_open_device (int n) ptx_dev->omp_stacks.size = 0; pthread_mutex_init (&ptx_dev->omp_stacks.lock, NULL); + ptx_dev->rev_data = NULL; + return ptx_dev; } @@ -734,8 +742,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, } static void -nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, - unsigned *dims, void *targ_mem_desc, +nvptx_exec (void (*fn), unsigned *dims, void *targ_mem_desc, CUdeviceptr dp, CUstream stream) { struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; @@ -1179,8 +1186,12 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask) { int num_devices = nvptx_get_num_devices (); /* Return -1 if no omp_requires_mask cannot be fulfilled but - devices were present. */ - if (num_devices > 0 && omp_requires_mask != 0) + devices were present. Unified-shared address: see comment in + nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */ + if (num_devices > 0 + && ((omp_requires_mask + & ~(GOMP_REQUIRES_UNIFIED_ADDRESS + | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0)) return -1; return num_devices; } @@ -1381,6 +1392,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, { CUdeviceptr var; size_t bytes; + unsigned int i; r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module, "$offload_func_table"); if (r != CUDA_SUCCESS) @@ -1390,6 +1402,48 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, r = CUDA_CALL_NOCHECK (cuMemcpyDtoH, *rev_fn_table, var, bytes); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r)); + /* Free if only NULL entries. */ + for (i = 0; i < fn_entries; ++i) + if ((*rev_fn_table)[i] != 0) + break; + if (i == fn_entries) + { + free (*rev_fn_table); + *rev_fn_table = NULL; + } + } + + if (rev_fn_table && *rev_fn_table && dev->rev_data == NULL) + { + /* Get the on-device GOMP_REV_OFFLOAD_VAR variable. It should be + available but it might be not. One reason could be: if the user code + has 'omp target device(ancestor:1)' in pure hostcode, GOMP_target_ext + is not called on the device and, hence, it and GOMP_REV_OFFLOAD_VAR + are not linked in. */ + CUdeviceptr device_rev_offload_var; + size_t device_rev_offload_size; + CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, + &device_rev_offload_var, + &device_rev_offload_size, module, + XSTRING (GOMP_REV_OFFLOAD_VAR)); + if (r != CUDA_SUCCESS) + { + free (*rev_fn_table); + *rev_fn_table = NULL; + } + else + { + /* cuMemHostAlloc memory is accessible on the device, if + unified-shared address is supported; this is assumed - see comment + in nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */ + CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data, + sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP); + CUdeviceptr dp = (CUdeviceptr) dev->rev_data; + r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp, + sizeof (dp)); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r)); + } } nvptx_set_clocktick (module, dev); @@ -1474,70 +1528,16 @@ GOMP_OFFLOAD_free (int ord, void *ptr) } void -GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, +GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), + size_t mapnum __attribute__((unused)), + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc) { - GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); + GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__); - struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); - acc_prof_info *prof_info = thr->prof_info; - acc_event_info data_event_info; - acc_api_info *api_info = thr->api_info; - bool profiling_p = __builtin_expect (prof_info != NULL, false); - - void **hp = NULL; - CUdeviceptr dp = 0; - - if (mapnum > 0) - { - size_t s = mapnum * sizeof (void *); - hp = alloca (s); - for (int i = 0; i < mapnum; i++) - hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]); - CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); - if (profiling_p) - goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); - } - - /* Copy the (device) pointers to arguments to the device (dp and hp might in - fact have the same value on a unified-memory system). */ - if (mapnum > 0) - { - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_start; - - data_event_info.data_event.event_type = prof_info->event_type; - data_event_info.data_event.valid_bytes - = _ACC_DATA_EVENT_INFO_VALID_BYTES; - data_event_info.data_event.parent_construct - = acc_construct_parallel; - data_event_info.data_event.implicit = 1; /* Always implicit. */ - data_event_info.data_event.tool_info = NULL; - data_event_info.data_event.var_name = NULL; - data_event_info.data_event.bytes = mapnum * sizeof (void *); - data_event_info.data_event.host_ptr = hp; - data_event_info.data_event.device_ptr = (const void *) dp; - - api_info->device_api = acc_device_api_cuda; - - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, (void *) hp, - mapnum * sizeof (void *)); - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_end; - data_event_info.data_event.event_type = prof_info->event_type; - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - } - - nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, - dp, NULL); + CUdeviceptr dp = (CUdeviceptr) devaddrs; + nvptx_exec (fn, dims, targ_mem_desc, dp, NULL); CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL); const char *maybe_abort_msg = "(perhaps abort was called)"; @@ -1546,98 +1546,20 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, maybe_abort_msg); else if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r)); - - CUDA_CALL_ASSERT (cuMemFree, dp); - if (profiling_p) - goacc_profiling_acc_ev_free (thr, (void *) dp); -} - -static void -cuda_free_argmem (void *ptr) -{ - void **block = (void **) ptr; - nvptx_free (block[0], (struct ptx_device *) block[1]); - free (block); } void -GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, +GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), + size_t mapnum __attribute__((unused)), + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc, struct goacc_asyncqueue *aq) { - GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); - - struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); - acc_prof_info *prof_info = thr->prof_info; - acc_event_info data_event_info; - acc_api_info *api_info = thr->api_info; - bool profiling_p = __builtin_expect (prof_info != NULL, false); - - void **hp = NULL; - CUdeviceptr dp = 0; - void **block = NULL; + GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__); - if (mapnum > 0) - { - size_t s = mapnum * sizeof (void *); - block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s); - hp = block + 2; - for (int i = 0; i < mapnum; i++) - hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]); - CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); - if (profiling_p) - goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); - } - - /* Copy the (device) pointers to arguments to the device (dp and hp might in - fact have the same value on a unified-memory system). */ - if (mapnum > 0) - { - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_start; - - data_event_info.data_event.event_type = prof_info->event_type; - data_event_info.data_event.valid_bytes - = _ACC_DATA_EVENT_INFO_VALID_BYTES; - data_event_info.data_event.parent_construct - = acc_construct_parallel; - data_event_info.data_event.implicit = 1; /* Always implicit. */ - data_event_info.data_event.tool_info = NULL; - data_event_info.data_event.var_name = NULL; - data_event_info.data_event.bytes = mapnum * sizeof (void *); - data_event_info.data_event.host_ptr = hp; - data_event_info.data_event.device_ptr = (const void *) dp; - - api_info->device_api = acc_device_api_cuda; - - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - - CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, dp, (void *) hp, - mapnum * sizeof (void *), aq->cuda_stream); - block[0] = (void *) dp; - - struct nvptx_thread *nvthd = - (struct nvptx_thread *) GOMP_PLUGIN_acc_thread (); - block[1] = (void *) nvthd->ptx_dev; - - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_end; - data_event_info.data_event.event_type = prof_info->event_type; - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - } - - nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, - dp, aq->cuda_stream); - - if (mapnum > 0) - GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block); + CUdeviceptr dp = (CUdeviceptr) devaddrs; + nvptx_exec (fn, dims, targ_mem_desc, dp, aq->cuda_stream); } void * @@ -2001,6 +1923,23 @@ nvptx_stacks_acquire (struct ptx_device *ptx_dev, size_t size, int num) return (void *) ptx_dev->omp_stacks.ptr; } + +void +rev_off_dev_to_host_cpy (void *dest, const void *src, size_t size, + CUstream stream) +{ + CUDA_CALL_ASSERT (cuMemcpyDtoHAsync, dest, (CUdeviceptr) src, size, stream); + CUDA_CALL_ASSERT (cuStreamSynchronize, stream); +} + +void +rev_off_host_to_dev_cpy (void *dest, const void *src, size_t size, + CUstream stream) +{ + CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, (CUdeviceptr) dest, src, size, stream); + CUDA_CALL_ASSERT (cuStreamSynchronize, stream); +} + void GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) { @@ -2035,6 +1974,8 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) nvptx_adjust_launch_bounds (tgt_fn, ptx_dev, &teams, &threads); size_t stack_size = nvptx_stacks_size (); + bool reverse_offload = ptx_dev->rev_data != NULL; + CUstream copy_stream = NULL; pthread_mutex_lock (&ptx_dev->omp_stacks.lock); void *stacks = nvptx_stacks_acquire (ptx_dev, stack_size, teams * threads); @@ -2048,12 +1989,41 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) GOMP_PLUGIN_debug (0, " %s: kernel %s: launch" " [(teams: %u), 1, 1] [(lanes: 32), (threads: %u), 1]\n", __FUNCTION__, fn_name, teams, threads); + if (reverse_offload) + CUDA_CALL_ASSERT (cuStreamCreate, ©_stream, CU_STREAM_NON_BLOCKING); r = CUDA_CALL_NOCHECK (cuLaunchKernel, function, teams, 1, 1, 32, threads, 1, 0, NULL, NULL, config); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r)); - - r = CUDA_CALL_NOCHECK (cuCtxSynchronize, ); + if (reverse_offload) + while (true) + { + r = CUDA_CALL_NOCHECK (cuStreamQuery, NULL); + if (r == CUDA_SUCCESS) + break; + if (r == CUDA_ERROR_LAUNCH_FAILED) + GOMP_PLUGIN_fatal ("cuStreamQuery error: %s %s\n", cuda_error (r), + maybe_abort_msg); + else if (r != CUDA_ERROR_NOT_READY) + GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r)); + + if (__atomic_load_n (&ptx_dev->rev_data->fn, __ATOMIC_ACQUIRE) != 0) + { + struct rev_offload *rev_data = ptx_dev->rev_data; + GOMP_PLUGIN_target_rev (rev_data->fn, rev_data->mapnum, + rev_data->addrs, rev_data->sizes, + rev_data->kinds, rev_data->dev_num, + rev_off_dev_to_host_cpy, + rev_off_host_to_dev_cpy, copy_stream); + CUDA_CALL_ASSERT (cuStreamSynchronize, copy_stream); + __atomic_store_n (&rev_data->fn, 0, __ATOMIC_RELEASE); + } + usleep (1); + } + else + r = CUDA_CALL_NOCHECK (cuCtxSynchronize, ); + if (reverse_offload) + CUDA_CALL_ASSERT (cuStreamDestroy, copy_stream); if (r == CUDA_ERROR_LAUNCH_FAILED) GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r), maybe_abort_msg); diff --git a/libgomp/priority_queue.c b/libgomp/priority_queue.c index 5496526..ad425d0 100644 --- a/libgomp/priority_queue.c +++ b/libgomp/priority_queue.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Aldy Hernandez <aldyh@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/priority_queue.h b/libgomp/priority_queue.h index e032d07..d644a12 100644 --- a/libgomp/priority_queue.h +++ b/libgomp/priority_queue.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Aldy Hernandez <aldyh@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/scope.c b/libgomp/scope.c index b351b3f..f0c602d 100644 --- a/libgomp/scope.c +++ b/libgomp/scope.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2021-2022 Free Software Foundation, Inc. +/* Copyright (C) 2021-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/sections.c b/libgomp/sections.c index 7751d5a..cdd810c 100644 --- a/libgomp/sections.c +++ b/libgomp/sections.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/secure_getenv.h b/libgomp/secure_getenv.h index d24dff1..2be3388 100644 --- a/libgomp/secure_getenv.h +++ b/libgomp/secure_getenv.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* Copyright (C) 2017-2023 Free Software Foundation, Inc. This file is part of GCC. diff --git a/libgomp/single.c b/libgomp/single.c index 79a3f8e..35a801a 100644 --- a/libgomp/single.c +++ b/libgomp/single.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/splay-tree.c b/libgomp/splay-tree.c index 2d8742f..02695d4 100644 --- a/libgomp/splay-tree.c +++ b/libgomp/splay-tree.c @@ -1,5 +1,5 @@ /* A splay-tree datatype. - Copyright (C) 1998-2022 Free Software Foundation, Inc. + Copyright (C) 1998-2023 Free Software Foundation, Inc. Contributed by Mark Mitchell (mark@markmitchell.com). This file is part of the GNU Offloading and Multi Processing Library @@ -236,3 +236,25 @@ splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) { splay_tree_foreach_internal (sp->root, func, data); } + +/* Like above, except when func returns != 0, stop early. */ + +static int +splay_tree_foreach_internal_lazy (splay_tree_node node, + splay_tree_callback_stop func, void *data) +{ + if (!node) + return 0; + if (func (&node->key, data)) + return 1; + if (splay_tree_foreach_internal_lazy (node->left, func, data)) + return 1; + /* Yeah, whatever. GCC can fix my tail recursion. */ + return splay_tree_foreach_internal_lazy (node->right, func, data); +} + +attribute_hidden void +splay_tree_foreach_lazy (splay_tree sp, splay_tree_callback_stop func, void *data) +{ + splay_tree_foreach_internal_lazy (sp->root, func, data); +} diff --git a/libgomp/splay-tree.h b/libgomp/splay-tree.h index 992381c..978f1e4 100644 --- a/libgomp/splay-tree.h +++ b/libgomp/splay-tree.h @@ -1,5 +1,5 @@ /* A splay-tree datatype. - Copyright (C) 1998-2022 Free Software Foundation, Inc. + Copyright (C) 1998-2023 Free Software Foundation, Inc. Contributed by Mark Mitchell (mark@markmitchell.com). This file is part of the GNU Offloading and Multi Processing Library @@ -78,8 +78,12 @@ typedef struct splay_tree_key_s *splay_tree_key; splay_tree_name (splay_tree_prefix, splay_tree_remove) # define splay_tree_foreach \ splay_tree_name (splay_tree_prefix, splay_tree_foreach) +# define splay_tree_foreach_lazy \ + splay_tree_name (splay_tree_prefix, splay_tree_foreach_lazy) # define splay_tree_callback \ splay_tree_name (splay_tree_prefix, splay_tree_callback) +# define splay_tree_callback_stop \ + splay_tree_name (splay_tree_prefix, splay_tree_callback_stop) #endif #ifndef splay_tree_c @@ -99,11 +103,13 @@ struct splay_tree_s { }; typedef void (*splay_tree_callback) (splay_tree_key, void *); +typedef int (*splay_tree_callback_stop) (splay_tree_key, void *); extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); extern void splay_tree_insert (splay_tree, splay_tree_node); extern void splay_tree_remove (splay_tree, splay_tree_key); extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); +extern void splay_tree_foreach_lazy (splay_tree, splay_tree_callback_stop, void *); #else /* splay_tree_c */ # ifdef splay_tree_prefix # include "splay-tree.c" @@ -125,6 +131,8 @@ extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); # undef splay_tree_insert # undef splay_tree_remove # undef splay_tree_foreach +# undef splay_tree_foreach_lazy # undef splay_tree_callback +# undef splay_tree_callback_stop # undef splay_tree_prefix #endif diff --git a/libgomp/target.c b/libgomp/target.c index 5763483..b30c6a50 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2022 Free Software Foundation, Inc. +/* Copyright (C) 2013-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -45,6 +45,12 @@ #include "plugin-suffix.h" #endif +/* Define another splay tree instantiation - for reverse offload. */ +#define splay_tree_prefix reverse +#define splay_tree_c +#include "splay-tree.h" + + typedef uintptr_t *hash_entry_type; static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } static inline void htab_free (void *ptr) { free (ptr); } @@ -200,6 +206,12 @@ gomp_map_lookup (splay_tree mem_map, splay_tree_key key) return splay_tree_lookup (mem_map, key); } +static inline reverse_splay_tree_key +gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key) +{ + return reverse_splay_tree_lookup (mem_map_rev, key); +} + static inline splay_tree_key gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) { @@ -298,10 +310,8 @@ struct gomp_coalesce_buf This must not be used for asynchronous copies, because the host data might not be computed yet (by an earlier asynchronous compute region, for - example). - TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question: - is it more performant to use libgomp CBUF buffering or individual device - asyncronous copying?) */ + example). The exception is for EPHEMERAL data, that we know is available + already "by construction". */ static inline void gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) @@ -365,30 +375,6 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, void *d, const void *h, size_t sz, bool ephemeral, struct gomp_coalesce_buf *cbuf) { - if (__builtin_expect (aq != NULL, 0)) - { - /* See 'gomp_coalesce_buf_add'. */ - assert (!cbuf); - - void *h_buf = (void *) h; - if (ephemeral) - { - /* We're queueing up an asynchronous copy from data that may - disappear before the transfer takes place (i.e. because it is a - stack local in a function that is no longer executing). Make a - copy of the data into a temporary buffer in those cases. */ - h_buf = gomp_malloc (sz); - memcpy (h_buf, h, sz); - } - goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, - "dev", d, "host", h_buf, h, sz, aq); - if (ephemeral) - /* Free temporary buffer once the transfer has completed. */ - devicep->openacc.async.queue_callback_func (aq, free, h_buf); - - return; - } - if (cbuf) { uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; @@ -408,6 +394,12 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, gomp_mutex_unlock (&devicep->lock); gomp_fatal ("internal libgomp cbuf error"); } + + /* In an asynchronous context, verify that CBUF isn't used + with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */ + if (__builtin_expect (aq != NULL, 0)) + assert (ephemeral); + memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), h, sz); return; @@ -418,7 +410,28 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, } } - gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); + if (__builtin_expect (aq != NULL, 0)) + { + void *h_buf = (void *) h; + if (ephemeral) + { + /* We're queueing up an asynchronous copy from data that may + disappear before the transfer takes place (i.e. because it is a + stack local in a function that is no longer executing). As we've + not been able to use CBUF, make a copy of the data into a + temporary buffer. */ + h_buf = gomp_malloc (sz); + memcpy (h_buf, h, sz); + } + goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, + "dev", d, "host", h_buf, h, sz, aq); + if (ephemeral) + /* Free once the transfer has completed. */ + devicep->openacc.async.queue_callback_func (aq, free, h_buf); + } + else + gomp_device_copy (devicep, devicep->host2dev_func, + "dev", d, "host", h, sz); } attribute_hidden void @@ -970,13 +983,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, cbuf.chunk_cnt = -1; cbuf.use_cnt = 0; cbuf.buf = NULL; - if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) + if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET)) { size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); cbuf.chunk_cnt = 0; } - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -1195,7 +1208,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, { /* Not present, hence, skip entry - including its MAP_POINTER, when existing. */ - tgt->list[i].offset = OFFSET_POINTER; + tgt->list[i].offset = OFFSET_INLINED; if (i + 1 < mapnum && ((typemask & get_kind (short_mapkind, kinds, i + 1)) == GOMP_MAP_POINTER)) @@ -1249,7 +1262,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) + else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET)) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -1287,7 +1300,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } tgt_size = 0; - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; @@ -1383,6 +1396,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, { uintptr_t target = (uintptr_t) hostaddrs[i]; void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1]; + /* Per + <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net> + "OpenMP: Handle descriptors in target's firstprivate [PR104949]" + this probably needs revision for 'aq' usage. */ + assert (!aq); gomp_copy_host2dev (devicep, aq, devptr, &target, sizeof (void *), false, cbufp); ++i; @@ -1468,8 +1486,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, gomp_mutex_unlock (&devicep->lock); gomp_fatal ("always pointer not mapped"); } - if ((get_kind (short_mapkind, kinds, i - 1) & typemask) - != GOMP_MAP_ALWAYS_POINTER) + if (i > 0 + && ((get_kind (short_mapkind, kinds, i - 1) & typemask) + != GOMP_MAP_ALWAYS_POINTER)) cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); if (cur_node.tgt_offset) cur_node.tgt_offset -= sizes[i]; @@ -1674,7 +1693,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, - k->host_start), sizes[j], cbufp, false); } - } + } i = j - 1; break; case GOMP_MAP_FORCE_PRESENT: @@ -1724,7 +1743,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } } - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { @@ -1738,9 +1757,6 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (cbufp) { - /* See 'gomp_coalesce_buf_add'. */ - assert (!aq); - long c = 0; for (c = 0; c < cbuf.chunk_cnt; ++c) gomp_copy_host2dev (devicep, aq, @@ -1748,8 +1764,12 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (char *) cbuf.buf + (cbuf.chunks[c].start - cbuf.chunks[0].start), cbuf.chunks[c].end - cbuf.chunks[c].start, - true, NULL); - free (cbuf.buf); + false, NULL); + if (aq) + /* Free once the transfer has completed. */ + devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf); + else + free (cbuf.buf); cbuf.buf = NULL; cbufp = NULL; } @@ -2155,6 +2175,19 @@ get_gomp_offload_icvs (int dev_num) new->icvs.nteams = gomp_default_icv_values.nteams_var; if (dev_x != NULL + && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var; + else if (dev != NULL + && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var; + else if (all != NULL + && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var; + else + new->icvs.teams_thread_limit + = gomp_default_icv_values.teams_thread_limit_var; + + if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE)) new->icvs.default_device = dev_x->icvs.default_device_var; else if (dev != NULL @@ -2192,11 +2225,16 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Load image to device and get target addresses for the image. */ struct addr_pair *target_table = NULL; + uint64_t *rev_target_fn_table = NULL; int i, num_target_entries; + /* With reverse offload, insert also target-host addresses. */ + bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD; + num_target_entries = devicep->load_image_func (devicep->target_id, version, - target_data, &target_table, NULL); + target_data, &target_table, + rev_lookup ? &rev_target_fn_table : NULL); if (num_target_entries != num_funcs + num_vars /* "+1" due to the additional ICV struct. */ @@ -2215,6 +2253,10 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* "+1" due to the additional ICV struct. */ tgt->array = gomp_malloc ((num_funcs + num_vars + 1) * sizeof (*tgt->array)); + if (rev_target_fn_table) + tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array)); + else + tgt->rev_array = NULL; tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; @@ -2223,6 +2265,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, tgt->list_count = 0; tgt->device_descr = devicep; splay_tree_node array = tgt->array; + reverse_splay_tree_node rev_array = tgt->rev_array; for (i = 0; i < num_funcs; i++) { @@ -2237,6 +2280,17 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); + if (rev_target_fn_table) + { + reverse_splay_tree_key k2 = &rev_array->key; + k2->dev = rev_target_fn_table[i]; + k2->k = k; + rev_array->left = NULL; + rev_array->right = NULL; + if (k2->dev != 0) + reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array); + rev_array++; + } array++; } @@ -2290,7 +2344,14 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, int dev_num = (int) (devicep - &devices[0]); struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num); size_t var_size = var->end - var->start; - + if (var_size != sizeof (struct gomp_offload_icvs)) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("offload plugin managed 'icv struct' not of expected " + "format"); + } /* Copy the ICVs variable to place on device memory, hereby actually designating its device number into effect. */ gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs, @@ -2349,6 +2410,15 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, gomp_mutex_unlock (&devicep->lock); gomp_fatal ("image unload fail"); } + if (devicep->mem_map_rev.root) + { + /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only + real allocation. */ + assert (node && node->tgt && node->tgt->rev_array); + assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt); + free (node->tgt->rev_array); + devicep->mem_map_rev.root = NULL; + } /* Remove mappings from splay tree. */ int i; @@ -2769,6 +2839,20 @@ clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) return flags; } +static void +gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device) +{ + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item == NULL) + return; + + void *host_ptr = &item->icvs; + void *dev_ptr = omp_get_mapped_ptr (host_ptr, device); + if (dev_ptr != NULL) + gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr, + sizeof (struct gomp_offload_icvs)); +} + /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. @@ -2801,6 +2885,146 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, size_t tgt_align = 0, tgt_size = 0; bool fpc_done = false; + /* Obtain the original TEAMS and THREADS values from ARGS. */ + intptr_t orig_teams = 1, orig_threads = 0; + size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1; + void **tmpargs = args; + while (*tmpargs) + { + intptr_t id = (intptr_t) *tmpargs++, val; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + { + val = (intptr_t) *tmpargs++; + len = 2; + } + else + { + val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; + len = 1; + } + num_args += len; + if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) + continue; + val = val > INT_MAX ? INT_MAX : val; + if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS) + { + orig_teams = val; + teams_len = len; + } + else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT) + { + orig_threads = val; + threads_len = len; + } + } + + intptr_t new_teams = orig_teams, new_threads = orig_threads; + /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1. + ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the + value could not be determined. No change. + ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause. + Set device-specific value. + ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause. + No change. */ + if (orig_teams == -2) + new_teams = 1; + else if (orig_teams == 0) + { + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item != NULL) + new_teams = item->icvs.nteams; + } + /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS + region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by + e.g. a THREAD_LIMIT clause. */ + if (orig_teams > -2 && orig_threads == 0) + { + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item != NULL) + new_threads = item->icvs.teams_thread_limit; + } + + /* Copy and change the arguments list only if TEAMS or THREADS need to be + updated. */ + void **new_args = args; + if (orig_teams != new_teams || orig_threads != new_threads) + { + size_t tms_len = (orig_teams == new_teams + ? teams_len + : (new_teams > -(1 << 15) && new_teams < (1 << 15) + ? 1 : 2)); + size_t ths_len = (orig_threads == new_threads + ? threads_len + : (new_threads > -(1 << 15) && new_threads < (1 << 15) + ? 1 : 2)); + /* One additional item after the last arg must be NULL. */ + size_t new_args_cnt = num_args - teams_len - threads_len + tms_len + + ths_len + 1; + new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*)); + + tmpargs = args; + void **tmp_new_args = new_args; + /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied + too if they have not been changed and skipped otherwise. */ + while (*tmpargs) + { + intptr_t id = (intptr_t) *tmpargs; + if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS + && orig_teams != new_teams) + || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT + && orig_threads != new_threads)) + { + tmpargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + tmpargs++; + } + else + { + *tmp_new_args++ = *tmpargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + *tmp_new_args++ = *tmpargs++; + } + } + + /* Add the new TEAMS arg to the new args list if it has been changed. */ + if (orig_teams != new_teams) + { + intptr_t new_val = new_teams; + if (tms_len == 1) + { + new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) + | GOMP_TARGET_ARG_NUM_TEAMS; + *tmp_new_args++ = (void *) new_val; + } + else + { + *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM + | GOMP_TARGET_ARG_NUM_TEAMS); + *tmp_new_args++ = (void *) new_val; + } + } + + /* Add the new THREADS arg to the new args list if it has been changed. */ + if (orig_threads != new_threads) + { + intptr_t new_val = new_threads; + if (ths_len == 1) + { + new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) + | GOMP_TARGET_ARG_THREAD_LIMIT; + *tmp_new_args++ = (void *) new_val; + } + else + { + *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM + | GOMP_TARGET_ARG_THREAD_LIMIT); + *tmp_new_args++ = (void *) new_val; + } + } + + *tmp_new_args = NULL; + } + flags = clear_unsupported_flags (devicep, flags); if (flags & GOMP_TARGET_FLAG_NOWAIT) @@ -2813,6 +3037,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, { struct gomp_team *team = gomp_new_team (1); struct gomp_task *task = thr->task; + struct gomp_task **implicit_task = &task; struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; team->prev_ts = thr->ts; thr->ts.team = team; @@ -2825,21 +3050,29 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, thr->ts.static_trip = 0; thr->task = &team->implicit_task[0]; gomp_init_task (thr->task, NULL, icv); - if (task) + while (*implicit_task + && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) + implicit_task = &(*implicit_task)->parent; + if (*implicit_task) { - thr->task = task; + thr->task = *implicit_task; gomp_end_task (); - free (task); + free (*implicit_task); thr->task = &team->implicit_task[0]; } else pthread_setspecific (gomp_thread_destructor, thr); + if (implicit_task != &task) + { + *implicit_task = thr->task; + thr->task = task; + } } if (thr->ts.team && !thr->task->final_task) { gomp_create_target_task (devicep, fn, mapnum, hostaddrs, - sizes, kinds, flags, depend, args, + sizes, kinds, flags, depend, new_args, GOMP_TARGET_TASK_BEFORE_MAP); return; } @@ -2885,7 +3118,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_align, tgt_size); } } - gomp_target_fallback (fn, hostaddrs, devicep, args); + gomp_target_fallback (fn, hostaddrs, devicep, new_args); return; } @@ -2915,7 +3148,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } devicep->run_func (devicep->target_id, fn_addr, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, - args); + new_args); if (tgt_vars) { htab_clear (refcount_set); @@ -2923,6 +3156,563 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } if (refcount_set) htab_free (refcount_set); + + /* Copy back ICVs from device to host. + HOST_PTR is expected to exist since it was added in + gomp_load_image_to_device if not already available. */ + gomp_copy_back_icvs (devicep, device); + +} + + +/* Reverse lookup (device addr -> host addr) for reverse offload. We avoid + keeping track of all variable handling - assuming that reverse offload occurs + ony very rarely. Downside is that the reverse search is slow. */ + +struct gomp_splay_tree_rev_lookup_data { + uintptr_t tgt_start; + uintptr_t tgt_end; + splay_tree_key key; +}; + +static int +gomp_splay_tree_rev_lookup (splay_tree_key key, void *d) +{ + struct gomp_splay_tree_rev_lookup_data *data; + data = (struct gomp_splay_tree_rev_lookup_data *)d; + uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset; + + if (tgt_start > data->tgt_start || key->tgt->list_count == 0) + return 0; + + size_t j; + for (j = 0; j < key->tgt->list_count; j++) + if (key->tgt->list[j].key == key) + break; + assert (j < key->tgt->list_count); + uintptr_t tgt_end = tgt_start + key->tgt->list[j].length; + + if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end) + || (tgt_end > data->tgt_start && tgt_start < data->tgt_end)) + { + data->key = key; + return 1; + } + return 0; +} + +static inline splay_tree_key +gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end, + bool zero_len) +{ + struct gomp_splay_tree_rev_lookup_data data; + data.key = NULL; + data.tgt_start = tgt_start; + data.tgt_end = tgt_end; + + if (tgt_start != tgt_end) + { + splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); + return data.key; + } + + data.tgt_end++; + splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); + if (data.key != NULL || zero_len) + return data.key; + data.tgt_end--; + + data.tgt_start--; + splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); + return data.key; +} + +struct cpy_data +{ + uint64_t devaddr; + bool present, aligned; +}; + + +/* Search just mapped reverse-offload data; returns index if found, + otherwise >= n. */ + +static inline int +gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs, + unsigned short *kinds, uint64_t *sizes, size_t n, + uint64_t tgt_start, uint64_t tgt_end) +{ + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + size_t i; + for (i = 0; i < n; i++) + { + bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask) + == GOMP_MAP_STRUCT); + uint64_t dev_end; + if (!is_struct) + dev_end = d[i].devaddr + sizes[i]; + else + { + if (i + sizes[i] < n) + dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]]; + else + dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]]; + } + if ((d[i].devaddr == tgt_start && dev_end == tgt_end) + || (dev_end > tgt_start && d[i].devaddr < tgt_end)) + break; + if (is_struct) + i += sizes[i]; + } + return i; +} + +static inline int +gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs, + unsigned short *kinds, uint64_t *sizes, + size_t n, uint64_t tgt_start, uint64_t tgt_end, + bool zero_len) +{ + size_t i; + if (tgt_start != tgt_end) + return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, + tgt_start, tgt_end); + tgt_end++; + i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, + tgt_start, tgt_end); + if (i < n || zero_len) + return i; + tgt_end--; + + tgt_start--; + return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, + tgt_start, tgt_end); +} + +/* Handle reverse offload. This is called by the device plugins for a + reverse offload; it is not called if the outer target runs on the host. + The mapping is simplified device-affecting constructs (except for target + with device(ancestor:1)) must not be encountered; in particular not + target (enter/exit) data. */ + +void +gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, + uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, + void (*dev_to_host_cpy) (void *, const void *, size_t, void*), + void (*host_to_dev_cpy) (void *, const void *, size_t, void*), + void *token) +{ + /* Return early if there is no offload code. */ + if (sizeof (OFFLOAD_PLUGINS) == sizeof ("")) + return; + /* Currently, this fails because of calculate_firstprivate_requirements + below; it could be fixed but additional code needs to be updated to + handle 32bit hosts - thus, it is not worthwhile. */ + if (sizeof (void *) != sizeof (uint64_t)) + gomp_fatal ("Reverse offload of 32bit hosts not supported."); + + struct cpy_data *cdata = NULL; + uint64_t *devaddrs; + uint64_t *sizes; + unsigned short *kinds; + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + struct gomp_device_descr *devicep = resolve_device (dev_num, false); + + reverse_splay_tree_key n; + struct reverse_splay_tree_key_s k; + k.dev = fn_ptr; + + gomp_mutex_lock (&devicep->lock); + n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k); + gomp_mutex_unlock (&devicep->lock); + + if (n == NULL) + gomp_fatal ("Cannot find reverse-offload function"); + void (*host_fn)() = (void (*)()) n->k->host_start; + + if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0) + { + devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr; + sizes = (uint64_t *) (uintptr_t) sizes_ptr; + kinds = (unsigned short *) (uintptr_t) kinds_ptr; + } + else + { + devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); + sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); + kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short)); + if (dev_to_host_cpy) + { + dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr, + mapnum * sizeof (uint64_t), token); + dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr, + mapnum * sizeof (uint64_t), token); + dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr, + mapnum * sizeof (unsigned short), token); + } + else + { + gomp_copy_dev2host (devicep, NULL, devaddrs, + (const void *) (uintptr_t) devaddrs_ptr, + mapnum * sizeof (uint64_t)); + gomp_copy_dev2host (devicep, NULL, sizes, + (const void *) (uintptr_t) sizes_ptr, + mapnum * sizeof (uint64_t)); + gomp_copy_dev2host (devicep, NULL, kinds, (const void *) (uintptr_t) kinds_ptr, + mapnum * sizeof (unsigned short)); + } + } + + size_t tgt_align = 0, tgt_size = 0; + + /* If actually executed on 32bit systems, the casts lead to wrong code; + but 32bit with offloading is not supported; see top of this function. */ + calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes, + (void *) (uintptr_t) kinds, + &tgt_align, &tgt_size); + + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + for (uint64_t i = 0; i < mapnum; i++) + if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE + && devaddrs[i] != 0) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], + (size_t) sizes[i]); + else if (dev_to_host_cpy) + dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], + (size_t) sizes[i], token); + else + gomp_copy_dev2host (devicep, NULL, tgt + tgt_size, + (void *) (uintptr_t) devaddrs[i], + (size_t) sizes[i]); + devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; + if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + && i + 1 < mapnum + && ((get_kind (short_mapkind, kinds, i) & typemask) + == GOMP_MAP_ATTACH)) + { + *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1]) + = (uint64_t) devaddrs[i]; + ++i; + } + } + } + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) + { + size_t j, struct_cpy = 0; + splay_tree_key n2; + cdata = gomp_alloca (sizeof (*cdata) * mapnum); + memset (cdata, '\0', sizeof (*cdata) * mapnum); + gomp_mutex_lock (&devicep->lock); + for (uint64_t i = 0; i < mapnum; i++) + { + if (devaddrs[i] == 0) + continue; + n = NULL; + int kind = get_kind (short_mapkind, kinds, i) & typemask; + switch (kind) + { + case GOMP_MAP_FIRSTPRIVATE: + case GOMP_MAP_FIRSTPRIVATE_INT: + continue; + + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + /* Assume it is present; look it up - but ignore otherwise. */ + case GOMP_MAP_ALLOC: + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_TO: + case GOMP_MAP_TOFROM: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_TOFROM: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + cdata[i].devaddr = devaddrs[i]; + bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION); + j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, + devaddrs[i], + devaddrs[i] + sizes[i], zero_len); + if (j < i) + { + n2 = NULL; + cdata[i].present = true; + devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr; + } + else + { + n2 = gomp_map_rev_lookup (&devicep->mem_map, + devaddrs[i], + devaddrs[i] + sizes[i], zero_len); + cdata[i].present = n2 != NULL; + } + if (!cdata[i].present + && kind != GOMP_MAP_DELETE + && kind != GOMP_MAP_RELEASE + && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) + { + cdata[i].aligned = true; + size_t align = (size_t) 1 << (kinds[i] >> 8); + devaddrs[i] + = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, + sizes[i]); + } + else if (n2 != NULL) + devaddrs[i] = (n2->host_start + cdata[i].devaddr + - (n2->tgt->tgt_start + n2->tgt_offset)); + if (((!cdata[i].present || struct_cpy) + && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM)) + || kind == GOMP_MAP_FORCE_TO + || kind == GOMP_MAP_FORCE_TOFROM + || kind == GOMP_MAP_ALWAYS_TO + || kind == GOMP_MAP_ALWAYS_TOFROM) + { + if (dev_to_host_cpy) + dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i], token); + else + gomp_copy_dev2host (devicep, NULL, + (void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i]); + } + if (struct_cpy) + struct_cpy--; + break; + case GOMP_MAP_ATTACH: + case GOMP_MAP_POINTER: + case GOMP_MAP_ALWAYS_POINTER: + n2 = gomp_map_rev_lookup (&devicep->mem_map, + devaddrs[i] + sizes[i], + devaddrs[i] + sizes[i] + + sizeof (void*), false); + cdata[i].present = n2 != NULL; + cdata[i].devaddr = devaddrs[i]; + if (n2) + devaddrs[i] = (n2->host_start + cdata[i].devaddr + - (n2->tgt->tgt_start + n2->tgt_offset)); + else + { + j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, + devaddrs[i] + sizes[i], + devaddrs[i] + sizes[i] + + sizeof (void*), false); + if (j < i) + { + cdata[i].present = true; + devaddrs[i] = (devaddrs[j] + devaddrs[i] + - cdata[j].devaddr); + } + } + if (!cdata[i].present) + devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*)); + /* Assume that when present, the pointer is already correct. */ + if (!n2) + *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i]) + = devaddrs[i-1]; + break; + case GOMP_MAP_TO_PSET: + /* Assume that when present, the pointers are fine and no 'to:' + is required. */ + n2 = gomp_map_rev_lookup (&devicep->mem_map, + devaddrs[i], devaddrs[i] + sizes[i], + false); + cdata[i].present = n2 != NULL; + cdata[i].devaddr = devaddrs[i]; + if (n2) + devaddrs[i] = (n2->host_start + cdata[i].devaddr + - (n2->tgt->tgt_start + n2->tgt_offset)); + else + { + j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, + devaddrs[i], + devaddrs[i] + sizes[i], false); + if (j < i) + { + cdata[i].present = true; + devaddrs[i] = (devaddrs[j] + devaddrs[i] + - cdata[j].devaddr); + } + } + if (!cdata[i].present) + { + cdata[i].aligned = true; + size_t align = (size_t) 1 << (kinds[i] >> 8); + devaddrs[i] + = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, + sizes[i]); + if (dev_to_host_cpy) + dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i], token); + else + gomp_copy_dev2host (devicep, NULL, + (void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i]); + } + for (j = i + 1; j < mapnum; j++) + { + kind = get_kind (short_mapkind, kinds, j) & typemask; + if (!GOMP_MAP_ALWAYS_POINTER_P (kind) + && !GOMP_MAP_POINTER_P (kind)) + break; + if (devaddrs[j] < devaddrs[i]) + break; + if (cdata[i].present) + continue; + if (devaddrs[j] == 0) + { + *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0; + continue; + } + int k; + n2 = NULL; + /* Dereference devaddrs[j] to get the device addr. */ + assert (devaddrs[j] - sizes[j] == cdata[i].devaddr); + devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i] + + sizes[j]); + cdata[j].present = true; + cdata[j].devaddr = devaddrs[j]; + if (devaddrs[j] == 0) + continue; + k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j, + devaddrs[j], + devaddrs[j] + sizeof (void*), + false); + if (k < j) + devaddrs[j] = (devaddrs[k] + devaddrs[j] + - cdata[k].devaddr); + else + { + n2 = gomp_map_rev_lookup (&devicep->mem_map, + devaddrs[j], + devaddrs[j] + sizeof (void*), + false); + if (n2 == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Pointer target wasn't mapped"); + } + devaddrs[j] = (n2->host_start + cdata[j].devaddr + - (n2->tgt->tgt_start + n2->tgt_offset)); + } + *(void **) (uintptr_t) (devaddrs[i] + sizes[j]) + = (void *) (uintptr_t) devaddrs[j]; + } + i = j -1; + break; + case GOMP_MAP_STRUCT: + n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1], + devaddrs[i + sizes[i]] + + sizes[i + sizes[i]], false); + cdata[i].present = n2 != NULL; + cdata[i].devaddr = devaddrs[i]; + struct_cpy = cdata[i].present ? 0 : sizes[i]; + if (!n2) + { + size_t sz = (size_t) (devaddrs[i + sizes[i]] + - devaddrs[i+1] + + sizes[i + sizes[i]]); + size_t align = (size_t) 1 << (kinds[i] >> 8); + cdata[i].aligned = true; + devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz); + devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr; + } + else + devaddrs[i] = (n2->host_start + cdata[i].devaddr + - (n2->tgt->tgt_start + n2->tgt_offset)); + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]); + } + } + gomp_mutex_unlock (&devicep->lock); + } + + host_fn (devaddrs); + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) + { + uint64_t struct_cpy = 0; + bool clean_struct = false; + for (uint64_t i = 0; i < mapnum; i++) + { + if (cdata[i].devaddr == 0) + continue; + int kind = get_kind (short_mapkind, kinds, i) & typemask; + bool copy = !cdata[i].present || struct_cpy; + switch (kind) + { + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_ALWAYS_TOFROM: + copy = true; + /* FALLTHRU */ + case GOMP_MAP_FROM: + case GOMP_MAP_TOFROM: + if (copy && host_to_dev_cpy) + host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr, + (void *) (uintptr_t) devaddrs[i], + sizes[i], token); + else if (copy) + gomp_copy_host2dev (devicep, NULL, + (void *) (uintptr_t) cdata[i].devaddr, + (void *) (uintptr_t) devaddrs[i], + sizes[i], false, NULL); + default: + break; + } + if (struct_cpy) + { + struct_cpy--; + continue; + } + if (kind == GOMP_MAP_STRUCT && !cdata[i].present) + { + clean_struct = true; + struct_cpy = sizes[i]; + } + else if (!cdata[i].present && cdata[i].aligned) + gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); + else if (!cdata[i].present) + free ((void *) (uintptr_t) devaddrs[i]); + } + if (clean_struct) + for (uint64_t i = 0; i < mapnum; i++) + if (!cdata[i].present + && ((get_kind (short_mapkind, kinds, i) & typemask) + == GOMP_MAP_STRUCT)) + { + devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr; + gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); + } + + free (devaddrs); + free (sizes); + free (kinds); + } } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -3309,7 +4099,10 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, GOMP_MAP_VARS_ENTER_DATA); i += j - i - 1; } - else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH) + else if (i + 1 < mapnum + && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH + || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER + && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER))) { /* An attach operation must be processed together with the mapped base-pointer list item. */ @@ -4302,6 +5095,7 @@ gomp_target_init (void) /* current_device.capabilities has already been set. */ current_device.type = current_device.get_type_func (); current_device.mem_map.root = NULL; + current_device.mem_map_rev.root = NULL; current_device.state = GOMP_DEVICE_UNINITIALIZED; for (i = 0; i < new_num_devs; i++) { diff --git a/libgomp/task.c b/libgomp/task.c index 30cd046..6907b33 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2022 Free Software Foundation, Inc. +/* Copyright (C) 2007-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -2465,6 +2465,7 @@ gomp_create_artificial_team (void) struct gomp_task_icv *icv; struct gomp_team *team = gomp_new_team (1); struct gomp_task *task = thr->task; + struct gomp_task **implicit_task = &task; icv = task ? &task->icv : &gomp_global_icv; team->prev_ts = thr->ts; thr->ts.team = team; @@ -2477,17 +2478,25 @@ gomp_create_artificial_team (void) thr->ts.static_trip = 0; thr->task = &team->implicit_task[0]; gomp_init_task (thr->task, NULL, icv); - if (task) + while (*implicit_task + && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) + implicit_task = &(*implicit_task)->parent; + if (*implicit_task) { - thr->task = task; + thr->task = *implicit_task; gomp_end_task (); - free (task); + free (*implicit_task); thr->task = &team->implicit_task[0]; } #ifdef LIBGOMP_USE_PTHREADS else pthread_setspecific (gomp_thread_destructor, thr); #endif + if (implicit_task != &task) + { + *implicit_task = thr->task; + thr->task = task; + } } /* The format of data is: @@ -2678,6 +2687,16 @@ omp_in_final (void) ialias (omp_in_final) +int +omp_in_explicit_task (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task *task = thr->task; + return task && task->kind != GOMP_TASK_IMPLICIT; +} + +ialias (omp_in_explicit_task) + void omp_fulfill_event (omp_event_handle_t event) { diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index af175f4..9fc3380 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2015-2022 Free Software Foundation, Inc. +/* Copyright (C) 2015-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/team.c b/libgomp/team.c index cb6875d..54dfca8 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/teams.c b/libgomp/teams.c index fc5f5d3..3747f74 100644 --- a/libgomp/teams.c +++ b/libgomp/teams.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2022 Free Software Foundation, Inc. +/* Copyright (C) 2018-2023 Free Software Foundation, Inc. Contributed by Jakub Jelinek <jakub@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library diff --git a/libgomp/testsuite/config/default.exp b/libgomp/testsuite/config/default.exp index 7ac3f31..b7afc82 100644 --- a/libgomp/testsuite/config/default.exp +++ b/libgomp/testsuite/config/default.exp @@ -1,4 +1,4 @@ -# Copyright (C) 1997-2022 Free Software Foundation, Inc. +# Copyright (C) 1997-2023 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 107a3c2..e12236e 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -32,6 +32,7 @@ load_gcc_lib scansarif.exp load_gcc_lib scantree.exp load_gcc_lib scanltranstree.exp load_gcc_lib scanoffload.exp +load_gcc_lib scanoffloadipa.exp load_gcc_lib scanoffloadtree.exp load_gcc_lib scanoffloadrtl.exp load_gcc_lib scanipa.exp @@ -119,18 +120,6 @@ proc libgomp_init { args } { # Compute what needs to be put into LD_LIBRARY_PATH set always_ld_library_path ".:${blddir}/.libs" - # Add liboffloadmic build directory in LD_LIBRARY_PATH to support - # Intel MIC offloading testing. - global offload_plugins - if { [string match "*,intelmic,*" ",$offload_plugins,"] } { - append always_ld_library_path ":${blddir}/../liboffloadmic/.libs" - append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs" - # libstdc++ is required by liboffloadmic - append always_ld_library_path ":${blddir}/../libstdc++-v3/src/.libs" - # libgcc_s is required by libstdc++ - append always_ld_library_path ":${blddir}/../libgcc" - } - global offload_additional_lib_paths if { $offload_additional_lib_paths != "" } { append always_ld_library_path "${offload_additional_lib_paths}" @@ -313,9 +302,6 @@ proc offload_target_to_openacc_device_type { offload_target } { disable { return "host" } - *-intelmic* { - return "" - } nvptx* { return "nvidia" } @@ -415,6 +401,18 @@ proc check_effective_target_offload_device_nvptx { } { } ] } +# Return 1 if using a GCN offload device. +proc check_effective_target_offload_device_gcn { } { + return [check_runtime_nocache offload_device_gcn { + #include <omp.h> + #include "testsuite/libgomp.c-c++-common/on_device_arch.h" + int main () + { + return !on_device_arch_gcn (); + } + } ] +} + # Return 1 if at least one Nvidia GPU is accessible. proc check_effective_target_openacc_nvidia_accel_present { } { @@ -437,28 +435,6 @@ proc check_effective_target_openacc_nvidia_accel_selected { } { return [string match "nvidia" $openacc_device_type] } -# Return 1 if using Intel MIC offload device. -proc check_effective_target_offload_device_intel_mic { } { - return [check_runtime_nocache offload_device_intel_mic { - #include "testsuite/libgomp.c-c++-common/on_device_arch.h" - int main () - { - return !on_device_arch_intel_mic (); - } - } ] -} - -# Return 1 if any Intel MIC offload device is available. -proc check_effective_target_offload_device_any_intel_mic { } { - return [check_runtime_nocache offload_device_any_intel_mic { - #include "testsuite/libgomp.c-c++-common/on_device_arch.h" - int main () - { - return !any_device_arch_intel_mic (); - } - } ] -} - # Return 1 if the OpenACC 'host' device type is selected. proc check_effective_target_openacc_host_selected { } { diff --git a/libgomp/testsuite/libgomp.c++/pr108180.C b/libgomp/testsuite/libgomp.c++/pr108180.C new file mode 100644 index 0000000..452910c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr108180.C @@ -0,0 +1,55 @@ +// PR c++/108180 +// { dg-do run } + +struct A { + A () { ++a; } + A (A &&) = delete; + A (const A &) { ++a; } + A &operator= (const A &) = delete; + A &operator= (A &&) = delete; + ~A () { --a; } + static int a; +}; +int A::a = 0; + +struct B { + A a; + template <int N> + int + foo () + { + int res = 0; + #pragma omp parallel for if(false) firstprivate(a) + for (int i = 0; i < 64; ++i) + res += i; + return res; + } + int + bar () + { + int res = 0; + #pragma omp parallel for if(false) firstprivate(a) + for (int i = 0; i < 64; ++i) + res += i; + return res; + } +}; + +int +main () +{ + { + B b; + if (b.foo<0> () != 2016) + __builtin_abort (); + } + if (A::a != 0) + __builtin_abort (); + { + B b; + if (b.bar () != 2016) + __builtin_abort (); + } + if (A::a != 0) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/pr108286.C b/libgomp/testsuite/libgomp.c++/pr108286.C new file mode 100644 index 0000000..ee88c2f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr108286.C @@ -0,0 +1,29 @@ +// PR c++/108286 +// { dg-do run } + +struct S { + int + foo () + { + int res = 0; +#pragma omp target map(size, ptr[:size], res) nowait + res = ptr[size - 1]; +#pragma omp taskwait + return res; + } + + unsigned size; + int *ptr; +}; + +int +main () +{ + S s; + int buf[5]; + s.size = 5; + s.ptr = buf; + buf[4] = 42; + if (s.foo () != 42) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c index b987a33..9da0d63 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c @@ -16,7 +16,7 @@ main () } else omp_set_num_teams (6); - if (getenv ("OMP_TEAMS_THREAD_LIMIT") == NULL + if (getenv ("OMP_TEAMS_THREAD_LIMIT") != NULL && strcmp (getenv ("OMP_TEAMS_THREAD_LIMIT"), "12") == 0) { if (omp_get_teams_thread_limit () != 12) diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c index 431cfc7..72d7af6 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c @@ -1,25 +1,203 @@ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "42" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "43" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "44" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "45" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "46" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS "47" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS "5" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } */ #include <omp.h> #include <stdlib.h> +#include <unistd.h> int main () { - if (omp_get_max_teams () != 47) + if (omp_get_max_teams () != 5 + || omp_get_teams_thread_limit () != 4) abort (); + #pragma omp teams + { + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + || omp_get_thread_num () >= 4) + abort (); + } + + omp_set_num_teams (4); + omp_set_teams_thread_limit (3); + if (omp_get_max_teams () != 4 + || omp_get_teams_thread_limit () != 3) + abort (); + + #pragma omp teams + { + if (omp_get_num_teams () > 4 + || omp_get_team_num () >= 4) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + || omp_get_thread_num () >= 3) + abort (); + } + + #pragma omp teams num_teams(3) thread_limit(2) + { + if (omp_get_num_teams () != 3 + || omp_get_team_num () >= 3) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 2 + || omp_get_thread_num () >= 2) + abort (); + } + + #pragma omp teams num_teams(5) thread_limit(4) + { + if (omp_get_num_teams () != 5 + || omp_get_team_num () >= 5) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + || omp_get_thread_num () >= 4) + abort (); + } + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); - for (int i=0; i < num_devices; i++) - #pragma omp target device (i) - if (omp_get_max_teams () != 42 + i) + + for (int i = 0; i < num_devices; i++) + { + #pragma omp target device (i) + if (omp_get_max_teams () != 6 + i + || omp_get_teams_thread_limit () != 5 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + #pragma omp parallel + if (omp_get_thread_limit () > 5 + i + || omp_get_thread_num () >= 5 + i) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (5 + i); + omp_set_teams_thread_limit (4 + i); + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + } + + /* omp_set_num_teams and omp_set_teams_thread_limit above set the value + of nteams-var and teams-thread-limit-var ICVs on device 'i', which has + scope 'device' and should be avaible in subsequent target regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 5 + i + || omp_get_team_num () >= 5 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + i + || omp_get_thread_num () >= 4 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams num_teams(6 + i) thread_limit(5 + i) + { + if (omp_get_num_teams () > 6 + i + || omp_get_team_num () >= 6 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 5 + i + || omp_get_thread_num () >= 5 + i + || omp_get_num_teams () > 6 + i + || omp_get_team_num () >= 6 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams num_teams(4 + i) thread_limit(3 + i) + { + if (omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + i + || omp_get_thread_num () >= 3 + i + || omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams thread_limit(3 + i) num_teams(4 + i) + { + if (omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + i + || omp_get_thread_num () >= 3 + i + || omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + } + + /* The NUM_TEAMS and THREAD_LIMIT clauses should not change the values + of the corresponding ICVs. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + /* This tests a large number of teams and threads. If it is larger than + 2^15+1 then the according argument in the kernels arguments list + is encoded with two items instead of one. */ + intptr_t large_num_teams = 66000; + intptr_t large_threads_limit = 67000; + #pragma omp target device (i) + { + omp_set_num_teams (large_num_teams + i); + omp_set_teams_thread_limit (large_threads_limit + i); + if (omp_get_max_teams () != large_num_teams + i + || omp_get_teams_thread_limit () != large_threads_limit + i) + abort (); + } + + #pragma omp target device (i) + if (omp_get_max_teams () != large_num_teams + i + || omp_get_teams_thread_limit () != large_threads_limit + i) abort (); + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > large_num_teams + i + || omp_get_team_num () >= large_num_teams + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > large_threads_limit + i + || omp_get_thread_num () >= large_threads_limit + i) + abort (); + } + } + return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c index e199a18..1b17f2d 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c @@ -1,9 +1,10 @@ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "43" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */ /* { dg-set-target-env-var OMP_SCHEDULE_ALL "guided,4" } */ /* { dg-set-target-env-var OMP_DYNAMIC_ALL "true" } */ -/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "44" } */ /* { dg-set-target-env-var OMP_THREAD_LIMIT_ALL "45" } */ /* { dg-set-target-env-var OMP_NUM_THREADS_ALL "46,3,2" } */ /* { dg-set-target-env-var OMP_MAX_ACTIVE_LEVELS_ALL "47" } */ @@ -12,8 +13,8 @@ /* This tests the hierarchical usage of ICVs on the device, i.e. if OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of - OMP_NUM_TEAMS_DEV should be used. And if there is no environment variable - without suffix, then the corresponding _ALL variant should be used. */ + OMP_NUM_TEAMS_DEV should be used. And if OMP_NUM_TEAMS (without suffix) is + not defined, then OMP_NUM_TEAMS_ALL should be used for the host. */ #include <omp.h> #include <stdlib.h> @@ -26,10 +27,10 @@ main () int chunk_size; omp_get_schedule(&kind, &chunk_size); - if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 42) + if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 3) || (!getenv ("OMP_DYNAMIC") && !omp_get_dynamic ()) || (!getenv ("OMP_SCHEDULE") && (kind != 3 || chunk_size != 4)) - || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 44) + || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 2) || (!getenv ("OMP_THREAD_LIMIT") && omp_get_thread_limit () != 45) || (!getenv ("OMP_NUM_THREADS") && omp_get_max_threads () != 46) || (!getenv ("OMP_PROC_BIND") && omp_get_proc_bind () != omp_proc_bind_spread) @@ -44,9 +45,51 @@ main () name[sizeof ("OMP_NUM_TEAMS_DEV_1") - 2] = '0' + i; if (getenv (name)) continue; - #pragma omp target device (i) - if (omp_get_max_teams () != 43) + + #pragma omp target device (i) + if (omp_get_max_teams () != 4 + || omp_get_teams_thread_limit () != 3) abort (); + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 4 + || omp_get_team_num () >= 4) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + || omp_get_thread_num () >= 3) + abort (); + } + + #pragma omp target device (i) + { + omp_set_num_teams (3 + i); + omp_set_teams_thread_limit (2 + i); + if (omp_get_max_teams () != 3 + i + || omp_get_teams_thread_limit () != 2 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 3 + i + || omp_get_teams_thread_limit () != 2 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 3 + i + || omp_get_team_num () >= 3 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 2 + i + || omp_get_thread_num () >= 2 + i) + abort (); + } } return 0; diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c index 70a716d..bbbd6df 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c @@ -1,5 +1,6 @@ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "7" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ /* This tests the hierarchical usage of ICVs on the host and on devices, i.e. if OMP_NUM_TEAMS_DEV_<device_num>, OMP_NUM_TEAMS_DEV, and @@ -9,18 +10,90 @@ #include <omp.h> #include <stdlib.h> +#include <string.h> int main () { - if (omp_get_max_teams () != 42) + if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 7) + || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 2)) abort (); + #pragma omp teams + if ((!getenv ("OMP_NUM_TEAMS")) + && (omp_get_num_teams () > 7 || omp_get_team_num () >= 7)) + abort (); + + omp_set_num_teams (9); + omp_set_teams_thread_limit (3); + if (omp_get_max_teams () != 9 + || omp_get_teams_thread_limit () != 3) + abort (); + + #pragma omp teams + if (omp_get_num_teams () > 9 + || omp_get_team_num () >= 9) + abort (); + + #pragma omp teams num_teams(5) + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + + if (getenv ("OMP_NUM_TEAMS_DEV") || getenv ("OMP_TEAMS_THREAD_LIMIT_DEV")) + return 0; + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); - for (int i=0; i < num_devices; i++) - #pragma omp target device (i) - if (omp_get_max_teams () != 42) + for (int i = 0; i < num_devices; i++) + { + char nteams[sizeof ("OMP_NUM_TEAMS_DEV_1")]; + strcpy (nteams, "OMP_NUM_TEAMS_DEV_1"); + nteams[sizeof ("OMP_NUM_TEAMS_DEV_1") - 2] = '0' + i; + char teams_thread_limit[sizeof ("OMP_TEAMS_THREAD_LIMIT_DEV_1")]; + strcpy (teams_thread_limit, "OMP_TEAMS_THREAD_LIMIT_DEV_1"); + teams_thread_limit[sizeof ("OMP_TEAMS_THREAD_LIMIT_DEV_1") - 2] = '0' + i; + if (getenv (nteams) || getenv (teams_thread_limit)) + continue; + + #pragma omp target device (i) + if (omp_get_max_teams () != 7 + || omp_get_teams_thread_limit () != 2) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 7 + || omp_get_team_num () >= 7) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (8 + i); + omp_set_teams_thread_limit (4 + i); + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 8 + i + || omp_get_team_num () >= 8 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams num_teams(5 + i) + if (omp_get_num_teams () != 5 + i) abort (); + } return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-9.c b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c new file mode 100644 index 0000000..21b874f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ + +/* This tests usage of ICVs on the host and on devices if no corresponding + environment variables are configured. */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + if (omp_get_max_teams () != 0 + || omp_get_teams_thread_limit () != 0) + abort (); + + omp_set_num_teams (9); + omp_set_teams_thread_limit (2); + if (omp_get_max_teams () != 9 + || omp_get_teams_thread_limit () != 2) + abort (); + + #pragma omp teams + if (omp_get_num_teams () > 9 + || omp_get_team_num () >= 9) + abort (); + + #pragma omp teams num_teams(5) + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); + for (int i = 0; i < num_devices; i++) + { + #pragma omp target device (i) + if (omp_get_max_teams () != 0 + || omp_get_teams_thread_limit () != 0) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (8 + i); + omp_set_teams_thread_limit (3 + i); + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 3 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 3 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 8 + i + || omp_get_team_num () >= 8 + i) + abort (); + + /* NUM_TEAMS clause has priority over previously set NUM_TEAMS value. */ + #pragma omp target device (i) + #pragma omp teams num_teams(5 + i) + if (omp_get_num_teams () > 5 + i + || omp_get_team_num () >= 5 + i) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h index f92743b..3fb5021 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h +++ b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h @@ -8,13 +8,13 @@ device_arch_nvptx (void) } /* static */ int -device_arch_intel_mic (void) +device_arch_gcn (void) { - return GOMP_DEVICE_INTEL_MIC; + return GOMP_DEVICE_GCN; } #pragma omp declare variant (device_arch_nvptx) match(construct={target},device={arch(nvptx)}) -#pragma omp declare variant (device_arch_intel_mic) match(construct={target},device={arch(intel_mic)}) +#pragma omp declare variant (device_arch_gcn) match(construct={target},device={arch(gcn)}) /* static */ int device_arch (void) { @@ -38,29 +38,7 @@ on_device_arch_nvptx () } int -on_device_arch_intel_mic () +on_device_arch_gcn () { - return on_device_arch (GOMP_DEVICE_INTEL_MIC); -} - -static int -any_device_arch (int d) -{ - int nd = omp_get_num_devices (); - for (int i = 0; i < nd; ++i) - { - int d_cur; - #pragma omp target device(i) map(from:d_cur) - d_cur = device_arch (); - if (d_cur == d) - return 1; - } - - return 0; -} - -int -any_device_arch_intel_mic () -{ - return any_device_arch (GOMP_DEVICE_INTEL_MIC); + return on_device_arch (GOMP_DEVICE_GCN); } diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr106981.c b/libgomp/testsuite/libgomp.c-c++-common/pr106981.c new file mode 100644 index 0000000..ed48d27 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr106981.c @@ -0,0 +1,19 @@ +/* PR c/106981 */ + +int +main () +{ + int a[0x101]; + unsigned int b = 0x100; + if ((unsigned char) b || (unsigned short) b != 0x100) + return 0; + a[0] = 0; + a[0x100] = 42; + #pragma omp atomic update + a[(unsigned char) b] = a[(unsigned short) b] + a[(unsigned char) b]; + #pragma omp atomic update + a[(unsigned char) b] = a[(unsigned char) b] + a[(unsigned short) b]; + if (a[0] != 84 || a[0x100] != 42) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr109062.c b/libgomp/testsuite/libgomp.c-c++-common/pr109062.c new file mode 100644 index 0000000..5c7c287 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr109062.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + omp_display_env (1); + + return 0; +} + +/* { dg-output ".*\\\[host] GOMP_SPINCOUNT = '300000'.*" { target native } } */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c b/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c new file mode 100644 index 0000000..4fb9783 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c @@ -0,0 +1,39 @@ +/* { dg-additional-options "-flto" } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */ +/* { dg-additional-sources requires-4-aux.c } */ + +/* Same as requires-4.c, but uses heap memory for 'a'. */ + +/* Check no diagnostic by device-compiler's or host compiler's lto1. + Other file uses: 'requires reverse_offload', but that's inactive as + there are no declare target directives, device constructs nor device routines */ + +/* Depending on offload device capabilities, it may print something like the + following (only) if GOMP_DEBUG=1: + "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled" + and in that case does host-fallback execution. + + No offload devices support USM at present, so we may verify host-fallback + execution by presence of separate memory spaces. */ + +#pragma omp requires unified_address,unified_shared_memory + +int *a; +extern void foo (void); + +int +main (void) +{ + a = (int *) __builtin_calloc (sizeof (int), 10); + #pragma omp target map(to: a) + for (int i = 0; i < 10; i++) + a[i] = i; + + for (int i = 0; i < 10; i++) + if (a[i] != i) + __builtin_abort (); + + foo (); + __builtin_free (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c new file mode 100644 index 0000000..33bd384 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */ + +#pragma omp requires reverse_offload + +int +main () +{ + int A[10]; + int y; + + for (int i = 0; i < 10; i++) + A[i] = 2*i; + + y = 42; + + /* Pointlessly copy to the default device. */ + #pragma omp target data map(to: A) + { + /* Not enclosed in a target region (= i.e. running on the host); the + following is valid - it runs on the current device (= host). */ + #pragma omp target device ( ancestor:1 ) firstprivate(y) map(to: A) + { + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + A[i] = 4*i; + y = 31; + } + + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + } + + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-45.c b/libgomp/testsuite/libgomp.c-c++-common/target-45.c index 27bbedd..73c105d 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/target-45.c +++ b/libgomp/testsuite/libgomp.c-c++-common/target-45.c @@ -1,5 +1,3 @@ -/* { dg-xfail-run-if TODO { offload_device_any_intel_mic } } */ - #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-1.c b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-1.c new file mode 100644 index 0000000..a228d93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-1.c @@ -0,0 +1,106 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + if (omp_in_explicit_task ()) + abort (); + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task final (1) + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + } + #pragma omp parallel + { + if (omp_in_explicit_task ()) + abort (); + #pragma omp task if (0) + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task if (0) + if (!omp_in_explicit_task ()) + abort (); + } + #pragma omp task final (1) + if (!omp_in_explicit_task ()) + abort (); + #pragma omp barrier + if (omp_in_explicit_task ()) + abort (); + #pragma omp taskloop num_tasks (24) + for (int i = 0; i < 32; ++i) + if (!omp_in_explicit_task ()) + abort (); + #pragma omp masked + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + #pragma omp barrier + if (omp_in_explicit_task ()) + abort (); + } + #pragma omp target + { + if (omp_in_explicit_task ()) + abort (); + #pragma omp task if (0) + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + } + #pragma omp target teams + { + #pragma omp distribute + for (int i = 0; i < 4; ++i) + if (omp_in_explicit_task ()) + abort (); + else + { + #pragma omp parallel + { + if (omp_in_explicit_task ()) + abort (); + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + #pragma omp barrier + if (omp_in_explicit_task ()) + abort (); + } + } + } + #pragma omp teams + { + #pragma omp distribute + for (int i = 0; i < 4; ++i) + if (omp_in_explicit_task ()) + abort (); + else + { + #pragma omp parallel + { + if (omp_in_explicit_task ()) + abort (); + #pragma omp task + if (!omp_in_explicit_task ()) + abort (); + #pragma omp barrier + if (omp_in_explicit_task ()) + abort (); + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-2.c b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-2.c new file mode 100644 index 0000000..44df596 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + #pragma omp task + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp target nowait + if (omp_in_explicit_task ()) + abort (); + if (!omp_in_explicit_task ()) + abort (); + #pragma omp taskwait + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-3.c b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-3.c new file mode 100644 index 0000000..40eb94d --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-in-explicit-3.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int a; + +int +main () +{ + #pragma omp task + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp taskgroup task_reduction (+: a) + { + if (!omp_in_explicit_task ()) + abort (); + #pragma omp task in_reduction (+: a) + { + ++a; + if (!omp_in_explicit_task ()) + abort (); + } + } + if (!omp_in_explicit_task ()) + abort (); + #pragma omp taskwait + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-17.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-17.c new file mode 100644 index 0000000..4a8d1e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-17.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int a; + +int +main () +{ + #pragma omp task final (1) + { + if (!omp_in_final ()) + abort (); + #pragma omp task + { + if (!omp_in_final ()) + abort (); + #pragma omp taskgroup task_reduction (+: a) + { + if (!omp_in_final ()) + abort (); + #pragma omp task in_reduction (+: a) + { + ++a; + if (!omp_in_final ()) + abort (); + } + } + if (!omp_in_final ()) + abort (); + #pragma omp taskwait + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-18.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-18.c new file mode 100644 index 0000000..483f440 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-18.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ + +int a; + +int +main () +{ + #pragma omp task + { + #pragma omp taskgroup task_reduction (+: a) + { + #pragma omp task in_reduction (+: a) + ++a; + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/affinity-1.c b/libgomp/testsuite/libgomp.c/affinity-1.c index 9725249..bdb869c 100644 --- a/libgomp/testsuite/libgomp.c/affinity-1.c +++ b/libgomp/testsuite/libgomp.c/affinity-1.c @@ -1,5 +1,5 @@ /* Affinity tests. - Copyright (C) 2013-2022 Free Software Foundation, Inc. + Copyright (C) 2013-2023 Free Software Foundation, Inc. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-fiji.c b/libgomp/testsuite/libgomp.c/declare-variant-4-fiji.c new file mode 100644 index 0000000..ae2af1c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-fiji.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "fiji/gfx803 only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=fiji" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#define USE_FIJI_FOR_GFX803 +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx803 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx803.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx803.c new file mode 100644 index 0000000..e0437a0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx803.c @@ -0,0 +1,7 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "fiji/gfx803 only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=fiji" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx803 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c new file mode 100644 index 0000000..8de0372 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c @@ -0,0 +1,7 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "gfx900 only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=gfx900" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx900 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c new file mode 100644 index 0000000..be6f193 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c @@ -0,0 +1,7 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "gfx906 only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=gfx906" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx906 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c new file mode 100644 index 0000000..311fad9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c @@ -0,0 +1,7 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "gfx908 only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=gfx908" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx908 \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c new file mode 100644 index 0000000..96cc14c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c @@ -0,0 +1,7 @@ +/* { dg-do run { target { offload_target_amdgcn } } } */ +/* { dg-skip-if "gfx90a only" { ! amdgcn-*-* } { "*" } { "-foffload=-march=gfx90a" } } */ +/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */ + +#include "declare-variant-4.h" + +/* { dg-final { scan-offload-tree-dump "= gfx90a \\(\\);" "optimized" } } */ diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h new file mode 100644 index 0000000..2d7c1ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h @@ -0,0 +1,63 @@ +#pragma omp declare target +int +gfx803 (void) +{ + return 0x803; +} + +int +gfx900 (void) +{ + return 0x900; +} + +int +gfx906 (void) +{ + return 0x906; +} + +int +gfx908 (void) +{ + return 0x908; +} + +int +gfx90a (void) +{ + return 0x90a; +} + +#ifdef USE_FIJI_FOR_GFX803 +#pragma omp declare variant(gfx803) match(device = {isa("fiji")}) +#else +#pragma omp declare variant(gfx803) match(device = {isa("gfx803")}) +#endif +#pragma omp declare variant(gfx900) match(device = {isa("gfx900")}) +#pragma omp declare variant(gfx906) match(device = {isa("gfx906")}) +#pragma omp declare variant(gfx908) match(device = {isa("gfx908")}) +#pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")}) +int +f (void) +{ + return 0; +} + +#pragma omp end declare target + +int +main (void) +{ + int v = 0; + +#pragma omp target map(from : v) + v = f (); + + if (v == 0) + __builtin_abort (); + + __builtin_printf ("AMDGCN accelerator: gfx%x\n", v); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr108459.c b/libgomp/testsuite/libgomp.c/pr108459.c new file mode 100644 index 0000000..87ce981 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr108459.c @@ -0,0 +1,41 @@ +/* PR middle-end/108459 */ + +char a[17][17]; + +__attribute__((noipa)) void +foo (int x, int y) +{ + #pragma omp for collapse(2) + for (int i = 1; i <= 16; i++) + for (int j = i * x + y; j <= 16; j++) + a[i][j] = 1; +} + +int +main () +{ + #pragma omp parallel + foo (1, 1); + for (int i = 0; i <= 16; i++) + for (int j = 0; j <= 16; j++) + if (i >= 1 && j >= i + 1) + { + if (a[i][j] != 1) + __builtin_abort (); + a[i][j] = 0; + } + else if (a[i][j]) + __builtin_abort (); + #pragma omp parallel + foo (2, -2); + for (int i = 0; i <= 16; i++) + for (int j = 0; j <= 16; j++) + if (i >= 1 && j >= 2 * i - 2) + { + if (a[i][j] != 1) + __builtin_abort (); + } + else if (a[i][j]) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/reverse-offload-sm30.c b/libgomp/testsuite/libgomp.c/reverse-offload-sm30.c index fbfeae1..7f10fd4 100644 --- a/libgomp/testsuite/libgomp.c/reverse-offload-sm30.c +++ b/libgomp/testsuite/libgomp.c/reverse-offload-sm30.c @@ -1,5 +1,5 @@ /* { dg-do link { target { offload_target_nvptx } } } */ -/* { dg-additional-options "-foffload-options=nvptx-none=-march=sm_30 -foffload=-mptx=_" } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-march=sm_30 -foffload-options=nvptx-none=-mptx=_" } */ #pragma omp requires reverse_offload diff --git a/libgomp/testsuite/libgomp.c/simd-math-1.c b/libgomp/testsuite/libgomp.c/simd-math-1.c new file mode 100644 index 0000000..947bf60 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-math-1.c @@ -0,0 +1,217 @@ +/* Check that the SIMD versions of math routines give the same (or + sufficiently close) results as their scalar equivalents. */ + +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */ +/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mstack-size=3000000 { target offload_target_amdgcn } } */ +/* { dg-additional-options -foffload-options=-lm } */ + +#undef PRINT_RESULT +#define VERBOSE 0 +#define EARLY_EXIT 1 + +#include <math.h> +#include <stdlib.h> + +#ifdef PRINT_RESULT + #include <stdio.h> + #define PRINTF printf +#else + static void null_printf (const char *f, ...) { } + + #define PRINTF null_printf +#endif + +#define N 512 +#define EPSILON_float 1e-5 +#define EPSILON_double 1e-10 + +static int xfail = 0; +static int failed = 0; + +int deviation_float (float x, float y) +{ + union { + float f; + unsigned u; + } u, v; + + u.f = x; + v.f = y; + + unsigned mask = 0x80000000U; + int i; + + for (i = 32; i > 0; i--) + if ((u.u ^ v.u) & mask) + break; + else + mask >>= 1; + + return i; +} + +int deviation_double (double x, double y) +{ + union { + double d; + unsigned long long u; + } u, v; + + u.d = x; + v.d = y; + + unsigned long long mask = 0x8000000000000000ULL; + int i; + + for (i = 64; i > 0; i--) + if ((u.u ^ v.u) & mask) + break; + else + mask >>= 1; + + return i; +} + +#define TEST_FUN_XFAIL(TFLOAT, LOW, HIGH, FUN) \ + xfail = 1; \ + TEST_FUN (TFLOAT, LOW, HIGH, FUN); \ + xfail = 0; + +#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \ +__attribute__((optimize("no-tree-vectorize"))) \ +__attribute__((optimize("no-unsafe-math-optimizations"))) \ +void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \ +{ \ + for (int i = 0; i < N; i++) { \ + TFLOAT expected = FUN (a[i]); \ + TFLOAT diff = __builtin_fabs (expected - res[i]); \ + int deviation = deviation_##TFLOAT (expected, res[i]); \ + int fail = isnan (res[i]) != isnan (expected) \ + || isinf (res[i]) != isinf (expected) \ + || (diff > EPSILON_##TFLOAT && deviation > 10); \ + if (VERBOSE || fail) \ + PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \ + a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \ + failed |= (fail && !xfail); \ + if (EARLY_EXIT && failed) \ + exit (1); \ + } \ +} \ +void test_##FUN (void) \ +{ \ + TFLOAT res[N], a[N]; \ + for (int i = 0; i < N; i++) \ + a[i] = LOW + ((HIGH - LOW) / N) * i; \ + _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \ + for (int i = 0; i < N; i++) \ + res[i] = FUN (a[i]); \ + check_##FUN (res, a); \ +}\ +test_##FUN (); + +#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \ +__attribute__((optimize("no-tree-vectorize"))) \ +__attribute__((optimize("no-unsafe-math-optimizations"))) \ +void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \ +{ \ + int failed = 0; \ + for (int i = 0; i < N; i++) { \ + TFLOAT expected = FUN (a[i], b[i]); \ + TFLOAT diff = __builtin_fabs (expected - res[i]); \ + int deviation = deviation_##TFLOAT (expected, res[i]); \ + int fail = isnan (res[i]) != isnan (expected) \ + || isinf (res[i]) != isinf (expected) \ + || (diff > EPSILON_##TFLOAT && deviation > 10); \ + failed |= fail; \ + if (VERBOSE || fail) \ + PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \ + a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \ + if (EARLY_EXIT && fail) \ + exit (1); \ + } \ +} \ +void test_##FUN (void) \ +{ \ + TFLOAT res[N], a[N], b[N]; \ + for (int i = 0; i < N; i++) { \ + a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \ + b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \ + } \ + _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \ + for (int i = 0; i < N; i++) \ + res[i] = FUN (a[i], b[i]); \ + check_##FUN (res, a, b); \ +}\ +test_##FUN (); + +int main (void) +{ + TEST_FUN (float, -1.1, 1.1, acosf); + TEST_FUN (float, -10, 10, acoshf); + TEST_FUN (float, -1.1, 1.1, asinf); + TEST_FUN (float, -10, 10, asinhf); + TEST_FUN (float, -1.1, 1.1, atanf); + TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f); + TEST_FUN (float, -2.0, 2.0, atanhf); + TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf); + TEST_FUN (float, -3.14159265359, 3.14159265359, cosf); + TEST_FUN (float, -3.14159265359, 3.14159265359, coshf); + TEST_FUN (float, -10.0, 10.0, erff); + TEST_FUN (float, -10.0, 10.0, expf); + TEST_FUN (float, -10.0, 10.0, exp2f); + TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf); + TEST_FUN (float, -10.0, 10.0, gammaf); + TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf); + TEST_FUN (float, -10.0, 10.0, lgammaf); + TEST_FUN (float, -1.0, 50.0, logf); + TEST_FUN (float, -1.0, 500.0, log10f); + TEST_FUN (float, -1.0, 64.0, log2f); + TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf); + TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf); + TEST_FUN (float, -50.0, 50.0, rintf); + TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf); + TEST_FUN (float, -10.0, 10.0, __builtin_significandf); + TEST_FUN (float, -3.14159265359, 3.14159265359, sinf); + TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf); + TEST_FUN (float, -0.1, 10000.0, sqrtf); + TEST_FUN (float, -5.0, 5.0, tanf); + TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf); + /* Newlib's version of tgammaf is known to have poor accuracy. */ + TEST_FUN_XFAIL (float, -10.0, 10.0, tgammaf); + + TEST_FUN (double, -1.1, 1.1, acos); + TEST_FUN (double, -10, 10, acosh); + TEST_FUN (double, -1.1, 1.1, asin); + TEST_FUN (double, -10, 10, asinh); + TEST_FUN (double, -1.1, 1.1, atan); + TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2); + TEST_FUN (double, -2.0, 2.0, atanh); + TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign); + TEST_FUN (double, -3.14159265359, 3.14159265359, cos); + TEST_FUN (double, -3.14159265359, 3.14159265359, cosh); + TEST_FUN (double, -10.0, 10.0, erf); + TEST_FUN (double, -10.0, 10.0, exp); + TEST_FUN (double, -10.0, 10.0, exp2); + TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod); + TEST_FUN (double, -10.0, 10.0, gamma); + TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot); + TEST_FUN (double, -10.0, 10.0, lgamma); + TEST_FUN (double, -1.0, 50.0, log); + TEST_FUN (double, -1.0, 500.0, log10); + TEST_FUN (double, -1.0, 64.0, log2); + TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow); + TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder); + TEST_FUN (double, -50.0, 50.0, rint); + TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb); + TEST_FUN (double, -10.0, 10.0, __builtin_significand); + TEST_FUN (double, -3.14159265359, 3.14159265359, sin); + TEST_FUN (double, -3.14159265359, 3.14159265359, sinh); + TEST_FUN (double, -0.1, 10000.0, sqrt); + TEST_FUN (double, -5.0, 5.0, tan); + TEST_FUN (double, -3.14159265359, 3.14159265359, tanh); + /* Newlib's version of tgamma is known to have poor accuracy. */ + TEST_FUN_XFAIL (double, -10.0, 10.0, tgamma); + + return failed; +} diff --git a/libgomp/testsuite/libgomp.c/sort-1.c b/libgomp/testsuite/libgomp.c/sort-1.c index b47a346..daf7865 100644 --- a/libgomp/testsuite/libgomp.c/sort-1.c +++ b/libgomp/testsuite/libgomp.c/sort-1.c @@ -1,5 +1,5 @@ /* Test and benchmark of a couple of parallel sorting algorithms. - Copyright (C) 2008-2022 Free Software Foundation, Inc. + Copyright (C) 2008-2023 Free Software Foundation, Inc. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free diff --git a/libgomp/testsuite/libgomp.c/target-simd-clone-1.c b/libgomp/testsuite/libgomp.c/target-simd-clone-1.c new file mode 100644 index 0000000..a9defc4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-simd-clone-1.c @@ -0,0 +1,43 @@ +/* { dg-do link { target { offload_target_amdgcn } } } */ +/* { dg-additional-options "-O2 -foffload-options=-fdump-ipa-simdclone-details" } */ + +/* Test that simd clones for the offload processor are generated for + functions with "declare target" when enabled by default at -O2. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} + +__attribute__ ((__noinline__)) +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} +#pragma omp end declare target + +int main (void) +{ + int aa[16], bb[16], cc[16]; + int i; + for (i = 0; i < 16; i++) + { + aa[i] = i; + bb[i] = -i; + } + callit (aa, bb, cc); + for (i = 0; i < 16; i++) + if (cc[i] != 0) + return 1; + return 0; +} + +/* Although addit has external linkage, we expect clones to be generated as + for a function with internal linkage. */ + +/* { dg-final { scan-offload-ipa-dump "Generated local clone _ZGV.*N.*_addit" "simdclone" } } */ +/* { dg-final { scan-offload-ipa-dump "Generated local clone _ZGV.*M.*_addit" "simdclone" } } */ diff --git a/libgomp/testsuite/libgomp.c/target-simd-clone-2.c b/libgomp/testsuite/libgomp.c/target-simd-clone-2.c new file mode 100644 index 0000000..05a38ae --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-simd-clone-2.c @@ -0,0 +1,39 @@ +/* { dg-do link { target { offload_target_amdgcn } } } */ +/* { dg-additional-options "-foffload-options=-fdump-ipa-simdclone-details -foffload-options=-fno-openmp-target-simd-clone" } */ + +/* Test that simd clones for the offload processor are not generated for + functions with "declare target" when explicitly disabled. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} + +__attribute__ ((__noinline__)) +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} +#pragma omp end declare target + +int main (void) +{ + int aa[16], bb[16], cc[16]; + int i; + for (i = 0; i < 16; i++) + { + aa[i] = i; + bb[i] = -i; + } + callit (aa, bb, cc); + for (i = 0; i < 16; i++) + if (cc[i] != 0) + return 1; + return 0; +} + +/* { dg-final { scan-offload-ipa-dump-not "Generated .* clone" "simdclone" } } */ diff --git a/libgomp/testsuite/libgomp.c/target-simd-clone-3.c b/libgomp/testsuite/libgomp.c/target-simd-clone-3.c new file mode 100644 index 0000000..bde091e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-simd-clone-3.c @@ -0,0 +1,40 @@ +/* { dg-do link { target { offload_target_amdgcn } } } */ +/* { dg-additional-options "-O2 -foffload-options=-fdump-ipa-simdclone-details" } */ + +/* Test that device simd clones are not generated for functions with the host + "declare target" clause only. */ + +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp declare target to(addit) device_type(host) + +#pragma omp declare target +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} +#pragma omp end declare target + +int main (void) +{ + int aa[16], bb[16], cc[16]; + int i; + for (i = 0; i < 16; i++) + { + aa[i] = i; + bb[i] = -i; + } + callit (aa, bb, cc); + for (i = 0; i < 16; i++) + if (cc[i] != 0) + return 1; + return 0; +} + +/* { dg-final { scan-offload-ipa-dump "device doesn't match" "simdclone" { target x86_64-*-* } } } */ +/* { dg-final { scan-offload-ipa-dump-not "Generated .* clone" "simdclone" { target x86_64-*-* } } } */ diff --git a/libgomp/testsuite/libgomp.c/task-7.c b/libgomp/testsuite/libgomp.c/task-7.c new file mode 100644 index 0000000..0307575 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-7.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + #pragma omp task final (1) + { + if (!omp_in_final ()) + abort (); + #pragma omp task + { + if (!omp_in_final ()) + abort (); + #pragma omp target nowait + if (omp_in_final ()) + abort (); + if (!omp_in_final ()) + abort (); + #pragma omp taskwait + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/task-8.c b/libgomp/testsuite/libgomp.c/task-8.c new file mode 100644 index 0000000..f03aef6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-8.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ + +int +main () +{ + int i = 0; + #pragma omp task + { + #pragma omp target nowait private (i) + i = 1; + #pragma omp taskwait + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/alloc-10.f90 b/libgomp/testsuite/libgomp.fortran/alloc-10.f90 index 3eab859..834a2d2 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-10.f90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-10.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources alloc-7.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + module m use omp_lib use iso_c_binding diff --git a/libgomp/testsuite/libgomp.fortran/alloc-11.f90 b/libgomp/testsuite/libgomp.fortran/alloc-11.f90 index b02c68f..e85e2fa 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-11.f90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-11.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources alloc-7.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + module m use omp_lib use iso_c_binding diff --git a/libgomp/testsuite/libgomp.fortran/alloc-7.f90 b/libgomp/testsuite/libgomp.fortran/alloc-7.f90 index 57ecd83..8ef40ff 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-7.f90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-7.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources alloc-7.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + module m use omp_lib use iso_c_binding diff --git a/libgomp/testsuite/libgomp.fortran/alloc-9.f90 b/libgomp/testsuite/libgomp.fortran/alloc-9.f90 index 1da1416..4a1c6a7 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-9.f90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-9.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources alloc-7.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + module m use omp_lib use iso_c_binding diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 b/libgomp/testsuite/libgomp.fortran/allocate-1.f90 index 1547d2b..c10820e 100644 --- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-additional-sources allocate-1.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } module m use omp_lib diff --git a/libgomp/testsuite/libgomp.fortran/allocate-2.f90 b/libgomp/testsuite/libgomp.fortran/allocate-2.f90 new file mode 100644 index 0000000..347656a --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocate-2.f90 @@ -0,0 +1,25 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +use omp_lib +implicit none +integer :: q, x,y,z + +!$omp parallel & +!$omp& allocate(omp_low_lat_mem_alloc : x) & +!$omp& allocate(omp_cgroup_mem_alloc : y) & +!$omp& allocate(omp_pteam_mem_alloc : z) & +!$omp& firstprivate(q, x,y,z) +!$omp end parallel + +!$omp parallel & +!$omp& allocate(align ( 64 ), allocator(omp_default_mem_alloc) : x) & +!$omp& allocate(allocator(omp_large_cap_mem_alloc) : y) & +!$omp& allocate(allocator ( omp_high_bw_mem_alloc ) , align ( 32 ) : z) & +!$omp& allocate(align (16 ): q) & +!$omp& firstprivate(q, x,y,z) +!$omp end parallel +end + +! { dg-final { scan-tree-dump-times "#pragma omp parallel firstprivate\\(q\\) firstprivate\\(x\\) firstprivate\\(y\\) firstprivate\\(z\\) allocate\\(allocator\\(5\\):x\\) allocate\\(allocator\\(6\\):y\\) allocate\\(allocator\\(7\\):z\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp parallel firstprivate\\(q\\) firstprivate\\(x\\) firstprivate\\(y\\) firstprivate\\(z\\) allocate\\(allocator\\(1\\),align\\(64\\):x\\) allocate\\(allocator\\(2\\):y\\) allocate\\(allocator\\(4\\),align\\(32\\):z\\) allocate\\(align\\(16\\):q\\)" 1 "original" } } diff --git a/libgomp/testsuite/libgomp.fortran/allocate-3.f90 b/libgomp/testsuite/libgomp.fortran/allocate-3.f90 new file mode 100644 index 0000000..1fa0bb9 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocate-3.f90 @@ -0,0 +1,28 @@ +! { dg-do compile } + +use omp_lib +implicit none +integer :: q, x,y,z + +!$omp parallel allocate(align ( 64 ) x) ! { dg-error "37:Expected ':' at" } +!$omp parallel allocate(align ( 64 ), x) ! { dg-error "37:Expected ':' at" } +!$omp parallel allocate(allocator ( omp_high_bw_mem_alloc ) x) ! { dg-error "60:Expected ':' at" } +!$omp parallel allocate(allocator ( omp_high_bw_mem_alloc ) , x) ! { dg-error "60:Expected ':' at" } + +!$omp parallel allocate( omp_high_bw_mem_alloc, align(12) : x) ! { dg-error "26:Expected variable list at" } +!$omp parallel allocate( align(12), omp_high_bw_mem_alloc : x) ! { dg-error "35:Expected ':' at" } + +!$omp parallel allocate( omp_high_bw_mem_alloc x) ! { dg-error "26:Expected variable list at" } + +!$omp parallel allocate( omp_high_bw_mem_alloc , x) firstprivate(x) ! { dg-error "'omp_high_bw_mem_alloc' specified in 'allocate' clause at \\(1\\) but not in an explicit privatization clause" } +! { dg-error "Object 'omp_high_bw_mem_alloc' is not a variable" "" { target *-*-* } .-1 } +!$omp end parallel + +!$omp parallel allocate( omp_high_bw_mem_alloc , x) firstprivate(x, omp_high_bw_mem_alloc) +! { dg-error "Object 'omp_high_bw_mem_alloc' is not a variable" "" { target *-*-* } .-1 } +!$omp end parallel + +!$omp parallel allocate( align(128) : x) firstprivate(x) ! OK +!$omp end parallel + +end diff --git a/libgomp/testsuite/libgomp.fortran/allocate-4.f90 b/libgomp/testsuite/libgomp.fortran/allocate-4.f90 new file mode 100644 index 0000000..ddb507b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocate-4.f90 @@ -0,0 +1,42 @@ +! { dg-do compile } + + +subroutine test() +use iso_c_binding, only: c_intptr_t +implicit none +integer, parameter :: omp_allocator_handle_kind = 1 !! <<< +integer (kind=omp_allocator_handle_kind), & + parameter :: omp_high_bw_mem_alloc = 4 +integer :: q, x,y,z +integer, parameter :: cnst(2) = [64, 101] + +!$omp parallel allocate( omp_high_bw_mem_alloc : x) firstprivate(x) ! { dg-error "Expected integer expression of the 'omp_allocator_handle_kind' kind" } +!$omp end parallel + +!$omp parallel allocate( allocator (omp_high_bw_mem_alloc) : x) firstprivate(x) ! { dg-error "Expected integer expression of the 'omp_allocator_handle_kind' kind" } +!$omp end parallel + +!$omp parallel allocate( align (q) : x) firstprivate(x) ! { dg-error "32:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel + +!$omp parallel allocate( align (32) : x) firstprivate(x) ! OK +!$omp end parallel + +!$omp parallel allocate( align(q) : x) firstprivate(x) ! { dg-error "31:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel + +!$omp parallel allocate( align(cnst(1)) : x ) firstprivate(x) ! OK +!$omp end parallel + +!$omp parallel allocate( align(cnst(2)) : x) firstprivate(x) ! { dg-error "31:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel + +!$omp parallel allocate( align( 31) :x) firstprivate(x) ! { dg-error "32:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel + +!$omp parallel allocate( align (32.0): x) firstprivate(x) ! { dg-error "32:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel + +!$omp parallel allocate( align(cnst ) : x ) firstprivate(x) ! { dg-error "31:ALIGN modifier requires at \\(1\\) a scalar positive constant integer alignment expression that is a power of two" } +!$omp end parallel +end diff --git a/libgomp/testsuite/libgomp.fortran/combined-directive-splitting-1.f90 b/libgomp/testsuite/libgomp.fortran/combined-directive-splitting-1.f90 new file mode 100644 index 0000000..e662a2b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/combined-directive-splitting-1.f90 @@ -0,0 +1,41 @@ +module m + integer :: a = 1 + !$omp declare target enter(a) +end module m + +module m2 +contains +subroutine bar() + use m + implicit none + !$omp declare target + a = a + 5 +end subroutine bar +end module m2 + +program p + use m + use m2 + implicit none + integer :: b, i + + !$omp target parallel do map(always, tofrom: a) firstprivate(a) + do i = 1, 1 + a = 7 + call bar() + if (a /= 7) error stop 1 + a = a + 8 + end do + if (a /= 6) error stop 2 + + b = 3 + !$omp target parallel do map(always, tofrom: a) firstprivate(b) + do i = 1, 1 + a = 3 + call bar () + if (a /= 8) error stop 3 + a = a + b + end do + if (a /= 11) error stop 4 +end program p + diff --git a/libgomp/testsuite/libgomp.fortran/depend-4.f90 b/libgomp/testsuite/libgomp.fortran/depend-4.f90 index 80d00ca..35b47e9 100644 --- a/libgomp/testsuite/libgomp.fortran/depend-4.f90 +++ b/libgomp/testsuite/libgomp.fortran/depend-4.f90 @@ -1,7 +1,7 @@ ! { dg-additional-options "-fdump-tree-gimple" } ! ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } ! ! Ensure that 'depend(...: var)' and 'depobj(...) depend(...: var)' ! depend on the same variable when 'var' is a pointer diff --git a/libgomp/testsuite/libgomp.fortran/depend-5.f90 b/libgomp/testsuite/libgomp.fortran/depend-5.f90 index b812b6d..7c3f297 100644 --- a/libgomp/testsuite/libgomp.fortran/depend-5.f90 +++ b/libgomp/testsuite/libgomp.fortran/depend-5.f90 @@ -1,5 +1,5 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } module m implicit none diff --git a/libgomp/testsuite/libgomp.fortran/depend-6.f90 b/libgomp/testsuite/libgomp.fortran/depend-6.f90 index b5032e9..bb085f2 100644 --- a/libgomp/testsuite/libgomp.fortran/depend-6.f90 +++ b/libgomp/testsuite/libgomp.fortran/depend-6.f90 @@ -1,5 +1,5 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } module m use omp_lib diff --git a/libgomp/testsuite/libgomp.fortran/depend-7.f90 b/libgomp/testsuite/libgomp.fortran/depend-7.f90 index 771a59c..86e310c 100644 --- a/libgomp/testsuite/libgomp.fortran/depend-7.f90 +++ b/libgomp/testsuite/libgomp.fortran/depend-7.f90 @@ -1,5 +1,5 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } program main implicit none diff --git a/libgomp/testsuite/libgomp.fortran/depend-inoutset-1.f90 b/libgomp/testsuite/libgomp.fortran/depend-inoutset-1.f90 index 46161c3..6953b59 100644 --- a/libgomp/testsuite/libgomp.fortran/depend-inoutset-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/depend-inoutset-1.f90 @@ -1,5 +1,5 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } program main use omp_lib diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-1.f90 b/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-1.f90 index 5fbf036..fcc7f51 100644 --- a/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-1.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-additional-sources ../on_device_arch.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } module e_53_1_mod integer :: THRESHOLD = 20 diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-2.f90 b/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-2.f90 index 1bcdac9..d43d9ab 100644 --- a/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-2.f90 +++ b/libgomp/testsuite/libgomp.fortran/examples-4/declare_target-2.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-additional-sources ../on_device_arch.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } program e_53_2 !$omp declare target (fib) diff --git a/libgomp/testsuite/libgomp.fortran/has_device_addr.f90 b/libgomp/testsuite/libgomp.fortran/has_device_addr.f90 new file mode 100644 index 0000000..95cc778 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/has_device_addr.f90 @@ -0,0 +1,59 @@ +! { dg-additional-options "-fdump-tree-original" } + +! +! PR fortran/108558 +! + +! { dg-final { scan-tree-dump-times "#pragma omp target has_device_addr\\(x\\) has_device_addr\\(y\\)" 2 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp target data map\\(tofrom:x\\) map\\(tofrom:y\\)" 2 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp target data use_device_addr\\(x\\) use_device_addr\\(y\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp target update from\\(y\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp target data map\\(tofrom:x\\) map\\(tofrom:y\\) use_device_addr\\(x\\) use_device_addr\\(y\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp teams" 2 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp distribute" 2 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp parallel" 2 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp for nowait" 2 "original" } } + +module m +contains +subroutine vectorAdd(x, y, N) + implicit none + integer :: N + integer(4) :: x(N), y(N) + integer :: i + + !$omp target teams distribute parallel do has_device_addr(x, y) + do i = 1, N + y(i) = x(i) + y(i) + end do +end subroutine vectorAdd +end module m + +program main + use m + implicit none + integer, parameter :: N = 9876 + integer(4) :: x(N), y(N) + integer :: i + + x(:) = 1 + y(:) = 2 + + !$omp target data map(x, y) + !$omp target data use_device_addr(x, y) + call vectorAdd(x, y, N) + !$omp end target data + !$omp target update from(y) + if (any (y /= 3)) error stop + !$omp end target data + + x = 1 + y = 2 + !$omp target data map(x, y) use_device_addr(x, y) + !$omp target teams distribute parallel do has_device_addr(x, y) + do i = 1, N + y(i) = x(i) + y(i) + end do + !$omp end target data + if (any (y /= 3)) error stop +end program diff --git a/libgomp/testsuite/libgomp.fortran/icv-5.f90 b/libgomp/testsuite/libgomp.fortran/icv-5.f90 new file mode 100644 index 0000000..05a35fa --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/icv-5.f90 @@ -0,0 +1,226 @@ +! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } +! { dg-set-target-env-var OMP_NUM_TEAMS "5" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } + +use omp_lib +implicit none (type, external) + integer :: num_devices, i, large_num_teams, large_threads_limit + logical :: err + + if (omp_get_num_devices () > 3) then + num_devices = 3 + else + num_devices = omp_get_num_devices () + end if + + do i=0,num_devices-1 + + ! Testing NUM_TEAMS. + if (env_is_set_dev ("OMP_NUM_TEAMS_DEV_", i, 6 + i)) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 6 + i) err = .true. + !$omp end target + if (err) stop 1 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 2 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (5 + i) + if (omp_get_max_teams () /= 5 + i) err = .true. + !$omp end target + if (err) stop 3 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 5 + i) err = .true. + !$omp end target + if (err) stop 4 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 5 + i .or. omp_get_team_num () >= 5 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 5 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams num_teams(6 + i) + if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 6 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams num_teams(4 + i) + if (omp_get_num_teams () > 4 + i .or. omp_get_team_num () >= 4 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 7 + + large_num_teams = 66000 + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (large_num_teams + i) + if (omp_get_max_teams () /= large_num_teams + i) err = .true. + !$omp end target + if (err) stop 8 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= large_num_teams + i) err = .true. + !$omp end target + if (err) stop 9 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > large_num_teams + i & + .or. omp_get_team_num () >= large_num_teams + i) err = .true. + !$omp end teams + !$omp end target + if (err) stop 10 + end if + + ! Testing TEAMS-THREAD-LIMIT + if (env_is_set_dev ("OMP_TEAMS_THREAD_LIMIT_DEV_", i, 5 + i)) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 5 + i) err = .true. + !$omp end target + if (err) stop 11 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 12 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (4 + i) + if (omp_get_teams_thread_limit () /= 4 + i) err = .true. + !$omp end target + if (err) stop 13 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 4 + i) err = .true. + !$omp end target + if (err) stop 14 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 4 + i .or. omp_get_thread_num () >= 4 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 15 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams thread_limit(5 + i) + !$omp parallel + if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 16 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams thread_limit(3 + i) + !$omp parallel + if (omp_get_thread_limit () > 3 + i .or. omp_get_thread_num () >= 3 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 17 + + large_threads_limit = 67000 + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (large_threads_limit + i) + if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true. + !$omp end target + if (err) stop 18 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true. + !$omp end target + if (err) stop 19 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > large_threads_limit + i & + .or. omp_get_thread_num () >= large_threads_limit + i) err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 20 + end if + + end do + +contains + logical function env_is_set (name, val) + character(len=*) :: name, val + character(len=40) :: val2 + integer :: stat + call get_environment_variable (name, val2, status=stat) + if (stat == 0) then + if (val == val2) then + env_is_set = .true. + return + end if + else if (stat /= 1) then + error stop 30 + endif + env_is_set = .false. + end + logical function env_is_set_dev (name, dev_num, val) + character(len=*) :: name + integer :: dev_num, val + character(len=64) :: dev_num_str, env_var, val_str + dev_num_str = ADJUSTL(dev_num_str) + env_var = name // dev_num_str + val_str = ADJUSTL(val_str) + env_is_set_dev = env_is_set (TRIM(env_var), TRIM(val_str)) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/icv-6.f90 b/libgomp/testsuite/libgomp.fortran/icv-6.f90 new file mode 100644 index 0000000..c8e6a0d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/icv-6.f90 @@ -0,0 +1,140 @@ +! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } + +! This test considers the hierarchical usage of ICVs on the device, i.e. if +! e.g. OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of +! OMP_NUM_TEAMS_DEV should be used for the targets. + +use omp_lib +implicit none (type, external) + integer :: num_devices, i, stat, tmp + logical :: err + character(len=40) :: val + + ! The following environment variables should not be set. + call get_environment_variable ("OMP_NUM_TEAMS_DEV_0", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_NUM_TEAMS_DEV_1", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_NUM_TEAMS_DEV_2", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_0", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_1", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_2", val, status=stat) + if (stat /= 1) return + + if (omp_get_num_devices () > 3) then + num_devices = 3 + else + num_devices = omp_get_num_devices () + end if + + do i=0,num_devices-1 + + ! Testing NUM_TEAMS. + if (env_is_set ("OMP_NUM_TEAMS_DEV", "4")) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 4) err = .true. + !$omp end target + if (err) stop 1 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 4 .or. omp_get_team_num () >= 4) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 2 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (3 + i) + if (omp_get_max_teams () /= 3 + i) err = .true. + !$omp end target + if (err) stop 3 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 3 + i) err = .true. + !$omp end target + if (err) stop 4 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 3 + i .or. omp_get_team_num () >= 3 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 5 + end if + + ! Testing TEAMS-THREAD-LIMIT + if (env_is_set ("OMP_TEAMS_THREAD_LIMIT_DEV", "3")) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 3) err = .true. + !$omp end target + if (err) stop 6 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 3 .or. omp_get_thread_num () >= 3) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 7 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (2 + i) + if (omp_get_teams_thread_limit () /= 2 + i) err = .true. + !$omp end target + if (err) stop 8 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 2 + i) err = .true. + !$omp end target + if (err) stop 9 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 2 + i .or. omp_get_thread_num () >= 2 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 10 + end if + + end do + +contains + logical function env_is_set (name, val) + character(len=*) :: name, val + character(len=40) :: val2 + integer :: stat + call get_environment_variable (name, val2, status=stat) + if (stat == 0) then + if (val == val2) then + env_is_set = .true. + return + end if + else if (stat /= 1) then + error stop 10 + endif + env_is_set = .false. + end +end diff --git a/libgomp/testsuite/libgomp.fortran/is_device_ptr-2.f90 b/libgomp/testsuite/libgomp.fortran/is_device_ptr-2.f90 new file mode 100644 index 0000000..5b7fab0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/is_device_ptr-2.f90 @@ -0,0 +1,159 @@ +! { dg-additional-options "-fdump-tree-original" } +! +! Since OpenMP 5.1, non-TYPE(c_ptr) arguments to is_device_ptr +! map to has_device_ptr - check this! +! +! PR fortran/105318 +! +module m + use iso_c_binding, only: c_ptr, c_loc, c_f_pointer, c_associated + implicit none (type, external) +contains + subroutine one (as, ar, asp, arp, asa, ara, cptr_a) + integer, target :: AS, AR(5) + integer, pointer :: ASP, ARP(:) + integer, allocatable :: ASA, ARA(:) + + type(c_ptr) :: cptr_a + + !$omp target is_device_ptr(as, ar, asp, arp, asa, ara, cptr_a) + if (.not. c_associated (cptr_a, c_loc(as))) stop 18 + if (as /= 5) stop 19 + if (any (ar /= [1,2,3,4,5])) stop 20 + if (asp /= 9) stop 21 + if (any (arp /= [2,4,6])) stop 22 + !$omp end target + end + + subroutine two (cptr_v) + type(c_ptr), value :: cptr_v + integer, pointer :: xx + + xx => null() + !$omp target is_device_ptr(cptr_v) + if (.not. c_associated (cptr_v)) stop 23 + call c_f_pointer (cptr_v, xx) + if (xx /= 5) stop 24 + xx => null() + !$omp end target + end + + subroutine three (os, or, osp, orp, osa, ora, cptr_o) + integer, optional, target :: OS, OR(5) + integer, optional, pointer :: OSP, ORP(:) + integer, optional, allocatable :: OSA, ORA(:) + + type(c_ptr) :: cptr_o + + !$omp target is_device_ptr(os, or, osp, orp, osa, ora, cptr_o) + if (.not. c_associated (cptr_o, c_loc(os))) stop 25 + if (os /= 5) stop 26 + if (any (or /= [1,2,3,4,5])) stop 27 + if (osp /= 9) stop 28 + if (any (orp /= [2,4,6])) stop 29 + !$omp end target + end + + subroutine four(NVS, NVSO) + use omp_lib, only: omp_initial_device, omp_invalid_device + integer, value :: NVS + integer, optional, value :: NVSO + integer :: NS, NR(5) + logical, volatile :: false_ + + false_ = .false. + + !$omp target is_device_ptr (NS, NR, NVS, NVSO) device(omp_initial_device) + NVS = 5 + NVSO = 5 + NS = 5 + NR(1) = 7 + !$omp end target + + if (false_) then + !$omp target device(omp_invalid_device) + !$omp end target + end if + end subroutine + +end module m + +program main + use iso_c_binding, only: c_ptr, c_loc, c_f_pointer, c_associated + use m + implicit none (type, external) + + integer, target :: IS, IR(5) + integer, pointer :: ISP, IRP(:) + integer, allocatable :: ISA, IRA(:) + integer :: xxx, xxxx + + type(c_ptr) :: cptr_i + + is = 5 + ir = [1,2,3,4,5] + allocate(ISP, source=9) + allocate(IRP, source=[2,4,6]) + + !$omp target data map(is, ir, isp, irp, isa, ira) & + !$omp& use_device_ptr(is, ir, isp, irp, isa, ira) + + cptr_i = c_loc(is) + !$omp target is_device_ptr(is, ir, isp, irp, isa, ira, cptr_i) + if (.not. c_associated (cptr_i, c_loc(is))) stop 30 + if (is /= 5) stop 31 + if (any (ir /= [1,2,3,4,5])) stop 32 + if (isp /= 9) stop 33 + if (any (irp /= [2,4,6])) stop 34 + !$omp end target + + call one (is, ir, isp, irp, isa, ira, cptr_i) + call two (cptr_i) + call three (is, ir, isp, irp, isa, ira, cptr_i) + + !$omp end target data + + call four(xxx, xxxx) +end + +! { dg-final { scan-tree-dump-not "use_device_ptr" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(ira\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(isa\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(irp\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(isp\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(ir\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(is\\)" "original" } } + +! { dg-final { scan-tree-dump-not "use_device_addr\\(cptr" "original" } } +! { dg-final { scan-tree-dump-not "use_device_ptr\\(o" "original" } } +! { dg-final { scan-tree-dump-not "use_device_ptr\\(a" "original" } } +! { dg-final { scan-tree-dump-not "use_device_ptr\\(i" "original" } } + +! { dg-final { scan-tree-dump "is_device_ptr\\(cptr_o\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(ora\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(osa\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(orp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(osp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(or\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(os\\)" "original" } } +! { dg-final { scan-tree-dump "is_device_ptr\\(cptr_v\\)" "original" } } +! { dg-final { scan-tree-dump "is_device_ptr\\(cptr_a\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(ara\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(asa\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(arp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(asp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(ar\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(as\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(is\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(ir\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(isp\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(irp\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(isa\\)" "original" } } +! { dg-final { scan-tree-dump "use_device_addr\\(ira\\)" "original" } } +! { dg-final { scan-tree-dump "is_device_ptr\\(cptr_i\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(ira\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(isa\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(irp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(isp\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(ir\\)" "original" } } +! { dg-final { scan-tree-dump "has_device_addr\\(is\\)" "original" } } diff --git a/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90 b/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90 new file mode 100644 index 0000000..ab9f00e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90 @@ -0,0 +1,46 @@ +module m + use iso_c_binding + implicit none +contains + subroutine s(x,y,z) + type(c_ptr), optional :: x + integer, pointer, optional :: y + integer, allocatable, optional :: z + logical is_present, is_null + is_present = present(x) + if (is_present) & + is_null = .not. c_associated(x) + + !$omp target is_device_ptr(x) has_device_addr(y) has_device_addr(z) + if (is_present) then + if (is_null) then + if (c_associated(x)) stop 1 + if (associated(y)) stop 2 + if (allocated(z)) stop 3 + else + if (.not. c_associated(x, c_loc(y))) stop 4 + if (y /= 7) stop 5 + if (z /= 9) stop 6 + end if + end if + !$omp end target + end +end + +use m +implicit none +integer, pointer :: p +integer, allocatable :: a +p => null() +call s() +!$omp target data map(p,a) use_device_addr(p,a) + call s(c_null_ptr, p, a) +!$omp end target data +allocate(p,a) +p = 7 +a = 9 +!$omp target data map(p,a) use_device_addr(p,a) + call s(c_loc(p), p, a) +!$omp end target data +deallocate(p,a) +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1.f90 new file mode 100644 index 0000000..dbbd18a --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1.f90 @@ -0,0 +1,668 @@ +! { dg-do run } +! { dg-additional-options "-msse2" { target sse2_runtime } } +! { dg-additional-options "-mavx" { target avx_runtime } } + +! PR fortran/107424 + +! Nonrectangular loop nests checks + +! See PR or non-rectangular-loop-1a.f90 for the commented tests +! Hint: Those use step for loop vars part of nonrectangular loop nests + +module m + implicit none (type, external) +contains + +! The 'k' loop uses i or j as start value +! but a constant end value such that 'lastprivate' +! should be well-defined +subroutine lastprivate_check_simd_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + +! !$omp simd collapse(3) lastprivate(k) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + +! !$omp simd collapse(3) lastprivate(k) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp simd collapse(3) lastprivate(k) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) then + print *, i, j, k,p, " -> i, j, k, p (k < 1 - 41 .or. k > p)" + error stop + end if + end do + end do + end do + if (k /= p + 1) error stop + + k = -43 + m = 0 + !$omp simd collapse(3) lastprivate(k) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) then + print *, i, j, k,p, " -> i, j, k, p (k < 1 - 41 .or. k > p)" + error stop + end if + end do + end do + end do + if (k /= -43) error stop + + m = 23 + + !$omp simd collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + n = -5 + k = - 70 + !$omp simd collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= -70) error stop + + n = 11 + + ! Same but 'private' for all (i,j) vars + +! !$omp simd collapse(3) lastprivate(k) private(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! +! !$omp simd collapse(3) lastprivate(k) private(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + +! !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 1 .or. j /= m + 2) error stop + +! !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop +end subroutine lastprivate_check_simd_1 + + +! Same but with do simd +subroutine lastprivate_check_do_simd_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + +! !$omp parallel do simd collapse(3) lastprivate(k) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + +! !$omp parallel do simd collapse(3) lastprivate(k) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same but 'private' for all (i,j) vars + +! !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + +! !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + +! !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 1 .or. j /= m + 2) error stop + +! !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop +end subroutine lastprivate_check_do_simd_1 + + + +! Same but with do +subroutine lastprivate_check_do_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + +! !$omp parallel do collapse(3) lastprivate(k) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + +! !$omp parallel do collapse(3) lastprivate(k) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same but 'private' for all (i,j) vars + +! !$omp parallel do collapse(3) lastprivate(k) private(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + +! !$omp parallel do collapse(3) lastprivate(k) private(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + +! !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n +! do j = 1, m, 2 +! do k = j - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 1 .or. j /= m + 2) error stop + +! !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) +! do i = 1, n, 2 +! do j = 1, m +! do k = i - 41, p +! if (k < 1 - 41 .or. k > p) error stop +! end do +! end do +! end do +! if (k /= p + 1) error stop +! if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 + do j = 1, m + do k = j - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 + do k = i - 41, p + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop +end subroutine lastprivate_check_do_1 + + + +subroutine lastprivate_check_2 + integer :: n,m,p, i,j,k,ll + + n = 11 + m = 23 + p = 27 + +! !$omp parallel do simd collapse(3) lastprivate(p) +! do i = 1, n +! do j = 1, m,2 +! do k = 1, j + 41 +! do ll = 1, p, 2 +! if (k > 23 + 41 .or. k < 1) error stop +! end do +! end do +! end do +! end do +! if (ll /= 29) error stop + +! !$omp simd collapse(3) lastprivate(p) +! do i = 1, n +! do j = 1, m,2 +! do k = 1, j + 41 +! do ll = 1, p, 2 +! if (k > 23 + 41 .or. k < 1) error stop +! end do +! end do +! end do +! end do +! if (ll /= 29) error stop + +! !$omp simd collapse(3) lastprivate(k) +! do i = 1, n,2 +! do j = 1, m +! do k = 1, i + 41 +! if (k > 11 + 41 .or. k < 1) error stop +! end do +! end do +! end do +!if (k /= 53) then +! print *, k, 53 +! error stop +!endif + +!$omp simd collapse(3) lastprivate(k) +do i = 1, n,2 + do j = 1, m + do k = 1, j + 41 + if (k > 23 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 65) then + print *, k, 65 + error stop +endif + + +!$omp simd collapse(3) lastprivate(k) +do i = 1, n + do j = 1, m,2 + do k = 1, i + 41 + if (k > 11 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +! - Same but without 'private': +!!$omp simd collapse(3) lastprivate(k) +!do i = 1, n +! do j = 1, m,2 +! do k = 1, j + 41 +! if (k > 23 + 41 .or. k < 1) error stop +! end do +! end do +!end do +!if (k /= 65) then +! print *, k, 65 +! error stop +!endif + + +!!$omp simd collapse(3) lastprivate(k) +!do i = 1, n,2 +! do j = 1, m +! do k = 1, i + 41 +! if (k > 11 + 41 .or. k < 1) error stop +! end do +! end do +!end do +!if (k /= 53) then +! print *, k, 53 +! error stop +!endif + +!$omp simd collapse(3) lastprivate(k) +do i = 1, n,2 + do j = 1, m + do k = 1, j + 41 + if (k > 23 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 65) then + print *, k, 65 + error stop +endif + + +!$omp simd collapse(3) lastprivate(k) +do i = 1, n + do j = 1, m,2 + do k = 1, i + 41 + if (k > 11 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +! - all with lastprivate +!!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +!do i = 1, n +! do j = 1, m,2 +! do k = 1, j + 41 +! if (k > 23 + 41 .or. k < 1) error stop +! end do +! end do +!end do +!if (k /= 65) then +! print *, k, 65 +! error stop +!endif + + +!!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +!do i = 1, n,2 +! do j = 1, m +! do k = 1, i + 41 +! if (k > 11 + 41 .or. k < 1) error stop +! end do +! end do +!end do +!if (k /= 53) then +! print *, k, 53 +! error stop +!endif + +!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +do i = 1, n,2 + do j = 1, m + do k = 1, j + 41 + if (k > 23 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 65) then + print *, k, 65 + error stop +endif + + +!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +do i = 1, n + do j = 1, m,2 + do k = 1, i + 41 + if (k > 11 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +end +end module m + +program main + use m + implicit none (type, external) + call lastprivate_check_simd_1 + call lastprivate_check_do_simd_1 + call lastprivate_check_do_1 + call lastprivate_check_2 +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1a.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1a.f90 new file mode 100644 index 0000000..77aa887 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-1a.f90 @@ -0,0 +1,374 @@ +! { dg-do compile } +! { dg-additional-options "-msse2" { target sse2_runtime } } +! { dg-additional-options "-mavx" { target avx_runtime } } + +! PR fortran/107424 + +! Nonrectangular loop nests checks + +! ======================================================== +! NOTE: The testcases are from non-rectangular-loop-1.f90, +! but commented there. Feel free to remove this +! file + uncomment them in non-rectangular-loop-1.f90 +! Otherwise, you need to change it to 'dg-do run'! +! ======================================================== + +module m + implicit none (type, external) +contains + +! The 'k' loop uses i or j as start value +! but a constant end value such that 'lastprivate' +! should be well-defined +subroutine lastprivate_check_simd_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + + !$omp simd collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp simd collapse(3) lastprivate(k) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same but 'private' for all (i,j) vars + + !$omp simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + + !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop + + !$omp simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + +end subroutine lastprivate_check_simd_1 + + +! Same but with do simd +subroutine lastprivate_check_do_simd_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + + !$omp parallel do simd collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same but 'private' for all (i,j) vars + + !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + + !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop + + !$omp parallel do simd collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + +end subroutine lastprivate_check_do_simd_1 + + + +! Same but with do +subroutine lastprivate_check_do_1 + integer :: n,m,p, i,j,k + + n = 11 + m = 23 + p = 27 + + ! Use 'i' or 'j', unit step on 'i' or on 'j' -> 4 loops + ! Then same, except use non-unit step for 'k' + + !$omp parallel do collapse(3) lastprivate(k) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same but 'private' for all (i,j) vars + + !$omp parallel do collapse(3) lastprivate(k) private(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + !$omp parallel do collapse(3) lastprivate(k) private(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + + ! Same - but with lastprivate(i,j) + + !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n + do j = 1, m, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = j - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 1 .or. j /= m + 2) error stop + + !$omp parallel do collapse(3) lastprivate(k) lastprivate(i,j) + do i = 1, n, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = i - 41, p ! { dg-note "Used here" } + if (k < 1 - 41 .or. k > p) error stop + end do + end do + end do + if (k /= p + 1) error stop + if (i /= n + 2 .or. j /= m + 1) error stop + +end subroutine lastprivate_check_do_1 + + + +subroutine lastprivate_check_2 + integer :: n,m,p, i,j,k,ll + + n = 11 + m = 23 + p = 27 + + !$omp parallel do simd collapse(3) lastprivate(p) + do i = 1, n + do j = 1, m,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = 1, j + 41 ! { dg-note "Used here" } + do ll = 1, p, 2 + if (k > 23 + 41 .or. k < 1) error stop + end do + end do + end do + end do + if (ll /= 29) error stop + + !$omp simd collapse(3) lastprivate(p) + do i = 1, n + do j = 1, m,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = 1, j + 41 ! { dg-note "Used here" } + do ll = 1, p, 2 + if (k > 23 + 41 .or. k < 1) error stop + end do + end do + end do + end do + if (ll /= 29) error stop + + !$omp simd collapse(3) lastprivate(k) + do i = 1, n,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = 1, i + 41 ! { dg-note "Used here" } + if (k > 11 + 41 .or. k < 1) error stop + end do + end do + end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +! - Same but without 'private': +!$omp simd collapse(3) lastprivate(k) +do i = 1, n + do j = 1, m,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = 1, j + 41 ! { dg-note "Used here" } + if (k > 23 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 65) then + print *, k, 65 + error stop +endif + + +!$omp simd collapse(3) lastprivate(k) +do i = 1, n,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = 1, i + 41 ! { dg-note "Used here" } + if (k > 11 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +! - all with lastprivate +!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +do i = 1, n + do j = 1, m,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + do k = 1, j + 41 ! { dg-note "Used here" } + if (k > 23 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 65) then + print *, k, 65 + error stop +endif + + +!$omp simd collapse(3) lastprivate(k) lastprivate(i, j) +do i = 1, n,2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'i'" } + do j = 1, m + do k = 1, i + 41 ! { dg-note "Used here" } + if (k > 11 + 41 .or. k < 1) error stop + end do + end do +end do +if (k /= 53) then + print *, k, 53 + error stop +endif + +end +end module m + +program main + use m + implicit none (type, external) + call lastprivate_check_simd_1 + call lastprivate_check_do_simd_1 + call lastprivate_check_do_1 + call lastprivate_check_2 +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-2.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-2.f90 new file mode 100644 index 0000000..0cea61e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-2.f90 @@ -0,0 +1,243 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original -fcheck=all" } + +! PR fortran/107424 + +! Nonrectangular loop nests checks + +! Valid patterns are: +! (1) a2 - var-outer +! (2) a1 * var-outer +! (3) a1 * var-outer + a2 +! (4) a2 + a1 * var-outer +! (5) a1 * var-outer - a2 +! (6) a2 - a1 * var-outer +! (7) var-outer * a1 +! (8) var-outer * a1 + a2 +! (9) a2 + var-outer * a1 +! (10) var-outer * a1 - a2 +! (11) a2 - var-outer * a1 + +module m +contains + + +! { dg-final { scan-tree-dump-times "for \\(one_two_inner = one_two_outer \\* -1 \\+ one_a2; one_two_inner <= one_two_outer \\* two_a1 \\+ 0; one_two_inner = one_two_inner \\+ 1\\)" 1 original } } + +! (1) a2 - var-outer +! (2) a1 * var-outer +subroutine one_two() + implicit none + integer :: one_a2 + integer :: two_a1 + integer :: one_two_outer, one_two_inner + integer :: i, j + integer, allocatable :: var(:,:) + + one_a2 = 13 + two_a1 = 5 + allocate(var(1:10, one_a2 - 10:two_a1 * 10), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do one_two_outer = 1, 10 + do one_two_inner = one_a2 - one_two_outer, two_a1 * one_two_outer + !$omp atomic update + var(one_two_outer,one_two_inner) = var(one_two_outer,one_two_inner) + 2 + end do + end do + + do i = 1, 10 + do j = one_a2 - i, two_a1 * i + if (var(i,j) /= 2) error stop + end do + end do +end + + +! { dg-final { scan-tree-dump-times "for \\(three_four_inner = three_four_outer \\* three_a1 \\+ three_a2; three_four_inner <= three_four_outer \\* four_a1 \\+ four_a2; three_four_inner = three_four_inner \\+ 1\\)" 1 original } } + +! (3) a1 * var-outer + a2 +! (4) a2 + a1 * var-outer +subroutine three_four() + implicit none + integer :: three_a1, three_a2 + integer :: four_a1, four_a2 + integer :: three_four_outer, three_four_inner + integer :: i, j + integer, allocatable :: var(:,:) + + three_a1 = 2 + three_a2 = 3 + four_a1 = 3 + four_a2 = 5 + allocate(var(1:10, three_a1 * 1 + three_a2:four_a2 + four_a1 * 10), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do three_four_outer = 1, 10 + do three_four_inner = three_a1 * three_four_outer + three_a2, four_a2 + four_a1 * three_four_outer + !$omp atomic update + var(three_four_outer, three_four_inner) = var(three_four_outer, three_four_inner) + 2 + end do + end do + do i = 1, 10 + do j = three_a1 * i + three_a2, four_a2 + four_a1 * i + if (var(i,j) /= 2) error stop + end do + end do +end + + +! { dg-final { scan-tree-dump-times "for \\(five_six_inner = five_six_outer \\* five_a1 \\+ D\\.\[0-9\]+; five_six_inner <= five_six_outer \\* D\\.\[0-9\]+ \\+ six_a2; five_six_inner = five_six_inner \\+ 1\\)" 1 original } } + +! (5) a1 * var-outer - a2 +! (6) a2 - a1 * var-outer +subroutine five_six() + implicit none + integer :: five_a1, five_a2 + integer :: six_a1, six_a2 + integer :: five_six_outer, five_six_inner + integer :: i, j + integer, allocatable :: var(:,:) + + five_a1 = 2 + five_a2 = -3 + six_a1 = 3 + six_a2 = 20 + allocate(var(1:10, five_a1 * 1 - five_a2:six_a2 - six_a1 * 1), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do five_six_outer = 1, 10 + do five_six_inner = five_a1 * five_six_outer - five_a2, six_a2 - six_a1 * five_six_outer + !$omp atomic update + var(five_six_outer, five_six_inner) = var(five_six_outer, five_six_inner) + 2 + end do + end do + + do i = 1, 10 + do j = five_a1 * i - five_a2, six_a2 - six_a1 * i + if (var(i,j) /= 2) error stop + end do + end do +end + + +! { dg-final { scan-tree-dump-times "for \\(seven_eight_inner = seven_eight_outer \\* seven_a1 \\+ 0; seven_eight_inner <= seven_eight_outer \\* eight_a1 \\+ eight_a2; seven_eight_inner = seven_eight_inner \\+ 1\\)" 1 original } } + +! (7) var-outer * a1 +! (8) var-outer * a1 + a2 +subroutine seven_eight() + implicit none + integer :: seven_a1 + integer :: eight_a1, eight_a2 + integer :: seven_eight_outer, seven_eight_inner + integer :: i, j + integer, allocatable :: var(:,:) + + seven_a1 = 3 + eight_a1 = 2 + eight_a2 = -4 + allocate(var(1:10, 1 * seven_a1 : 10 * eight_a1 + eight_a2), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do seven_eight_outer = 1, 10 + do seven_eight_inner = seven_eight_outer * seven_a1, seven_eight_outer * eight_a1 + eight_a2 + !$omp atomic update + var(seven_eight_outer, seven_eight_inner) = var(seven_eight_outer, seven_eight_inner) + 2 + end do + end do + + do i = 1, 10 + do j = i * seven_a1, i * eight_a1 + eight_a2 + if (var(i,j) /= 2) error stop + end do + end do +end + + +! { dg-final { scan-tree-dump-times "for \\(nine_ten_inner = nine_ten_outer \\* nine_a1 \\+ nine_a2; nine_ten_inner <= nine_ten_outer \\* ten_a1 \\+ D\\.\[0-9\]+; nine_ten_inner = nine_ten_inner \\+ 1\\)" 1 original } } + +! (9) a2 + var-outer * a1 +! (10) var-outer * a1 - a2 +subroutine nine_ten() + implicit none + integer :: nine_a1, nine_a2 + integer :: ten_a1, ten_a2 + integer :: nine_ten_outer, nine_ten_inner + integer :: i, j + integer, allocatable :: var(:,:) + + nine_a1 = 3 + nine_a2 = 5 + ten_a1 = 2 + ten_a2 = 3 + allocate(var(1:10, nine_a2 + 1 * nine_a1:10 * ten_a1 - ten_a2), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do nine_ten_outer = 1, 10 + do nine_ten_inner = nine_a2 + nine_ten_outer * nine_a1, nine_ten_outer * ten_a1 - ten_a2 + !$omp atomic update + var(nine_ten_outer, nine_ten_inner) = var(nine_ten_outer, nine_ten_inner) + 2 + end do + end do + + do i = 1, 10 + do j = nine_a2 + i * nine_a1, i * ten_a1 - ten_a2 + if (var(i,j) /= 2) error stop + end do + end do +end + + +! { dg-final { scan-tree-dump-times "for \\(eleven_inner = eleven_outer \\* D\\.\[0-9\]+ \\+ eleven_a2; eleven_inner <= 10; eleven_inner = eleven_inner \\+ 1\\)" 1 original } } + +! (11) a2 - var-outer * a1 + +subroutine eleven() + implicit none + integer :: eleven_a1, eleven_a2 + integer :: eleven_outer, eleven_inner + integer :: i, j + integer, allocatable :: var(:,:) + + eleven_a1 = 2 + eleven_a2 = 3 + allocate(var(1:10, eleven_a2 - 10 * eleven_a1 : 10), & + source=0) + if (size(var) <= 4) error stop + + !$omp simd collapse(2) + do eleven_outer = 1, 10 + do eleven_inner = eleven_a2 - eleven_outer * eleven_a1, 10 + !$omp atomic update + var(eleven_outer, eleven_inner) = var(eleven_outer, eleven_inner) + 2 + end do + end do + + do i = 1, 10 + do j = eleven_a2 - i * eleven_a1, 10 + if (var(i,j) /= 2) error stop + end do + end do +end +end module m + +program main +use m +implicit none +call one_two() +call three_four() +call five_six() +call seven_eight() +call nine_ten() +call eleven() +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-3.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-3.f90 new file mode 100644 index 0000000..c97cd99 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-3.f90 @@ -0,0 +1,212 @@ +! { dg-additional-options "-fdump-tree-original" } +! PR fortran/107424 + +module m +contains +subroutine foo (av, avo, a0, a0o, a1, a2, a3, a4) +implicit none + +integer, value :: av +integer, value, optional :: avo +integer :: a0 +integer, optional :: a0o +integer, pointer :: a1 +integer, pointer, optional :: a2 +integer, allocatable :: a3 +integer, allocatable, optional :: a4 +integer :: a5 +integer, pointer :: a6 +integer, allocatable :: a7 +integer :: arr(20,10), ref(20,10) + +integer :: j, i + +allocate(a6, a7) + +ref = 44 +do i = 1, 10 + do j = i, 20 + ref(j, i) = j + 100 * i + end do +end do + +! { dg-final { scan-tree-dump-times "for \\(av = 1; av <= 10; av = av \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = av \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +av = 99; j = 99 +!$omp simd collapse(2) lastprivate(av,j) +do av = 1, 10 + do j = av, 20 + arr(j, av) = j + 100 * av + end do +end do +if (any (ref /= arr)) error stop +if (av /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(avo = 1; avo <= 10; avo = avo \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = avo \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +avo = 99; j = 99 +!$omp simd collapse(2) lastprivate(avo, j) +do avo = 1, 10 + do j = avo, 20 + arr(j, avo) = j + 100 * avo + end do +end do +if (any (ref /= arr)) error stop +if (avo /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a0\\.\[0-9\]+ = 1; a0\\.\[0-9\]+ <= 10; a0\\.\[0-9\]+ = a0\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a0\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a0 = a0\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a0 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a0,j) +do a0 = 1, 10 + do j = a0, 20 + arr(j, a0) = j + 100 * a0 + end do +end do +if (any (ref /= arr)) error stop +if (a0 /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a0o\\.\[0-9\]+ = 1; a0o\\.\[0-9\]+ <= 10; a0o\\.\[0-9\]+ = a0o\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a0o\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a0o = a0o\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a0o = 99; j = 99 +!$omp simd collapse(2) lastprivate(a0o,j) +do a0o = 1, 10 + do j = a0o, 20 + arr(j, a0o) = j + 100 * a0o + end do +end do +if (any (ref /= arr)) error stop +if (a0o /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a1\\.\[0-9\]+ = 1; a1\\.\[0-9\]+ <= 10; a1\\.\[0-9\]+ = a1\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a1\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a1 = a1\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a1 = 99; j = 99 +! no last private for 'a1' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a1 = 1, 10 + do j = a1, 20 + arr(j, a1) = j + 100 * a1 + end do +end do +if (any (ref /= arr)) error stop +if (j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a2\\.\[0-9\]+ = 1; a2\\.\[0-9\]+ <= 10; a2\\.\[0-9\]+ = a2\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a2\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a2 = a2\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a2 = 99; j = 99 +! no last private for 'a2' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a2 = 1, 10 + do j = a2, 20 + arr(j, a2) = j + 100 * a2 + end do +end do +if (any (ref /= arr)) error stop +if (j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a3\\.\[0-9\]+ = 1; a3\\.\[0-9\]+ <= 10; a3\\.\[0-9\]+ = a3\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a3\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a3 = a3\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a3 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a3,j) +do a3 = 1, 10 + do j = a3, 20 + arr(j, a3) = j + 100 * a3 + end do +end do +if (any (ref /= arr)) error stop +if (a3 /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a4\\.\[0-9\]+ = 1; a4\\.\[0-9\]+ <= 10; a4\\.\[0-9\]+ = a4\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a4\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a4 = a4\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a4 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a4,j) +do a4 = 1, 10 + do j = a4, 20 + arr(j, a4) = j + 100 * a4 + end do +end do +if (any (ref /= arr)) error stop +if (a4 /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a5 = 1; a5 <= 10; a5 = a5 \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a5 \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +a5 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a5,j) +do a5 = 1, 10 + do j = a5, 20 + arr(j, a5) = j + 100 * a5 + end do +end do +if (any (ref /= arr)) error stop +if (a5 /= 11 .or. j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a6\\.\[0-9\]+ = 1; a6\\.\[0-9\]+ <= 10; a6\\.\[0-9\]+ = a6\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a6\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a6 = a6\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a6 = 99; j = 99 +! no last private for 'a6' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a6 = 1, 10 + do j = a6, 20 + arr(j, a6) = j + 100 * a6 + end do +end do +if (any (ref /= arr)) error stop +if (j /= 21) error stop + +! { dg-final { scan-tree-dump-times "for \\(a7\\.\[0-9\]+ = 1; a7\\.\[0-9\]+ <= 10; a7\\.\[0-9\]+ = a7\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = a7\\.\[0-9\]+ \\* 1 \\+ 0; j <= 20; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a7 = a7\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a7 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a7,j) +do a7 = 1, 10 + do j = a7, 20 + arr(j, a7) = j + 100 * a7 + end do +end do +if (any (ref /= arr)) error stop +if (a7 /= 11 .or. j /= 21) error stop + +deallocate(a6, a7) +end + +end module m + + +use m +implicit none + +integer :: av +integer :: avo +integer :: a0 +integer :: a0o +integer, pointer :: a1 +integer, pointer :: a2 +integer, allocatable :: a3 +integer, allocatable :: a4 + +av = -99; avo = -99 +allocate(a1,a2,a3,a4) +call foo (av, avo, a0, a0o, a1, a2, a3, a4) +deallocate(a1,a2,a3,a4) +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-4.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-4.f90 new file mode 100644 index 0000000..ef2bd61 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-4.f90 @@ -0,0 +1,215 @@ +! { dg-additional-options "-fdump-tree-original" } +! PR fortran/107424 + +! Same as non-rectangular-loop-4.f90 but expr in upper bound + +module m +contains +subroutine foo (av, avo, a0, a0o, a1, a2, a3, a4) +implicit none + +integer, value :: av +integer, value, optional :: avo +integer :: a0 +integer, optional :: a0o +integer, pointer :: a1 +integer, pointer, optional :: a2 +integer, allocatable :: a3 +integer, allocatable, optional :: a4 +integer :: a5 +integer, pointer :: a6 +integer, allocatable :: a7 +integer :: arr(20,10), ref(20,10) + +integer :: j, i, lp_i, lp_j + +allocate(a6, a7) + +ref = 44 +do i = 1, 10 + do j = 1, i*2-1 + ref(j, i) = j + 100 * i + end do +end do +lp_i = i; lp_j = j + +! { dg-final { scan-tree-dump-times "for \\(av = 1; av <= 10; av = av \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= av \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +av = 99; j = 99 +!$omp simd collapse(2) lastprivate(av,j) +do av = 1, 10 + do j = 1, av*2-1 + arr(j, av) = j + 100 * av + end do +end do +if (any (ref /= arr)) error stop +if (av /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(avo = 1; avo <= 10; avo = avo \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= avo \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +avo = 99; j = 99 +!$omp simd collapse(2) lastprivate(avo, j) +do avo = 1, 10 + do j = 1, avo*2-1 + arr(j, avo) = j + 100 * avo + end do +end do +if (any (ref /= arr)) error stop +if (avo /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a0\\.\[0-9\]+ = 1; a0\\.\[0-9\]+ <= 10; a0\\.\[0-9\]+ = a0\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a0\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a0 = a0\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a0 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a0,j) +do a0 = 1, 10 + do j = 1, a0*2-1 + arr(j, a0) = j + 100 * a0 + end do +end do +if (any (ref /= arr)) error stop +if (a0 /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a0o\\.\[0-9\]+ = 1; a0o\\.\[0-9\]+ <= 10; a0o\\.\[0-9\]+ = a0o\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a0o\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a0o = a0o\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a0o = 99; j = 99 +!$omp simd collapse(2) lastprivate(a0o,j) +do a0o = 1, 10 + do j = 1, a0o*2-1 + arr(j, a0o) = j + 100 * a0o + end do +end do +if (any (ref /= arr)) error stop +if (a0o /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a1\\.\[0-9\]+ = 1; a1\\.\[0-9\]+ <= 10; a1\\.\[0-9\]+ = a1\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a1\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a1 = a1\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a1 = 99; j = 99 +! no last private for 'a1' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a1 = 1, 10 + do j = 1, a1*2-1 + arr(j, a1) = j + 100 * a1 + end do +end do +if (any (ref /= arr)) error stop +if (j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a2\\.\[0-9\]+ = 1; a2\\.\[0-9\]+ <= 10; a2\\.\[0-9\]+ = a2\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a2\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a2 = a2\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a2 = 99; j = 99 +! no last private for 'a2' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a2 = 1, 10 + do j = 1, a2*2-1 + arr(j, a2) = j + 100 * a2 + end do +end do +if (any (ref /= arr)) error stop +if (j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a3\\.\[0-9\]+ = 1; a3\\.\[0-9\]+ <= 10; a3\\.\[0-9\]+ = a3\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a3\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a3 = a3\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a3 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a3,j) +do a3 = 1, 10 + do j = 1, a3*2-1 + arr(j, a3) = j + 100 * a3 + end do +end do +if (any (ref /= arr)) error stop +if (a3 /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a4\\.\[0-9\]+ = 1; a4\\.\[0-9\]+ <= 10; a4\\.\[0-9\]+ = a4\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a4\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*\\*a4 = a4\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a4 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a4,j) +do a4 = 1, 10 + do j = 1, a4*2-1 + arr(j, a4) = j + 100 * a4 + end do +end do +if (any (ref /= arr)) error stop +if (a4 /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a5 = 1; a5 <= 10; a5 = a5 \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a5 \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! -> no temp var +arr = 44 +a5 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a5,j) +do a5 = 1, 10 + do j = 1, a5*2-1 + arr(j, a5) = j + 100 * a5 + end do +end do +if (any (ref /= arr)) error stop +if (a5 /= lp_i .or. j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a6\\.\[0-9\]+ = 1; a6\\.\[0-9\]+ <= 10; a6\\.\[0-9\]+ = a6\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a6\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a6 = a6\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a6 = 99; j = 99 +! no last private for 'a6' as "The initial status of a private pointer is undefined." +!$omp simd collapse(2) lastprivate(j) +do a6 = 1, 10 + do j = 1, a6*2-1 + arr(j, a6) = j + 100 * a6 + end do +end do +if (any (ref /= arr)) error stop +if (j /= lp_j) error stop + +! { dg-final { scan-tree-dump-times "for \\(a7\\.\[0-9\]+ = 1; a7\\.\[0-9\]+ <= 10; a7\\.\[0-9\]+ = a7\\.\[0-9\]+ \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "for \\(j = 1; j <= a7\\.\[0-9\]+ \\* 2 \\+ -1; j = j \\+ 1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "\\*a7 = a7\\.\[0-9\]+;" 1 "original" } } +arr = 44 +a7 = 99; j = 99 +!$omp simd collapse(2) lastprivate(a7,j) +do a7 = 1, 10 + do j = 1, a7*2-1 + arr(j, a7) = j + 100 * a7 + end do +end do +if (any (ref /= arr)) error stop +if (a7 /= lp_i .or. j /= lp_j) error stop + +deallocate(a6, a7) +end + +end module m + + +use m +implicit none + +integer :: av +integer :: avo +integer :: a0 +integer :: a0o +integer, pointer :: a1 +integer, pointer :: a2 +integer, allocatable :: a3 +integer, allocatable :: a4 + +av = -99; avo = -99 +allocate(a1,a2,a3,a4) +call foo (av, avo, a0, a0o, a1, a2, a3, a4) +deallocate(a1,a2,a3,a4) +end diff --git a/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-5.f90 b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-5.f90 new file mode 100644 index 0000000..643ab79 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/non-rectangular-loop-5.f90 @@ -0,0 +1,28 @@ +! { dg-do compile } +! { dg-additional-options "-msse2" { target sse2_runtime } } +! { dg-additional-options "-mavx" { target avx_runtime } } + +! PR fortran/107424 + +! Nonrectangular loop nests checks + +!$omp simd collapse(2) +do i = 1, 10 + do j = i, 10, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + end do +end do + +!$omp do collapse(2) lastprivate(j) ! { dg-error "lastprivate variable 'j' is private in outer context" } +do i = 1, 10 + do j = i, 10, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + end do +end do +if (i /= 11) stop 1 + +!$omp simd collapse(2) lastprivate(j) +do i = 1, 10 + do j = i, 10, 2 ! { dg-message "sorry, unimplemented: non-rectangular loop nest with step other than constant 1 or -1 for 'j'" } + end do +end do +if (i /= 11) stop 1 +end diff --git a/libgomp/testsuite/libgomp.fortran/order-reproducible-1.f90 b/libgomp/testsuite/libgomp.fortran/order-reproducible-1.f90 index ba416b9..35a030e 100644 --- a/libgomp/testsuite/libgomp.fortran/order-reproducible-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/order-reproducible-1.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + program main implicit none interface diff --git a/libgomp/testsuite/libgomp.fortran/order-reproducible-2.f90 b/libgomp/testsuite/libgomp.fortran/order-reproducible-2.f90 index 9d72020..c888251 100644 --- a/libgomp/testsuite/libgomp.fortran/order-reproducible-2.f90 +++ b/libgomp/testsuite/libgomp.fortran/order-reproducible-2.f90 @@ -1,5 +1,6 @@ ! { dg-additional-sources my-usleep.c } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options -Wno-complain-wrong-lang } + program main implicit none interface diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-2.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-2.f90 new file mode 100644 index 0000000..067639bc67 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-2.f90 @@ -0,0 +1,72 @@ +! { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } + +implicit none +!$omp requires reverse_offload +integer :: A(50), A2(50) +integer :: i, error +logical :: shared_mem + +error = 0 +shared_mem = .false. +A = [(i, i=1,50)] +A2 = [(i, i=1,50)] + +!$omp target map(to: shared_mem) + shared_mem = .true. +!$omp end target + +!$omp target map(to: A(20:40), A2(20:40)) map(from: error) +block + integer :: B(10), C(10) + B = 99 + C = 88 + A(20:40) = -A(20:40) + A2(20:40) = -A2(20:40) + + !$omp target device (ancestor:1) & + !$omp& map(to: A(25:35)) map(always, to: A2(25:35)) & + !$omp& map(from:B(4:8)) map(tofrom:C(4:8)) + if (shared_mem) then + if (any (A(25:35) /= [(-i,i=25,35)])) stop 20 + else + if (any (A(25:35) /= [( i,i=25,35)])) stop 21 + end if + if (any (A2(25:35) /= [(-i,i=25,35)])) stop 22 + if (any (C(4:8) /= 88)) stop 23 + + A(25:35) = -A(25:35)*10 + A2(25:35) = -A2(25:35)*10 + B(4:8) = [4,5,6,7,8] + C(4:8) = [-4,-5,-6,-7,-8] + !$omp end target + + if (any (B(1:3) /= 99) .or. any (B(9:10) /= 99)) then + error = 30 + elseif (any (B(4:8) /= [4,5,6,7,8])) then + error = 31 + elseif (any (C(1:3) /= 88) .or. any (C(9:10) /= 88)) then + error = 32 + elseif (any (C(4:8) /= [-4,-5,-6,-7,-8])) then + error = 33 + else + error = 0 + endif +end block + +if (error /= 0) stop error + +if (shared_mem) then + if (any (A(1:19) /= [( i, i=1,19)])) stop 1 + if (any (A(20:24) /= [(-i, i=20,24)])) stop 2 + if (any (A(36:40) /= [(-i, i=36,40)])) stop 3 + if (any (A(41:50) /= [( i, i=41,50)])) stop 4 + + if (any (A(25:35) /= [( 10*i, i=25,35)])) stop 5 +else + if (any (A(1:24) /= [( i, i=1,24)])) stop 6 + if (any (A(36:50) /= [( i, i=36,50)])) stop 7 + + if (any (A(25:35) /= [(-10*i, i=25,35)])) stop 8 +end if +if (any (A2(25:35) /= [( 10*i, i=25,35)])) stop 9 +end diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-3.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-3.f90 new file mode 100644 index 0000000..2fd2f5b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-3.f90 @@ -0,0 +1,68 @@ +! { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } + +implicit none +!$omp requires reverse_offload +integer :: A(50), A2(50), A3(50) +integer :: i +logical :: shared_mem + +shared_mem = .false. +A = [(3*i, i=1,50)] +A2 = [(7*i, i=1,50)] +A3 = [(11*i, i=1,50)] + +!$omp target map(to: shared_mem) + shared_mem = .true. +!$omp end target + +!$omp target map(to: A(20:40), A2(20:40), A3(20:40)) +block + integer :: C(10) + C = 88 + A(20:40) = -2*A(20:40) + A2(20:40) = -9*A2(20:40) + A3(20:40) = -13*A3(20:40) + + !$omp target device (ancestor:1) & + !$omp& map(from: A(25:35)) map(always, from: A2(25:35)) & + !$omp& map(alloc: A3(25:35)) map(alloc:C(4:8)) + if (shared_mem) then + if (any (A(25:35) /= [(-2*3*i, i=25,35)])) stop 1 + if (any (A2(25:35) /= [(-9*7*i, i=25,35)])) stop 2 + if (any (A3(25:35) /= [(-13*11*i, i=25,35)])) stop 3 + else + if (any (A(25:35) /= [(3*i, i=25,35)])) stop 4 + if (any (A2(25:35) /= [(7*i, i=25,35)])) stop 5 + if (any (A3(25:35) /= [(11*i, i=25,35)])) stop 6 + end if + + A(25:35) = A(25:35)*5 + A2(25:35) = A2(25:35)*8 + A3(25:35) = A3(25:35)*18 + C(4:8) = [4,5,6,7,8] + !$omp end target + + if (shared_mem) then + if (any (A(25:35) /= [(-2*3*5*i, i=25,35)])) stop 7 + if (any (A2(25:35) /= [(-9*7*8*i, i=25,35)])) stop 8 + if (any (A3(25:35) /= [(-13*11*18*i, i=25,35)])) stop 9 + if (any (C(4:8) /= [4,5,6,7,8])) stop 10 + else + if (any (A(25:35) /= [(-2*3*i, i=25,35)])) stop 11 + if (any (A2(25:35) /= [(7*8*i, i=25,35)])) stop 12 + if (any (A3(25:35) /= [(-13*11*i, i=25,35)])) stop 13 + if (any (C(4:8) /= 88)) stop 14 + end if +end block + +if (shared_mem) then + if (any (A(25:35) /= [(-2*3*5*i, i=25,35)])) stop + if (any (A2(25:35) /= [(-9*7**8*i, i=25,35)])) stop + if (any (A3(25:35) /= [(-13*11*18*i, i=25,35)])) stop +else + if (any (A(25:35) /= [(3*5*i, i=25,35)])) stop + if (any (A2(25:35) /= [(7*8*i, i=25,35)])) stop + if (any (A3(25:35) /= [(11*18*i, i=25,35)])) stop +end if + +end diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-4.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-4.f90 new file mode 100644 index 0000000..fb27aa7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-4.f90 @@ -0,0 +1,129 @@ +! { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } + +implicit none +!$omp requires reverse_offload + +type t2 + integer :: a, b, c +end type t2 + +type t + integer :: A(5), B(5), C(5) + integer, pointer :: P(:), P2 !Just some padding + type(t2) :: tt !Just some padding +end type t + +type(t) :: S1, S2 +logical :: shared_mem + +shared_mem = .false. + +!$omp target map(to: shared_mem) + shared_mem = .true. +!$omp end target + +s1%A = [1,2,3,4,5] +s1%B = [10,20,30,40,50] +s1%C = [11,22,33,44,55] +s2%A = 2*s1%A +s2%B = 2*s1%B +s2%C = 2*s1%C + +!$omp target & +!$omp& map(to: s1%b, s1%c) & +!$omp& map(to: s2%b, s2%c) +block + type(t) :: si1, si2, si3, si4 + + s1%B = -10 * s1%B + s1%C = -10 * s1%C + s2%B = -15 * s2%B + s2%C = -15 * s2%C + + si1%A = -1 * [1,2,3,4,5] + si1%B = -1 * [10,20,30,40,50] + si1%C = -1 * [11,22,33,44,55] + si2%A = -23 * [1,2,3,4,5] + si2%B = -23 * [10,20,30,40,50] + si2%C = -23 * [11,22,33,44,55] + + !$omp target device (ancestor:1) & + !$omp& map(to: si1%C, si1%B) & + !$omp& map(tofrom: si2%C, si2%B) & + !$omp& map(always, to: s1%B) & + !$omp& map( to: s2%B) + if (any (s1%A /= [1,2,3,4,5])) stop 1 + if (any (s1%B /= -10 * [10,20,30,40,50])) stop 2 + if (shared_mem) then + if (any (s1%C /= -10 * [11,22,33,44,55])) stop 4 + else + if (any (s1%C /= [11,22,33,44,55])) stop 3 + endif + if (any (s2%A /= 2 * [1,2,3,4,5])) stop 4 + if (shared_mem) then + if (any (s2%B /= -15 * 2 * [10,20,30,40,50])) stop 5 + if (any (s2%C /= -15 * 2 * [11,22,33,44,55])) stop 6 + else + if (any (s2%B /= 2 * [10,20,30,40,50])) stop 7 + if (any (s2%C /= 2 * [11,22,33,44,55])) stop 8 + endif + if (any (si1%B /= -1 * [10,20,30,40,50])) stop 9 + if (any (si1%C /= -1 * [11,22,33,44,55])) stop 10 + if (any (si2%B /= -23 * [10,20,30,40,50])) stop 10 + if (any (si2%C /= -23 * [11,22,33,44,55])) stop 11 + + s1%A = 5 * s1%A + s1%B = 7 * s1%B + s1%C = 13 * s1%C + s2%A = 9 * s2%A + s2%B = 21 * s2%B + s2%C = 31 * s2%C + si1%B = -11 * si1%B + si1%C = -13 * si1%C + si2%B = -27 * si2%B + si2%C = -29 * si2%C + !$omp end target + + if (shared_mem) then + if (any (s1%B /= -10 * 7 * [10,20,30,40,50])) stop 20 + if (any (s1%C /= -10 * 13 * [11,22,33,44,55])) stop 21 + else + if (any (s1%B /= -10 * [10,20,30,40,50])) stop 22 + if (any (s1%C /= -10 * [11,22,33,44,55])) stop 23 + endif + if (shared_mem) then + if (any (s2%B /= -15 * 2 * 21 * [10,20,30,40,50])) stop 24 + if (any (s2%C /= -15 * 2 * 31 * [11,22,33,44,55])) stop 25 + else + if (any (s2%B /= -15 * 2 * [10,20,30,40,50])) stop 26 + if (any (s2%C /= -15 * 2 * [11,22,33,44,55])) stop 27 + endif + if (any (si1%A /= -1 * [1,2,3,4,5])) stop 28 + if (shared_mem) then + if (any (si1%B /= -1 * (-11) * [10,20,30,40,50])) stop 29 + if (any (si1%C /= -1 * (-13) * [11,22,33,44,55])) stop 30 + else + if (any (si1%B /= -1 * [10,20,30,40,50])) stop 31 + if (any (si1%C /= -1 * [11,22,33,44,55])) stop 32 + endif + if (any (si2%A /= -23 * [1,2,3,4,5])) stop 33 + if (any (si2%B /= -23 * (-27) * [10,20,30,40,50])) stop 34 + if (any (si2%C /= -23 * (-29) * [11,22,33,44,55])) stop 35 +end block + +if (any (s1%A /= 5 * [1,2,3,4,5])) stop 40 +if (any (s1%B /= -10 * 7 * [10,20,30,40,50])) stop 41 +if (shared_mem) then + if (any (s1%C /= -10 * 13 * [11,22,33,44,55])) stop 42 +else + if (any (s1%C /= 13 * [11,22,33,44,55])) stop 43 +endif +if (any (s2%A /= 2 * 9 * [1,2,3,4,5])) stop 44 +if (shared_mem) then + if (any (s2%B /= -15 * 2 * 21 * [10,20,30,40,50])) stop 45 + if (any (s2%C /= -15 * 2 * 31 * [11,22,33,44,55])) stop 46 +else + if (any (s2%B /= 2 * 21 * [10,20,30,40,50])) stop 47 + if (any (s2%C /= 2 * 31 * [11,22,33,44,55])) stop 48 +endif +end diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-5.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-5.f90 new file mode 100644 index 0000000..16810eb --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-5.f90 @@ -0,0 +1,102 @@ +! { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } +! { dg-xfail-run-if "Copying on-device allocated memory fails with cuMemcpyDtoHAsync error: invalid argument" { offload_device_nvptx } } + +! Because of the nvptx fail, a non-device alloc version has been +! created: reverse-offload-5a.f90 + +implicit none +!$omp requires reverse_offload + +integer, allocatable :: A(:), A2(:), s1, s2 +integer :: i +logical :: shared_mem + +shared_mem = .false. + +a = [1,2,3,4] +a2 = [8,7,6,5] +s1 = 532 +s2 = 55 + +!$omp target map(to: shared_mem) + shared_mem = .true. +!$omp end target + +!$omp target map(to: A, A2, s1, s2) +block + integer, allocatable :: ai(:), ai2(:), ai3(:), si1, si2, si3 + + a = a * 2 + a2 = a2 * 3 + s1 = s1 * 4 + s2 = s2 * 5 + + ai = [23,35,86,43] + ai2 = [8,4,7,1] + si1 = 64 + si2 = 765 + + !$omp target device (ancestor:1) & + !$omp& map(to: A, s1, ai, si1) map(always, to: a2, s2) & + !$omp& map(tofrom: ai2, si2, ai3, si3) + if (shared_mem) then + if (any (a /= 2 * [1,2,3,4])) stop 1 + if (s1 /= 4 * 532) stop 2 + else + if (any (a /= [1,2,3,4])) stop 3 + if (s1 /= 532) stop 4 + endif + if (any (a2 /= 3 * [8,7,6,5])) stop 5 + if (s2 /= 5 * 55) stop 6 + if (any (ai /= [23,35,86,43])) stop 7 + if (any (ai2 /= [8,4,7,1])) stop 8 + if (si1 /= 64) stop 9 + if (si2 /= 765) stop 10 + if (allocated (ai3) .or. allocated(si3)) stop 26 + + a = a*3 + a2 = a2*7 + s1 = s1*11 + s2 = s2*5 + ai = ai*13 + ai2 = ai2*21 + si1 = si1*27 + si2 = si2*31 + !$omp end target + + if (shared_mem) then + if (any (a /= 3 * 2 * [1,2,3,4])) stop 11 + if (any (a2 /= 7 * 3 * [8,7,6,5])) stop 12 + if (s1 /= 11 * 4 * 532) stop 13 + if (s2 /= 5 * 5 * 55) stop 14 + if (any (ai /= 13 * [23,35,86,43])) stop 15 + if (si1 /= 27 * 64) stop 16 + else + if (any (a /= 2 * [1,2,3,4])) stop 17 + if (any (a2 /= 3 * [8,7,6,5])) stop 18 + if (s1 /= 4 * 532) stop 19 + if (s2 /= 5 * 55) stop 20 + if (any (ai /= [23,35,86,43])) stop 22 + if (si1 /= 64) stop 23 + endif + if (any (ai2 /= 21 * [8,4,7,1])) stop 24 + if (si2 /= 31 * 765) stop 25 + if (allocated (ai3) .or. allocated(si3)) stop 27 + + deallocate (ai, ai2, si1, si2) +end block + +if (shared_mem) then + if (any (a /= 3 * 2 * [1,2,3,4])) stop 30 + if (any (a2 /= 7 * 3 * [8,7,6,5])) stop 31 + if (s1 /= 11 * 4 * 532) stop 32 + if (s2 /= 5 * 5 * 55) stop 33 +else + if (any (a /= 3 * [1,2,3,4])) stop 34 + if (any (a2 /= 3 * 7 * [8,7,6,5])) stop 35 + if (s1 /= 11 * 532) stop 36 + if (s2 /= 5 * 5 * 55) stop 37 +endif + +deallocate (a, a2, s1, s2) +end diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-5a.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-5a.f90 new file mode 100644 index 0000000..914d10d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-5a.f90 @@ -0,0 +1,98 @@ +! { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } + +! Copying on-device allocated memory fails with cuMemcpyDtoHAsync error: invalid argument +! Hence, reverse-offload-5.f90 has been copied to *this* file, +! which uses on-host allocated vars - but only on the device side stack variables + +implicit none +!$omp requires reverse_offload + +integer, allocatable :: A(:), A2(:), s1, s2 +integer :: i,D(4) +logical :: shared_mem + +shared_mem = .false. + +a = [1,2,3,4] +a2 = [8,7,6,5] +s1 = 532 +s2 = 55 + +!$omp target map(to: shared_mem) + shared_mem = .true. +!$omp end target + +!$omp target map(to: A, A2, s1, s2) +block + ! stack variables: + integer :: ai(4), ai2(4), si1, si2 + + a = a * 2 + a2 = a2 * 3 + s1 = s1 * 4 + s2 = s2 * 5 + + ai = [23,35,86,43] + ai2 = [8,4,7,1] + si1 = 64 + si2 = 765 + + !$omp target device (ancestor:1) & + !$omp& map(to: A, s1, ai, si1) map(always, to: a2, s2) & + !$omp& map(tofrom: ai2, si2) + if (shared_mem) then + if (any (a /= 2 * [1,2,3,4])) stop 1 + if (s1 /= 4 * 532) stop 2 + else + if (any (a /= [1,2,3,4])) stop 3 + if (s1 /= 532) stop 4 + endif + if (any (a2 /= 3 * [8,7,6,5])) stop 5 + if (s2 /= 5 * 55) stop 6 + if (any (ai /= [23,35,86,43])) stop 7 + if (any (ai2 /= [8,4,7,1])) stop 8 + if (si1 /= 64) stop 9 + if (si2 /= 765) stop 10 + + a = a*3 + a2 = a2*7 + s1 = s1*11 + s2 = s2*5 + ai = ai*13 + ai2 = ai2*21 + si1 = si1*27 + si2 = si2*31 + !$omp end target + + if (shared_mem) then + if (any (a /= 3 * 2 * [1,2,3,4])) stop 11 + if (any (a2 /= 7 * 3 * [8,7,6,5])) stop 12 + if (s1 /= 11 * 4 * 532) stop 13 + if (s2 /= 5 * 5 * 55) stop 14 + if (any (ai /= 13 * [23,35,86,43])) stop 15 + if (si1 /= 27 * 64) stop 16 + else + if (any (a /= 2 * [1,2,3,4])) stop 17 + if (any (a2 /= 3 * [8,7,6,5])) stop 18 + if (s1 /= 4 * 532) stop 19 + if (s2 /= 5 * 55) stop 20 + if (any (ai /= [23,35,86,43])) stop 22 + if (si1 /= 64) stop 23 + endif + if (any (ai2 /= 21 * [8,4,7,1])) stop 24 + if (si2 /= 31 * 765) stop 25 +end block +if (shared_mem) then + if (any (a /= 3 * 2 * [1,2,3,4])) stop 30 + if (any (a2 /= 7 * 3 * [8,7,6,5])) stop 31 + if (s1 /= 11 * 4 * 532) stop 32 + if (s2 /= 5 * 5 * 55) stop 33 +else + if (any (a /= 3 * [1,2,3,4])) stop 34 + if (any (a2 /= 3 * 7 * [8,7,6,5])) stop 35 + if (s1 /= 11 * 532) stop 36 + if (s2 /= 5 * 5 * 55) stop 37 +endif + +deallocate (a, a2, s1, s2) +end diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90 new file mode 100644 index 0000000..ddb7008 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90 @@ -0,0 +1,34 @@ +! +! Ensure that a mapping with no argument works +! + +! { dg-additional-options -foffload-options=nvptx-none=-misa=sm_35 { target offload_target_nvptx } } + +module m + implicit none (type, external) + integer :: x = 32 + integer :: dev_num2 = -1 +contains +subroutine foo() + use omp_lib, only: omp_get_device_num + x = x + 10 + dev_num2 = omp_get_device_num() +end +end module m + +use m +use omp_lib +!$omp requires reverse_offload +implicit none (type, external) +integer :: dev_num = -1 +!$omp target map(from:dev_num) + dev_num = omp_get_device_num() + ! This calls GOMP_target_ext with number of maps = 0 + !$omp target device(ancestor:1) + call foo + !$omp end target +!$omp end target + +if (omp_get_num_devices() > 0 .and. dev_num2 == dev_num) stop 1 +if (x /= 42) stop 2 +end diff --git a/libgomp/testsuite/libgomp.fortran/target-11.f90 b/libgomp/testsuite/libgomp.fortran/target-11.f90 new file mode 100644 index 0000000..b0faa2e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-11.f90 @@ -0,0 +1,75 @@ +! Based on libgomp.c/target-23.c + +! { dg-additional-options "-fdump-tree-original" } +! { dg-final { scan-tree-dump "omp target update to\\(xxs\\\[3\\\] \\\[len: 2\\\]\\)" "original" } } +! { dg-final { scan-tree-dump "omp target update to\\(s\\.s \\\[len: 4\\\]\\)" "original" } } +! { dg-final { scan-tree-dump "omp target update from\\(s\\.s \\\[len: 4\\\]\\)" "original" } } + +module m + implicit none + type S_type + integer s + integer, pointer :: u(:) => null() + integer :: v(0:4) + end type S_type + integer, volatile :: z +end module m + +program main + use m + implicit none + integer, target :: u(0:9) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + logical :: err + type (S_type) :: s + integer, pointer :: v(:) + integer(kind=2) :: xxs(5) + err = .false. + s = S_type(9, v=[10, 11, 12, 13, 14]) + s%u(0:) => u(3:) + v(-4+3:) => u(3:) + xxs = [-1,-2,-3,-4,-5] + !$omp target enter data map (to: s%s, s%u, s%u(0:5)) map (alloc: s%v(1:4), xxs(3:5)) + s%s = s%s + 1 + u(3) = u(3) + 1 + s%v(1) = s%v(1) + 1 + xxs(3) = -33 + xxs(4) = -44 + xxs(5) = -55 + !$omp target update to (xxs(4)) + !$omp target update to (s%s) to (s%u(0:2), s%v(1:4)) + + !$omp target map (alloc: s%s, s%v(1:4)) map (from: err) + err = .false. + if (s%s /= 10 .or. s%v(1) /= 12 .or. s%v(2) /= 12 .or. s%v(3) /= 13) & + err = .true. + if (v(-1) /= 4 .or. v(0) /= 4 .or. v(1) /= 5 .or. v(2) /= 6 .or. v(3) /= 7) & + err = .true. + if (xxs(4) /= -44) & + err = .true. + s%s = s%s + 1 + s%v(2) = s%v(2) + 2 + v(-1) = 5 + v(3) = 9 + !$omp end target + + if (err) & + error stop + + !$omp target map (alloc: s%u(0:5)) + err = .false. + if (s%u(0) /= 5 .or. s%u(1) /= 4 .or. s%u(2) /= 5 .or. s%u(3) /= 6 .or. s%u(4) /= 9) & + err = .true. + s%u(1) = 12 + !$omp end target + + !$omp target update from (s%s, s%u(0:5)) from (s%v(1:4)) + if (err .or. s%s /= 11 .or. u(0) /= 0 .or. u(1) /= 1 .or. u(2) /= 2 .or. u(3) /= 5 & + .or. u(4) /= 12 .or. u(5) /= 5 .or. u(6) /= 6 .or. u(7) /= 9 .or. u(8) /= 8 & + .or. u(9) /= 9 .or. s%v(0) /= 10 .or. s%v(1) /= 12 .or. s%v(2) /= 14 & + .or. s%v(3) /= 13 .or. s%v(4) /= 14) & + error stop + ! !$omp target exit data map (release: s%s) + ! !$omp target exit data map (release: s%u(0:5)) + ! !$omp target exit data map (delete: s%v(1:4)) + ! !$omp target exit data map (release: s%s) +end diff --git a/libgomp/testsuite/libgomp.fortran/target-13.f90 b/libgomp/testsuite/libgomp.fortran/target-13.f90 new file mode 100644 index 0000000..6aacc77 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-13.f90 @@ -0,0 +1,159 @@ +module m + implicit none + type t + integer :: s, a(5) + end type t + + type t2 + integer :: s, a(5) + type(t) :: st, at(2:3) + end type t2 + + interface operator(/=) + procedure ne_compare_t + procedure ne_compare_t2 + end interface + +contains + + logical pure elemental function ne_compare_t (a, b) result(res) + type(t), intent(in) :: a, b + res = (a%s /= b%s) .or. any(a%a /= b%a) + end function + + logical pure elemental function ne_compare_t2 (a, b) result(res) + type(t2), intent(in) :: a, b + res = (a%s /= b%s) .or. any(a%a /= b%a) & + .or. (a%st /= b%st) .or. any(a%at /= b%at) + end function +end module m + +program p +use m +implicit none + +type(t2) :: var1, var2(5), var3(:) +type(t2) :: var1a, var2a(5), var3a(:) +allocatable :: var3, var3a +logical :: shared_memory = .false. + +!$omp target map(to: shared_memory) + shared_memory = .true. +!$omp end target + +var1 = T2(1, [1,2,3,4,5], T(11, [11,22,33,44,55]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]) + +var2 = [T2(101, [201,202,203,204,205], T(2011, [2011,2022,2033,2044,2055]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(111, [211,212,213,214,215], T(2111, [2111,2122,2133,2144,2155]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(121, [221,222,223,224,225], T(2211, [2211,2222,2233,2244,2255]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(131, [231,232,233,234,235], T(2311, [2311,2322,2333,2344,2355]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(141, [241,242,243,244,245], T(2411, [2411,2422,2433,2444,2455]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])])] + +var3 = [T2(301, [401,402,403,404,405], T(4011, [4011,4022,4033,4044,4055]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(311, [411,412,413,414,415], T(4111, [4111,4122,4133,4144,4155]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(321, [421,422,423,424,425], T(4211, [4211,4222,4233,4244,4255]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(331, [431,432,433,434,435], T(4311, [4311,4322,4333,4344,4355]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])]), & + T2(341, [441,442,443,444,445], T(4411, [4411,4422,4433,4444,4455]), & + [T(-11, [-11,-22,-33,-44,-55]), T(11, [11,22,33,44,55])])] + +var1a = var1 +var2a = var2 +var3a = var3 + +!$omp target enter data map(to:var1) +!$omp target enter data map(to:var2) +!$omp target enter data map(to:var3) + +! --------------- + +!$omp target update from(var1%at(2:3)) + +if (var1a /= var1) error stop +if (any (var2a /= var2)) error stop +if (any (var3a /= var3)) error stop + +! --------------- + +!$omp target + var1%st%s = 1243 + var2(3)%at(2) = T(123, [345,64,356,39,13]) + var2(3)%at(3) = T(48, [74,162,572,357,3]) +!$omp end target + +if (.not. shared_memory) then + if (var1 /= var1) error stop + if (any (var2a /= var2)) error stop + if (any (var3a /= var3)) error stop +endif + +!$omp target update from(var1%st) from(var2(3)%at(2:3)) + +var1a%st%s = 1243 +var2a(3)%at(2) = T(123, [345,64,356,39,13]) +var2a(3)%at(3) = T(48, [74,162,572,357,3]) +if (var1 /= var1) error stop +if (any (var2a /= var2)) error stop +if (any (var3a /= var3)) error stop + +! --------------- + +var3(1) = var2(1) +var1%at(2)%a = var2(1)%a +var1%at(3)%a = var2(2)%a + +var1a = var1 +var2a = var2 +var3a = var3 + +!$omp target update to(var3) to(var1%at(2:3)) + +!$omp target + var3(1)%s = var3(1)%s + 123 + var1%at(2)%a = var1%at(2)%a * 7 + var1%at(3)%s = var1%at(3)%s * (-3) +!$omp end target + +if (.not. shared_memory) then + if (var1 /= var1) error stop + if (any (var2a /= var2)) error stop + if (any (var3a /= var3)) error stop +endif + +var3a(1)%s = var3a(1)%s + 123 +var1a%at(2)%a = var1a%at(2)%a * 7 +var1a%at(3)%s = var1a%at(3)%s * (-3) + +block + integer, volatile :: i1,i2,i3,i4 + i1 = 1 + i2 = 2 + i3 = 1 + i4 = 2 + !$omp target update from(var3(i1:i2)) from(var1%at(i3:i4)) + i1 = 3 + i2 = 3 + i3 = 1 + i4 = 5 + !$omp target update from(var1%at(i1)%s) from(var1%at(i2)%a(i3:i4)) +end block + +if (var1 /= var1) error stop +if (any (var2a /= var2)) error stop +if (any (var3a /= var3)) error stop + +! --------------- + +!$omp target exit data map(from:var1) +!$omp target exit data map(from:var2) +!$omp target exit data map(from:var3) +end diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90 new file mode 100644 index 0000000..5d97566 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-3.f90 @@ -0,0 +1,22 @@ +implicit none +type t + integer :: dummy + integer, pointer :: p1(:), p2(:) + integer :: dummy2 +end type t +type(t) :: var +integer :: i +allocate(var%p1(5),var%p2(2:4)) +var%p1 = [22,53,28,6,4] +var%p2 = [46,679,54] + +!$omp target enter data map(to:var%p1, var%p2) +!$omp target + if (.not.associated(var%p1).or.lbound(var%p1,1)/=1.or.ubound(var%p1,1)/=5) stop 1 + if (.not.associated(var%p2).or.lbound(var%p2,1)/=2.or.ubound(var%p2,1)/=4) stop 2 + if (any (var%p1 /= [22,53,28,6,4])) stop 3 + if (any (var%p2 /= [46,679,54])) stop 4 +!$omp end target +!!$omp target exit data map(from:var%p1, var%p2) +end + diff --git a/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90 b/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90 index 3613b73..783ad4f 100644 --- a/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90 +++ b/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90 @@ -1,4 +1,4 @@ -! Runs the the target region asynchrolously and checks for it +! Run the target region asynchronously and check it ! ! Note that map(alloc: work(:, i)) + nowait should be safe ! given that a nondescriptor array is used. However, it still diff --git a/libgomp/testsuite/libgomp.fortran/target10.f90 b/libgomp/testsuite/libgomp.fortran/target10.f90 index 3145255..4876cce 100644 --- a/libgomp/testsuite/libgomp.fortran/target10.f90 +++ b/libgomp/testsuite/libgomp.fortran/target10.f90 @@ -1,5 +1,4 @@ ! { dg-do run } -! { dg-xfail-run-if TODO { offload_device_any_intel_mic } } program main use omp_lib diff --git a/libgomp/testsuite/libgomp.fortran/task-7.f90 b/libgomp/testsuite/libgomp.fortran/task-7.f90 new file mode 100644 index 0000000..e806bd7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-7.f90 @@ -0,0 +1,22 @@ +! { dg-do run } + +program main + use omp_lib + implicit none + + !$omp task final (.true.) + if (.not. omp_in_final ()) & + error stop + !$omp task + if (.not. omp_in_final ()) & + error stop + !$omp target nowait + if (omp_in_final ()) & + error stop + !$omp end target + if (.not. omp_in_final ()) & + error stop + !$omp taskwait + !$omp end task + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/task-8.f90 b/libgomp/testsuite/libgomp.fortran/task-8.f90 new file mode 100644 index 0000000..037c63b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-8.f90 @@ -0,0 +1,13 @@ +! { dg-do run } + +program main + implicit none + integer :: i + i = 0 + !$omp task + !$omp target nowait private (i) + i = 1 + !$omp end target + !$omp taskwait + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 b/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 index 03a3b61..b2c476f 100644 --- a/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 +++ b/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 @@ -1,7 +1,5 @@ ! { dg-do run } - ! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } - ! Test tasks with detach clause on an offload device. Each device ! thread spawns off a chain of tasks, that can then be executed by ! any available thread. diff --git a/libgomp/testsuite/libgomp.fortran/task-in-explicit-1.f90 b/libgomp/testsuite/libgomp.fortran/task-in-explicit-1.f90 new file mode 100644 index 0000000..b6fa21b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-in-explicit-1.f90 @@ -0,0 +1,113 @@ +! { dg-do run } + +program main + use omp_lib + implicit none + integer :: i + + if (omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + + !$omp task final (.true.) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp end task + + !$omp parallel + if (omp_in_explicit_task ()) & + error stop + !$omp task if (.false.) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp task if (.false.) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp end task + !$omp task final (.true.) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp barrier + if (omp_in_explicit_task ()) & + error stop + !$omp taskloop num_tasks (24) + do i = 1, 32 + if (.not. omp_in_explicit_task ()) & + error stop + end do + !$omp masked + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp end masked + !$omp barrier + if (omp_in_explicit_task ()) & + error stop + !$omp end parallel + + !$omp target + if (omp_in_explicit_task ()) & + error stop + !$omp task if (.false.) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp end target + + !$omp target teams + !$omp distribute + do i = 1, 4 + if (omp_in_explicit_task ()) then + error stop + else + !$omp parallel + if (omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp barrier + if (omp_in_explicit_task ()) & + error stop + !$omp end parallel + end if + end do + !$omp end target teams + + !$omp teams + !$omp distribute + do i = 1, 4 + if (omp_in_explicit_task ()) then + error stop + else + !$omp parallel + if (omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp barrier + if (omp_in_explicit_task ()) & + error stop + !$omp end parallel + end if + end do + !$omp end distribute + !$omp end teams +end diff --git a/libgomp/testsuite/libgomp.fortran/task-in-explicit-2.f90 b/libgomp/testsuite/libgomp.fortran/task-in-explicit-2.f90 new file mode 100644 index 0000000..c615ff6 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-in-explicit-2.f90 @@ -0,0 +1,21 @@ +! { dg-do run } + +program main + use omp_lib + implicit none + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp target nowait + if (omp_in_explicit_task ()) & + error stop + !$omp end target + if (.not. omp_in_explicit_task ()) & + error stop + !$omp taskwait + !$omp end task + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/task-in-explicit-3.f90 b/libgomp/testsuite/libgomp.fortran/task-in-explicit-3.f90 new file mode 100644 index 0000000..629c567 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-in-explicit-3.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +module m + integer :: a +end module m + +program main + use omp_lib + use m + implicit none + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp task + if (.not. omp_in_explicit_task ()) & + error stop + !$omp taskgroup task_reduction (+: a) + if (.not. omp_in_explicit_task ()) & + error stop + !$omp task in_reduction (+: a) + a = a + 1 + if (.not. omp_in_explicit_task ()) & + error stop + !$omp end task + !$omp end taskgroup + if (.not. omp_in_explicit_task ()) & + error stop + !$omp taskwait + !$omp end task + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/task-reduction-17.f90 b/libgomp/testsuite/libgomp.fortran/task-reduction-17.f90 new file mode 100644 index 0000000..12a8962 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-reduction-17.f90 @@ -0,0 +1,32 @@ +! { dg-do run } + +module m + integer a +end module m + +program main + use omp_lib + use m + implicit none + + !$omp task final (.true.) + if (.not. omp_in_final ()) & + error stop + !$omp task + if (.not. omp_in_final ()) & + error stop + !$omp taskgroup task_reduction (+: a) + if (.not. omp_in_final ()) & + error stop + !$omp task in_reduction (+: a) + a = a + 1 + if (.not. omp_in_final ()) & + error stop + !$omp end task + !$omp end taskgroup + if (.not. omp_in_final ()) & + error stop + !$omp taskwait + !$omp end task + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/task-reduction-18.f90 b/libgomp/testsuite/libgomp.fortran/task-reduction-18.f90 new file mode 100644 index 0000000..0cbd471 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-reduction-18.f90 @@ -0,0 +1,15 @@ +! { dg-do run } + +module m + integer :: a = 0 +end module m + +program main + !$omp task + !$omp taskgroup task_reduction (+: a) + !$omp task in_reduction (+: a) + a = a + 1 + !$omp end task + !$omp end taskgroup + !$omp end task +end diff --git a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90 b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90 new file mode 100644 index 0000000..b2a5c31 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90 @@ -0,0 +1,53 @@ +! PR middle-end/108546 +! +module m + use iso_c_binding + implicit none + type(c_ptr) :: p2, p3 +contains + subroutine s(x,y,z) + type(c_ptr), optional :: x + integer, pointer, optional :: y + integer, allocatable, optional, target :: z + logical is_present, is_null + is_present = present(x) + if (is_present) & + is_null = .not. c_associated(x) + + !$omp target data use_device_ptr(x) use_device_addr(y) use_device_addr(z) + if (is_present) then + if (is_null) then + if (c_associated(x)) stop 1 + if (associated(y)) stop 2 + if (allocated(z)) stop 3 + else + if (.not. c_associated(x, p2)) stop 4 + if (.not. c_associated(c_loc(y), p2)) stop 5 + if (.not. c_associated(c_loc(z), p3)) stop 6 + end if + end if + !$omp end target data + end +end + +use m +implicit none +type(c_ptr) :: cp +integer, pointer :: p +integer, allocatable, target :: a +call s() +p => null() +call s(c_null_ptr, p, a) +allocate(p,a) +p = 7 +a = 9 +cp = c_loc(p) +!$omp target enter data map(to: cp, p, a) +!$omp target map(from: p2, p3) + p2 = c_loc(p) + p3 = c_loc(a) +!$omp end target +call s(cp, p, a) +!$omp target exit data map(delete: cp, p, a) +deallocate(p,a) +end diff --git a/libgomp/testsuite/libgomp.graphite/graphite.exp b/libgomp/testsuite/libgomp.graphite/graphite.exp index 9a4a3e3..1260dc9 100644 --- a/libgomp/testsuite/libgomp.graphite/graphite.exp +++ b/libgomp/testsuite/libgomp.graphite/graphite.exp @@ -1,4 +1,4 @@ -# Copyright (C) 2009-2022 Free Software Foundation, Inc. +# Copyright (C) 2009-2023 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 1f50386..9b4493d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -195,6 +195,123 @@ static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_ #endif } +static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + +#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT +# error TODO +#else + assert (state == 4 + || state == 104); + STATE_OP (state, ++); + + if (state == 5 + || state == 105) + { + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested != NULL); + assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start); + assert (tool_info->nested->nested == NULL); + } + else + abort (); +#endif + + assert (prof_info->event_type == acc_ev_alloc); + assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); + assert (prof_info->version == _ACC_PROF_INFO_VERSION); + assert (prof_info->device_type == acc_device_type); + assert (prof_info->device_number == acc_device_num); + assert (prof_info->thread_id == -1); + assert (prof_info->async == acc_async); + assert (prof_info->async_queue == prof_info->async); + assert (prof_info->src_file == NULL); + assert (prof_info->func_name == NULL); + assert (prof_info->line_no == -1); + assert (prof_info->end_line_no == -1); + assert (prof_info->func_line_no == -1); + assert (prof_info->func_end_line_no == -1); + + assert (event_info->data_event.event_type == prof_info->event_type); + assert (event_info->data_event.valid_bytes == _ACC_DATA_EVENT_INFO_VALID_BYTES); + assert (event_info->data_event.parent_construct == acc_construct_parallel); + assert (event_info->data_event.implicit == 1); + assert (event_info->data_event.tool_info == NULL); + assert (event_info->data_event.var_name == NULL); + assert (event_info->data_event.bytes != 0); + assert (event_info->data_event.host_ptr == NULL); + assert (event_info->data_event.device_ptr != NULL); + + assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); + assert (api_info->device_type == prof_info->device_type); + assert (api_info->vendor == -1); + assert (api_info->device_handle == NULL); + assert (api_info->context_handle == NULL); + assert (api_info->async_handle == NULL); +} + +static void cb_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + +#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT +# error TODO +#else + assert (state == 9); + STATE_OP (state, ++); + + if (state == 10) + { + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested != NULL); + assert (tool_info->nested->event_info.other_event.event_type == acc_ev_exit_data_start); + assert (tool_info->nested->nested == NULL); + } + else + abort (); +#endif + + assert (prof_info->event_type == acc_ev_free); + assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); + assert (prof_info->version == _ACC_PROF_INFO_VERSION); + assert (prof_info->device_type == acc_device_type); + assert (prof_info->device_number == acc_device_num); + assert (prof_info->thread_id == -1); + assert (prof_info->async == acc_async); + assert (prof_info->async_queue == prof_info->async); + assert (prof_info->src_file == NULL); + assert (prof_info->func_name == NULL); + assert (prof_info->line_no == -1); + assert (prof_info->end_line_no == -1); + assert (prof_info->func_line_no == -1); + assert (prof_info->func_end_line_no == -1); + + assert (event_info->data_event.event_type == prof_info->event_type); + assert (event_info->data_event.valid_bytes == _ACC_DATA_EVENT_INFO_VALID_BYTES); + assert (event_info->data_event.parent_construct == acc_construct_parallel); + assert (event_info->data_event.implicit == 1); + assert (event_info->data_event.tool_info == NULL); + assert (event_info->data_event.var_name == NULL); + if (acc_device_type == acc_device_nvidia) + assert (event_info->data_event.bytes == (size_t) -1); + else if (acc_device_type == acc_device_radeon) + assert (event_info->data_event.bytes == 0); + else + abort (); + assert (event_info->data_event.host_ptr == NULL); + assert (event_info->data_event.device_ptr != NULL); + + assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); + assert (api_info->device_type == prof_info->device_type); + assert (api_info->vendor == -1); + assert (api_info->device_handle == NULL); + assert (api_info->context_handle == NULL); + assert (api_info->async_handle == NULL); +} + static void cb_enter_data_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) { DEBUG_printf ("%s\n", __FUNCTION__); @@ -246,8 +363,8 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 4 - || state == 104); + assert (state == 5 + || state == 105); #if defined COPYIN /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying, before 'acc_ev_enqueue_launch_start' marks invoking the compute region. @@ -316,9 +433,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 7 + assert (state == 8 #if ASYNC_EXIT_DATA - || state == 107 + || state == 108 #endif ); STATE_OP (state, ++); @@ -366,15 +483,25 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ tool_info->nested->event_info.other_event.event_type = event_info->other_event.event_type; event_info->other_event.tool_info = tool_info->nested; + +#if ASYNC_EXIT_DATA + if (acc_async != acc_async_sync) + { + /* Compensate for the deferred 'acc_ev_free'. */ + state += 1; + } +#else +# error TODO +#endif } static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 8 + assert (state == 10 #if ASYNC_EXIT_DATA - || state == 108 + || state == 110 #endif ); STATE_OP (state, ++); @@ -488,6 +615,8 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info { /* Compensate for the missing 'acc_ev_enter_data_start'. */ state += 1; + /* Compensate for the missing 'acc_ev_alloc'. */ + state += 1; } } @@ -502,9 +631,12 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * /* Compensate for the missing 'acc_ev_enqueue_launch_start' and 'acc_ev_enqueue_launch_end'. */ state += 2; - /* Compensate for the missing 'acc_ev_exit_data_start' and - 'acc_ev_exit_data_end'. */ - state += 2; + /* Compensate for the missing 'acc_ev_exit_data_start'. */ + state += 1; + /* Compensate for the missing 'acc_ev_free'. */ + state += 1; + /* Compensate for the missing 'acc_ev_exit_data_end'. */ + state += 1; } #if !ASYNC_EXIT_DATA else if (acc_async != acc_async_sync) @@ -514,8 +646,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 9 - || state == 109); + assert (state == 11 + || state == 111); STATE_OP (state, ++); assert (tool_info != NULL); @@ -569,8 +701,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); - assert (state == 5 - || state == 105); + assert (state == 6 + || state == 106); STATE_OP (state, ++); assert (tool_info != NULL); @@ -638,8 +770,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); - assert (state == 6 - || state == 106); + assert (state == 7 + || state == 107); STATE_OP (state, ++); assert (tool_info != NULL); @@ -703,6 +835,8 @@ int main() STATE_OP (state, = 0); reg (acc_ev_device_init_start, cb_device_init_start, acc_reg); reg (acc_ev_device_init_end, cb_device_init_end, acc_reg); + reg (acc_ev_alloc, cb_alloc, acc_reg); + reg (acc_ev_free, cb_free, acc_reg); reg (acc_ev_enter_data_start, cb_enter_data_start, acc_reg); reg (acc_ev_enter_data_end, cb_enter_data_end, acc_reg); reg (acc_ev_exit_data_start, cb_exit_data_start, acc_reg); @@ -725,9 +859,9 @@ int main() state_init = state; } - assert (state_init == 4); + assert (state_init == 5); } - assert (state == 10); + assert (state == 12); STATE_OP (state, = 100); @@ -742,9 +876,9 @@ int main() } acc_async = acc_async_sync; #pragma acc wait - assert (state_init == 104); + assert (state_init == 105); } - assert (state == 110); + assert (state == 112); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c index 5ec50b8..c422cbc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c @@ -144,8 +144,8 @@ main (int argc, char **argv) #pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \ copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1) -#pragma acc exit data delete (N) -#pragma acc wait (1) +#pragma acc exit data delete (N) wait(1) async(2) +#pragma acc wait (2) for (i = 0; i < N; i++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c index 22e0c20..05297d3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c @@ -22,15 +22,10 @@ main (int argc, char *argv[]) devptr[0] = &var; devptr[1] = &arr[2]; } - if (acc_hostptr (devptr[0]) != (void *) &var) __builtin_abort (); if (acc_hostptr (devptr[1]) != (void *) &arr[2]) __builtin_abort (); - - acc_delete (&var, sizeof (var)); - acc_delete (arr, N * sizeof (*arr)); - #if ACC_MEM_SHARED if (devptr[0] != &var) __builtin_abort (); @@ -43,6 +38,31 @@ main (int argc, char *argv[]) __builtin_abort (); #endif +#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async + { + devptr[0] = &arr[N - 2]; + devptr[1] = &var; + } +#pragma acc wait + if (acc_hostptr (devptr[0]) != (void *) &arr[N - 2]) + __builtin_abort (); + if (acc_hostptr (devptr[1]) != (void *) &var) + __builtin_abort (); +#if ACC_MEM_SHARED + if (devptr[0] != &arr[N - 2]) + __builtin_abort (); + if (devptr[1] != &var) + __builtin_abort (); +#else + if (devptr[0] == &arr[N - 2]) + __builtin_abort (); + if (devptr[1] == &var) + __builtin_abort (); +#endif + + acc_delete (&var, sizeof (var)); + acc_delete (arr, N * sizeof (*arr)); + free (arr); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c index fbd01a2..202092f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c @@ -18,12 +18,22 @@ main (int argc, char *argv[]) devptr[0] = &var; devptr[1] = &arr[2]; } - if (devptr[0] != &var) __builtin_abort (); if (devptr[1] != &arr[2]) __builtin_abort (); +#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async + { + devptr[0] = &arr[N - 2]; + devptr[1] = &var; + } +#pragma acc wait + if (devptr[0] != &arr[N - 2]) + __builtin_abort (); + if (devptr[1] != &var) + __builtin_abort (); + free (arr); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/nvptx-sese-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/nvptx-sese-1.c index 9583265..6507e1a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/nvptx-sese-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/nvptx-sese-1.c @@ -22,7 +22,7 @@ int main () if (foo (r)) r *= 2; - if (r & 1) /* to here. */ + if (r & 8) /* to here. */ #pragma acc loop vector reduction (+:r) for (int i = 00; i < 40; i++) r += i; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-big-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-big-1.c new file mode 100644 index 0000000..c0e8db0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-big-1.c @@ -0,0 +1,100 @@ +/* Test "big" private data. */ + +/* { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. */ + +/* { dg-additional-options -fopt-info-all-omp } + { dg-additional-options --param=openacc-privatization=noisy } + { dg-additional-options -foffload=-fopt-info-all-omp } + { dg-additional-options -foffload=--param=openacc-privatization=noisy } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options -Wopenacc-parallelism } for testing/documenting + aspects of that functionality. */ + +/* For GCN offloading compilation, we (expectedly) run into a + 'gang-private data-share memory exhausted' error: the default + '-mgang-private-size' is too small. Raise it so that 'uint32_t x[344]' plus + some internal-use data fits in: + { dg-additional-options -foffload-options=amdgcn-amdhsa=-mgang-private-size=1555 { target openacc_radeon_accel_selected } } */ + +/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' + passed to 'incr' may be unset, and in that case, it will be set to [...]", + so to maintain compatibility with earlier Tcl releases, we manually + initialize counter variables: + { dg-line l_dummy[variable c_compute 0 c_loop 0] } + { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid + "WARNING: dg-line var l_dummy defined, but not used". */ + +#include <assert.h> +#include <stdint.h> + + +/* Based on 'private-variables.c:loop_g_5'. */ + +/* To demonstrate PR105421 "GCN offloading, raised '-mgang-private-size': + 'HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION'", a 'struct' indirection, for + example, has been necessary in combination with a separate routine. */ + +struct data +{ + uint32_t *x; + uint32_t *arr; + uint32_t i; +}; + +#pragma acc routine worker +static void +loop_g_5_r(struct data *data) +{ + uint32_t *x = data->x; + uint32_t *arr = data->arr; + uint32_t i = data->i; + +#pragma acc loop /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */ + /* { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_loop$c_loop } */ + for (int j = 0; j < 320; j++) + arr[i * 320 + j] += x[(i * 320 + j) % 344]; +} + +void loop_g_5() +{ + uint32_t x[344], i, arr[320 * 320]; + + for (i = 0; i < 320 * 320; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) + { + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'gang'} {} { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' adjusted for OpenACC privatization level: 'gang'} {} { target { ! openacc_host_selected } } l_loop$c_loop } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'data' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_loop$c_loop } + { dg-note {variable 'data' ought to be adjusted for OpenACC privatization level: 'gang'} {} { target *-*-* } l_loop$c_loop } + { dg-note {variable 'data' adjusted for OpenACC privatization level: 'gang'} {} { target { ! openacc_host_selected } } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */ + /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop$c_loop } */ + for (i = 0; i < 320; i++) + { + for (int j = 0; j < 344; j++) + x[j] = j * (2 + i); + + struct data data = { x, arr, i }; + loop_g_5_r(&data); /* { dg-line l_compute[incr c_compute] } */ + /* { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_compute$c_compute } */ + } + } + + for (i = 0; i < 320 * 320; i++) + assert(arr[i] == i + (i % 344) * (2 + (i / 320))); +} + + +int main () +{ + loop_g_5(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c index 20c420d..525e3bf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c @@ -8,6 +8,7 @@ /* { dg-additional-sources routine-nohost-2_2.c } */ /* { dg-additional-options "-fno-inline" } for stable results regarding OpenACC 'routine'. */ +/* { dg-add-options weak_undefined } */ #include <assert.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 index 51776a1..89bd4a2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 @@ -215,7 +215,7 @@ program main ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } ! { dg-note {variable 'S\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } - ! { dg-note {variable 'desc\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-4 } + ! { dg-note {variable 'desc\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-4 } use vars use openacc implicit none diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 new file mode 100644 index 0000000..759873b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 @@ -0,0 +1,278 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +!TODO-OpenACC-declare-allocate +! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". +! Thus, after 'allocate'/before 'deallocate', do +! '!$acc enter data create'/'!$acc exit data delete' manually. + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + +! { dg-additional-options -fopt-info-all-omp } +! { dg-additional-options -foffload=-fopt-info-all-omp } + +! { dg-additional-options --param=openacc-privatization=noisy } +! { dg-additional-options -foffload=--param=openacc-privatization=noisy } +! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types): +! { dg-prune-output {note: variable '[Di]\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } + +! { dg-additional-options -Wopenacc-parallelism } + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c 0] } +! { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". + + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) error stop + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + !$acc enter data create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + a = 2.0 + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) error stop + end do + + !$acc exit data delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + !$acc enter data create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel + call sub1 ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC gang worker vector loop parallelism} {} { target *-*-* } l$c } + !$acc end parallel + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) error stop + end do + + !$acc exit data delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) error stop + end do + + !$acc exit data delete (b) + deallocate (b) + + if (allocated (b)) error stop + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + !$acc enter data create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = fun1 (i) ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l$c } + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) error stop + end do + + !$acc exit data delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + !$acc enter data create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) error stop + + do i = 1, n + if (b(i) /= i*i) error stop + end do + + !$acc exit data delete (b) + deallocate (b) +end program test ! { dg-line l[incr c] } +! { dg-bogus {note: variable 'overflow\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'not_prev_allocated\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} {TODO n/a} { xfail *-*-* } l$c } + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 ! { dg-line subroutine_sub1 } + use vars + implicit none + integer i + !$acc routine gang + ! { dg-bogus {[Ww]arning: region is worker partitioned but does not contain worker partitioned code} {TODO default 'gang' 'vector'} { xfail *-*-* } subroutine_sub1 } + + !$acc loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + !$acc enter data create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 new file mode 100644 index 0000000..e4cb9c3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 @@ -0,0 +1,278 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +!TODO-OpenACC-declare-allocate +! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". +! Thus, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' +! manually. + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + +! { dg-additional-options -fopt-info-all-omp } +! { dg-additional-options -foffload=-fopt-info-all-omp } + +! { dg-additional-options --param=openacc-privatization=noisy } +! { dg-additional-options -foffload=--param=openacc-privatization=noisy } +! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types): +! { dg-prune-output {note: variable '[Di]\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } + +! { dg-additional-options -Wopenacc-parallelism } + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c 0] } +! { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". + + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) error stop + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + call acc_create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + a = 2.0 + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) error stop + end do + + call acc_delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + call acc_create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel + call sub1 ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC gang worker vector loop parallelism} {} { target *-*-* } l$c } + !$acc end parallel + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) error stop + end do + + call acc_delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) error stop + end do + + call acc_delete (b) + deallocate (b) + + if (allocated (b)) error stop + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + call acc_create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = fun1 (i) ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l$c } + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) error stop + end do + + call acc_delete (b) + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + call acc_create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) error stop + + do i = 1, n + if (b(i) /= i*i) error stop + end do + + call acc_delete (b) + deallocate (b) +end program test ! { dg-line l[incr c] } +! { dg-bogus {note: variable 'overflow\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'not_prev_allocated\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} {TODO n/a} { xfail *-*-* } l$c } + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 ! { dg-line subroutine_sub1 } + use vars + implicit none + integer i + !$acc routine gang + ! { dg-bogus {[Ww]arning: region is worker partitioned but does not contain worker partitioned code} {TODO default 'gang' 'vector'} { xfail *-*-* } subroutine_sub1 } + + !$acc loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + call acc_create (b) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 new file mode 100644 index 0000000..1c8ccd9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -0,0 +1,268 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +!TODO-OpenACC-declare-allocate +! Not currently implementing correct '-DACC_MEM_SHARED=0' behavior: +! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". +! { dg-xfail-run-if TODO { *-*-* } { -DACC_MEM_SHARED=0 } } + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + +! { dg-additional-options -fopt-info-all-omp } +! { dg-additional-options -foffload=-fopt-info-all-omp } + +! { dg-additional-options --param=openacc-privatization=noisy } +! { dg-additional-options -foffload=--param=openacc-privatization=noisy } +! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types): +! { dg-prune-output {note: variable '[Di]\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } + +! { dg-additional-options -Wopenacc-parallelism } + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c 0] } +! { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". + + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) error stop + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + a = 2.0 + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) error stop + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel + call sub1 ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC gang worker vector loop parallelism} {} { target *-*-* } l$c } + !$acc end parallel + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) error stop + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) error stop + end do + + deallocate (b) + + if (allocated (b)) error stop + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = fun1 (i) ! { dg-line l[incr c] } + ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l$c } + end do + + if (.not.acc_is_present (b)) error stop + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) error stop + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c } + ! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c } + ! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) error stop + + do i = 1, n + if (b(i) /= i*i) error stop + end do + + deallocate (b) +end program test ! { dg-line l[incr c] } +! { dg-bogus {note: variable 'overflow\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'not_prev_allocated\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c } +! { dg-bogus {note: variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} {TODO n/a} { xfail *-*-* } l$c } + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 ! { dg-line subroutine_sub1 } + use vars + implicit none + integer i + !$acc routine gang + ! { dg-bogus {[Ww]arning: region is worker partitioned but does not contain worker partitioned code} {TODO default 'gang' 'vector'} { xfail *-*-* } subroutine_sub1 } + + !$acc loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) error stop + if (.not.acc_is_present (b)) error stop + + !$acc parallel loop ! { dg-line l[incr c] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c } + ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c } + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 new file mode 100644 index 0000000..6604f72 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 @@ -0,0 +1,438 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +! Note that we're not testing OpenACC semantics here, but rather documenting +! current GCC behavior, specifically, behavior concerning updating of +! host/device array descriptors. +! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } } + +!TODO-OpenACC-declare-allocate +! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". +! Thus, after 'allocate'/before 'deallocate', do +! '!$acc enter data create'/'!$acc exit data delete' manually. + + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + + +!TODO OpenACC 'serial' vs. GCC/nvptx: +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } + + +! { dg-additional-options -fdump-tree-original } +! { dg-additional-options -fdump-tree-gimple } + + +module vars + implicit none + integer, parameter :: n1_lb = -3 + integer, parameter :: n1_ub = 6 + integer, parameter :: n2_lb = -9999 + integer, parameter :: n2_ub = 22222 + + integer, allocatable :: b(:) + !$acc declare create (b) + +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + ! Identifiers for purposes of reliable '-fdump-tree-[...]' scanning. + integer :: id1_1, id1_2 + + interface + + subroutine verify_initial + implicit none + !$acc routine seq + end subroutine verify_initial + + subroutine verify_n1_allocated + implicit none + !$acc routine seq + end subroutine verify_n1_allocated + + subroutine verify_n1_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n1_values + + subroutine verify_n1_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n1_deallocated + + subroutine verify_n2_allocated + implicit none + !$acc routine seq + end subroutine verify_n2_allocated + + subroutine verify_n2_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n2_values + + subroutine verify_n2_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n2_deallocated + + end interface + + call acc_create (id1_1) + call acc_create (id1_2) + + call verify_initial + ! It is important here (and similarly, following) that there is no data + ! clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + !$acc serial + call verify_initial + !$acc end serial + + allocate (b(n1_lb:n1_ub)) + call verify_n1_allocated + if (acc_is_present (b)) error stop + !$acc enter data create (b) + ! This is now OpenACC "present": + if (.not.acc_is_present (b)) error stop + ! ..., and got the actual array descriptor installed: + !$acc serial + call verify_n1_allocated + !$acc end serial + + do i = n1_lb, n1_ub + b(i) = i - 1 + end do + + ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify + ! that host-to-device copy doesn't touch the device-side (still initial) + ! array descriptor (but it does copy the array data"). This is here not + ! applicable anymore, as we've already gotten the actual array descriptor + ! installed. Thus now verify that it does copy the array data. + call acc_update_device (b) + !$acc serial + call verify_n1_allocated + !$acc end serial + + b = 40 + + !$acc parallel copyout (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (-1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(from:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyout (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '-1'. + call verify_n1_values (-1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify + ! that device-to-host copy doesn't touch the host-side array descriptor, + ! doesn't copy out the device-side (still initial) array descriptor (but it + ! does copy the array data)". This is here not applicable anymore, as we've + ! already gotten the actual array descriptor installed. Thus now verify that + ! it does copy the array data. + call acc_update_self (b) + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i - 1) error stop + b(i) = b(i) + 2 + end do + + ! The same using the OpenACC 'update' directive. + + !$acc update device (b) self (id1_1) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_to:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + !$acc serial + call verify_n1_allocated + !$acc end serial + + b = 41 + + !$acc parallel + call verify_n1_values (1) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n1_values (1) + !$acc end parallel + + !$acc update self (b) self (id1_2) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_from:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i + 1) error stop + b(i) = b(i) + 2 + end do + + ! Now test that (potentially re-)installing the actual array descriptor is a + ! no-op, via a data clause for 'b' (explicit or implicit): must get a + ! 'GOMP_MAP_TO_PSET'. + !$acc serial present (b) copyin (id1_2) + call verify_n1_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copyin (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(to:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyin (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '1'. + call verify_n1_values (1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + call verify_n1_allocated + if (.not.acc_is_present (b)) error stop + + !$acc exit data delete (b) + if (.not.allocated (b)) error stop + if (acc_is_present (b)) error stop + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !$acc serial + call verify_n1_allocated + !$acc end serial + + deallocate (b) + call verify_n1_deallocated (.false.) + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !$acc serial + call verify_n1_allocated + !$acc end serial + + ! Now try to install the actual array descriptor, via a data clause for 'b' + ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in + ! 'gomp_map_vars_internal' is handled as 'declare target', but because of + ! '*(void **) hostaddrs[i] == NULL', we've got 'has_always_ptrset == false', + ! 'always_to_cnt == 0', and therefore 'gomp_map_vars_existing' doesn't update + ! the 'GOMP_MAP_TO_PSET'. + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !TODO Why does 'present (b)' still work here? + !$acc serial present (b) copyout (id1_2) + call verify_n1_deallocated (.true.) + !TODO Use of 'b'. + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + + ! Restart the procedure, with different array dimensions. + + allocate (b(n2_lb:n2_ub)) + call verify_n2_allocated + if (acc_is_present (b)) error stop + !$acc enter data create (b) + if (.not.acc_is_present (b)) error stop + ! ..., and got the actual array descriptor installed: + !$acc serial + call verify_n2_allocated + !$acc end serial + + do i = n2_lb, n2_ub + b(i) = i + 20 + end do + + call acc_update_device (b) + !$acc serial + call verify_n2_allocated + !$acc end serial + + b = -40 + + !$acc parallel + call verify_n2_values (20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (20) + !$acc end parallel + + call acc_update_self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i + 20) error stop + b(i) = b(i) - 40 + end do + + !$acc update device (b) + !$acc serial + call verify_n2_allocated + !$acc end serial + + b = -41 + + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + + !$acc update self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i - 20) error stop + b(i) = b(i) + 10 + end do + + !$acc serial present (b) copy (id1_2) + call verify_n2_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + + call verify_n2_allocated + if (.not.acc_is_present (b)) error stop + + !$acc exit data delete (b) + if (.not.allocated (b)) error stop + if (acc_is_present (b)) error stop + !$acc serial + call verify_n2_allocated + !$acc end serial + + deallocate (b) + call verify_n2_deallocated (.false.) + !$acc serial + call verify_n2_allocated + !$acc end serial + + !$acc serial present (b) copy (id1_2) + call verify_n2_deallocated (.true.) + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + +end program test + + +subroutine verify_initial + use vars + implicit none + !$acc routine seq + + if (allocated (b)) error stop "verify_initial allocated" + if (any (lbound (b) /= [0])) error stop "verify_initial lbound" + if (any (ubound (b) /= [0])) error stop "verify_initial ubound" +end subroutine verify_initial + +subroutine verify_n1_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated (b)) error stop "verify_n1_allocated allocated" + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_allocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_allocated ubound" +end subroutine verify_n1_allocated + +subroutine verify_n1_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n1_lb, n1_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n1_values + +subroutine verify_n1_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n1_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_deallocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_deallocated ubound" +end subroutine verify_n1_deallocated + +subroutine verify_n2_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated(b)) error stop "verify_n2_allocated allocated" + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_allocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_allocated ubound" +end subroutine verify_n2_allocated + +subroutine verify_n2_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n2_lb, n2_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n2_values + +subroutine verify_n2_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n2_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_deallocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_deallocated ubound" +end subroutine verify_n2_deallocated diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 new file mode 100644 index 0000000..b27f312 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 @@ -0,0 +1,402 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +! Note that we're not testing OpenACC semantics here, but rather documenting +! current GCC behavior, specifically, behavior concerning updating of +! host/device array descriptors. +! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } } + +!TODO-OpenACC-declare-allocate +! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". +! Thus, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' +! manually. + + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + + +!TODO OpenACC 'serial' vs. GCC/nvptx: +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } + + +! { dg-additional-options -fdump-tree-original } +! { dg-additional-options -fdump-tree-gimple } + + +module vars + implicit none + integer, parameter :: n1_lb = -3 + integer, parameter :: n1_ub = 6 + integer, parameter :: n2_lb = -9999 + integer, parameter :: n2_ub = 22222 + + integer, allocatable :: b(:) + !$acc declare create (b) + +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + ! Identifiers for purposes of reliable '-fdump-tree-[...]' scanning. + integer :: id1_1, id1_2 + + interface + + subroutine verify_initial + implicit none + !$acc routine seq + end subroutine verify_initial + + subroutine verify_n1_allocated + implicit none + !$acc routine seq + end subroutine verify_n1_allocated + + subroutine verify_n1_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n1_values + + subroutine verify_n1_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n1_deallocated + + subroutine verify_n2_allocated + implicit none + !$acc routine seq + end subroutine verify_n2_allocated + + subroutine verify_n2_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n2_values + + subroutine verify_n2_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n2_deallocated + + end interface + + call acc_create (id1_1) + call acc_create (id1_2) + + call verify_initial + ! It is important here (and similarly, following) that there is no data + ! clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + !$acc serial + call verify_initial + !$acc end serial + + allocate (b(n1_lb:n1_ub)) + call verify_n1_allocated + if (acc_is_present (b)) error stop + call acc_create (b) + ! This is now OpenACC "present": + if (.not.acc_is_present (b)) error stop + ! This still has the initial array descriptor: + !$acc serial + call verify_initial + !$acc end serial + + do i = n1_lb, n1_ub + b(i) = i - 1 + end do + + ! Verify that host-to-device copy doesn't touch the device-side (still + ! initial) array descriptor (but it does copy the array data). + call acc_update_device (b) + !$acc serial + call verify_initial + !$acc end serial + + b = 40 + + ! Verify that device-to-host copy doesn't touch the host-side array + ! descriptor, doesn't copy out the device-side (still initial) array + ! descriptor (but it does copy the array data). + call acc_update_self (b) + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i - 1) error stop + b(i) = b(i) + 2 + end do + + ! The same using the OpenACC 'update' directive. + + !$acc update device (b) self (id1_1) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_to:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + !$acc serial + call verify_initial + !$acc end serial + + b = 41 + + !$acc update self (b) self (id1_2) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_from:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i + 1) error stop + b(i) = b(i) + 2 + end do + + ! Now install the actual array descriptor, via a data clause for 'b' + ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in + ! 'gomp_map_vars_internal' is handled as 'declare target', and because of + ! '*(void **) hostaddrs[i] != NULL', we've got 'has_always_ptrset == true', + ! 'always_to_cnt == 1', and therefore 'gomp_map_vars_existing' does update + ! the 'GOMP_MAP_TO_PSET'. + !$acc serial present (b) copyin (id1_1) + call verify_initial + id1_1 = 0 + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + !$acc serial present (b) copyin (id1_2) + call verify_n1_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copyin (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(to:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyin (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '1'. + call verify_n1_values (1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + call verify_n1_allocated + if (.not.acc_is_present (b)) error stop + + call acc_delete (b) + if (.not.allocated (b)) error stop + if (acc_is_present (b)) error stop + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !$acc serial + call verify_n1_allocated + !$acc end serial + + deallocate (b) + call verify_n1_deallocated (.false.) + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !$acc serial + call verify_n1_allocated + !$acc end serial + + ! Now try to install the actual array descriptor, via a data clause for 'b' + ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in + ! 'gomp_map_vars_internal' is handled as 'declare target', but because of + ! '*(void **) hostaddrs[i] == NULL', we've got 'has_always_ptrset == false', + ! 'always_to_cnt == 0', and therefore 'gomp_map_vars_existing' doesn't update + ! the 'GOMP_MAP_TO_PSET'. + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !TODO Why does 'present (b)' still work here? + !$acc serial present (b) copyout (id1_2) + call verify_n1_deallocated (.true.) + !TODO Use of 'b'. + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + + ! Restart the procedure, with different array dimensions. + + allocate (b(n2_lb:n2_ub)) + call verify_n2_allocated + if (acc_is_present (b)) error stop + call acc_create (b) + if (.not.acc_is_present (b)) error stop + ! This still has the previous (n1) array descriptor: + !$acc serial + call verify_n1_deallocated (.true.) + !$acc end serial + + do i = n2_lb, n2_ub + b(i) = i + 20 + end do + + call acc_update_device (b) + !$acc serial + call verify_n1_deallocated (.true.) + !$acc end serial + + b = -40 + + call acc_update_self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i + 20) error stop + b(i) = b(i) - 40 + end do + + !$acc update device (b) + !$acc serial + call verify_n1_deallocated (.true.) + !$acc end serial + + b = -41 + + !$acc update self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i - 20) error stop + b(i) = b(i) + 10 + end do + + !$acc serial present (b) copy (id1_2) + call verify_n2_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + + call verify_n2_allocated + if (.not.acc_is_present (b)) error stop + + call acc_delete (b) + if (.not.allocated (b)) error stop + if (acc_is_present (b)) error stop + !$acc serial + call verify_n2_allocated + !$acc end serial + + deallocate (b) + call verify_n2_deallocated (.false.) + !$acc serial + call verify_n2_allocated + !$acc end serial + + !$acc serial present (b) copy (id1_2) + call verify_n2_deallocated (.true.) + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + +end program test + + +subroutine verify_initial + use vars + implicit none + !$acc routine seq + + if (allocated (b)) error stop "verify_initial allocated" + if (any (lbound (b) /= [0])) error stop "verify_initial lbound" + if (any (ubound (b) /= [0])) error stop "verify_initial ubound" +end subroutine verify_initial + +subroutine verify_n1_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated (b)) error stop "verify_n1_allocated allocated" + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_allocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_allocated ubound" +end subroutine verify_n1_allocated + +subroutine verify_n1_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n1_lb, n1_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n1_values + +subroutine verify_n1_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n1_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_deallocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_deallocated ubound" +end subroutine verify_n1_deallocated + +subroutine verify_n2_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated(b)) error stop "verify_n2_allocated allocated" + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_allocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_allocated ubound" +end subroutine verify_n2_allocated + +subroutine verify_n2_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n2_lb, n2_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n2_values + +subroutine verify_n2_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n2_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_deallocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_deallocated ubound" +end subroutine verify_n2_deallocated diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 index 93e9ee0..c3453a5 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 @@ -43,7 +43,7 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { ! openacc_host_selected } } .-2 } ! { dg-note {variable 'p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target { ! openacc_host_selected } } .-5 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-5 } #if !ACC_MEM_SHARED if (acc_is_present(p, c_sizeof(p))) stop 5 if (acc_is_present(parr, 1)) stop 6 @@ -54,8 +54,8 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } - ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-6 } - ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-6 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-7 } ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-8 } ! not mapped yet, so it will be equal to the host pointer. if (transfer(c_loc(p), host_p) /= host_p) stop 7 @@ -74,9 +74,9 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-7 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } - ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-9 } - ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-10 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-8 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-9 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-10 } ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-11 } if (.not. acc_is_present(p, c_sizeof(p))) stop 11 if (.not. acc_is_present(parr, 1)) stop 12 @@ -90,8 +90,8 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } - ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-6 } - ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-6 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-7 } ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 15 @@ -110,8 +110,8 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } - ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } - ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-7 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 19 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 20 @@ -129,8 +129,8 @@ subroutine foo (p2, parr, host_p, host_parr, cond) ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } - ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } - ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-7 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 23 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 24 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 index c6d6764..e0cfd91 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 @@ -382,7 +382,7 @@ program main b(:) = 1.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1) - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-1 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .TRUE.) STOP 21 @@ -396,7 +396,7 @@ program main !$acc data copyin (a(1:N)) if (1 == 1) ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-2 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .FALSE.) STOP 23 @@ -404,7 +404,7 @@ program main !$acc data copyout (b(1:N)) if (0 == 1) ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-2 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (b) .eqv. .TRUE.) STOP 24 #endif @@ -877,7 +877,7 @@ program main b(:) = 1.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1) - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-1 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .TRUE.) STOP 56 @@ -891,7 +891,7 @@ program main !$acc data copyin (a(1:N)) if (1 == 1) ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-2 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .FALSE.) STOP 58 @@ -899,7 +899,7 @@ program main !$acc data copyout (b(1:N)) if (0 == 1) ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target { ! openacc_host_selected } } .-2 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target { ! openacc_host_selected } } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (b) .eqv. .TRUE.) STOP 59 #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 index d2050e6..178a66d 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 @@ -2,8 +2,8 @@ ! vector_length. ! { dg-additional-sources parallel-dims-aux.c } +! { dg-additional-options -Wno-complain-wrong-lang } ! { dg-do run } -! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } ! { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } ! { dg-additional-options "-cpp" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 new file mode 100644 index 0000000..a9c969e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 @@ -0,0 +1,83 @@ +! { dg-do run } +! { dg-additional-options -cpp } + + +!TODO OpenACC 'serial' vs. GCC/nvptx: +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } + + +module m_macron + + implicit none + + real(kind(0d0)), allocatable, dimension(:) :: valls + !$acc declare create(valls) + +contains + + subroutine s_macron_compute(size) + + integer :: size + + !$acc routine seq + +#if ACC_MEM_SHARED + if (valls(size) /= 1) error stop +#else + if (valls(size) /= size - 2) error stop +#endif + + valls(size) = size + 2 + + end subroutine s_macron_compute + + subroutine s_macron_init(size) + + integer :: size + + print*, "size=", size + + print*, "allocate(valls(1:size))" + allocate(valls(1:size)) + + print*, "acc enter data create(valls(1:size))" + !$acc enter data create(valls(1:size)) + + print*, "!$acc update device(valls(1:size))" + valls(size) = size - 2 + !$acc update device(valls(1:size)) + + valls(size) = 1 + + !$acc serial + call s_macron_compute(size) + !$acc end serial + + valls(size) = -1 + + !$acc update host(valls(1:size)) +#if ACC_MEM_SHARED + if (valls(size) /= -1) error stop +#else + if (valls(size) /= size + 2) error stop +#endif + + print*, valls(1:size) + + print*, "acc exit data delete(valls)" + !$acc exit data delete(valls) + + end subroutine s_macron_init + +end module m_macron + + +program p_main + + use m_macron + + implicit none + + call s_macron_init(10) + +end program p_main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/print-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/print-1.f90 index 42a8538..d2f89d9 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/print-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/print-1.f90 @@ -6,15 +6,6 @@ ! Separate file 'print-1-nvptx.f90' for nvptx offloading. ! { dg-skip-if "separate file" { offload_target_nvptx } } -! For GCN offloading compilation, when gang-privatizing 'dt_parm.N' -! (see below), we run into an 'gang-private data-share memory exhausted' -! error: the default '-mgang-private-size' is too small. Per -! 'gcc/fortran/trans-io.cc'/'libgfortran/io/io.h', that one is -! 'struct st_parameter_dt', which indeed is rather big. Instead of -! working out its exact size (which may vary per GCC configuration), -! raise '-mgang-private-size' to an arbitrary high value. -! { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mgang-private-size=13579" { target openacc_radeon_accel_selected } } - ! { dg-additional-options "-fopt-info-note-omp" } ! { dg-additional-options "-foffload=-fopt-info-note-omp" } @@ -36,9 +27,7 @@ program main integer :: var = 42 !$acc parallel ! { dg-line l_compute[incr c_compute] } - ! { dg-note {variable 'dt_parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } - ! { dg-note {variable 'dt_parm\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} {} { target *-*-* } l_compute$c_compute } - ! { dg-note {variable 'dt_parm\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} {} { target { ! openacc_host_selected } } l_compute$c_compute } + ! { dg-note {variable 'dt_parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} {} { target *-*-* } l_compute$c_compute } write (0, '("The answer is ", I2)') var !$acc end parallel diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 index b31f406..498ef70 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 @@ -122,9 +122,7 @@ contains ! { dg-note {variable 'str' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } ! { dg-note {variable 'str' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } ! { dg-note {variable 'str' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || { openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || { openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } l_loop$c_loop } ! { dg-message {sorry, unimplemented: target cannot support alloca} PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop } do i = 1, 10 str(i:i) = achar(ichar('A') + i) @@ -167,9 +165,7 @@ contains ! { dg-note {variable 'scalar' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } ! { dg-note {variable 'scalar' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } ! { dg-note {variable 'scalar' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || { openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } - ! { dg-note {variable 'char\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || { openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} "" { target *-*-* } l_loop$c_loop } do i = 1, 15 scalar(i:i) = achar(ichar('A') + i) end do diff --git a/libgomp/work.c b/libgomp/work.c index c53625a..99b79df 100644 --- a/libgomp/work.c +++ b/libgomp/work.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2022 Free Software Foundation, Inc. +/* Copyright (C) 2005-2023 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library |