diff options
Diffstat (limited to 'libgomp')
304 files changed, 13823 insertions, 976 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index c7502ca..4d9e206 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,1131 @@ +2021-09-10 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/atomic-19.c: New test. + * testsuite/libgomp.c-c++-common/atomic-20.c: New test. + * testsuite/libgomp.c-c++-common/atomic-21.c: New test. + +2021-09-07 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenMP Implementation Status): Extend + OpenMP 5.0 section. + (OpenACC Profiling Interface): Fix typo. + +2021-09-07 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (Enabling OpenMP): Refer to OMP spec in general + not to 4.5; link to new section. + (OpenMP Implementation Status): New. + +2021-09-06 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.c/target-43.c: '-latomic' for nvptx offloading. + +2021-09-03 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.c-c++-common/error-1.c: Use \r\n not \n\r in + dg-output. + * testsuite/libgomp.fortran/error-1.f90: Likewise. + +2021-08-23 Thomas Schwinge <thomas@codesourcery.com> + Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c/address-space-1.c: New file. + +2021-08-23 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_offload_target_intelmic): Remove 'proc'. + (check_effective_target_offload_device_intel_mic): New 'proc'. + * testsuite/libgomp.c-c++-common/on_device_arch.h + (device_arch_intel_mic, on_device_arch_intel_mic): New. + * testsuite/libgomp.c-c++-common/target-45.c: Use that for + 'dg-xfail-run-if'. + * testsuite/libgomp.fortran/target10.f90: Likewise. + +2021-08-23 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/taskloop-4-a.f90: New test. + * testsuite/libgomp.fortran/taskloop-4.f90: New test. + * testsuite/libgomp.fortran/taskloop-5-a.f90: New test. + * testsuite/libgomp.fortran/taskloop-5.f90: New test. + +2021-08-23 Jakub Jelinek <jakub@redhat.com> + + * taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT. + * testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment. + * testsuite/libgomp.c-c++-common/taskloop-5.c: New test. + +2021-08-22 Thomas Schwinge <thomas@codesourcery.com> + + * config/nvptx/error.c (fwrite, exit): Override, too. + * testsuite/libgomp.c-c++-common/error-1.c: Add a minimum amount + of offloading testing. + * testsuite/libgomp.fortran/error-1.f90: Likewise. + +2021-08-20 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/error-1.f90: New test. + +2021-08-20 Jakub Jelinek <jakub@redhat.com> + + * libgomp.map (GOMP_5.1): Add GOMP_error and GOMP_warning. + * libgomp_g.h (GOMP_warning, GOMP_error): Declare. + * error.c (GOMP_warning, GOMP_error): New functions. + * testsuite/libgomp.c-c++-common/error-1.c: New test. + +2021-08-18 Tobias Burnus <tobias@codesourcery.com> + + * omp_lib.f90.in (omp_alloc, omp_free, omp_target_alloc, + omp_target_free. omp_target_is_present, omp_target_memcpy, + omp_target_memcpy_rect, omp_target_associate_ptr, + omp_target_disassociate_ptr): Add interface. + * omp_lib.h.in (omp_alloc, omp_free, omp_target_alloc, + omp_target_free. omp_target_is_present, omp_target_memcpy, + omp_target_memcpy_rect, omp_target_associate_ptr, + omp_target_disassociate_ptr): Add interface. + * testsuite/libgomp.fortran/alloc-1.F90: Remove local + interface block for omp_alloc + omp_free. + * testsuite/libgomp.fortran/alloc-4.f90: Likewise. + * testsuite/libgomp.fortran/refcount-1.f90: New test. + * testsuite/libgomp.fortran/target-12.f90: New test. + +2021-08-18 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/nothing-1.c: New test. + +2021-08-17 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/scope-1.f90: New test. + * testsuite/libgomp.fortran/task-reduction-16.f90: New test. + +2021-08-17 Jakub Jelinek <jakub@redhat.com> + + * Makefile.am (libgomp_la_SOURCES): Add scope.c + * Makefile.in: Regenerated. + * libgomp_g.h (GOMP_scope_start): Declare. + * libgomp.map: Add GOMP_scope_start@@GOMP_5.1. + * scope.c: New file. + * testsuite/libgomp.c-c++-common/scope-1.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-16.c: New test. + +2021-08-16 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Address + '?:' issues. + +2021-08-16 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/masked-1.f90: New test. + +2021-08-13 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/static-variable-1.c: Adjust. + +2021-08-12 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/masked-1.c: New test. + +2021-08-12 Tobias Burnus <tobias@codesourcery.com> + + * env.c (parse_bind_var): Accept 'primary' as alias for + 'master'. + (omp_display_env): Add TODO comment to + change 'master' to 'primary' in proc_bind for OpenMP 5.1. + * libgomp.texi: Change 'master thread' to 'primary thread' + in line with OpenMP 5.1. + (omp_get_proc_bind): Add omp_proc_bind_primary and note that + omp_proc_bind_master is an alias of it. + (OMP_PROC_BIND): Mention 'PRIMARY'. + * omp.h.in (__GOMP_DEPRECATED_5_1): Define. + (omp_proc_bind_primary): Add. + (omp_proc_bind_master): Deprecate for OpenMP 5.1. + * omp_lib.f90.in (omp_proc_bind_primary): Add. + (omp_proc_bind_master): Deprecate for OpenMP 5.1. + * omp_lib.h.in (omp_proc_bind_primary): Add. + * testsuite/libgomp.c/affinity-1.c: Check that + 'primary' works and is identical to 'master'. + +2021-08-09 Julian Brown <julian@codesourcery.com> + Kwok Cheung Yeung <kcy@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * plugin/plugin-gcn.c (gcn_exec): Change default number of workers to + 16. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c + [acc_device_radeon]: Update. + * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c + [ACC_DEVICE_TYPE_radeon]: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c + [acc_device_radeon]: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c + [ACC_DEVICE_TYPE_radeon]: Likewise. + * testsuite/libgomp.oacc-fortran/optional-reduction.f90: XFAIL for + 'openacc_radeon_accel_selected' and '-O0'. + * testsuite/libgomp.oacc-fortran/reduction-7.f90: Likewise. + +2021-08-05 Chung-Lin Tang <cltang@codesourcery.com> + + * icv-device.c (omp_get_device_num): New API function, host side. + * fortran.c (omp_get_device_num_): New interface function. + * libgomp-plugin.h (GOMP_DEVICE_NUM_VAR): Define macro symbol. + * libgomp.map (OMP_5.0.2): New version space with omp_get_device_num, + omp_get_device_num_. + * libgomp.texi (omp_get_device_num): Add documentation for new API + function. + * omp.h.in (omp_get_device_num): Add declaration. + * omp_lib.f90.in (omp_get_device_num): Likewise. + * omp_lib.h.in (omp_get_device_num): Likewise. + * target.c (gomp_load_image_to_device): If additional entry for device + number exists at end of returned entries from 'load_image_func' hook, + copy the assigned device number over to the device variable. + * config/gcn/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. + (omp_get_device_num): New API function, device side. + * plugin/plugin-gcn.c ("symcat.h"): Add include. + (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR + at end of returned 'target_table' entries. + * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. + (omp_get_device_num): New API function, device side. + * plugin/plugin-nvptx.c ("symcat.h"): Add include. + (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR + at end of returned 'target_table' entries. + * testsuite/lib/libgomp.exp + (check_effective_target_offload_target_intelmic): New function for + testing for intelmic offloading. + * testsuite/libgomp.c-c++-common/target-45.c: New test. + * testsuite/libgomp.fortran/target10.f90: New test. + +2021-07-30 Thomas Schwinge <thomas@codesourcery.com> + Ulrich Drepper <drepper@redhat.com> + + * fortran.c (omp_display_env_, omp_display_env_8_): Only + '#ifndef LIBGOMP_OFFLOADED_ONLY'. + +2021-07-29 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + Kwok Cheung Yeung <kcy@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: + 's%oaccdevlow%oaccloops%g'. + * testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: + Likewise. + * testsuite/libgomp.oacc-fortran/routine-nohost-1.f90: Likewise. + +2021-07-29 Aldy Hernandez <aldyh@redhat.com> + + * testsuite/libgomp.graphite/force-parallel-4.c: Adjust for + threader. + * testsuite/libgomp.graphite/force-parallel-8.c: Same. + +2021-07-27 Ulrich Drepper <drepper@gmail.com> + + * env.c (wait_policy, stacksize): New static variables, + move out of handle_omp_display_env. + (omp_display_env): New function. The meat of the old + handle_omp_display_env function. + (handle_omp_display_env): Change to not take parameters + and instead use the global variables. Only perform + parsing, defer to omp_display_env for the implementation. + (initialize_env): Remove local variables wait_policy and + stacksize. Don't pass parameters to handle_omp_display_env. + * fortran.c: Add ialias_redirect for omp_display_env. + (omp_display_env_, omp_display_env_8_): New functions. + * libgomp.map (OMP_5.1): New version. Add omp_display_env, + omp_display_env_, and omp_display_env_8_. + * omp.h.in: Declare omp_display_env. + * omp_lib.f90.in: Likewise. + * omp_lib.h.in: Likewise. + +2021-07-27 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * target.c (gomp_coalesce_buf_add): Update comment. + (gomp_copy_host2dev, gomp_map_vars_internal): Don't expect to see + 'aq && cbuf'. + (gomp_map_vars_internal): Only 'if (!aq)', do + 'gomp_coalesce_buf_add'. + * testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c: Remove + XFAIL. + +2021-07-27 Julian Brown <julian@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * libgomp.h (gomp_copy_host2dev): Update prototype. + * oacc-mem.c (memcpy_tofrom_device, update_dev_host): Add new + argument to gomp_copy_host2dev (false). + * plugin/plugin-gcn.c (struct copy_data): Remove free_src field. + (copy_data): Don't free src. + (queue_push_copy): Remove free_src handling. + (GOMP_OFFLOAD_dev2dev): Update call to queue_push_copy. + (GOMP_OFFLOAD_openacc_async_host2dev): Remove source-data + snapshotting. + (GOMP_OFFLOAD_openacc_async_dev2host): Update call to + queue_push_copy. + * target.c (goacc_device_copy_async): Add SRCADDR_ORIG parameter. + (gomp_copy_host2dev): Add EPHEMERAL parameter. Snapshot source + data when true, and set up deferred freeing of temporary buffer. + (gomp_copy_dev2host): Update call to goacc_device_copy_async. + (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer) + (gomp_detach_pointer, gomp_map_vars_internal, gomp_update): Update + calls to gomp_copy_host2dev with appropriate ephemeral argument. + * testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c: Remove + XFAIL. + +2021-07-27 Thomas Schwinge <thomas@codesourcery.com> + Tom de Vries <tom@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c: New file. + * testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c: Likewise. + +2021-07-27 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Clarify + sequencing of 'async' data copying vs. profiling events. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Likewise. + +2021-07-27 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/lib-94.c: Fix OpenACC + 'async'/'wait' issue. + * testsuite/libgomp.oacc-c-c++-common/lib-95.c: Likewise. + * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-16.f90: Likewise. + +2021-07-21 Thomas Schwinge <thomas@codesourcery.com> + Joseph Myers <joseph@codesourcery.com> + Cesar Philippidis <cesar@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c: New + file. + * testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-nohost-2_2.c: + Likewise. + * testsuite/libgomp.oacc-fortran/routine-nohost-1.f90: Likewise. + +2021-07-20 Thomas Schwinge <thomas@codesourcery.com> + + PR target/101484 + * configure.tgt [amdgcn*-*-*] (XCFLAGS): Add + '-Wno-error=array-bounds'. + * config/gcn/team.c: Remove '-Werror=array-bounds' work-around. + * libgomp.h [__AMDGCN__]: Likewise. + +2021-07-19 Thomas Schwinge <thomas@codesourcery.com> + + PR target/101484 + * config/gcn/team.c: Apply '-Werror=array-bounds' work-around. + * libgomp.h [__AMDGCN__]: Likewise. + +2021-07-13 Jakub Jelinek <jakub@redhat.com> + Florian Weimer <fweimer@redhat.com> + + * config/linux/sem.h: Don't include limits.h. + (SEM_WAIT): Define to -__INT_MAX__ - 1 instead of INT_MIN. + * config/linux/affinity.c: Include limits.h. + +2021-07-01 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/94366 + * testsuite/libgomp.c-c++-common/pr94366.c: New test. + +2021-06-29 Tobias Burnus <tobias@codesourcery.com> + + PR other/67300 + * testsuite/libgomp.c-c++-common/reduction-16.c: Replace + -foffload=nvptx-none= by -foffload-options=nvptx-none= to + avoid disabling other offload targets. + * testsuite/libgomp.c-c++-common/reduction-5.c: Likewise. + * testsuite/libgomp.c-c++-common/reduction-6.c: Likewise. + * testsuite/libgomp.c/target-44.c: Likewise. + +2021-06-29 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/defaultmap-8.f90 (bar): Determine whether + target has shared memory and disable some scalar pointer/allocatable + checks if not as firstprivate does not work. + +2021-06-25 Chung-Lin Tang <cltang@codesourcery.com> + + PR testsuite/101114 + * testsuite/libgomp.c-c++-common/struct-elem-5.c: + Add "target offload_device_nonshared_as" condition for enabling test. + +2021-06-24 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/target-in-reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/target-in-reduction-2.c: New test. + * testsuite/libgomp.c++/target-in-reduction-1.C: New test. + * testsuite/libgomp.c++/target-in-reduction-2.C: New test. + +2021-06-23 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/101167 + * testsuite/libgomp.c-c++-common/task-reduction-15.c: New test. + +2021-06-17 Chung-Lin Tang <cltang@codesourcery.com> + + * hashtab.h (htab_clear): New function with initialization code + factored out from... + (htab_create): ...here, adjust to use htab_clear function. + * libgomp.h (REFCOUNT_SPECIAL): New symbol to denote range of + special refcount values, add comments. + (REFCOUNT_INFINITY): Adjust definition to use REFCOUNT_SPECIAL. + (REFCOUNT_LINK): Likewise. + (REFCOUNT_STRUCTELEM): New special refcount range for structure + element siblings. + (REFCOUNT_STRUCTELEM_P): Macro for testing for structure element + sibling maps. + (REFCOUNT_STRUCTELEM_FLAG_FIRST): Flag to indicate first sibling. + (REFCOUNT_STRUCTELEM_FLAG_LAST): Flag to indicate last sibling. + (REFCOUNT_STRUCTELEM_FIRST_P): Macro to test _FIRST flag. + (REFCOUNT_STRUCTELEM_LAST_P): Macro to test _LAST flag. + (struct splay_tree_key_s): Add structelem_refcount and + structelem_refcount_ptr fields into a union with dynamic_refcount. + Add comments. + (gomp_map_vars): Delete declaration. + (gomp_map_vars_async): Likewise. + (gomp_unmap_vars): Likewise. + (gomp_unmap_vars_async): Likewise. + (goacc_map_vars): New declaration. + (goacc_unmap_vars): Likewise. + * oacc-mem.c (acc_map_data): Adjust to use goacc_map_vars. + (goacc_enter_datum): Likewise. + (goacc_enter_data_internal): Likewise. + * oacc-parallel.c (GOACC_parallel_keyed): Adjust to use goacc_map_vars + and goacc_unmap_vars. + (GOACC_data_start): Adjust to use goacc_map_vars. + (GOACC_data_end): Adjust to use goacc_unmap_vars. + * target.c (hash_entry_type): New typedef. + (htab_alloc): New function hook for hashtab.h. + (htab_free): Likewise. + (htab_hash): Likewise. + (htab_eq): Likewise. + (hashtab.h): Add file include. + (gomp_increment_refcount): New function. + (gomp_decrement_refcount): Likewise. + (gomp_map_vars_existing): Add refcount_set parameter, adjust to use + gomp_increment_refcount. + (gomp_map_fields_existing): Add refcount_set parameter, adjust calls + to gomp_map_vars_existing. + (gomp_map_vars_internal): Add refcount_set parameter, add local openmp_p + variable to guard OpenMP specific paths, adjust calls to + gomp_map_vars_existing, add structure element sibling splay_tree_key + sequence creation code, adjust Fortran map case to avoid increment + under OpenMP. + (gomp_map_vars): Adjust to static, add refcount_set parameter, manage + local refcount_set if caller passed in NULL, adjust call to + gomp_map_vars_internal. + (gomp_map_vars_async): Adjust and rename into... + (goacc_map_vars): ...this new function, adjust call to + gomp_map_vars_internal. + (gomp_remove_splay_tree_key): New function with code factored out from + gomp_remove_var_internal. + (gomp_remove_var_internal): Add code to handle removing multiple + splay_tree_key sequence for structure elements, adjust code to use + gomp_remove_splay_tree_key for splay-tree key removal. + (gomp_unmap_vars_internal): Add refcount_set parameter, adjust to use + gomp_decrement_refcount. + (gomp_unmap_vars): Adjust to static, add refcount_set parameter, manage + local refcount_set if caller passed in NULL, adjust call to + gomp_unmap_vars_internal. + (gomp_unmap_vars_async): Adjust and rename into... + (goacc_unmap_vars): ...this new function, adjust call to + gomp_unmap_vars_internal. + (GOMP_target): Manage refcount_set and adjust calls to gomp_map_vars and + gomp_unmap_vars. + (GOMP_target_ext): Likewise. + (gomp_target_data_fallback): Adjust call to gomp_map_vars. + (GOMP_target_data): Likewise. + (GOMP_target_data_ext): Likewise. + (GOMP_target_end_data): Adjust call to gomp_unmap_vars. + (gomp_exit_data): Add refcount_set parameter, adjust to use + gomp_decrement_refcount, adjust to queue splay-tree keys for removal + after main loop. + (GOMP_target_enter_exit_data): Manage refcount_set and adjust calls to + gomp_map_vars and gomp_exit_data. + (gomp_target_task_fn): Likewise. + * testsuite/libgomp.c-c++-common/refcount-1.c: New testcase. + * testsuite/libgomp.c-c++-common/struct-elem-1.c: New testcase. + * testsuite/libgomp.c-c++-common/struct-elem-2.c: New testcase. + * testsuite/libgomp.c-c++-common/struct-elem-3.c: New testcase. + * testsuite/libgomp.c-c++-common/struct-elem-4.c: New testcase. + * testsuite/libgomp.c-c++-common/struct-elem-5.c: New testcase. + +2021-06-15 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/92568 + * testsuite/libgomp.fortran/defaultmap-8.f90: New test. + +2021-06-10 Andrew Stubbs <ams@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * libgomp.map (GOACC_2.0.2): New symbol version. + * libgomp_g.h (GOACC_enter_data, GOACC_exit_data) New prototypes. + * oacc-mem.c (GOACC_enter_data, GOACC_exit_data) New functions. + +2021-06-10 Thomas Schwinge <thomas@codesourcery.com> + Andrew Stubbs <ams@codesourcery.com> + + * oacc-mem.c (goacc_enter_exit_data_internal): New function, + extracted from... + (GOACC_enter_exit_data): ... here. + (GOACC_declare): Use it. + +2021-06-10 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-parallel.c (GOACC_declare): Move... + * oacc-mem.c: ... here. + * libgomp_g.h: Adjust. + +2021-06-10 Andrew Stubbs <ams@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * oacc-parallel.c (GOACC_declare): Clean up 'GOMP_MAP_POINTER' + handling. + +2021-06-10 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/100981 + * testsuite/libgomp.fortran/pr100981-2.f90 (cdcdot): Initialize + dsdotr and dsdoti to 0. + +2021-06-09 H.J. Lu <hjl.tools@gmail.com> + + * testsuite/lib/libgomp.exp (libgomp_init): Don't add -march=i486 + if atomic compare-and-swap is supported on 'int'. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100981 + * testsuite/libgomp.fortran/pr100981-2.f90: New testcase. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * plugin/plugin-gcn.c (gcn_exec): Force 'num_workers (1)' + unconditionally. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: + Update. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/lib-11.c: Enable for all but + '-DACC_MEM_SHARED=0'. + * testsuite/libgomp.oacc-c-c++-common/lib-13.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-14.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-15.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-20.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-24.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-34.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-42.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-44.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-48.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-88.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-89.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-92.c: Likewise. + * testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-5.c: Add + 'acc_device_radeon' testing. + * testsuite/libgomp.oacc-c-c++-common/lib-6.c: Likewise. + * testsuite/libgomp.oacc-fortran/lib-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-7.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-52.c: Enable for all. + * testsuite/libgomp.oacc-c-c++-common/lib-53.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-54.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-86.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-87.c: Likewise. + * testsuite/libgomp.oacc-fortran/lib-10.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-8.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-57.c: Improve checking + for non-'openacc_nvidia_accel_selected'. + * testsuite/libgomp.oacc-c-c++-common/lib-58.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-62.c: Clarify that "Not + all implement this checking". + * testsuite/libgomp.oacc-c-c++-common/lib-63.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-64.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-65.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-67.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-68.c: Likewise. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Simplify. + * testsuite/libgomp.oacc-fortran/parallel-dims-aux.c: Update. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Fix + for 'acc_device_radeon'. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Enhance + for non-'acc_device_nvidia'. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: Add + 'acc_device_radeon' testing. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f: Likewise. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Don't + require 'openacc_nvidia_accel_selected'. Fix up for + 'ACC_DEVICE_TYPE_radeon'. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c++/declare-1.C: Don't require + 'openacc_nvidia_accel_selected'. + * testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_openacc_radeon_accel_selected): + Streamline. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Revert + PR80547 workaround. + +2021-06-08 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c + <acc_device_nvidia>: Update comment. + +2021-05-28 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/depend-iterator-2.f90: New test. + +2021-05-27 Jakub Jelinek <jakub@redhat.com> + + * testsuite/lib/libgomp.exp (check_effective_target_openacc_cuda, + check_effective_target_openacc_cublas, + check_effective_target_openacc_cudart): New. + * testsuite/libgomp.oacc-fortran/host_data-4.f90: Require effective + target openacc_cublas. + * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Require effective + target openacc_cuda. + * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr87835.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Require effective + targets openacc_cublas and openacc_cudart. + * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c: + Require effective target openacc_cudart. + * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Add -DUSE_CUDA_H + for effective target openacc_cuda and add && defined USE_CUDA_H to + preprocessor conditionals. Guard -lcuda also on openacc_cuda + effective target. + +2021-05-26 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/100573 + * config/nvptx/target.c (GOMP_target_ext, GOMP_target_data_ext, + GOMP_target_end_data, GOMP_target_update_ext, + GOMP_target_enter_exit_data): New dummy entrypoints. + * config/gcn/target.c (GOMP_target_ext, GOMP_target_data_ext, + GOMP_target_end_data, GOMP_target_update_ext, + GOMP_target_enter_exit_data): Likewise. + * testsuite/libgomp.c-c++-common/for-3.c (DO_PRAGMA, OMPTEAMS, + OMPFROM, OMPTO): Define. + (main): Remove #pragma omp target teams around all the tests. + * testsuite/libgomp.c-c++-common/target-41.c: New test. + * testsuite/libgomp.c-c++-common/target-42.c: New test. + +2021-05-25 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/99928 + * testsuite/libgomp.c-c++-common/reduction-17.c: New test. + +2021-05-24 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/86470 + * testsuite/libgomp.fortran/class-firstprivate-1.f90: New test. + * testsuite/libgomp.fortran/class-firstprivate-2.f90: New test. + * testsuite/libgomp.fortran/class-firstprivate-3.f90: New test. + +2021-05-22 Thomas Schwinge <thomas@codesourcery.com> + + PR testsuite/90115 + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Prune + uninteresting/varying diagnostics. + +2021-05-21 Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/90115 + * testsuite/libgomp.oacc-c-c++-common/static-variable-1.c: Update. + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise. + +2021-05-21 Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/90115 + * testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90: New + file. + * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update. + * testsuite/libgomp.oacc-c-c++-common/host_data-7.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/private-variables.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-variable-1.c: + Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f: Likewise. + * testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/host_data-5.F90: Likewise. + * testsuite/libgomp.oacc-fortran/if-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/optional-private.f90: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise. + * testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise. + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-7.f90: Likewise. + +2021-05-21 Julian Brown <julian@codesourcery.com> + Chung-Lin Tang <cltang@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/90115 + * testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c: New + test. + * testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90: + Likewise. + * testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90: + Likewise. + +2021-05-21 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Don't skip + for nvptx offloading. + +2021-05-21 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: New. + +2021-05-19 Thomas Schwinge <thomas@codesourcery.com> + + PR target/83812 + * testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c: New. + +2021-05-19 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New. + +2021-05-18 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_offload_target_nvptx): Don't shadow global + 'offload_targets' variable. + +2021-05-18 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.c-c++-common/reduction-5.c: Restrict + '-latomic' to nvptx offloading compilation. + * testsuite/libgomp.c-c++-common/reduction-6.c: Likewise. + +2021-05-18 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.c/target-44.c: Restrict '-latomic' to nvptx + offloading compilation. + +2021-05-17 Kwok Cheung Yeung <kcy@codesourcery.com> + + * task.c (omp_fulfill_event): Call gomp_team_barrier_set_task_pending + if new tasks generated. + * testsuite/libgomp.c-c++-common/task-detach-13.c: New. + +2021-05-14 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/parallel-master.f90: New test. + +2021-05-13 Martin Liska <mliska@suse.cz> + + PR testsuite/100569 + * testsuite/libgomp.c/omp-nested-3.c: Prune new LTO warning. + * testsuite/libgomp.c/pr46032-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c: Likewise. + +2021-05-12 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.c-c++-common/task-detach-12.c: New test. + * testsuite/libgomp.fortran/task-detach-12.f90: New test. + +2021-05-11 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/100471 + * taskloop.c (GOMP_taskloop): If GOMP_TASK_FLAG_REDUCTION and not + GOMP_TASK_FLAG_NOGROUP, when doing early return clear the task + reduction pointer. + * testsuite/libgomp.c/task-reduction-4.c: New test. + +2021-05-07 Tobias Burnus <tobias@codesourcery.com> + Tom de Vries <tdevries@suse.de> + + * testsuite/libgomp.c-c++-common/reduction-5.c: New test, testing + complex/floating-point || + && reduction with 'omp target'. + * testsuite/libgomp.c-c++-common/reduction-6.c: Likewise. + +2021-05-04 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.c-c++-common/reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/reduction-2.c: New test. + * testsuite/libgomp.c-c++-common/reduction-3.c: New test. + * testsuite/libgomp.c-c++-common/reduction-4.c: New file. + +2021-05-04 Tobias Burnus <tobias@codesourcery.com> + + PR testsuite/100397 + * testsuite/libgomp.fortran/depobj-1.f90 (dep2, dep3): Move var + declaration to scope of non-'depend'-guarded assignment to avoid races. + +2021-05-03 Tom de Vries <tdevries@suse.de> + + PR target/100321 + * testsuite/libgomp.c/target-44.c: New test. + +2021-04-30 Roman Zhuykov <zhroma@ispras.ru> + + * testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c: New test. + +2021-04-29 Tom de Vries <tdevries@suse.de> + + * testsuite/libgomp.c/pr81778.c: New test. + +2021-04-29 Tom de Vries <tdevries@suse.de> + + PR target/100232 + * testsuite/libgomp.c/target-43.c: New file. + +2021-04-28 Jakub Jelinek <jakub@redhat.com> + Tobias Burnus <tobias@codesourcery.com> + + * configure.ac (OFFLOAD_DEFAULTED): AC_DEFINE if offload-defaulted. + * target.c (gomp_load_plugin_for_device): If set and if a plugin + can't be dlopened, silently assume it has no devices. + * Makefile.in: Regenerate. + * config.h.in: Regenerate. + * configure: Regenerate. + +2021-04-26 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/par-reduction-2-1.f: + Use [Ww]arning in dg-bogus as FE diagnostic and default + diagnostic differ and the result depends on ENABLE_OFFLOAD. + * testsuite/libgomp.oacc-fortran/par-reduction-2-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: Likewise. + +2021-04-26 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/par-reduction-2-1.f: + Correct spelling in dg-bogus to match -Wopenacc-parallelism. + * testsuite/libgomp.oacc-fortran/par-reduction-2-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: Likewise. + +2021-04-26 Thomas Schwinge <thomas@codesourcery.com> + Nathan Sidwell <nathan@codesourcery.com> + Tom de Vries <vries@codesourcery.com> + Julian Brown <julian@codesourcery.com> + Kwok Cheung Yeung <kcy@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Specify + '-Wopenacc-parallelism', and match diagnostics, as appropriate. + * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/private-variables.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-variable-1.c: + Likewise. + * testsuite/libgomp.oacc-fortran/optional-private.f90: Likewise. + * testsuite/libgomp.oacc-fortran/par-reduction-2-1.f: Likewise. + * testsuite/libgomp.oacc-fortran/par-reduction-2-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: Likewise. + * testsuite/libgomp.oacc-fortran/pr84028.f90: Likewise. + * testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-7.f90: Likewise. + +2021-04-26 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Don't + compile with '-w'. + * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: Likewise. + * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-7.f90: Likewise. + +2021-04-22 Richard Biener <rguenther@suse.de> + + * testsuite/libgomp.c-c++-common/reduction-16.c: Use -latomic + only on nvptx-none. + +2021-04-21 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/depobj-1.f90: Use omp_lib's + omp_depend_kind instead of defining it as 16. + +2021-04-21 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/lib/libgomp.exp (offload_target_to_openacc_device_type): + New, based on check_effective_target_offload_target_nvptx. + (check_effective_target_offload_target_nvptx): Call it. + (check_effective_target_offload_target_amdgcn): New. + * testsuite/libgomp.c-c++-common/function-not-offloaded.c: + Require target offload_target_nvptx || offload_target_amdgcn. + * testsuite/libgomp.c-c++-common/variable-not-offloaded.c: Likewise. + * testsuite/libgomp.c/pr86416-1.c: Likewise. + * testsuite/libgomp.c/pr86416-2.c: Likewise. + +2021-04-21 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/depobj-1.f90: New test. + +2021-04-19 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c: + '-fopenacc-kernels=[...]' -> '--param=openacc-kernels=[...]'. + * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: + Likewise. + * testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise. + +2021-04-15 Thomas Schwinge <thomas@codesourcery.com> + + PR target/99555 + * testsuite/lib/libgomp.exp + (check_effective_target_offload_device_nvptx): New. + * testsuite/libgomp.c/pr99555-1.c <nvptx offload device>: Until + resolved, make sure that we exit quickly, with error status, + XFAILed. + * testsuite/libgomp.c-c++-common/task-detach-6.c: Likewise. + * testsuite/libgomp.fortran/task-detach-6.f90: Likewise. + +2021-04-14 Jakub Jelinek <jakub@redhat.com> + + PR testsuite/100071 + * testsuite/libgomp.fortran/alloc-1.F90: Call c_f_pointer after last + cp = omp_alloc with cp, p arguments instead of cq, q and call + c_f_pointer after last cq = omp_alloc with cq, q. + +2021-04-11 Hafiz Abid Qadeer <abidh@codesourcery.com> + + PR middle-end/98088 + * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Add check + for loop with GT/GE condition. + * testsuite/libgomp.oacc-c-c++-common/collapse-3.c: Likewise. + +2021-04-09 Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/84991 + PR middle-end/84992 + PR middle-end/90779 + * testsuite/libgomp.oacc-c-c++-common/static-variable-1.c: New. + +2021-04-09 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/99984 + * team.c (gomp_thread_start): Call pthread_setspecific for + !(defined HAVE_TLS || defined USE_EMUTLS) only after local_thr + has been initialized to avoid false positive warning. + +2021-03-29 Tobias Burnus <tobias@codesourcery.com> + + PR target/99555 + * testsuite/lib/on_device_arch.c: Move to ... + * testsuite/libgomp.c-c++-common/on_device_arch.h: ... here. + * testsuite/libgomp.fortran/on_device_arch.c: New file; + #include on_device_arch.h. + * testsuite/libgomp.c-c++-common/task-detach-6.c: #include + on_device_arch.h instead of using dg-additional-source. + * testsuite/libgomp.c/pr99555-1.c: Likewise. + * testsuite/libgomp.fortran/task-detach-6.f90: Update to use + on_device_arch.c without relative paths. + +2021-03-25 Thomas Schwinge <thomas@codesourcery.com> + + * plugin/plugin-gcn.c (init_environment_variables): Don't prepend + the 'HSA_RUNTIME_LIB' path to 'libhsa-runtime64.so'. + * plugin/configfrag.ac (HSA_RUNTIME_LIB): Clean up. + * config.h.in: Regenerate. + * configure: Likewise. + +2021-03-25 Thomas Schwinge <thomas@codesourcery.com> + + PR target/99555 + * testsuite/lib/on_device_arch.c: New file. + * testsuite/libgomp.c/pr99555-1.c: Likewise. + * testsuite/libgomp.c-c++-common/task-detach-6.c: Until resolved, + skip for nvptx offloading, with error status. + * testsuite/libgomp.fortran/task-detach-6.f90: Likewise. + +2021-03-25 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90: + OpenACC 'serial' construct diagnostic for nvptx offloading. + 2021-03-15 Tobias Burnus <tobias@codesourcery.com> PR c++/99509 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 4cf1f58..f8b2a06 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -61,11 +61,12 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c ordered.c \ - parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \ - proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ - splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ - oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c + parallel.c scope.c sections.c single.c task.c team.c work.c lock.c \ + mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c \ + target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ + oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ + priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ + oacc-target.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index eb868b3..22cb213 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -16,7 +16,7 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# Copyright (C) 2014-2021 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -213,14 +213,14 @@ libgomp_la_LIBADD = @USE_FORTRAN_TRUE@am__objects_1 = openacc.lo am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ env.lo error.lo icv.lo icv-device.lo iter.lo iter_ull.lo \ - loop.lo loop_ull.lo ordered.lo parallel.lo sections.lo \ - single.lo task.lo team.lo work.lo lock.lo mutex.lo proc.lo \ - sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \ - target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ - oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ - oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ - teams.lo allocator.lo oacc-profiling.lo oacc-target.lo \ - $(am__objects_1) + loop.lo loop_ull.lo ordered.lo parallel.lo scope.lo \ + sections.lo single.lo task.lo team.lo work.lo lock.lo mutex.lo \ + proc.lo sem.lo bar.lo ptrlock.lo time.lo fortran.lo \ + affinity.lo target.lo splay-tree.lo libgomp-plugin.lo \ + oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ + oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ + affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ + oacc-target.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -553,9 +553,9 @@ libgomp_la_DEPENDENCIES = $(libgomp_version_dep) libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ error.c icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c \ - ordered.c parallel.c sections.c single.c task.c team.c work.c \ - lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c \ - affinity.c target.c splay-tree.c libgomp-plugin.c \ + ordered.c parallel.c scope.c sections.c single.c task.c team.c \ + work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c \ + fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ @@ -769,6 +769,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ diff --git a/libgomp/config.h.in b/libgomp/config.h.in index 390e548..e702625 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -130,9 +130,6 @@ /* Define to 1 if you have the `__secure_getenv' function. */ #undef HAVE___SECURE_GETENV -/* Define path to HSA runtime. */ -#undef HSA_RUNTIME_LIB - /* Define to 1 if GNU symbol versioning is used for libgomp. */ #undef LIBGOMP_GNU_SYMBOL_VERSIONING @@ -146,6 +143,9 @@ */ #undef LT_OBJDIR +/* Define to 1 to if -foffload is defaulted */ +#undef OFFLOAD_DEFAULTED + /* Define to offload plugins, separated by commas. */ #undef OFFLOAD_PLUGINS diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c index 72d4f7c..34e0f83 100644 --- a/libgomp/config/gcn/icv-device.c +++ b/libgomp/config/gcn/icv-device.c @@ -70,6 +70,16 @@ omp_is_initial_device (void) return 0; } +/* This is set to the device number of current GPU during device initialization, + when the offload image containing this libgomp portion is loaded. */ +static volatile int GOMP_DEVICE_NUM_VAR; + +int +omp_get_device_num (void) +{ + return GOMP_DEVICE_NUM_VAR; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -77,3 +87,4 @@ ialias (omp_get_num_devices) ialias (omp_get_num_teams) ialias (omp_get_team_num) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c index 4016b7b..a93ecc9 100644 --- a/libgomp/config/gcn/target.c +++ b/libgomp/config/gcn/target.c @@ -65,3 +65,68 @@ omp_pause_resource_all (omp_pause_resource_t kind) ialias (omp_pause_resource) ialias (omp_pause_resource_all) + +void +GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend, void **args) +{ + (void) device; + (void) fn; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + (void) args; + __builtin_unreachable (); +} + +void +GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + __builtin_unreachable (); +} + +void +GOMP_target_end_data (void) +{ + __builtin_unreachable (); +} + +void +GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + __builtin_unreachable (); +} + +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + __builtin_unreachable (); +} diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c index c5abdce..1b636c6 100644 --- a/libgomp/config/linux/affinity.c +++ b/libgomp/config/linux/affinity.c @@ -35,6 +35,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <limits.h> #ifdef HAVE_PTHREAD_AFFINITY_NP diff --git a/libgomp/config/linux/sem.h b/libgomp/config/linux/sem.h index bc0627d..a3b0e26 100644 --- a/libgomp/config/linux/sem.h +++ b/libgomp/config/linux/sem.h @@ -33,10 +33,8 @@ #ifndef GOMP_SEM_H #define GOMP_SEM_H 1 -#include <limits.h> /* For INT_MIN */ - typedef int gomp_sem_t; -#define SEM_WAIT INT_MIN +#define SEM_WAIT (-__INT_MAX__ - 1) #define SEM_INC 1 extern void gomp_sem_wait_slow (gomp_sem_t *, int); diff --git a/libgomp/config/nvptx/error.c b/libgomp/config/nvptx/error.c index dfa75da..c55791e 100644 --- a/libgomp/config/nvptx/error.c +++ b/libgomp/config/nvptx/error.c @@ -31,12 +31,38 @@ #include <stdio.h> #include <stdlib.h> -#undef vfprintf -#undef fputs -#undef fputc +/* No 'FILE *stream's, just basic 'vprintf' etc. */ + +#undef vfprintf #define vfprintf(stream, fmt, list) vprintf (fmt, list) + +#undef fputs #define fputs(s, stream) printf ("%s", s) + +#undef fputc #define fputc(c, stream) printf ("%c", c) +#undef fwrite +#if 0 +# define fwrite(ptr, size, nmemb, stream) \ + printf ("%.*s", (int) (size * nmemb), (int) (size * nmemb), ptr) +/* ... prints literal '%.*s'. */ +#else +# define fwrite(ptr, size, nmemb, stream) \ + do { \ + /* Yuck! */ \ + for (size_t i = 0; i < size * nmemb; ++i) \ + printf ("%c", ptr[i]); \ + } while (0) +#endif + + +/* The 'exit (EXIT_FAILURE);' of an Fortran (only, huh?) OpenMP 'error' + directive with 'severity (fatal)' causes a hang, so 'abort' instead of + 'exit'. */ +#undef exit +#define exit(status) abort () + + #include "../../error.c" diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 3b96890..b63149d 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -58,8 +58,19 @@ omp_is_initial_device (void) return 0; } +/* This is set to the device number of current GPU during device initialization, + when the offload image containing this libgomp portion is loaded. */ +static volatile int GOMP_DEVICE_NUM_VAR; + +int +omp_get_device_num (void) +{ + return GOMP_DEVICE_NUM_VAR; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) ialias (omp_get_num_devices) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c index 1410577..e4140e4 100644 --- a/libgomp/config/nvptx/target.c +++ b/libgomp/config/nvptx/target.c @@ -65,3 +65,68 @@ omp_pause_resource_all (omp_pause_resource_t kind) ialias (omp_pause_resource) ialias (omp_pause_resource_all) + +void +GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend, void **args) +{ + (void) device; + (void) fn; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + (void) args; + __builtin_unreachable (); +} + +void +GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + __builtin_unreachable (); +} + +void +GOMP_target_end_data (void) +{ + __builtin_unreachable (); +} + +void +GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + __builtin_unreachable (); +} + +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + (void) device; + (void) mapnum; + (void) hostaddrs; + (void) sizes; + (void) kinds; + (void) flags; + (void) depend; + __builtin_unreachable (); +} diff --git a/libgomp/configure b/libgomp/configure index 22123f9..6161da5 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -14995,6 +14995,12 @@ $as_echo "#define LIBGOMP_OFFLOADED_ONLY 1" >>confdefs.h fi +if test "x$enable_offload_defaulted" = xyes; then + +$as_echo "#define OFFLOAD_DEFAULTED 1" >>confdefs.h + +fi + # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. @@ -15448,16 +15454,6 @@ cat >>confdefs.h <<_ACEOF _ACEOF -if test "$HSA_RUNTIME_LIB" != ""; then - HSA_RUNTIME_LIB="$HSA_RUNTIME_LIB/" -fi - - -cat >>confdefs.h <<_ACEOF -#define HSA_RUNTIME_LIB "$HSA_RUNTIME_LIB" -_ACEOF - - # Check for functions needed. for ac_func in getloadavg clock_gettime strtoull diff --git a/libgomp/configure.ac b/libgomp/configure.ac index c23fe00..7df80a3 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -221,6 +221,11 @@ if test x$libgomp_offloaded_only = xyes; then [Define to 1 if building libgomp for an accelerator-only target.]) fi +if test "x$enable_offload_defaulted" = xyes; then + AC_DEFINE(OFFLOAD_DEFAULTED, 1, + [Define to 1 to if -foffload is defaulted]) +fi + AC_CHECK_SIZEOF([void *]) m4_include([plugin/configfrag.ac]) diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt index fe2bf1d..d4f1e74 100644 --- a/libgomp/configure.tgt +++ b/libgomp/configure.tgt @@ -173,6 +173,9 @@ case "${target}" in amdgcn*-*-*) config_path="gcn accel" + + #TODO PR101484 + XCFLAGS="$XCFLAGS -Wno-error=array-bounds" ;; *) diff --git a/libgomp/env.c b/libgomp/env.c index a24deab..9216998 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -99,6 +99,9 @@ int goacc_default_dims[GOMP_DIM_MAX]; #ifndef LIBGOMP_OFFLOADED_ONLY +static int wait_policy; +static unsigned long stacksize = GOMP_DEFAULT_STACKSIZE; + /* Parse the OMP_SCHEDULE environment variable. */ static void @@ -434,6 +437,7 @@ parse_bind_var (const char *name, char *p1stvalue, { "false", 5, omp_proc_bind_false }, { "true", 4, omp_proc_bind_true }, { "master", 6, omp_proc_bind_master }, + { "primary", 7, omp_proc_bind_primary }, { "close", 5, omp_proc_bind_close }, { "spread", 6, omp_proc_bind_spread } }; @@ -447,14 +451,14 @@ parse_bind_var (const char *name, char *p1stvalue, if (*env == '\0') goto invalid; - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0) { value = kinds[i].kind; env += kinds[i].len; break; } - if (i == 5) + if (i == 6) goto invalid; while (isspace ((unsigned char) *env)) @@ -494,14 +498,14 @@ parse_bind_var (const char *name, char *p1stvalue, if (*env == '\0') goto invalid; - for (i = 2; i < 5; i++) + for (i = 2; i < 6; i++) if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0) { value = kinds[i].kind; env += kinds[i].len; break; } - if (i == 5) + if (i == 6) goto invalid; values[nvalues++] = value; @@ -1210,46 +1214,11 @@ parse_gomp_openacc_dim (void) } } -static void -handle_omp_display_env (unsigned long stacksize, int wait_policy) +void +omp_display_env (int verbose) { - const char *env; - bool display = false; - bool verbose = false; int i; - env = getenv ("OMP_DISPLAY_ENV"); - if (env == NULL) - return; - - while (isspace ((unsigned char) *env)) - ++env; - if (strncasecmp (env, "true", 4) == 0) - { - display = true; - env += 4; - } - else if (strncasecmp (env, "false", 5) == 0) - { - display = false; - env += 5; - } - else if (strncasecmp (env, "verbose", 7) == 0) - { - display = true; - verbose = true; - env += 7; - } - else - env = "X"; - while (isspace ((unsigned char) *env)) - ++env; - if (*env != '\0') - gomp_error ("Invalid value for environment variable OMP_DISPLAY_ENV"); - - if (!display) - return; - fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); fputs (" _OPENMP = '201511'\n", stderr); @@ -1309,7 +1278,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fputs ("TRUE", stderr); break; case omp_proc_bind_master: - fputs ("MASTER", stderr); + fputs ("MASTER", stderr); /* TODO: Change to PRIMARY for OpenMP 5.1. */ break; case omp_proc_bind_close: fputs ("CLOSE", stderr); @@ -1322,7 +1291,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) switch (gomp_bind_var_list[i]) { case omp_proc_bind_master: - fputs (",MASTER", stderr); + fputs (",MASTER", stderr); /* TODO: Change to PRIMARY for OpenMP 5.1. */ break; case omp_proc_bind_close: fputs (",CLOSE", stderr); @@ -1408,14 +1377,54 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fputs ("OPENMP DISPLAY ENVIRONMENT END\n", stderr); } +ialias (omp_display_env) + +static void +handle_omp_display_env (void) +{ + const char *env; + bool display = false; + bool verbose = false; + + env = getenv ("OMP_DISPLAY_ENV"); + if (env == NULL) + return; + + while (isspace ((unsigned char) *env)) + ++env; + if (strncasecmp (env, "true", 4) == 0) + { + display = true; + env += 4; + } + else if (strncasecmp (env, "false", 5) == 0) + { + display = false; + env += 5; + } + else if (strncasecmp (env, "verbose", 7) == 0) + { + display = true; + verbose = true; + env += 7; + } + else + env = "X"; + while (isspace ((unsigned char) *env)) + ++env; + if (*env != '\0') + gomp_error ("Invalid value for environment variable OMP_DISPLAY_ENV"); + + if (display) + omp_display_env (verbose); +} static void __attribute__((constructor)) initialize_env (void) { - unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE; + unsigned long thread_limit_var; unsigned long max_active_levels_var; - int wait_policy; /* Do a compile time check that mkomp_h.pl did good job. */ omp_check_defines (); @@ -1546,7 +1555,7 @@ initialize_env (void) gomp_error ("Stack size change failed: %s", strerror (err)); } - handle_omp_display_env (stacksize, wait_policy); + handle_omp_display_env (); /* OpenACC. */ diff --git a/libgomp/error.c b/libgomp/error.c index 3ddf3aa..9b69a4b 100644 --- a/libgomp/error.c +++ b/libgomp/error.c @@ -89,3 +89,34 @@ gomp_fatal (const char *fmt, ...) gomp_vfatal (fmt, list); va_end (list); } + +void +GOMP_warning (const char *msg, size_t msglen) +{ + if (msg && msglen == (size_t) -1) + gomp_error ("error directive encountered: %s", msg); + else if (msg) + { + fputs ("\nlibgomp: error directive encountered: ", stderr); + fwrite (msg, 1, msglen, stderr); + fputc ('\n', stderr); + } + else + gomp_error ("error directive encountered"); +} + +void +GOMP_error (const char *msg, size_t msglen) +{ + if (msg && msglen == (size_t) -1) + gomp_fatal ("fatal error: error directive encountered: %s", msg); + else if (msg) + { + fputs ("\nlibgomp: fatal error: error directive encountered: ", stderr); + fwrite (msg, 1, msglen, stderr); + fputc ('\n', stderr); + exit (EXIT_FAILURE); + } + else + gomp_fatal ("fatal error: error directive encountered"); +} diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 4ec39c4..07f9765 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -83,6 +83,7 @@ ialias_redirect (omp_get_partition_place_nums) ialias_redirect (omp_set_default_device) ialias_redirect (omp_get_default_device) ialias_redirect (omp_get_num_devices) +ialias_redirect (omp_get_device_num) ialias_redirect (omp_get_num_teams) ialias_redirect (omp_get_team_num) ialias_redirect (omp_is_initial_device) @@ -94,6 +95,7 @@ ialias_redirect (omp_init_allocator) ialias_redirect (omp_destroy_allocator) ialias_redirect (omp_set_default_allocator) ialias_redirect (omp_get_default_allocator) +ialias_redirect (omp_display_env) #endif #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING @@ -599,6 +601,12 @@ omp_get_initial_device_ (void) } int32_t +omp_get_device_num_ (void) +{ + return omp_get_device_num (); +} + +int32_t omp_get_max_task_priority_ (void) { return omp_get_max_task_priority (); @@ -736,3 +744,19 @@ omp_get_default_allocator_ () { return (intptr_t) omp_get_default_allocator (); } + +#ifndef LIBGOMP_OFFLOADED_ONLY + +void +omp_display_env_ (const int32_t *verbose) +{ + omp_display_env (*verbose); +} + +void +omp_display_env_8_ (const int64_t *verbose) +{ + omp_display_env (!!*verbose); +} + +#endif /* LIBGOMP_OFFLOADED_ONLY */ diff --git a/libgomp/hashtab.h b/libgomp/hashtab.h index 25d6d94..26d8081 100644 --- a/libgomp/hashtab.h +++ b/libgomp/hashtab.h @@ -224,6 +224,15 @@ htab_mod_m2 (hashval_t hash, htab_t htab) return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); } +static inline htab_t +htab_clear (htab_t htab) +{ + htab->n_elements = 0; + htab->n_deleted = 0; + memset (htab->entries, 0, htab->size * sizeof (hash_entry_type)); + return htab; +} + /* Create hash table of size SIZE. */ static htab_t @@ -238,11 +247,8 @@ htab_create (size_t size) result = (htab_t) htab_alloc (sizeof (struct htab) + size * sizeof (hash_entry_type)); result->size = size; - result->n_elements = 0; - result->n_deleted = 0; result->size_prime_index = size_prime_index; - memset (result->entries, 0, size * sizeof (hash_entry_type)); - return result; + return htab_clear (result); } /* Similar to htab_find_slot, but without several unwanted side effects: diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index c1bedf4..f11bdfa 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -61,8 +61,17 @@ omp_is_initial_device (void) return 1; } +int +omp_get_device_num (void) +{ + /* By specification, this is equivalent to omp_get_initial_device + on the host. */ + return omp_get_initial_device (); +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) ialias (omp_get_num_devices) ialias (omp_is_initial_device) +ialias (omp_get_device_num) diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 62645ce..cf24a2b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -102,6 +102,12 @@ struct addr_pair uintptr_t end; }; +/* This symbol is to name a target side variable that holds the designated + 'device number' of the target device. The symbol needs to be available to + libgomp code and the offload plugin (which in the latter case must be + stringified). */ +#define GOMP_DEVICE_NUM_VAR __gomp_device_num + /* Miscellaneous functions. */ extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index ef1bb49..e8901da 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1012,11 +1012,35 @@ struct target_mem_desc { struct target_var_desc list[]; }; +/* Special value for refcount - mask to indicate existence of special + values. Right now we allocate 3 bits. */ +#define REFCOUNT_SPECIAL (~(uintptr_t) 0x7) + /* Special value for refcount - infinity. */ -#define REFCOUNT_INFINITY (~(uintptr_t) 0) +#define REFCOUNT_INFINITY (REFCOUNT_SPECIAL | 0) /* Special value for refcount - tgt_offset contains target address of the artificial pointer to "omp declare target link" object. */ -#define REFCOUNT_LINK (~(uintptr_t) 1) +#define REFCOUNT_LINK (REFCOUNT_SPECIAL | 1) + +/* Special value for refcount - structure element sibling list items. + All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST + and _FLAG_LAST indicating first and last in the created sibling sequence. */ +#define REFCOUNT_STRUCTELEM (REFCOUNT_SPECIAL | 4) +#define REFCOUNT_STRUCTELEM_P(V) \ + (((V) & REFCOUNT_STRUCTELEM) == REFCOUNT_STRUCTELEM) +/* The first leading key with _FLAG_FIRST set houses the actual reference count + in the structelem_refcount field. Other siblings point to this counter value + through its structelem_refcount_ptr field. */ +#define REFCOUNT_STRUCTELEM_FLAG_FIRST (1) +/* The last key in the sibling sequence has this set. This is required to + indicate the sequence boundary, when we remove the structure sibling list + from the map. */ +#define REFCOUNT_STRUCTELEM_FLAG_LAST (2) + +#define REFCOUNT_STRUCTELEM_FIRST_P(V) \ + (REFCOUNT_STRUCTELEM_P (V) && ((V) & REFCOUNT_STRUCTELEM_FLAG_FIRST)) +#define REFCOUNT_STRUCTELEM_LAST_P(V) \ + (REFCOUNT_STRUCTELEM_P (V) && ((V) & REFCOUNT_STRUCTELEM_FLAG_LAST)) /* Special offset values. */ #define OFFSET_INLINED (~(uintptr_t) 0) @@ -1044,8 +1068,22 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Dynamic reference count. */ - uintptr_t dynamic_refcount; + union { + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; + + /* Unified reference count for structure element siblings, this is used + when REFCOUNT_STRUCTELEM_FIRST_P(k->refcount) == true, the first sibling + in a structure element sibling list item sequence. */ + uintptr_t structelem_refcount; + + /* When REFCOUNT_STRUCTELEM_P (k->refcount) == true, this field points + into the (above) structelem_refcount field of the _FIRST splay_tree_key, + the first key in the created sequence. All structure element siblings + share a single refcount in this manner. Since these two fields won't be + used at the same time, they are stashed in a union. */ + uintptr_t *structelem_refcount_ptr; + }; struct splay_tree_aux *aux; }; @@ -1188,7 +1226,7 @@ extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, struct gomp_coalesce_buf; extern void gomp_copy_host2dev (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, - size_t, struct gomp_coalesce_buf *); + size_t, bool, struct gomp_coalesce_buf *); extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); @@ -1200,19 +1238,13 @@ extern void gomp_attach_pointer (struct gomp_device_descr *, extern void gomp_detach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree_key, uintptr_t, bool, struct gomp_coalesce_buf *); - -extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, - size_t, void **, void **, - size_t *, void *, bool, - enum gomp_map_vars_kind); -extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *, - struct goacc_asyncqueue *, - size_t, void **, void **, - size_t *, void *, bool, - enum gomp_map_vars_kind); -extern void gomp_unmap_vars (struct target_mem_desc *, bool); -extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, - struct goacc_asyncqueue *); +extern struct target_mem_desc *goacc_map_vars (struct gomp_device_descr *, + struct goacc_asyncqueue *, + size_t, void **, void **, + size_t *, void *, bool, + enum gomp_map_vars_kind); +extern void goacc_unmap_vars (struct target_mem_desc *, bool, + struct goacc_asyncqueue *); extern void gomp_init_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 4ad190a..e0c813c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -199,6 +199,19 @@ OMP_5.0.1 { omp_fulfill_event_; } OMP_5.0; +OMP_5.0.2 { + global: + omp_get_device_num; + omp_get_device_num_; +} OMP_5.0.1; + +OMP_5.1 { + global: + omp_display_env; + omp_display_env_; + omp_display_env_8_; +} OMP_5.0.2; + GOMP_1.0 { global: GOMP_atomic_end; @@ -367,6 +380,13 @@ GOMP_5.0.1 { GOMP_free; } GOMP_5.0; +GOMP_5.1 { + global: + GOMP_error; + GOMP_scope_start; + GOMP_warning; +} GOMP_5.0.1; + OACC_2.0 { global: acc_get_num_devices; @@ -541,6 +561,12 @@ GOACC_2.0.1 { GOACC_parallel_keyed; } GOACC_2.0; +GOACC_2.0.2 { + global: + GOACC_enter_data; + GOACC_exit_data; +} GOACC_2.0.1; + GOMP_PLUGIN_1.0 { global: GOMP_PLUGIN_malloc; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 2c1f1b5..6408518 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -95,6 +95,7 @@ changed to GNU Offloading and Multi Processing Runtime Library. @comment @menu * Enabling OpenMP:: How to enable OpenMP for your applications. +* OpenMP Implementation Status:: List of implemented features by OpenMP version * OpenMP Runtime Library Routines: Runtime Library Routines. The OpenMP runtime application programming interface. @@ -141,9 +142,203 @@ flag @command{-fopenmp} must be specified. This enables the OpenMP directive arranges for automatic linking of the OpenMP runtime library (@ref{Runtime Library Routines}). -A complete description of all OpenMP directives accepted may be found in -the @uref{https://www.openmp.org, OpenMP Application Program Interface} manual, -version 4.5. +A complete description of all OpenMP directives may be found in the +@uref{https://www.openmp.org, OpenMP Application Program Interface} manuals. +See also @ref{OpenMP Implementation Status}. + + +@c --------------------------------------------------------------------- +@c OpenMP Implementation Status +@c --------------------------------------------------------------------- + +@node OpenMP Implementation Status +@chapter OpenMP Implementation Status + +@menu +* OpenMP 4.5:: Feature completion status to 4.5 specification +* OpenMP 5.0:: Feature completion status to 5.0 specification +* OpenMP 5.1:: Feature completion status to 5.1 specification +@end menu + +The @code{_OPENMP} preprocessor macro and Fortran's @code{openmp_version} +parameter, provided by @code{omp_lib.h} and the @code{omp_lib} module, have +the value @code{201511} (i.e. OpenMP 4.5). + +@node OpenMP 4.5 +@section OpenMP 4.5 + +The OpenMP 4.5 specification is fully supported. + +@node OpenMP 5.0 +@section OpenMP 5.0 + +@unnumberedsubsec New features listed in Appendix B of the OpenMP specification +@c This list is sorted as in OpenMP 5.1's B.3 not as in OpenMP 5.0's B.2 + +@multitable @columnfractions .60 .10 .25 +@headitem Description @tab Status @tab Comments +@item Array shaping @tab N @tab +@item Array sections with non-unit strides in C and C++ @tab N @tab +@item Iterators @tab Y @tab +@item @code{metadirective} directive @tab N @tab +@item @code{declare variant} directive + @tab P @tab Only C and C++, simd traits not handled correctly +@item @emph{target-offload-var} ICV and @code{OMP_TARGET_OFFLOAD} + env variable @tab Y @tab +@item Nested-parallel changes to @emph{max-active-levels-var} ICV @tab Y @tab +@item @code{requires} directive @tab P + @tab Only fulfillable requirement is @code{atomic_default_mem_order} +@item @code{teams} construct outside an enclosing target region @tab Y @tab +@item Non-rectangular loop nests @tab Y @tab +@item @code{!=} as relational-op in canonical loop form for C/C++ @tab Y @tab +@item @code{nonmonotonic} as default loop schedule modifier for worksharing-loop + constructs @tab Y @tab +@item Collapse of associated loops that are imperfectly nested loops @tab N @tab +@item Clauses @code{if}, @code{nontemporal} and @code{order(concurrent)} in + @code{simd} construct @tab Y @tab +@item @code{atomic} constructs in @code{simd} @tab Y @tab +@item @code{loop} construct @tab Y @tab +@item @code{order(concurrent)} clause @tab Y @tab +@item @code{scan} directive and @code{in_scan} modifier for the + @code{reduction} clause @tab Y @tab +@item @code{in_reduction} clause on @code{task} constructs @tab Y @tab +@item @code{in_reduction} clause on @code{target} constructs @tab P + @tab Only C/C++, @code{nowait} only stub +@item @code{task_reduction} clause with @code{taskgroup} @tab Y @tab +@item @code{task} modifier to @code{reduction} clause @tab Y @tab +@item @code{affinity} clause to @code{task} construct @tab Y @tab Stub only +@item @code{detach} clause to @code{task} construct @tab Y @tab +@item @code{omp_fulfill_event} runtime routine @tab Y @tab +@item @code{reduction} and @code{in_reduction} clauses on @code{taskloop} + and @code{taskloop simd} constructs @tab Y @tab +@item @code{taskloop} construct cancelable by @code{cancel} construct + @tab Y @tab +@item @code{mutexinouset} @emph{dependence-type} for @code{depend} clause + @tab Y @tab +@item Predefined memory spaces, memory allocators, allocator traits + @tab Y @tab Some are only stubs +@item Memory management routines @tab Y @tab +@item @code{allocate} directive @tab N @tab +@item @code{allocate} clause @tab P @tab initial support in C/C++ only +@item @code{use_device_addr} clause on @code{target data} @tab Y @tab +@item @code{ancestor} modifier on @code{device} clause + @tab P @tab Reverse offload unsupported +@item Implicit declare target directive @tab Y @tab +@item Discontiguous array section with @code{target update} construct + @tab N @tab +@item C/C++'s lvalue expressions in @code{to}, @code{from} + and @code{map} clauses @tab N @tab +@item C/C++'s lvalue expressions in @code{depend} clauses @tab Y @tab +@item Nested @code{declare target} directive @tab Y @tab +@item Combined @code{master} constructs @tab Y @tab +@item @code{depend} clause on @code{taskwait} @tab Y @tab +@item Weak memory ordering clauses on @code{atomic} and @code{flush} construct + @tab Y @tab +@item @code{hint} clause on the @code{atomic} construct @tab Y @tab Stub only +@item @code{depobj} construct and depend objects @tab Y @tab +@item Lock hints were renamed to synchronization hints @tab Y @tab +@item @code{conditional} modifier to @code{lastprivate} clause @tab Y @tab +@item Map-order clarifications @tab P @tab +@item @code{close} @emph{map-type-modifier} @tab Y @tab +@item Mapping C/C++ pointer variables and to assign the address of + device memory mapped by an array section @tab P @tab +@item Mapping of Fortran pointer and allocatable variables, including pointer + and allocatable components of variables + @tab P @tab Mapping of vars with allocatable components unspported +@item @code{defaultmap} extensions @tab Y @tab +@item @code{declare mapper} directive @tab N @tab +@item @code{omp_get_supported_active_levels} routine @tab Y @tab +@item Runtime routines and environment variables to display runtime thread + affinity information @tab Y @tab +@item @code{omp_pause_resource} and @code{omp_pause_resource_all} runtime + routines @tab Y @tab +@item @code{omp_get_device_num} runtime routine @tab Y @tab +@item OMPT interface @tab N @tab +@item OMPD interface @tab N @tab +@end multitable + +@unnumberedsubsec Other new OpenMP 5.0 features + +@multitable @columnfractions .60 .10 .25 +@headitem Description @tab Status @tab Comments +@item Supporting C++'s range-based for loop @tab Y @tab +@end multitable + + +@node OpenMP 5.1 +@section OpenMP 5.1 + +@unnumberedsubsec New features listed in Appendix B of the OpenMP specification + +@multitable @columnfractions .60 .10 .25 +@headitem Description @tab Status @tab Comments +@item OpenMP directive as C++ attribute specifiers @tab Y @tab +@item @code{omp_all_memory} reserved locator @tab N @tab +@item @emph{target_device trait} in OpenMP Context @tab N @tab +@item @code{target_device} selector set in context selectors @tab N @tab +@item C/C++'s @code{declare variante} directive: elision support of + preprocessed code @tab N @tab +@item @code{declare variante}: new clauses @code{adjust_args} and + @code{append_args} @tab N @tab +@item @code{dispatch} construct @tab N @tab +@item device-specific ICV settings the environment variables @tab N @tab +@item assume directive @tab N @tab +@item @code{nothing} directive @tab Y @tab +@item @code{error} directive @tab Y @tab +@item @code{masked} construct @tab Y @tab +@item @code{scope} directive @tab Y @tab +@item Loop transformation constructs @tab N @tab +@item @code{strict} modifier in the @code{grainsize} and @code{num_tasks} + clauses of the taskloop construct @tab Y @tab +@item @code{align} clause/modifier in @code{allocate} directive/clause + and @code{allocator} directive @tab N @tab +@item @code{thread_limit} clause to @code{target} construct @tab N @tab +@item @code{has_device_addr} clause to @code{target} construct @tab N @tab +@item iterators in @code{target update} motion clauses and @code{map} + clauses @tab N @tab +@item indirect calls to the device version of a procedure or function in + @code{target} regions @tab N @tab +@item @code{interop} directive @tab N @tab +@item @code{omp_interop_t} object support in runtime routines @tab N @tab +@item @code{nowait} clause in @code{taskwait} directive @tab N @tab +@item Extensions to the @code{atomic} directive @tab N @tab +@item @code{seq_cst} clause on a @code{flush} construct @tab Y @tab +@item @code{inoutset} argument to the @code{depend} clause @tab N @tab +@item @code{private} and @code{firstprivate} argument to @code{default} + clause in C and C++ @tab N @tab +@item @code{present} argument to @code{defaultmap} clause @tab N @tab +@item @code{omp_set_num_teams}, @code{omp_set_teams_thread_limit}, + @code{omp_get_max_teams}, @code{omp_get_teams_thread_limit} runtime + routines @tab N @tab +@item @code{omp_target_is_accessible} runtime routine @tab N @tab +@item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async} + runtime routines @tab N @tab +@item @code{omp_get_mapped_ptr} runtime routine @tab N @tab +@item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and + @code{omp_aligned_calloc} runtime routines @tab N @tab +@item @code{omp_alloctrait_key_t} enum: @code{omp_atv_serialized} added, + @code{omp_atv_default} changed @tab Y @tab +@item @code{omp_display_env} runtime routine @tab P + @tab Not inside @code{target} regions +@item @code{ompt_scope_endpoint_t} enum: @code{ompt_scope_beginend} @tab N @tab +@item @code{ompt_sync_region_t} enum additions @tab N @tab +@item @code{ompt_state_t} enum: @code{ompt_state_wait_barrier_implementation} + and @code{ompt_state_wait_barrier_teams} @tab N @tab +@item @code{ompt_callback_target_data_op_emi_t}, + @code{ompt_callback_target_emi_t}, @code{ompt_callback_target_map_emi_t} + and @code{ompt_callback_target_submit_emi_t} @tab N @tab +@item @code{ompt_callback_error_t} type @tab N @tab +@item @code{OMP_PLACES} syntax was extension @tab N @tab +@item @code{OMP_NUM_TEAMS} and @code{OMP_TEAMS_THREAD_LIMIT} environment + variables @tab N @tab +@end multitable + +@unnumberedsubsec Other new OpenMP 5.1 features + +@multitable @columnfractions .60 .10 .25 +@headitem Description @tab Status @tab Comments +@item Suppport of strictly structured blocks in Fortran @tab N @tab +@end multitable @c --------------------------------------------------------------------- @@ -165,6 +360,7 @@ linkage, and do not throw exceptions. * omp_get_ancestor_thread_num:: Ancestor thread ID * omp_get_cancellation:: Whether cancellation support is enabled * omp_get_default_device:: Get the default device for target regions +* omp_get_device_num:: Get device that current thread is running on * omp_get_dynamic:: Dynamic teams setting * omp_get_initial_device:: Device number of host device * omp_get_level:: Number of parallel regions @@ -385,6 +581,34 @@ For OpenMP 5.1, this must be equal to the value returned by the +@node omp_get_device_num +@section @code{omp_get_device_num} -- Return device number of current device +@table @asis +@item @emph{Description}: +This function returns a device number that represents the device that the +current thread is executing on. For OpenMP 5.0, this must be equal to the +value returned by the @code{omp_get_initial_device} function when called +from the host. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{int omp_get_device_num(void);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{integer function omp_get_device_num()} +@end multitable + +@item @emph{See also}: +@ref{omp_get_initial_device} + +@item @emph{Reference}: +@uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.37. +@end table + + + @node omp_get_level @section @code{omp_get_level} -- Obtain the current nesting level @table @asis @@ -631,8 +855,9 @@ one thread per CPU online is used. @item @emph{Description}: This functions returns the currently active thread affinity policy, which is set via @env{OMP_PROC_BIND}. Possible values are @code{omp_proc_bind_false}, -@code{omp_proc_bind_true}, @code{omp_proc_bind_master}, -@code{omp_proc_bind_close} and @code{omp_proc_bind_spread}. +@code{omp_proc_bind_true}, @code{omp_proc_bind_primary}, +@code{omp_proc_bind_master}, @code{omp_proc_bind_close} and @code{omp_proc_bind_spread}, +where @code{omp_proc_bind_master} is an alias for @code{omp_proc_bind_primary}. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -793,7 +1018,7 @@ Returns a unique thread identification number within the current team. In a sequential parts of the program, @code{omp_get_thread_num} always returns 0. In parallel regions the return value varies from 0 to @code{omp_get_num_threads}-1 inclusive. The return -value of the master thread of a team is always 0. +value of the primary thread of a team is always 0. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -1641,11 +1866,12 @@ nesting by default. If undefined one thread per CPU is used. Specifies whether threads may be moved between processors. If set to @code{TRUE}, OpenMP theads should not be moved; if set to @code{FALSE} they may be moved. Alternatively, a comma separated list with the -values @code{MASTER}, @code{CLOSE} and @code{SPREAD} can be used to specify -the thread affinity policy for the corresponding nesting level. With -@code{MASTER} the worker threads are in the same place partition as the -master thread. With @code{CLOSE} those are kept close to the master thread -in contiguous place partitions. And with @code{SPREAD} a sparse distribution +values @code{PRIMARY}, @code{MASTER}, @code{CLOSE} and @code{SPREAD} can +be used to specify the thread affinity policy for the corresponding nesting +level. With @code{PRIMARY} and @code{MASTER} the worker threads are in the +same place partition as the primary thread. With @code{CLOSE} those are +kept close to the primary thread in contiguous place partitions. And +with @code{SPREAD} a sparse distribution across the place partitions is used. Specifying more than one item in the list will automatically enable nesting by default. @@ -1922,23 +2148,23 @@ instance. @item @code{$<priority>} is an optional priority for the worker threads of a thread pool according to @code{pthread_setschedparam}. In case a priority value is omitted, then a worker thread will inherit the priority of the OpenMP -master thread that created it. The priority of the worker thread is not -changed after creation, even if a new OpenMP master thread using the worker has +primary thread that created it. The priority of the worker thread is not +changed after creation, even if a new OpenMP primary thread using the worker has a different priority. @item @code{@@<scheduler-name>} is the scheduler instance name according to the RTEMS application configuration. @end itemize In case no thread pool configuration is specified for a scheduler instance, -then each OpenMP master thread of this scheduler instance will use its own +then each OpenMP primary thread of this scheduler instance will use its own dynamically allocated thread pool. To limit the worker thread count of the -thread pools, each OpenMP master thread must call @code{omp_set_num_threads}. +thread pools, each OpenMP primary thread must call @code{omp_set_num_threads}. @item @emph{Example}: Lets suppose we have three scheduler instances @code{IO}, @code{WRK0}, and @code{WRK1} with @env{GOMP_RTEMS_THREAD_POOLS} set to @code{"1@@WRK0:3$4@@WRK1"}. Then there are no thread pool restrictions for scheduler instance @code{IO}. In the scheduler instance @code{WRK0} there is one thread pool available. Since no priority is specified for this scheduler -instance, the worker thread inherits the priority of the OpenMP master thread +instance, the worker thread inherits the priority of the OpenMP primary thread that created it. In the scheduler instance @code{WRK1} there are three thread pools available and their worker threads run at priority four. @end table @@ -3717,7 +3943,7 @@ Remarks about certain event types: @c See 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' in @c 'libgomp.oacc-c-c++-common/acc_prof-kernels-1.c', @c 'libgomp.oacc-c-c++-common/acc_prof-parallel-1.c'. -Whan a compute construct triggers implicit +When a compute construct triggers implicit @code{acc_ev_device_init_start} and @code{acc_ev_device_init_end} events, they currently aren't @emph{nested within} the corresponding @code{acc_ev_compute_construct_start} and @@ -3852,7 +4078,7 @@ if (omp_get_thread_num () == 0) @end smallexample Alternately, we generate two copies of the parallel subfunction -and only include this in the version run by the master thread. +and only include this in the version run by the primary thread. Surely this is not worthwhile though... @@ -3989,7 +4215,7 @@ broadcast would have to happen via SINGLE machinery instead. The private struct mentioned in the previous section should have a pointer to an array of the type of the variable, indexed by the thread's @var{team_id}. The thread stores its final value into the -array, and after the barrier, the master thread iterates over the +array, and after the barrier, the primary thread iterates over the array to collect the values. diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 3cbe0a4..40e5cf0 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -332,6 +332,10 @@ extern bool GOMP_single_start (void); extern void *GOMP_single_copy_start (void); extern void GOMP_single_copy_end (void *); +/* scope.c */ + +extern void GOMP_scope_start (uintptr_t *); + /* target.c */ extern void GOMP_target (int, void (*) (void *), const void *, @@ -362,6 +366,11 @@ extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, extern void *GOMP_alloc (size_t, size_t, uintptr_t); extern void GOMP_free (void *, uintptr_t); +/* error.c */ + +extern void GOMP_warning (const char *, size_t); +extern void GOMP_error (const char *, size_t); + /* oacc-async.c */ extern void GOACC_wait (int, int, ...); @@ -370,6 +379,11 @@ extern void GOACC_wait (int, int, ...); extern void GOACC_enter_exit_data (int, size_t, void **, size_t *, unsigned short *, int, int, ...); +extern void GOACC_enter_data (int, size_t, void **, size_t *, + unsigned short *, int, int, ...); +extern void GOACC_exit_data (int, size_t, void **, size_t *, + unsigned short *, int, int, ...); +extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); /* oacc-parallel.c */ @@ -384,6 +398,5 @@ extern void GOACC_update (int, size_t, void **, size_t *, unsigned short *, int, int, ...); extern int GOACC_get_num_threads (void); extern int GOACC_get_thread_num (void); -extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); #endif /* LIBGOMP_G_H */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 405574d..5988db0 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -202,7 +202,7 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, if (from) gomp_copy_dev2host (thr->dev, aq, h, d, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -402,9 +402,8 @@ acc_map_data (void *h, void *d, size_t s) gomp_mutex_unlock (&acc_dev->lock); struct target_mem_desc *tgt - = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, - GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA); + = goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes, + &kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); splay_tree_key n = tgt->list[0].key; @@ -572,9 +571,8 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) goacc_aq aq = get_goacc_asyncqueue (async); struct target_mem_desc *tgt - = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, (GOMP_MAP_VARS_OPENACC - | GOMP_MAP_VARS_ENTER_DATA)); + = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, + kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); n = tgt->list[0].key; @@ -876,7 +874,7 @@ update_dev_host (int is_dev, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); if (is_dev) - gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); else gomp_copy_dev2host (acc_dev, aq, h, d, s); @@ -1070,7 +1068,7 @@ find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) } /* Map variables for OpenACC "enter data". We can't just call - gomp_map_vars_async once, because individual mapped variables might have + goacc_map_vars once, because individual mapped variables might have "exit data" called for them at different times. */ static void @@ -1202,10 +1200,9 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); struct target_mem_desc *tgt - = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, - (GOMP_MAP_VARS_OPENACC - | GOMP_MAP_VARS_ENTER_DATA)); + = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); assert (tgt); gomp_mutex_lock (&acc_dev->lock); @@ -1320,56 +1317,22 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); } -void -GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, - size_t *sizes, unsigned short *kinds, int async, - int num_waits, ...) +static void +goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + bool data_enter, int async, int num_waits, + va_list *ap) { int flags = GOACC_FLAGS_UNMARSHAL (flags_m); struct goacc_thread *thr; struct gomp_device_descr *acc_dev; - bool data_enter = false; - size_t i; goacc_lazy_initialize (); thr = goacc_thread (); acc_dev = thr->dev; - /* Determine if this is an "acc enter data". */ - for (i = 0; i < mapnum; ++i) - { - unsigned char kind = kinds[i] & 0xff; - - if (kind == GOMP_MAP_POINTER - || kind == GOMP_MAP_TO_PSET - || kind == GOMP_MAP_STRUCT) - continue; - - if (kind == GOMP_MAP_FORCE_ALLOC - || kind == GOMP_MAP_FORCE_PRESENT - || kind == GOMP_MAP_ATTACH - || kind == GOMP_MAP_FORCE_TO - || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) - { - data_enter = true; - break; - } - - if (kind == GOMP_MAP_RELEASE - || kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_DETACH - || kind == GOMP_MAP_FORCE_DETACH - || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) - break; - - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - } - bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); acc_prof_info prof_info; @@ -1433,13 +1396,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, } if (num_waits) - { - va_list ap; - - va_start (ap, num_waits); - goacc_wait (async, num_waits, &ap); - va_end (ap); - } + goacc_wait (async, num_waits, ap); goacc_aq aq = get_goacc_asyncqueue (async); @@ -1461,3 +1418,123 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, thr->api_info = NULL; } } + +/* Legacy entry point (GCC 11 and earlier). */ + +void +GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, int async, + int num_waits, ...) +{ + /* Determine if this is an OpenACC "enter data". */ + bool data_enter = false; + for (size_t i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + if (kind == GOMP_MAP_POINTER + || kind == GOMP_MAP_TO_PSET + || kind == GOMP_MAP_STRUCT) + continue; + + if (kind == GOMP_MAP_FORCE_ALLOC + || kind == GOMP_MAP_FORCE_PRESENT + || kind == GOMP_MAP_ATTACH + || kind == GOMP_MAP_FORCE_TO + || kind == GOMP_MAP_TO + || kind == GOMP_MAP_ALLOC) + { + data_enter = true; + break; + } + + if (kind == GOMP_MAP_RELEASE + || kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DETACH + || kind == GOMP_MAP_FORCE_DETACH + || kind == GOMP_MAP_FROM + || kind == GOMP_MAP_FORCE_FROM) + break; + + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + } + + va_list ap; + va_start (ap, num_waits); + goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds, + data_enter, async, num_waits, &ap); + va_end (ap); +} + +void +GOACC_enter_data (int flags_m, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, int async, + int num_waits, ...) +{ + va_list ap; + va_start (ap, num_waits); + goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds, + true, async, num_waits, &ap); + va_end (ap); +} + +void +GOACC_exit_data (int flags_m, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, int async, + int num_waits, ...) +{ + va_list ap; + va_start (ap, num_waits); + goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds, + false, async, num_waits, &ap); + va_end (ap); +} + +void +GOACC_declare (int flags_m, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + for (size_t i = 0; i < mapnum; i++) + { + unsigned char kind = kinds[i] & 0xff; + + if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) + continue; + + switch (kind) + { + case GOMP_MAP_ALLOC: + if (acc_is_present (hostaddrs[i], sizes[i])) + continue; + /* FALLTHRU */ + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_TO: + case GOMP_MAP_FORCE_TO: + goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i], + &kinds[i], true, GOMP_ASYNC_SYNC, 0, NULL); + break; + + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_RELEASE: + case GOMP_MAP_DELETE: + goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i], + &kinds[i], false, GOMP_ASYNC_SYNC, 0, NULL); + break; + + case GOMP_MAP_FORCE_DEVICEPTR: + break; + + case GOMP_MAP_FORCE_PRESENT: + if (!acc_is_present (hostaddrs[i], sizes[i])) + gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], + (unsigned long) sizes[i]); + break; + + default: + assert (0); + break; + } + } +} diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index cf1baf6..83625ba 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -290,8 +290,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + true, 0); if (profiling_p) { prof_info.event_type = acc_ev_enter_data_end; @@ -300,7 +300,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); } - + devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); @@ -321,11 +321,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), &api_info); } - /* If running synchronously, unmap immediately. */ - if (aq == NULL) - gomp_unmap_vars (tgt, true); - else - gomp_unmap_vars_async (tgt, true, aq); + /* If running synchronously (aq == NULL), this will unmap immediately. */ + goacc_unmap_vars (tgt, true, aq); if (profiling_p) { @@ -456,8 +453,7 @@ GOACC_data_start (int flags_m, size_t mapnum, { prof_info.device_type = acc_device_host; api_info.device_type = prof_info.device_type; - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, - GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; @@ -465,8 +461,8 @@ GOACC_data_start (int flags_m, size_t mapnum, } gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, + true, 0); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; @@ -542,7 +538,7 @@ GOACC_data_end (void) gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); thr->mapped_data = tgt->prev; - gomp_unmap_vars (tgt, true); + goacc_unmap_vars (tgt, true, NULL); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); if (profiling_p) @@ -728,61 +724,3 @@ GOACC_get_thread_num (void) { return 0; } - -void -GOACC_declare (int flags_m, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds) -{ - int i; - - for (i = 0; i < mapnum; i++) - { - unsigned char kind = kinds[i] & 0xff; - - if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) - continue; - - switch (kind) - { - case GOMP_MAP_FORCE_ALLOC: - case GOMP_MAP_FORCE_FROM: - case GOMP_MAP_FORCE_TO: - case GOMP_MAP_POINTER: - case GOMP_MAP_RELEASE: - case GOMP_MAP_DELETE: - GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], - &kinds[i], GOMP_ASYNC_SYNC, 0); - break; - - case GOMP_MAP_FORCE_DEVICEPTR: - break; - - case GOMP_MAP_ALLOC: - if (!acc_is_present (hostaddrs[i], sizes[i])) - GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], - &kinds[i], GOMP_ASYNC_SYNC, 0); - break; - - case GOMP_MAP_TO: - GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], - &kinds[i], GOMP_ASYNC_SYNC, 0); - - break; - - case GOMP_MAP_FROM: - GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], - &kinds[i], GOMP_ASYNC_SYNC, 0); - break; - - case GOMP_MAP_FORCE_PRESENT: - if (!acc_is_present (hostaddrs[i], sizes[i])) - gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], - (unsigned long) sizes[i]); - break; - - default: - assert (0); - break; - } - } -} diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 69f96f0..314f964 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -32,6 +32,12 @@ # define __GOMP_DEPRECATED_5_0 #endif +#if defined(__GNUC__) && _OPENMP >= 202011 +# define __GOMP_DEPRECATED_5_1 __attribute__((__deprecated__)) +#else +# define __GOMP_DEPRECATED_5_1 +#endif + #ifndef _LIBGOMP_OMP_LOCK_DEFINED #define _LIBGOMP_OMP_LOCK_DEFINED 1 /* These two structures get edited by the libgomp build process to @@ -64,7 +70,9 @@ typedef enum omp_proc_bind_t { omp_proc_bind_false = 0, omp_proc_bind_true = 1, - omp_proc_bind_master = 2, + omp_proc_bind_primary = 2, + omp_proc_bind_master __GOMP_DEPRECATED_5_1 + = omp_proc_bind_primary, omp_proc_bind_close = 3, omp_proc_bind_spread = 4 } omp_proc_bind_t; @@ -243,6 +251,7 @@ extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; extern void omp_set_default_device (int) __GOMP_NOTHROW; extern int omp_get_default_device (void) __GOMP_NOTHROW; extern int omp_get_num_devices (void) __GOMP_NOTHROW; +extern int omp_get_device_num (void) __GOMP_NOTHROW; extern int omp_get_num_teams (void) __GOMP_NOTHROW; extern int omp_get_team_num (void) __GOMP_NOTHROW; @@ -293,6 +302,8 @@ extern void omp_free (void *, omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) __GOMP_NOTHROW; +extern void omp_display_env (int) __GOMP_NOTHROW; + #ifdef __cplusplus } #endif diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 851f85f..a36a562 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -49,6 +49,8 @@ integer (omp_proc_bind_kind), & parameter :: omp_proc_bind_true = 1 integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_primary = 2 + integer (omp_proc_bind_kind), & parameter :: omp_proc_bind_master = 2 integer (omp_proc_bind_kind), & parameter :: omp_proc_bind_close = 3 @@ -551,6 +553,12 @@ end interface interface + function omp_get_device_num () + integer (4) :: omp_get_device_num + end function omp_get_device_num + end interface + + interface function omp_get_max_task_priority () integer (4) :: omp_get_max_task_priority end function omp_get_max_task_priority @@ -653,8 +661,115 @@ end function end interface + interface omp_display_env + subroutine omp_display_env (verbose) + logical (4),intent (in) :: verbose + end subroutine omp_display_env + subroutine omp_display_env_8 (verbose) + logical (8),intent (in) :: verbose + end subroutine omp_display_env_8 + end interface + + interface + function omp_alloc (size, allocator) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t + import :: omp_allocator_handle_kind + type(c_ptr) :: omp_alloc + integer(c_size_t), value :: size + integer(omp_allocator_handle_kind), value :: allocator + end function omp_alloc + end interface + + interface + subroutine omp_free(ptr, allocator) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr + import :: omp_allocator_handle_kind + type(c_ptr), value :: ptr + integer(omp_allocator_handle_kind), value :: allocator + end subroutine + end interface + + interface + function omp_target_alloc (size, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int + type(c_ptr) :: omp_target_alloc + integer(c_size_t), value :: size + integer(c_int), value :: device_num + end function omp_target_alloc + end interface + + interface + subroutine omp_target_free (device_ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + type(c_ptr), value :: device_ptr + integer(c_int), value :: device_num + end subroutine omp_target_free + end interface + + interface + function omp_target_is_present (ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + integer(c_int) :: omp_target_is_present + type(c_ptr), value :: ptr + integer(c_int), value :: device_num + end function omp_target_is_present + end interface + + interface + function omp_target_memcpy (dst, src, length, dst_offset, & + src_offset, dst_device_num, & + src_device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + integer(c_int) :: omp_target_memcpy + type(c_ptr), value :: dst, src + integer(c_size_t), value :: length, dst_offset, src_offset + integer(c_int), value :: dst_device_num, src_device_num + end function omp_target_memcpy + end interface + + interface + function omp_target_memcpy_rect (dst,src,element_size, num_dims, & + volume, dst_offsets, src_offsets, & + dst_dimensions, src_dimensions, & + dst_device_num, src_device_num) & + bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + integer(c_int) :: omp_target_memcpy_rect + type(c_ptr), value :: dst, src + integer(c_size_t), value :: element_size + integer(c_int), value :: num_dims, dst_device_num, src_device_num + integer(c_size_t), intent(in) :: volume(*), dst_offsets(*), & + src_offsets(*), dst_dimensions(*), & + src_dimensions(*) + end function omp_target_memcpy_rect + end interface + + interface + function omp_target_associate_ptr (host_ptr, device_ptr, size, & + device_offset, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int + integer(c_int) :: omp_target_associate_ptr + type(c_ptr), value :: host_ptr, device_ptr + integer(c_size_t), value :: size, device_offset + integer(c_int), value :: device_num + end function omp_target_associate_ptr + end interface + + interface + function omp_target_disassociate_ptr (ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + integer(c_int) :: omp_target_disassociate_ptr + type(c_ptr), value :: ptr + integer(c_int), value :: device_num + end function omp_target_disassociate_ptr + end interface + #if _OPENMP >= 201811 !GCC$ ATTRIBUTES DEPRECATED :: omp_get_nested, omp_set_nested #endif +#if _OPENMP >= 202011 +!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master +#endif + end module omp_lib diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index 06d17b5..1c2eacb 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -40,11 +40,13 @@ parameter (omp_proc_bind_kind = 4) integer (omp_proc_bind_kind) omp_proc_bind_false integer (omp_proc_bind_kind) omp_proc_bind_true + integer (omp_proc_bind_kind) omp_proc_bind_primary integer (omp_proc_bind_kind) omp_proc_bind_master integer (omp_proc_bind_kind) omp_proc_bind_close integer (omp_proc_bind_kind) omp_proc_bind_spread parameter (omp_proc_bind_false = 0) parameter (omp_proc_bind_true = 1) + parameter (omp_proc_bind_primary = 2) parameter (omp_proc_bind_master = 2) parameter (omp_proc_bind_close = 3) parameter (omp_proc_bind_spread = 4) @@ -244,6 +246,9 @@ external omp_get_initial_device integer(4) omp_get_initial_device + external omp_get_device_num + integer(4) omp_get_device_num + external omp_get_max_task_priority integer(4) omp_get_max_task_priority @@ -264,3 +269,102 @@ external omp_set_default_allocator external omp_get_default_allocator integer (omp_allocator_handle_kind) omp_get_default_allocator + + external omp_display_env + + interface + function omp_alloc (size, allocator) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t + use, intrinsic :: omp_lib_kinds + type(c_ptr) :: omp_alloc + integer(c_size_t), value :: size + integer(omp_allocator_handle_kind), value :: allocator + end function omp_alloc + end interface + + interface + subroutine omp_free(ptr, allocator) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr + use, intrinsic :: omp_lib_kinds + type(c_ptr), value :: ptr + integer(omp_allocator_handle_kind), value :: allocator + end subroutine + end interface + + interface + function omp_target_alloc (size, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int + type(c_ptr) :: omp_target_alloc + integer(c_size_t), value :: size + integer(c_int), value :: device_num + end function omp_target_alloc + end interface + + interface + subroutine omp_target_free (device_ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + type(c_ptr), value :: device_ptr + integer(c_int), value :: device_num + end subroutine omp_target_free + end interface + + interface + function omp_target_is_present (ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + integer(c_int) :: omp_target_is_present + type(c_ptr), value :: ptr + integer(c_int), value :: device_num + end function omp_target_is_present + end interface + + interface + function omp_target_memcpy (dst, src, length, dst_offset, & + & src_offset, dst_device_num, & + & src_device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + integer(c_int) :: omp_target_memcpy + type(c_ptr), value :: dst, src + integer(c_size_t), value :: length, dst_offset, src_offset + integer(c_int), value :: dst_device_num, src_device_num + end function omp_target_memcpy + end interface + + interface + function omp_target_memcpy_rect (dst,src,element_size, num_dims, & + & volume, dst_offsets, & + & src_offsets, dst_dimensions, & + & src_dimensions, dst_device_num, & + & src_device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t + integer(c_int) :: omp_target_memcpy_rect + type(c_ptr), value :: dst, src + integer(c_size_t), value :: element_size + integer(c_int), value :: num_dims + integer(c_int), value :: dst_device_num, src_device_num + integer(c_size_t), intent(in) :: volume(*), dst_offsets(*) + integer(c_size_t), intent(in) :: src_offsets(*) + integer(c_size_t), intent(in) :: dst_dimensions(*) + integer(c_size_t), intent(in) :: src_dimensions(*) + end function omp_target_memcpy_rect + end interface + + interface + function omp_target_associate_ptr (host_ptr, device_ptr, size, & + & device_offset, device_num) & + & bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int + integer(c_int) :: omp_target_associate_ptr + type(c_ptr), value :: host_ptr, device_ptr + integer(c_size_t), value :: size, device_offset + integer(c_int), value :: device_num + end function omp_target_associate_ptr + end interface + + interface + function omp_target_disassociate_ptr (ptr, device_num) bind(c) + use, intrinsic :: iso_c_binding, only : c_ptr, c_int + integer(c_int) :: omp_target_disassociate_ptr + type(c_ptr), value :: ptr + integer(c_int), value :: device_num + end function omp_target_disassociate_ptr + end interface diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac index 1ab1777..f447def 100644 --- a/libgomp/plugin/configfrag.ac +++ b/libgomp/plugin/configfrag.ac @@ -272,10 +272,3 @@ AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC], AM_CONDITIONAL([PLUGIN_GCN], [test $PLUGIN_GCN = 1]) AC_DEFINE_UNQUOTED([PLUGIN_GCN], [$PLUGIN_GCN], [Define to 1 if the GCN plugin is built, 0 if not.]) - -if test "$HSA_RUNTIME_LIB" != ""; then - HSA_RUNTIME_LIB="$HSA_RUNTIME_LIB/" -fi - -AC_DEFINE_UNQUOTED([HSA_RUNTIME_LIB], ["$HSA_RUNTIME_LIB"], - [Define path to HSA runtime.]) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 8e6af69..9e7377c 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -29,6 +29,7 @@ /* {{{ Includes and defines */ #include "config.h" +#include "symcat.h" #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -292,7 +293,6 @@ struct copy_data void *dst; const void *src; size_t len; - bool free_src; struct goacc_asyncqueue *aq; }; @@ -1072,7 +1072,7 @@ init_environment_variables (void) hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB"); if (hsa_runtime_lib == NULL) - hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so.1"; + hsa_runtime_lib = "libhsa-runtime64.so.1"; support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES"); @@ -2914,8 +2914,6 @@ copy_data (void *data_) data->aq->agent->device_id, data->aq->id, data->len, data->src, data->dst); hsa_memory_copy_wrapper (data->dst, data->src, data->len); - if (data->free_src) - free ((void *) data->src); free (data); } @@ -2929,12 +2927,11 @@ gomp_offload_free (void *ptr) } /* Request an asynchronous data copy, to or from a device, on a given queue. - The event will be registered as a callback. If FREE_SRC is true - then the source data will be freed following the copy. */ + The event will be registered as a callback. */ static void queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, - size_t len, bool free_src) + size_t len) { if (DEBUG_QUEUES) GCN_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n", @@ -2944,7 +2941,6 @@ queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, data->dst = dst; data->src = src; data->len = len; - data->free_src = free_src; data->aq = aq; queue_push_callback (aq, copy_data, data); } @@ -3041,10 +3037,8 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, problem size, so let's do a reasonable number of single-worker gangs. 64 gangs matches a typical Fiji device. */ - /* NOTE: Until support for middle-end worker partitioning is merged, use 1 - for the default number of workers. */ if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */ - if (dims[1] == 0) dims[1] = 1; /* Workers. */ + if (dims[1] == 0) dims[1] = 16; /* Workers. */ /* The incoming dimensions are expressed in terms of gangs, workers, and vectors. The HSA dimensions are expressed in terms of "work-items", @@ -3311,6 +3305,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct kernel_info *kernel; int kernel_count = image_desc->kernel_count; unsigned var_count = image_desc->global_variable_count; + int other_count = 1; agent = get_agent_info (ord); if (!agent) @@ -3327,7 +3322,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count); GCN_DEBUG ("Encountered %u global variables in an image\n", var_count); - pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2) + GCN_DEBUG ("Expect %d other variables in an image\n", other_count); + pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2) * sizeof (struct addr_pair)); *target_table = pair; module = (struct module_info *) @@ -3402,6 +3398,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, pair++; } + GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR)); + + hsa_status_t status; + hsa_executable_symbol_t var_symbol; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + STRINGX (GOMP_DEVICE_NUM_VAR), + agent->id, 0, &var_symbol); + if (status == HSA_STATUS_SUCCESS) + { + uint64_t device_num_varptr; + uint32_t device_num_varsize; + + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &device_num_varptr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &device_num_varsize); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable size from its symbol", status); + + pair->start = device_num_varptr; + pair->end = device_num_varptr + device_num_varsize; + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + pair->start = pair->end = 0; + pair++; + /* Ensure that constructors are run first. */ struct GOMP_kernel_launch_attributes kla = { 3, @@ -3424,7 +3451,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (module->fini_array_func) kernel_count--; - return kernel_count + var_count; + return kernel_count + var_count + other_count; } /* Unload GCN object-code module described by struct gcn_image_desc in @@ -3647,7 +3674,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n) { struct agent_info *agent = get_agent_info (device); maybe_init_omp_async (agent); - queue_push_copy (agent->omp_async_queue, dst, src, n, false); + queue_push_copy (agent->omp_async_queue, dst, src, n); return true; } @@ -3917,15 +3944,7 @@ GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - /* The source data does not necessarily remain live until the deferred - copy happens. Taking a snapshot of the data here avoids reading - uninitialised data later, but means that (a) data is copied twice and - (b) modifications to the copied data between the "spawning" point of - the asynchronous kernel and when it is executed will not be seen. - But, that is probably correct. */ - void *src_copy = GOMP_PLUGIN_malloc (n); - memcpy (src_copy, src, n); - queue_push_copy (aq, dst, src_copy, n, true); + queue_push_copy (aq, dst, src, n); return true; } @@ -3937,7 +3956,7 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n, false); + queue_push_copy (aq, dst, src, n); return true; } diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1215212..0f16e1c 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -34,6 +34,7 @@ #define _GNU_SOURCE #include "openacc.h" #include "config.h" +#include "symcat.h" #include "libgomp-plugin.h" #include "oacc-plugin.h" #include "gomp-constants.h" @@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, CUmodule module; const char *const *var_names; const struct targ_fn_launch *fn_descs; - unsigned int fn_entries, var_entries, i, j; + unsigned int fn_entries, var_entries, other_entries, i, j; struct targ_fn_descriptor *targ_fns; struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; @@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, fn_entries = img_header->fn_num; fn_descs = img_header->fn_descs; + /* Currently, the only other entry kind is 'device number'. */ + other_entries = 1; + targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) - * (fn_entries + var_entries)); + * (fn_entries + var_entries + other_entries)); targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) * fn_entries); @@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, targ_tbl->end = targ_tbl->start + bytes; } + CUdeviceptr device_num_varptr; + size_t device_num_varsize; + CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr, + &device_num_varsize, module, + STRINGX (GOMP_DEVICE_NUM_VAR)); + if (r == CUDA_SUCCESS) + { + targ_tbl->start = (uintptr_t) device_num_varptr; + targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize); + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + targ_tbl->start = targ_tbl->end = 0; + targ_tbl++; + nvptx_set_clocktick (module, dev); - return fn_entries + var_entries; + return fn_entries + var_entries + other_entries; } /* Unload the program described by TARGET_DATA. DEV_DATA is the diff --git a/libgomp/scope.c b/libgomp/scope.c new file mode 100644 index 0000000..8a4691c --- /dev/null +++ b/libgomp/scope.c @@ -0,0 +1,62 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the SCOPE construct with task reductions. */ + +#include "libgomp.h" +#include <string.h> + + +ialias_redirect (GOMP_taskgroup_reduction_register) + +/* This routine is called when first encountering a scope construct + with task reductions. While scope is not a work-sharing construct, + if it has task reductions on it, we treat it as one, but as if it is + nowait, so the work-sharing behavior is done solely to choose which + thread does the initial initialization of task reductions and which + threads follow. scope with task reductions must not be nowait, + but the barrier and GOMP_workshare_task_reduction_unregister are emitted + by the lowered code later. */ + +void +GOMP_scope_start (uintptr_t *reductions) +{ + struct gomp_thread *thr = gomp_thread (); + + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + gomp_work_share_init_done (); + } + else + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } +} diff --git a/libgomp/target.c b/libgomp/target.c index 4a4e1f8..67fcf41 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -44,6 +44,23 @@ #include "plugin-suffix.h" #endif +typedef uintptr_t *hash_entry_type; +static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } +static inline void htab_free (void *ptr) { free (ptr); } +#include "hashtab.h" + +static inline hashval_t +htab_hash (hash_entry_type element) +{ + return hash_pointer ((void *) element); +} + +static inline bool +htab_eq (hash_entry_type x, hash_entry_type y) +{ + return x == y; +} + #define FIELD_TGT_EMPTY (~(size_t) 0) static void gomp_target_init (void); @@ -197,13 +214,24 @@ goacc_device_copy_async (struct gomp_device_descr *devicep, struct goacc_asyncqueue *), const char *dst, void *dstaddr, const char *src, const void *srcaddr, + const void *srcaddr_orig, size_t size, struct goacc_asyncqueue *aq) { if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) { gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", - src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); + if (srcaddr_orig && srcaddr_orig != srcaddr) + gomp_fatal ("Copying of %s object [%p..%p)" + " via buffer %s object [%p..%p)" + " to %s object [%p..%p) failed", + src, srcaddr_orig, srcaddr_orig + size, + src, srcaddr, srcaddr + size, + dst, dstaddr, dstaddr + size); + else + gomp_fatal ("Copying of %s object [%p..%p)" + " to %s object [%p..%p) failed", + src, srcaddr, srcaddr + size, + dst, dstaddr, dstaddr + size); } } @@ -247,7 +275,14 @@ struct gomp_coalesce_buf host to device (e.g. map(alloc:), map(from:) etc.). */ #define MAX_COALESCE_BUF_GAP (4 * 1024) -/* Add region with device tgt_start relative offset and length to CBUF. */ +/* Add region with device tgt_start relative offset and length to CBUF. + + This must not be used for asynchronous copies, because the host data might + not be computed yet (by an earlier asynchronous compute region, for + example). + TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question: + is it more performant to use libgomp CBUF buffering or individual device + asyncronous copying?) */ static inline void gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) @@ -300,12 +335,41 @@ gomp_to_device_kind_p (int kind) } } +/* Copy host memory to an offload device. In asynchronous mode (if AQ is + non-NULL), when the source data is stack or may otherwise be deallocated + before the asynchronous copy takes place, EPHEMERAL must be passed as + TRUE. */ + attribute_hidden void gomp_copy_host2dev (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, void *d, const void *h, size_t sz, - struct gomp_coalesce_buf *cbuf) + bool ephemeral, struct gomp_coalesce_buf *cbuf) { + if (__builtin_expect (aq != NULL, 0)) + { + /* See 'gomp_coalesce_buf_add'. */ + assert (!cbuf); + + void *h_buf = (void *) h; + if (ephemeral) + { + /* We're queueing up an asynchronous copy from data that may + disappear before the transfer takes place (i.e. because it is a + stack local in a function that is no longer executing). Make a + copy of the data into a temporary buffer in those cases. */ + h_buf = gomp_malloc (sz); + memcpy (h_buf, h, sz); + } + goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, + "dev", d, "host", h_buf, h, sz, aq); + if (ephemeral) + /* Free temporary buffer once the transfer has completed. */ + devicep->openacc.async.queue_callback_func (aq, free, h_buf); + + return; + } + if (cbuf) { uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; @@ -331,11 +395,8 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, } } } - if (__builtin_expect (aq != NULL, 0)) - goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, - "dev", d, "host", h, sz, aq); - else - gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); + + gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); } attribute_hidden void @@ -345,7 +406,7 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, { if (__builtin_expect (aq != NULL, 0)) goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, - "host", h, "dev", d, sz, aq); + "host", h, "dev", d, NULL, sz, aq); else gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } @@ -360,6 +421,113 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) } } +/* Increment reference count of a splay_tree_key region K by 1. + If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only + increment the value if refcount is not yet contained in the set (used for + OpenMP 5.0, which specifies that a region's refcount is adjusted at most + once for each construct). */ + +static inline void +gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) +{ + if (k == NULL || k->refcount == REFCOUNT_INFINITY) + return; + + uintptr_t *refcount_ptr = &k->refcount; + + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) + refcount_ptr = &k->structelem_refcount; + else if (REFCOUNT_STRUCTELEM_P (k->refcount)) + refcount_ptr = k->structelem_refcount_ptr; + + if (refcount_set) + { + if (htab_find (*refcount_set, refcount_ptr)) + return; + uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); + *slot = refcount_ptr; + } + + *refcount_ptr += 1; + return; +} + +/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P + is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to + track already seen refcounts, and only adjust the value if refcount is not + yet contained in the set (like gomp_increment_refcount). + + Return out-values: set *DO_COPY to true if we set the refcount to zero, or + it is already zero and we know we decremented it earlier. This signals that + associated maps should be copied back to host. + + *DO_REMOVE is set to true when we this is the first handling of this refcount + and we are setting it to zero. This signals a removal of this key from the + splay-tree map. + + Copy and removal are separated due to cases like handling of structure + elements, e.g. each map of a structure element representing a possible copy + out of a structure field has to be handled individually, but we only signal + removal for one (the first encountered) sibing map. */ + +static inline void +gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, + bool *do_copy, bool *do_remove) +{ + if (k == NULL || k->refcount == REFCOUNT_INFINITY) + { + *do_copy = *do_remove = false; + return; + } + + uintptr_t *refcount_ptr = &k->refcount; + + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) + refcount_ptr = &k->structelem_refcount; + else if (REFCOUNT_STRUCTELEM_P (k->refcount)) + refcount_ptr = k->structelem_refcount_ptr; + + bool new_encountered_refcount; + bool set_to_zero = false; + bool is_zero = false; + + uintptr_t orig_refcount = *refcount_ptr; + + if (refcount_set) + { + if (htab_find (*refcount_set, refcount_ptr)) + { + new_encountered_refcount = false; + goto end; + } + + uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); + *slot = refcount_ptr; + new_encountered_refcount = true; + } + else + /* If no refcount_set being used, assume all keys are being decremented + for the first time. */ + new_encountered_refcount = true; + + if (delete_p) + *refcount_ptr = 0; + else if (*refcount_ptr > 0) + *refcount_ptr -= 1; + + end: + if (*refcount_ptr == 0) + { + if (orig_refcount > 0) + set_to_zero = true; + + is_zero = true; + } + + *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); + *do_remove = (new_encountered_refcount && set_to_zero); +} + /* Handle the case where gomp_map_lookup, splay_tree_lookup or gomp_map_0len_lookup found oldn for newn. Helper function of gomp_map_vars. */ @@ -369,7 +537,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, splay_tree_key oldn, splay_tree_key newn, struct target_var_desc *tgt_var, unsigned char kind, bool always_to_flag, - struct gomp_coalesce_buf *cbuf) + struct gomp_coalesce_buf *cbuf, + htab_t *refcount_set) { assert (kind != GOMP_MAP_ATTACH); @@ -396,10 +565,9 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + newn->host_start - oldn->host_start), (void *) newn->host_start, - newn->host_end - newn->host_start, cbuf); + newn->host_end - newn->host_start, false, cbuf); - if (oldn->refcount != REFCOUNT_INFINITY) - oldn->refcount++; + gomp_increment_refcount (oldn, refcount_set); } static int @@ -424,8 +592,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, cur_node.tgt_offset = (uintptr_t) NULL; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, - sizeof (void *), cbuf); + (void *) &cur_node.tgt_offset, sizeof (void *), + true, cbuf); return; } /* Add bias to the pointer value. */ @@ -445,7 +613,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, to initialize the pointer with. */ cur_node.tgt_offset -= bias; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); + (void *) &cur_node.tgt_offset, sizeof (void *), + true, cbuf); } static void @@ -453,7 +622,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, splay_tree_key n, size_t first, size_t i, void **hostaddrs, size_t *sizes, void *kinds, - struct gomp_coalesce_buf *cbuf) + struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) { struct gomp_device_descr *devicep = tgt->device_descr; struct splay_tree_s *mem_map = &devicep->mem_map; @@ -471,7 +640,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } if (sizes[i] == 0) @@ -487,7 +656,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } } @@ -499,7 +668,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } } @@ -578,7 +747,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -631,7 +800,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep, (void *) target); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -671,11 +840,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, bool short_mapkind, + htab_t *refcount_set, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; bool has_firstprivate = false; bool has_always_ptrset = false; + bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; const int rshift = short_mapkind ? 8 : 3; const int typemask = short_mapkind ? 0xff : 0x7; struct splay_tree_s *mem_map = &devicep->mem_map; @@ -801,8 +972,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, for (i = first; i <= last; i++) { tgt->list[i].key = NULL; - if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) - & typemask)) + if (!aq + && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) + & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size - cur_node.host_end + (uintptr_t) hostaddrs[i], @@ -813,7 +985,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } for (i = first; i <= last; i++) gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, - sizes, kinds, NULL); + sizes, kinds, NULL, refcount_set); i--; continue; } @@ -843,8 +1015,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - gomp_coalesce_buf_add (&cbuf, tgt_size, - cur_node.host_end - cur_node.host_start); + if (!aq) + gomp_coalesce_buf_add (&cbuf, tgt_size, + cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; has_firstprivate = true; continue; @@ -909,7 +1082,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } } gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], - kind & typemask, always_to_cnt > 0, NULL); + kind & typemask, always_to_cnt > 0, NULL, + refcount_set); i += always_to_cnt; } else @@ -936,7 +1110,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - if (gomp_to_device_kind_p (kind & typemask)) + if (!aq + && gomp_to_device_kind_p (kind & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size, cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; @@ -1022,6 +1197,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, splay_tree_node array = tgt->array; size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; uintptr_t field_tgt_base = 0; + splay_tree_key field_tgt_structelem_first = NULL; for (i = 0; i < mapnum; i++) if (has_always_ptrset @@ -1064,8 +1240,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[j].copy_from = false; tgt->list[j].always_copy_from = false; tgt->list[j].is_attach = false; - if (k->refcount != REFCOUNT_INFINITY) - k->refcount++; + gomp_increment_refcount (k, refcount_set); gomp_map_pointer (k->tgt, aq, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset + ((uintptr_t) hostaddrs[j] @@ -1091,7 +1266,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, len = sizes[i]; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + tgt_size), - (void *) hostaddrs[i], len, cbufp); + (void *) hostaddrs[i], len, false, cbufp); tgt_size += len; continue; case GOMP_MAP_FIRSTPRIVATE_INT: @@ -1153,13 +1328,14 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, field_tgt_base = (uintptr_t) hostaddrs[first]; field_tgt_offset = tgt_size; field_tgt_clear = last; + field_tgt_structelem_first = NULL; tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[first]; continue; } for (i = first; i <= last; i++) gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, - sizes, kinds, cbufp); + sizes, kinds, cbufp, refcount_set); i--; continue; case GOMP_MAP_ALWAYS_POINTER: @@ -1184,7 +1360,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, + cur_node.host_start - n->host_start), (void *) &cur_node.tgt_offset, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start; continue; @@ -1236,7 +1412,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, splay_tree_key n = splay_tree_lookup (mem_map, k); if (n && n->refcount != REFCOUNT_LINK) gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], - kind & typemask, false, cbufp); + kind & typemask, false, cbufp, + refcount_set); else { k->aux = NULL; @@ -1252,10 +1429,33 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, size_t align = (size_t) 1 << (kind >> rshift); tgt->list[i].key = k; k->tgt = tgt; + k->refcount = 0; + k->dynamic_refcount = 0; if (field_tgt_clear != FIELD_TGT_EMPTY) { k->tgt_offset = k->host_start - field_tgt_base + field_tgt_offset; + if (openmp_p) + { + k->refcount = REFCOUNT_STRUCTELEM; + if (field_tgt_structelem_first == NULL) + { + /* Set to first structure element of sequence. */ + k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; + field_tgt_structelem_first = k; + } + else + /* Point to refcount of leading element, but do not + increment again. */ + k->structelem_refcount_ptr + = &field_tgt_structelem_first->structelem_refcount; + + if (i == field_tgt_clear) + { + k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; + field_tgt_structelem_first = NULL; + } + } if (i == field_tgt_clear) field_tgt_clear = FIELD_TGT_EMPTY; } @@ -1265,14 +1465,17 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, k->tgt_offset = tgt_size; tgt_size += k->host_end - k->host_start; } + /* First increment, from 0 to 1. gomp_increment_refcount + encapsulates the different increment cases, so use this + instead of directly setting 1 during initialization. */ + gomp_increment_refcount (k, refcount_set); + tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); tgt->list[i].always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); tgt->list[i].is_attach = false; tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; - k->refcount = 1; - k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1295,7 +1498,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, + false, cbufp); break; case GOMP_MAP_POINTER: gomp_map_pointer (tgt, aq, @@ -1307,7 +1511,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, + false, cbufp); tgt->list[i].has_null_ptr_assoc = false; for (j = i + 1; j < mapnum; j++) @@ -1328,8 +1533,14 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[j].always_copy_from = false; tgt->list[j].is_attach = false; tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); - if (k->refcount != REFCOUNT_INFINITY) - k->refcount++; + /* For OpenMP, the use of refcount_sets causes + errors if we set k->refcount = 1 above but also + increment it again here, for decrementing will + not properly match, since we decrement only once + for each key's refcount. Therefore avoid this + increment for OpenMP constructs. */ + if (!openmp_p) + gomp_increment_refcount (k, refcount_set); gomp_map_pointer (tgt, aq, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset @@ -1364,7 +1575,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - sizeof (void *), cbufp); + sizeof (void *), false, cbufp); break; default: gomp_mutex_unlock (&devicep->lock); @@ -1380,7 +1591,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* We intentionally do not use coalescing here, as it's not data allocated by the current call to this function. */ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, - &tgt_addr, sizeof (void *), NULL); + &tgt_addr, sizeof (void *), true, NULL); } array++; } @@ -1395,19 +1606,23 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + i * sizeof (void *)), (void *) &cur_node.tgt_offset, sizeof (void *), - cbufp); + true, cbufp); } } if (cbufp) { + /* See 'gomp_coalesce_buf_add'. */ + assert (!aq); + long c = 0; for (c = 0; c < cbuf.chunk_cnt; ++c) gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + cbuf.chunks[c].start), (char *) cbuf.buf + (cbuf.chunks[c].start - cbuf.chunks[0].start), - cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); + cbuf.chunks[c].end - cbuf.chunks[c].start, + true, NULL); free (cbuf.buf); cbuf.buf = NULL; cbufp = NULL; @@ -1426,24 +1641,41 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, return tgt; } -attribute_hidden struct target_mem_desc * +static struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool short_mapkind, enum gomp_map_vars_kind pragma_kind) + bool short_mapkind, htab_t *refcount_set, + enum gomp_map_vars_kind pragma_kind) { - return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, - sizes, kinds, short_mapkind, pragma_kind); + /* This management of a local refcount_set is for convenience of callers + who do not share a refcount_set over multiple map/unmap uses. */ + htab_t local_refcount_set = NULL; + if (refcount_set == NULL) + { + local_refcount_set = htab_create (mapnum); + refcount_set = &local_refcount_set; + } + + struct target_mem_desc *tgt; + tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, + sizes, kinds, short_mapkind, refcount_set, + pragma_kind); + if (local_refcount_set) + htab_free (local_refcount_set); + + return tgt; } attribute_hidden struct target_mem_desc * -gomp_map_vars_async (struct gomp_device_descr *devicep, - struct goacc_asyncqueue *aq, size_t mapnum, - void **hostaddrs, void **devaddrs, size_t *sizes, - void *kinds, bool short_mapkind, - enum gomp_map_vars_kind pragma_kind) +goacc_map_vars (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, size_t mapnum, + void **hostaddrs, void **devaddrs, size_t *sizes, + void *kinds, bool short_mapkind, + enum gomp_map_vars_kind pragma_kind) { return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, - sizes, kinds, short_mapkind, pragma_kind); + sizes, kinds, short_mapkind, NULL, + GOMP_MAP_VARS_OPENACC | pragma_kind); } static void @@ -1481,22 +1713,56 @@ gomp_unref_tgt_void (void *ptr) (void) gomp_unref_tgt (ptr); } -static inline __attribute__((always_inline)) bool -gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, - struct goacc_asyncqueue *aq) +static void +gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) { - bool is_tgt_unmapped = false; - splay_tree_remove (&devicep->mem_map, k); + splay_tree_remove (sp, k); if (k->aux) { if (k->aux->link_key) - splay_tree_insert (&devicep->mem_map, - (splay_tree_node) k->aux->link_key); + splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); if (k->aux->attach_count) free (k->aux->attach_count); free (k->aux); k->aux = NULL; } +} + +static inline __attribute__((always_inline)) bool +gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, + struct goacc_asyncqueue *aq) +{ + bool is_tgt_unmapped = false; + + if (REFCOUNT_STRUCTELEM_P (k->refcount)) + { + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) + /* Infer the splay_tree_key of the first structelem key using the + pointer to the first structleme_refcount. */ + k = (splay_tree_key) ((char *) k->structelem_refcount_ptr + - offsetof (struct splay_tree_key_s, + structelem_refcount)); + assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); + + /* The array created by gomp_map_vars is an array of splay_tree_nodes, + with the splay_tree_keys embedded inside. */ + splay_tree_node node = + (splay_tree_node) ((char *) k + - offsetof (struct splay_tree_node_s, key)); + while (true) + { + /* Starting from the _FIRST key, and continue for all following + sibling keys. */ + gomp_remove_splay_tree_key (&devicep->mem_map, k); + if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) + break; + else + k = &(++node)->key; + } + } + else + gomp_remove_splay_tree_key (&devicep->mem_map, k); + if (aq) devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, (void *) k->tgt); @@ -1530,7 +1796,7 @@ gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, static inline __attribute__((always_inline)) void gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq) + htab_t *refcount_set, struct goacc_asyncqueue *aq) { struct gomp_device_descr *devicep = tgt->device_descr; @@ -1573,23 +1839,17 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, if (tgt->list[i].is_attach) continue; - bool do_unmap = false; - if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) - k->refcount--; - else if (k->refcount == 1) - { - k->refcount--; - do_unmap = true; - } + bool do_copy, do_remove; + gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); - if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + if ((do_copy && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) gomp_copy_dev2host (devicep, aq, (void *) (k->host_start + tgt->list[i].offset), (void *) (k->tgt->tgt_start + k->tgt_offset + tgt->list[i].offset), tgt->list[i].length); - if (do_unmap) + if (do_remove) { struct target_mem_desc *k_tgt = k->tgt; bool is_tgt_unmapped = gomp_remove_var (devicep, k); @@ -1610,17 +1870,30 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, gomp_mutex_unlock (&devicep->lock); } -attribute_hidden void -gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) +static void +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, + htab_t *refcount_set) { - gomp_unmap_vars_internal (tgt, do_copyfrom, NULL); + /* This management of a local refcount_set is for convenience of callers + who do not share a refcount_set over multiple map/unmap uses. */ + htab_t local_refcount_set = NULL; + if (refcount_set == NULL) + { + local_refcount_set = htab_create (tgt->list_count); + refcount_set = &local_refcount_set; + } + + gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); + + if (local_refcount_set) + htab_free (local_refcount_set); } attribute_hidden void -gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq) +goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, + struct goacc_asyncqueue *aq) { - gomp_unmap_vars_internal (tgt, do_copyfrom, aq); + gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); } static void @@ -1673,7 +1946,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, if (GOMP_MAP_COPY_TO_P (kind & typemask)) gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, - NULL); + false, NULL); if (GOMP_MAP_COPY_FROM_P (kind & typemask)) gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); } @@ -1701,6 +1974,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, int num_funcs = host_funcs_end - host_func_table; int num_vars = (host_vars_end - host_var_table) / 2; + /* Others currently is only 'device_num' */ + int num_others = 1; + /* Load image to device and get target addresses for the image. */ struct addr_pair *target_table = NULL; int i, num_target_entries; @@ -1709,7 +1985,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, = devicep->load_image_func (devicep->target_id, version, target_data, &target_table); - if (num_target_entries != num_funcs + num_vars) + if (num_target_entries != num_funcs + num_vars + /* Others (device_num) are included as trailing entries in pair list. */ + && num_target_entries != num_funcs + num_vars + num_others) { gomp_mutex_unlock (&devicep->lock); if (is_register_lock) @@ -1781,6 +2059,35 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array++; } + /* Last entry is for the on-device 'device_num' variable. Tolerate case + where plugin does not return this entry. */ + if (num_funcs + num_vars < num_target_entries) + { + struct addr_pair *device_num_var = &target_table[num_funcs + num_vars]; + /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR + was found in this image. */ + if (device_num_var->start != 0) + { + /* The index of the devicep within devices[] is regarded as its + 'device number', which is different from the per-device type + devicep->target_id. */ + int device_num_val = (int) (devicep - &devices[0]); + if (device_num_var->end - device_num_var->start != sizeof (int)) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("offload plugin managed 'device_num' not of expected " + "format"); + } + + /* Copy device_num value to place on device memory, hereby actually + designating its device number into effect. */ + gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start, + &device_num_val, sizeof (int), false, NULL); + } + } + free (target_table); } @@ -2130,12 +2437,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) return gomp_target_fallback (fn, hostaddrs, devicep); + htab_t refcount_set = htab_create (mapnum); struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - GOMP_MAP_VARS_TARGET); + &refcount_set, GOMP_MAP_VARS_TARGET); devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, NULL); - gomp_unmap_vars (tgt_vars, true); + htab_clear (refcount_set); + gomp_unmap_vars (tgt_vars, true, &refcount_set); + htab_free (refcount_set); } static inline unsigned int @@ -2269,6 +2579,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } struct target_mem_desc *tgt_vars; + htab_t refcount_set = NULL; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { if (!fpc_done) @@ -2285,13 +2597,21 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_vars = NULL; } else - tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_TARGET); + { + refcount_set = htab_create (mapnum); + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, &refcount_set, GOMP_MAP_VARS_TARGET); + } devicep->run_func (devicep->target_id, fn_addr, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, args); if (tgt_vars) - gomp_unmap_vars (tgt_vars, true); + { + htab_clear (refcount_set); + gomp_unmap_vars (tgt_vars, true, &refcount_set); + } + if (refcount_set) + htab_free (refcount_set); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -2314,7 +2634,7 @@ gomp_target_data_fallback (struct gomp_device_descr *devicep) would get out of sync. */ struct target_mem_desc *tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); tgt->prev = icv->target_data; icv->target_data = tgt; } @@ -2333,7 +2653,7 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -2352,7 +2672,7 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -2366,7 +2686,7 @@ GOMP_target_end_data (void) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, true, NULL); } } @@ -2465,7 +2785,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, static void gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds) + void **hostaddrs, size_t *sizes, unsigned short *kinds, + htab_t *refcount_set) { const int typemask = 0xff; size_t i; @@ -2489,6 +2810,9 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, false, NULL); } + int nrmvars = 0; + splay_tree_key remove_vars[mapnum]; + for (i = 0; i < mapnum; i++) { struct splay_tree_key_s cur_node; @@ -2510,22 +2834,32 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, if (!k) continue; - if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) - k->refcount--; - if ((kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) - && k->refcount != REFCOUNT_INFINITY) - k->refcount = 0; + bool delete_p = (kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION); + bool do_copy, do_remove; + gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy, + &do_remove); - if ((kind == GOMP_MAP_FROM && k->refcount == 0) + if ((kind == GOMP_MAP_FROM && do_copy) || kind == GOMP_MAP_ALWAYS_FROM) gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, (void *) (k->tgt->tgt_start + k->tgt_offset + cur_node.host_start - k->host_start), cur_node.host_end - cur_node.host_start); - if (k->refcount == 0) - gomp_remove_var (devicep, k); + + /* Structure elements lists are removed altogether at once, which + may cause immediate deallocation of the target_mem_desc, causing + errors if we still have following element siblings to copy back. + While we're at it, it also seems more disciplined to simply + queue all removals together for processing below. + + Structured block unmapping (i.e. gomp_unmap_vars_internal) should + not have this problem, since they maintain an additional + tgt->refcount = 1 reference to the target_mem_desc to start with. + */ + if (do_remove) + remove_vars[nrmvars++] = k; break; case GOMP_MAP_DETACH: @@ -2537,6 +2871,9 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, } } + for (int i = 0; i < nrmvars; i++) + gomp_remove_var (devicep, remove_vars[i]); + gomp_mutex_unlock (&devicep->lock); } @@ -2616,6 +2953,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } } + htab_t refcount_set = htab_create (mapnum); + /* The variables are mapped separately such that they can be released independently. */ size_t i, j; @@ -2624,7 +2963,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) { gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], - &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + &kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); i += sizes[i]; } else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) @@ -2634,7 +2974,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff)) break; gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], - &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + &kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); i += j - i - 1; } else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH) @@ -2642,14 +2983,15 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, /* An attach operation must be processed together with the mapped base-pointer list item. */ gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], - true, GOMP_MAP_VARS_ENTER_DATA); + true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); i += 1; } else gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], - true, GOMP_MAP_VARS_ENTER_DATA); + true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); else - gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set); + htab_free (refcount_set); } bool @@ -2674,7 +3016,7 @@ gomp_target_task_fn (void *data) if (ttask->state == GOMP_TARGET_TASK_FINISHED) { if (ttask->tgt) - gomp_unmap_vars (ttask->tgt, true); + gomp_unmap_vars (ttask->tgt, true, NULL); return false; } @@ -2688,7 +3030,7 @@ gomp_target_task_fn (void *data) { ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL, ttask->sizes, ttask->kinds, true, - GOMP_MAP_VARS_TARGET); + NULL, GOMP_MAP_VARS_TARGET); actual_arguments = (void *) ttask->tgt->tgt_start; } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; @@ -2707,21 +3049,27 @@ gomp_target_task_fn (void *data) if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, ttask->kinds, true); - else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) - for (i = 0; i < ttask->mapnum; i++) - if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) - { - gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], - NULL, &ttask->sizes[i], &ttask->kinds[i], true, - GOMP_MAP_VARS_ENTER_DATA); - i += ttask->sizes[i]; - } - else - gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], - &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); else - gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, - ttask->kinds); + { + htab_t refcount_set = htab_create (ttask->mapnum); + if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], + NULL, &ttask->sizes[i], &ttask->kinds[i], true, + &refcount_set, GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, &refcount_set); + htab_free (refcount_set); + } return false; } @@ -3173,7 +3521,11 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); if (!plugin_handle) +#if OFFLOAD_DEFAULTED + return 0; +#else goto dl_fail; +#endif /* Check if all required functions are available in the plugin and store their handlers. None of the symbols can legitimately be NULL, diff --git a/libgomp/task.c b/libgomp/task.c index 1c73c75..feb4796 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -2460,6 +2460,7 @@ omp_fulfill_event (omp_event_handle_t event) if (new_tasks > 0) { /* Wake up threads to run new tasks. */ + gomp_team_barrier_set_task_pending (&team->barrier); do_wake = team->nthreads - team->task_running_count; if (do_wake > new_tasks) do_wake = new_tasks; diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index 75697fe..9d27dd0 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -51,20 +51,32 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), /* If parallel or taskgroup has been cancelled, don't start new tasks. */ if (team && gomp_team_barrier_cancelled (&team->barrier)) - return; + { + early_return: + if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION)) + == GOMP_TASK_FLAG_REDUCTION) + { + struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; + uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; + /* Tell callers GOMP_taskgroup_reduction_register has not been + called. */ + ptr[2] = 0; + } + return; + } #ifdef TYPE_is_long TYPE s = step; if (step > 0) { if (start >= end) - return; + goto early_return; s--; } else { if (start <= end) - return; + goto early_return; s++; } UTYPE n = (end - start + s) / step; @@ -73,18 +85,19 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (flags & GOMP_TASK_FLAG_UP) { if (start >= end) - return; + goto early_return; n = (end - start + step - 1) / step; } else { if (start <= end) - return; + goto early_return; n = (start - end - step - 1) / -step; } #endif TYPE task_step = step; + TYPE nfirst_task_step = step; unsigned long nfirst = n; if (flags & GOMP_TASK_FLAG_GRAINSIZE) { @@ -97,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (num_tasks != ndiv) num_tasks = ~0UL; #endif - if (num_tasks <= 1) + if ((flags & GOMP_TASK_FLAG_STRICT) + && num_tasks != ~0ULL) + { + UTYPE mod = n % grainsize; + task_step = (TYPE) grainsize * step; + if (mod) + { + num_tasks++; + nfirst_task_step = (TYPE) mod * step; + if (num_tasks == 1) + task_step = nfirst_task_step; + else + nfirst = num_tasks - 2; + } + } + else if (num_tasks <= 1) { num_tasks = 1; task_step = end - start; @@ -112,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) grainsize * step; if (mul != n) { + nfirst_task_step = task_step; task_step += step; nfirst = n - mul - 1; } @@ -123,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -141,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -213,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (arg); arg += arg_size; if (!priority_queue_empty_p (&task[i].children_queue, @@ -246,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)data)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (data); if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) @@ -291,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; thr->task = parent; task->kind = GOMP_TASK_WAITING; task->fn = fn; diff --git a/libgomp/team.c b/libgomp/team.c index 9662234..ba57152 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -78,7 +78,6 @@ gomp_thread_start (void *xdata) #else struct gomp_thread local_thr; thr = &local_thr; - pthread_setspecific (gomp_tls_key, thr); #endif gomp_sem_init (&thr->release, 0); @@ -92,6 +91,9 @@ gomp_thread_start (void *xdata) #ifdef GOMP_NEEDS_THREAD_HANDLE thr->handle = data->handle; #endif +#if !(defined HAVE_TLS || defined USE_EMUTLS) + pthread_setspecific (gomp_tls_key, thr); +#endif thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 72d0011..57fb6b0 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -210,7 +210,8 @@ proc libgomp_init { args } { # We use atomic operations in the testcases to validate results. if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) - && [check_effective_target_ia32] } { + && [check_effective_target_ia32] + && ![check_effective_target_cas_char] } { lappend ALWAYS_CFLAGS "additional_flags=-march=i486" } @@ -340,8 +341,10 @@ proc offload_target_to_openacc_device_type { offload_target } { } } -# Return 1 if compiling for offload target nvptx. -proc check_effective_target_offload_target_nvptx { } { +# Return 1 if compiling for the specified offload target +# Takes -foffload=... into account by checking OFFLOAD_TARGET_NAMES= +# in the -v compiler output. +proc libgomp_check_effective_target_offload_target { target_name } { # Consider all actual options, including the flags passed to # 'gcc-dg-runtest', or 'gfortran-dg-runtest' (see the 'libgomp.*/*.exp' # files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't @@ -352,15 +355,25 @@ proc check_effective_target_offload_target_nvptx { } { # decides. This is somewhat modelled after # 'gcc/testsuite/lib/target-supports.exp:check_configured_with'. set gcc_output [libgomp_target_compile "" "" "none" $options] - if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy offload_targets] { - verbose "compiling for offload targets: $offload_targets" - return [string match "*:nvptx*:*" ":$offload_targets:"] + if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy gcc_offload_targets] { + verbose "compiling for offload targets: $gcc_offload_targets" + return [string match "*:$target_name*:*" ":$gcc_offload_targets:"] } - verbose "not compiling for any offload targets" + verbose "not compiling for $target_name offload target" return 0 } +# Return 1 if compiling for offload target nvptx. +proc check_effective_target_offload_target_nvptx { } { + return [libgomp_check_effective_target_offload_target "nvptx"] +} + +# Return 1 if compiling for offload target amdgcn +proc check_effective_target_offload_target_amdgcn { } { + return [libgomp_check_effective_target_offload_target "amdgcn"] +} + # Return 1 if offload device is available. proc check_effective_target_offload_device { } { return [check_runtime_nocache offload_device_available_ { @@ -401,6 +414,18 @@ proc check_effective_target_offload_device_shared_as { } { } ] } +# Return 1 if using nvptx offload device. +proc check_effective_target_offload_device_nvptx { } { + return [check_runtime_nocache offload_device_nvptx { + #include <omp.h> + #include "testsuite/libgomp.c-c++-common/on_device_arch.h" + int main () + { + return !on_device_arch_nvptx (); + } + } ] +} + # Return 1 if at least one Nvidia GPU is accessible. proc check_effective_target_openacc_nvidia_accel_present { } { @@ -423,6 +448,18 @@ proc check_effective_target_openacc_nvidia_accel_selected { } { return [string match "nvidia" $openacc_device_type] } +# Return 1 if using Intel MIC offload device. +proc check_effective_target_offload_device_intel_mic { } { + return [check_runtime_nocache offload_device_intel_mic { + #include <omp.h> + #include "testsuite/libgomp.c-c++-common/on_device_arch.h" + int main () + { + return !on_device_arch_intel_mic (); + } + } ] +} + # Return 1 if the OpenACC 'host' device type is selected. proc check_effective_target_openacc_host_selected { } { @@ -448,10 +485,60 @@ proc check_effective_target_openacc_radeon_accel_selected { } { if { ![check_effective_target_openacc_radeon_accel_present] } { return 0; } - global offload_target - if { [string match "amdgcn*" $offload_target] } { - return 1; - } + global openacc_device_type + return [string match "radeon" $openacc_device_type] +} + +# Return 1 if cuda.h and -lcuda are available. + +proc check_effective_target_openacc_cuda { } { + return [check_no_compiler_messages openacc_cuda executable { +#include <cuda.h> +int main() { + CUdevice dev; + CUresult r = cuDeviceGet (&dev, 0); + if (r != CUDA_SUCCESS) + return 1; + return 0; +} } "-lcuda" ] +} + +# Return 1 if cublas_v2.h and -lcublas are available. + +proc check_effective_target_openacc_cublas { } { + return [check_no_compiler_messages openacc_cublas executable { +#include <cuda.h> +#include <cublas_v2.h> +int main() { + cublasStatus_t s; + cublasHandle_t h; + CUdevice dev; + CUresult r = cuDeviceGet (&dev, 0); + if (r != CUDA_SUCCESS) + return 1; + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + return 1; return 0; +} } "-lcuda -lcublas" ] } +# Return 1 if cuda_runtime_api.h and -lcudart are available. + +proc check_effective_target_openacc_cudart { } { + return [check_no_compiler_messages openacc_cudart executable { +#include <cuda.h> +#include <cuda_runtime_api.h> +int main() { + cudaError_t e; + int devn; + CUdevice dev; + CUresult r = cuDeviceGet (&dev, 0); + if (r != CUDA_SUCCESS) + return 1; + e = cudaGetDevice (&devn); + if (e != cudaSuccess) + return 1; + return 0; +} } "-lcuda -lcudart" ] +} diff --git a/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C b/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C new file mode 100644 index 0000000..21130f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-in-reduction-1.C @@ -0,0 +1,113 @@ +void +foo (int &x, int *&y, int n, int v) +{ + int zu[3] = { 45, 46, 47 }; + int uu[n], wu[n], i; + int (&z)[3] = zu; + int (&u)[n] = uu; + int (&w)[n] = wu; + for (i = 0; i < n; i++) + w[i] = u[i] = n + i; + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x++; + y[0] += 2; + y[1] += 3; + z[1] += 4; + u[0] += 5; + w[1] += 6; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x += 4; + y[0] += 5; + y[1] += 6; + z[2] += 7; + u[1] += 8; + w[2] += 7; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x += 9; + y[0] += 10; + y[1] += 11; + z[1] += 12; + u[2] += 13; + w[1] += 14; + } + } + if (x != 56 || y[0] != 60 || y[1] != 64) + __builtin_abort (); + if (z[0] != 45 || z[1] != 62 || z[2] != 54) + __builtin_abort (); + if (u[0] != 8 || u[1] != 12 || u[2] != 18) + __builtin_abort (); + if (w[0] != 3 || w[1] != 24 || w[2] != 12) + __builtin_abort (); +} + +void +bar (int &x, int *&y, int n, int v) +{ + int zu[3] = { 45, 46, 47 }; + int uu[n], wu[n], i; + int (&z)[3] = zu; + int (&u)[n] = uu; + int (&w)[n] = wu; + for (i = 0; i < n; i++) + w[i] = u[i] = n + i; + #pragma omp parallel master + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x++; + y[0] += 2; + y[1] += 3; + z[1] += 4; + u[0] += 5; + w[1] += 6; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x += 4; + y[0] += 5; + y[1] += 6; + z[2] += 7; + u[1] += 8; + w[2] += 7; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x += 9; + y[0] += 10; + y[1] += 11; + z[1] += 12; + u[2] += 13; + w[1] += 14; + } + } + if (x != 56 || y[0] != 77 || y[1] != 84) + __builtin_abort (); + if (z[0] != 45 || z[1] != 62 || z[2] != 54) + __builtin_abort (); + if (u[0] != 8 || u[1] != 12 || u[2] != 18) + __builtin_abort (); + if (w[0] != 3 || w[1] != 24 || w[2] != 12) + __builtin_abort (); +} + +int +main () +{ + int x = 42; + int yu[2] = { 43, 44 }; + int *y = yu; + #pragma omp parallel master + foo (x, y, 3, 2); + x = 42; + bar (x, y, 3, 2); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C b/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C new file mode 100644 index 0000000..5da0e90 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-in-reduction-2.C @@ -0,0 +1,182 @@ +struct S { int a, b, c[2]; }; +#pragma omp declare reduction (+: S : (omp_out.a += omp_in.a, omp_out.b += omp_in.b)) \ + initializer (omp_priv = { 0, 0, { 0, 0 } }) + +void +foo (S &x, S *&y, int n, int v) +{ + S zu[3] = { { 45, 47, {} }, { 46, 48, {} }, { 47, 49, {} } }; + S uu[n], wu[n]; + S (&z)[3] = zu; + S (&u)[n] = uu; + S (&w)[n] = wu; + int i; + for (i = 0; i < n; i++) + { + w[i].a = u[i].a = n + i; + w[i].b = u[i].b = n - i; + w[i].c[0] = u[i].c[0] = 0; + w[i].c[1] = u[i].c[1] = 0; + } + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a++; + x.b++; + y[0].a += 2; + y[0].b += 12; + y[1].a += 3; + y[1].b += 13; + z[1].a += 4; + z[1].b += 14; + u[0].a += 5; + u[0].b += 15; + w[1].a += 6; + w[1].b += 16; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a += 4; + x.b += 14; + y[0].a += 5; + y[0].b += 15; + y[1].a += 6; + y[1].b += 16; + z[2].a += 7; + z[2].b += 17; + u[1].a += 8; + u[1].b += 18; + w[2].a += 7; + w[2].b += 17; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x.a += 9; + x.b += 19; + y[0].a += 10; + y[0].b += 20; + y[1].a += 11; + y[1].b += 21; + z[1].a += 12; + z[1].b += 22; + u[2].a += 13; + u[2].b += 23; + w[1].a += 14; + w[1].b += 24; + } + } + if (x.a != 56 || y[0].a != 60 || y[1].a != 64) + __builtin_abort (); + if (x.b != 86 || y[0].b != 100 || y[1].b != 104) + __builtin_abort (); + if (z[0].a != 45 || z[1].a != 62 || z[2].a != 54) + __builtin_abort (); + if (z[0].b != 47 || z[1].b != 84 || z[2].b != 66) + __builtin_abort (); + if (u[0].a != 8 || u[1].a != 12 || u[2].a != 18) + __builtin_abort (); + if (u[0].b != 18 || u[1].b != 20 || u[2].b != 24) + __builtin_abort (); + if (w[0].a != 3 || w[1].a != 24 || w[2].a != 12) + __builtin_abort (); + if (w[0].b != 3 || w[1].b != 42 || w[2].b != 18) + __builtin_abort (); +} + +void +bar (S &x, S *&y, int n, int v) +{ + S zu[3] = { { 45, 47, {} }, { 46, 48, {} }, { 47, 49, {} } }; + S uu[n], wu[n]; + S (&z)[3] = zu; + S (&u)[n] = uu; + S (&w)[n] = wu; + int i; + for (i = 0; i < n; i++) + { + w[i].a = u[i].a = n + i; + w[i].b = u[i].b = n - i; + w[i].c[0] = u[i].c[0] = 0; + w[i].c[1] = u[i].c[1] = 0; + } + #pragma omp parallel master + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a++; + x.b++; + y[0].a += 2; + y[0].b += 12; + y[1].a += 3; + y[1].b += 13; + z[1].a += 4; + z[1].b += 14; + u[0].a += 5; + u[0].b += 15; + w[1].a += 6; + w[1].b += 16; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a += 4; + x.b += 14; + y[0].a += 5; + y[0].b += 15; + y[1].a += 6; + y[1].b += 16; + z[2].a += 7; + z[2].b += 17; + u[1].a += 8; + u[1].b += 18; + w[2].a += 7; + w[2].b += 17; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x.a += 9; + x.b += 19; + y[0].a += 10; + y[0].b += 20; + y[1].a += 11; + y[1].b += 21; + z[1].a += 12; + z[1].b += 22; + u[2].a += 13; + u[2].b += 23; + w[1].a += 14; + w[1].b += 24; + } + } + if (x.a != 56 || y[0].a != 77 || y[1].a != 84) + __builtin_abort (); + if (x.b != 86 || y[0].b != 147 || y[1].b != 154) + __builtin_abort (); + if (z[0].a != 45 || z[1].a != 62 || z[2].a != 54) + __builtin_abort (); + if (z[0].b != 47 || z[1].b != 84 || z[2].b != 66) + __builtin_abort (); + if (u[0].a != 8 || u[1].a != 12 || u[2].a != 18) + __builtin_abort (); + if (u[0].b != 18 || u[1].b != 20 || u[2].b != 24) + __builtin_abort (); + if (w[0].a != 3 || w[1].a != 24 || w[2].a != 12) + __builtin_abort (); + if (w[0].b != 3 || w[1].b != 42 || w[2].b != 18) + __builtin_abort (); +} + +int +main () +{ + S x = { 42, 52 }; + S yu[2] = { { 43, 53 }, { 44, 54 } }; + S *y = yu; + #pragma omp parallel master + foo (x, y, 3, 2); + x.a = 42; + x.b = 52; + bar (x, y, 3, 2); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/atomic-19.c b/libgomp/testsuite/libgomp.c-c++-common/atomic-19.c new file mode 100644 index 0000000..3aee228 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/atomic-19.c @@ -0,0 +1,274 @@ +// { dg-do run { target c } } + +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int x = 6; +int w, y; + +int * +foo (void) +{ + if (w) + abort (); + return &y; +} + +int +main () +{ + int v, r; + #pragma omp atomic compare + x = x > 8 ? 8 : x; + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic compare + x = x > 4 ? 4 : x; + #pragma omp atomic read + v = x; + if (v != 4) + abort (); + #pragma omp atomic compare capture + v = x = x < 8 ? 8 : x; + if (v != 8) + abort (); + #pragma omp atomic read + v = x; + if (v != 8) + abort (); + #pragma omp atomic capture compare + { v = x; x = x < 12 ? 12 : x; } + if (v != 8) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic capture compare + { v = x; x = x < 4 ? 4 : x; } + if (v != 12) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic write + x = -32; + #pragma omp atomic capture compare seq_cst fail(relaxed) + { x = 12U < x ? 12U : x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic compare + x = x == 12 ? 16 : x; + #pragma omp atomic read + v = x; + if (v != 16) + abort (); + r = 57; + #pragma omp atomic compare capture + v = x = x == 15 ? r + 7 : x; + if (v != 16) + abort (); + #pragma omp atomic read + v = x; + if (v != 16) + abort (); + #pragma omp atomic capture, update, compare seq_cst fail(acquire) + { v = x; x = x == 73ULL - r ? 12LL : x; } + if (v != 16) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic update, compare, capture + { x = x == 69LL - r ? (unsigned char) 6 : x; v = x; } + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic compare + if (x > 8) { x = 8; } + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic compare + if (x > 4) { x = 4; } + #pragma omp atomic read + v = x; + if (v != 4) + abort (); + #pragma omp atomic compare capture + { if (x < 8) { x = 8; } v = x; } + if (v != 8) + abort (); + #pragma omp atomic read + v = x; + if (v != 8) + abort (); + #pragma omp atomic capture compare + { v = x; if (x < 12) { x = 12; } } + if (v != 8) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic capture compare + { v = x; if (x < 4) { x = 4; } } + if (v != 12) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic write + x = -32; + #pragma omp atomic capture compare seq_cst fail(relaxed) + { if (12U < x) { x = 12U; } v = x; } + if (v != 12) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic compare + if (x == 12) { x = 16; } + #pragma omp atomic read + v = x; + if (v != 16) + abort (); + r = 57; + #pragma omp atomic compare capture + { if (x == 15) { x = r + 7; } v = x; } + if (v != 16) + abort (); + #pragma omp atomic read + v = x; + if (v != 16) + abort (); + #pragma omp atomic capture, update, compare seq_cst fail(acquire) + { v = x; if (x == 73ULL - r) { x = 12LL; } } + if (v != 16) + abort (); + #pragma omp atomic read + v = x; + if (v != 12) + abort (); + #pragma omp atomic update, compare, capture + { if (x == 69LL - r) { x = (unsigned char) 6; } v = x; } + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + v = 24; + #pragma omp atomic compare capture + if (x == 12) { x = 16; } else { v = x; } + if (v != 6) + abort (); + v = 32; + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + v = 147; + #pragma omp atomic capture compare + if (x == 6) { x = 57; } else { v = x; } + if (v != 147) + abort (); + #pragma omp atomic read + v = x; + if (v != 57) + abort (); + #pragma omp atomic update, capture, compare, weak, seq_cst, fail (relaxed) + { r = x == 137; if (r) { x = 174; } } + if (r) + abort (); + #pragma omp atomic read + v = x; + if (v != 57) + abort (); + #pragma omp atomic compare capture fail (relaxed) + { r = x == 57; if (r) { x = 6; } } + if (r != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + v = -5; + #pragma omp atomic capture compare + { r = x == 17; if (r) { x = 25; } else { v = x; } } + if (r || v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + v = 15; + #pragma omp atomic capture compare + { r = x == 6; if (r) { x = 23; } else { v = x; } } + if (r != 1 || v != 15) + abort (); + #pragma omp atomic read + v = x; + if (v != 23) + abort (); + w = 1; + #pragma omp atomic compare capture + if (x == 23) { x = 57; } else { foo ()[0] = x; } + #pragma omp atomic read + v = x; + if (v != 57) + abort (); + #pragma omp atomic capture update compare + { r = x == 57; if (r) { x = 23; } else { foo ()[0] = x; } } + if (r != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 23) + abort (); + w = 0; + #pragma omp atomic compare capture + if (x == 24) { x = 57; } else { foo ()[0] = x; } + if (y != 23) + abort (); + #pragma omp atomic read + v = x; + if (v != 23) + abort (); + y = -5; + #pragma omp atomic capture update compare + { + r = x == 57; + if (r) + { + x = 27; + } + else + { + foo ()[0] = x; + } + } + if (r || y != 23) + abort (); + #pragma omp atomic read + v = x; + if (v != 23) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/atomic-20.c b/libgomp/testsuite/libgomp.c-c++-common/atomic-20.c new file mode 100644 index 0000000..571a714 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/atomic-20.c @@ -0,0 +1,203 @@ +// { dg-do run { target c } } + +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +float x = 6.0f; + +int +main () +{ + float v; + int r; + #pragma omp atomic compare + x = x > 8.0f ? 8.0f : x; + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + #pragma omp atomic compare + x = x > 4.0f ? 4.0f : x; + #pragma omp atomic read + v = x; + if (v != 4.0f) + abort (); + #pragma omp atomic compare capture + v = x = x < 8.0f ? 8.0f : x; + if (v != 8.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 8) + abort (); + #pragma omp atomic capture compare + { v = x; x = x < 12.0f ? 12.0f : x; } + if (v != 8.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic capture compare + { v = x; x = x < 4.0f ? 4.0f : x; } + if (v != 12.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic compare + x = x == 12.0 ? 16.0L : x; + #pragma omp atomic read + v = x; + if (v != 16.0) + abort (); + r = 57; + #pragma omp atomic compare capture + v = x = x == 15.0f ? r + 7.0f : x; + if (v != 16.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 16.0f) + abort (); + #pragma omp atomic capture, update, compare seq_cst fail(acquire) + { v = x; x = x == 73.0L - r ? 12.0f : x; } + if (v != 16.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic update, compare, capture + { x = x == 69.0 - r ? 6.0f : x; v = x; } + if (v != 6.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + #pragma omp atomic compare + if (x > 8.0f) { x = 8.0f; } + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + #pragma omp atomic compare + if (x > 4.0) { x = 4.0; } + #pragma omp atomic read + v = x; + if (v != 4.0f) + abort (); + #pragma omp atomic compare capture + { if (x < 8.0f) { x = 8.0f; } v = x; } + if (v != 8.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 8.0f) + abort (); + #pragma omp atomic capture compare + { v = x; if (x < 12.0f) { x = 12.0f; } } + if (v != 8.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic capture compare + { v = x; if (x < 4.0L) { x = 4.0L; } } + if (v != 12.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic compare + if (x == 12.0f) { x = 16.0L; } + #pragma omp atomic read + v = x; + if (v != 16.0f) + abort (); + r = 57.0; + #pragma omp atomic compare capture + { if (x == 15.0f) { x = r + 7.0f; } v = x; } + if (v != 16.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 16.0f) + abort (); + #pragma omp atomic capture, update, compare seq_cst fail(acquire) + { v = x; if (x == 73.0L - r) { x = 12.0L; } } + if (v != 16.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 12.0f) + abort (); + #pragma omp atomic update, compare, capture + { if (x == 69.0L - r) { x = 6.0; } v = x; } + if (v != 6.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + v = 24; + #pragma omp atomic compare capture + if (x == 12.0f) { x = 16.0f; } else { v = x; } + if (v != 6.0f) + abort (); + v = 32.0f; + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + v = 147.0f; + #pragma omp atomic capture compare + if (x == 6.0f) { x = 57.0f; } else { v = x; } + if (v != 147.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 57.0f) + abort (); + #pragma omp atomic update, capture, compare, weak, seq_cst, fail (relaxed) + { r = x == 137.0f; if (r) { x = 174.0f; } } + if (r) + abort (); + #pragma omp atomic read + v = x; + if (v != 57.0f) + abort (); + #pragma omp atomic compare capture fail (relaxed) + { r = x == 57.0f; if (r) { x = 6.0f; } } + if (r != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + v = -5.0f; + #pragma omp atomic capture compare + { r = x == 17.0L; if (r) { x = 25.0; } else { v = x; } } + if (r || v != 6.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 6.0f) + abort (); + v = 15.0f; + #pragma omp atomic capture compare + { r = x == 6.0f; if (r) { x = 23.0f; } else { v = x; } } + if (r != 1 || v != 15.0f) + abort (); + #pragma omp atomic read + v = x; + if (v != 23.0f) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/atomic-21.c b/libgomp/testsuite/libgomp.c-c++-common/atomic-21.c new file mode 100644 index 0000000..87216e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/atomic-21.c @@ -0,0 +1,49 @@ +// { dg-do run { target c } } + +double d; +long double ld; + +int +main () +{ + double e = __builtin_copysign (0.0, -1.0), v; + long double le = __builtin_copysignl (0.0L, -1.0L), lv; + if (__builtin_memcmp (&d, &e, sizeof (d)) != 0) + { + /* Verify == comparison for atomics is done as with memcmp. */ + #pragma omp atomic compare + d = d == e ? 5.0 : d; + #pragma omp atomic read + v = d; + if (v != 0.0) + __builtin_abort (); + #pragma omp atomic compare + d = d == 0.0 ? 5.0 : d; + #pragma omp atomic read + v = d; + if (v != 5.0) + __builtin_abort (); + } + if (__builtin_memcmp (&ld, &le, sizeof (ld)) != 0) + { + __builtin_memset (&ld, 0xff, sizeof (ld)); + #pragma omp atomic write + ld = 0.0L; + __asm volatile ("" : : "g" (&ld) : "memory"); + /* Verify == comparison for atomics is done as with memcmp + with __builtin_clear_padding if needed. */ + #pragma omp atomic compare + ld = ld == le ? 5.0L : ld; + #pragma omp atomic read + lv = ld; + if (lv != 0.0L) + __builtin_abort (); + #pragma omp atomic compare + ld = ld == 0.0L ? 5.0L : ld; + #pragma omp atomic read + lv = ld; + if (lv != 5.0L) + __builtin_abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/error-1.c b/libgomp/testsuite/libgomp.c-c++-common/error-1.c new file mode 100644 index 0000000..e7f550a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/error-1.c @@ -0,0 +1,59 @@ +/* { dg-shouldfail "error directive" } */ + +#ifdef __cplusplus +extern "C" +#endif +void abort (); + +int +foo (int i, int x) +{ + if (x) + #pragma omp error severity(warning) /* { dg-warning "'pragma omp error' encountered" } */ + i++; + if (!x) + ; + else + #pragma omp error severity(warning) /* { dg-warning "'pragma omp error' encountered" } */ + i += 2; + switch (0) + #pragma omp error severity(warning) /* { dg-warning "'pragma omp error' encountered" } */ + { + default: + break; + } + while (0) + #pragma omp error message("42 - 1") severity (warning) /* { dg-warning "'pragma omp error' encountered: 42 - 1" } */ + i += 4; + lab: + #pragma omp error severity(warning) message("bar") at(compilation) /* { dg-warning "'pragma omp error' encountered: bar" } */ + i += 8; + return i; +} + +int +main () +{ + /* Initialize offloading early, so that any output this may produce doesn't + disturb the 'dg-output' scanning below. */ + #pragma omp target + ; + + if (foo (5, 0) != 13 || foo (6, 1) != 17) + abort (); + #pragma omp error at (execution) severity (warning) + const char *msg = "my message" + 2; + #pragma omp error at (execution) severity (warning) message (msg + 1) + #pragma omp target + { + #pragma omp error at (execution) severity (warning) message ("hello from a distance") + } + #pragma omp error at (execution) severity (fatal) message (msg - 2) + #pragma omp error at (execution) severity (warning) message ("foobar") + return 0; +} + +/* { dg-output "libgomp: error directive encountered(\n|\r|\r\n)(\n|\r|\r\n)" } */ +/* { dg-output "libgomp: error directive encountered: message(\n|\r|\r\n)(\n|\r|\r\n)" } */ +/* { dg-output "libgomp: error directive encountered: hello from a distance(\n|\r|\r\n)(\n|\r|\r\n)" } */ +/* { dg-output "libgomp: fatal error: error directive encountered: my message" } */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-3.c b/libgomp/testsuite/libgomp.c-c++-common/for-3.c index 173ce8e..285f8e9 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/for-3.c +++ b/libgomp/testsuite/libgomp.c-c++-common/for-3.c @@ -9,6 +9,11 @@ void abort (); #define M(x, y, z) O(x, y, z) #define O(x, y, z) x ## _ ## y ## _ ## z +#define DO_PRAGMA(x) _Pragma (#x) +#define OMPTEAMS DO_PRAGMA (omp target teams) +#define OMPFROM(v) DO_PRAGMA (omp target update from(v)) +#define OMPTO(v) DO_PRAGMA (omp target update to(v)) + #pragma omp declare target #define F distribute @@ -81,33 +86,30 @@ int main () { int err = 0; - #pragma omp target teams reduction(|:err) - { - err |= test_d_normal (); - err |= test_d_ds128_normal (); - err |= test_ds_normal (); - err |= test_ds_ds128_normal (); - err |= test_dpf_static (); - err |= test_dpf_static32 (); - err |= test_dpf_auto (); - err |= test_dpf_guided32 (); - err |= test_dpf_runtime (); - err |= test_dpf_ds128_static (); - err |= test_dpf_ds128_static32 (); - err |= test_dpf_ds128_auto (); - err |= test_dpf_ds128_guided32 (); - err |= test_dpf_ds128_runtime (); - err |= test_dpfs_static (); - err |= test_dpfs_static32 (); - err |= test_dpfs_auto (); - err |= test_dpfs_guided32 (); - err |= test_dpfs_runtime (); - err |= test_dpfs_ds128_static (); - err |= test_dpfs_ds128_static32 (); - err |= test_dpfs_ds128_auto (); - err |= test_dpfs_ds128_guided32 (); - err |= test_dpfs_ds128_runtime (); - } + err |= test_d_normal (); + err |= test_d_ds128_normal (); + err |= test_ds_normal (); + err |= test_ds_ds128_normal (); + err |= test_dpf_static (); + err |= test_dpf_static32 (); + err |= test_dpf_auto (); + err |= test_dpf_guided32 (); + err |= test_dpf_runtime (); + err |= test_dpf_ds128_static (); + err |= test_dpf_ds128_static32 (); + err |= test_dpf_ds128_auto (); + err |= test_dpf_ds128_guided32 (); + err |= test_dpf_ds128_runtime (); + err |= test_dpfs_static (); + err |= test_dpfs_static32 (); + err |= test_dpfs_auto (); + err |= test_dpfs_guided32 (); + err |= test_dpfs_runtime (); + err |= test_dpfs_ds128_static (); + err |= test_dpfs_ds128_static32 (); + err |= test_dpfs_ds128_auto (); + err |= test_dpfs_ds128_guided32 (); + err |= test_dpfs_ds128_runtime (); if (err) abort (); return 0; diff --git a/libgomp/testsuite/libgomp.c-c++-common/function-not-offloaded.c b/libgomp/testsuite/libgomp.c-c++-common/function-not-offloaded.c index 9e59ef8..bc1ae68 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/function-not-offloaded.c +++ b/libgomp/testsuite/libgomp.c-c++-common/function-not-offloaded.c @@ -1,5 +1,5 @@ /* { dg-do link } */ -/* { dg-excess-errors "unresolved symbol foo, lto1, mkoffload and lto-wrapper fatal errors" { target offload_device_nonshared_as } } */ +/* { dg-excess-errors "unresolved symbol foo, lto1, mkoffload and lto-wrapper fatal errors" { target { offload_target_nvptx || offload_target_amdgcn } } } */ /* { dg-additional-sources "function-not-offloaded-aux.c" } */ #pragma omp declare target diff --git a/libgomp/testsuite/libgomp.c-c++-common/masked-1.c b/libgomp/testsuite/libgomp.c-c++-common/masked-1.c new file mode 100644 index 0000000..4ba259c --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/masked-1.c @@ -0,0 +1,83 @@ +#include <omp.h> +#include <stdlib.h> + +void +foo (int x, int *a) +{ + #pragma omp masked + { + if (omp_get_thread_num () != 0) + abort (); + a[128]++; + } + #pragma omp masked filter (0) + { + if (omp_get_thread_num () != 0) + abort (); + a[129]++; + } + #pragma omp masked filter (7) + { + if (omp_get_thread_num () != 7) + abort (); + a[130]++; + } + #pragma omp masked filter (x) + { + if (omp_get_thread_num () != x) + abort (); + a[131]++; + } + #pragma omp masked taskloop simd filter (x) grainsize (12) simdlen (4) + for (int i = 0; i < 128; i++) + a[i] += i; +} + +int +main () +{ + int a[136] = {}; + #pragma omp parallel num_threads (4) + foo (4, a); + for (int i = 0; i < 128; i++) + if (a[i]) + abort (); + if (a[128] != 1 || a[129] != 1 || a[130] || a[131]) + abort (); + #pragma omp parallel num_threads (4) + foo (3, a); + for (int i = 0; i < 128; i++) + if (a[i] != i) + abort (); + if (a[128] != 2 || a[129] != 2 || a[130] || a[131] != 1) + abort (); + #pragma omp parallel num_threads (8) + foo (8, a); + for (int i = 0; i < 128; i++) + if (a[i] != i) + abort (); + if (a[128] != 3 || a[129] != 3 || a[130] != 1 || a[131] != 1) + abort (); + #pragma omp parallel num_threads (8) + foo (6, a); + for (int i = 0; i < 128; i++) + if (a[i] != 2 * i) + abort (); + if (a[128] != 4 || a[129] != 4 || a[130] != 2 || a[131] != 2) + abort (); + for (int i = 0; i < 8; i++) + a[i] = 0; + /* The filter expression can evaluate to different values in different threads. */ + #pragma omp parallel masked num_threads (8) filter (omp_get_thread_num () + 1) + a[omp_get_thread_num ()]++; + for (int i = 0; i < 8; i++) + if (a[i]) + abort (); + /* And multiple threads can be filtered. */ + #pragma omp parallel masked num_threads (8) filter (omp_get_thread_num () & ~1) + a[omp_get_thread_num ()]++; + for (int i = 0; i < 8; i++) + if (a[i] != !(i & 1)) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/nothing-1.c b/libgomp/testsuite/libgomp.c-c++-common/nothing-1.c new file mode 100644 index 0000000..69716b1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/nothing-1.c @@ -0,0 +1,47 @@ +#include <stdlib.h> + +#pragma omp nothing + +struct S +{ + #pragma omp nothing + int s; +}; + +int +foo (int i) +{ + #pragma omp nothing + if (0) + #pragma omp nothing + i++; + if (1) + ; + else + #pragma omp nothing + i++; + switch (0) + #pragma omp nothing + { + default: + break; + } + while (0) + #pragma omp nothing + i++; + for (; 0;) + #pragma omp nothing + i++; + lab: + #pragma omp nothing + i++; + return i; +} + +int +main () +{ + if (foo (5) != 6 || foo (-2) != -1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h new file mode 100644 index 0000000..ee541dd --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h @@ -0,0 +1,43 @@ +#include <gomp-constants.h> + +/* static */ int +device_arch_nvptx (void) +{ + return GOMP_DEVICE_NVIDIA_PTX; +} + +/* static */ int +device_arch_intel_mic (void) +{ + return GOMP_DEVICE_INTEL_MIC; +} + +#pragma omp declare variant (device_arch_nvptx) match(construct={target},device={arch(nvptx)}) +#pragma omp declare variant (device_arch_intel_mic) match(construct={target},device={arch(intel_mic)}) +/* static */ int +device_arch (void) +{ + return GOMP_DEVICE_DEFAULT; +} + +static int +on_device_arch (int d) +{ + int d_cur; + #pragma omp target map(from:d_cur) + d_cur = device_arch (); + + return d_cur == d; +} + +int +on_device_arch_nvptx () +{ + return on_device_arch (GOMP_DEVICE_NVIDIA_PTX); +} + +int +on_device_arch_intel_mic () +{ + return on_device_arch (GOMP_DEVICE_INTEL_MIC); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr94366.c b/libgomp/testsuite/libgomp.c-c++-common/pr94366.c new file mode 100644 index 0000000..5837cd0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr94366.c @@ -0,0 +1,17 @@ +/* PR middle-end/94366 */ + +int +main () +{ + int a = 2; + #pragma omp parallel reduction(&& : a) + a = a && 1; + if (!a) + __builtin_abort (); + a = 4; + #pragma omp parallel reduction(|| : a) + a = a || 0; + if (!a) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-1.c new file mode 100644 index 0000000..89a4153 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-1.c @@ -0,0 +1,192 @@ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to a floating-poing + or complex type. + + While having a floating-point/complex array element with || and && can make + sense, having a non-integer/non-bool reduction variable is odd but valid. + + Test: FP reduction variable + FP array. */ + +#define N 1024 +_Complex float rcf[N]; +_Complex double rcd[N]; +float rf[N]; +double rd[N]; + +int +reduction_or () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp parallel reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp parallel for reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp parallel for simd reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp parallel loop reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_or_teams () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp teams distribute parallel for reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp teams distribute parallel for reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_and () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp parallel reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp parallel for reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp parallel for simd reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp parallel loop reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +reduction_and_teams () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp teams distribute parallel for reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp teams distribute parallel for reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rf[i] = 0; + rd[i] = 0; + rcf[i] = 0; + rcd[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rf[10] = 1.0; + rd[15] = 1.0; + rcf[10] = 1.0; + rcd[15] = 1.0i; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rf[i] = 1; + rd[i] = 1; + rcf[i] = 1; + rcd[i] = 1; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rf[10] = 0.0; + rd[15] = 0.0; + rcf[10] = 0.0; + rcd[15] = 0.0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c index e60fe36..4bf62c3 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-additional-options "-foffload=-latomic" { target offload_target_nvptx } } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-latomic" { target offload_target_nvptx } } */ #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-17.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-17.c new file mode 100644 index 0000000..5e680d6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-17.c @@ -0,0 +1,16 @@ +/* PR middle-end/99928 */ +/* { dg-do run } */ + +#define N 64 + +int +main () +{ + int r = 0, i; + #pragma omp teams distribute simd reduction(+:r) + for (i = 0; i < N; i++) + r += i; + if (r != N * (N - 1) / 2) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-2.c new file mode 100644 index 0000000..bdcba86 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-2.c @@ -0,0 +1,192 @@ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to a floating-poing + or complex type. + + While having a floating-point/complex array element with || and && can make + sense, having a non-integer/non-bool reduction variable is odd but valid. + + Test: FP reduction variable + integer array. */ + +#define N 1024 +char rcf[N]; +short rcd[N]; +int rf[N]; +long rd[N]; + +int +reduction_or () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp parallel reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp parallel for reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp parallel for simd reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp parallel loop reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_or_teams () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp teams distribute parallel for reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp teams distribute parallel for reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_and () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp parallel reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp parallel for reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp parallel for simd reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp parallel loop reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +reduction_and_teams () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp teams distribute parallel for reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp teams distribute parallel for reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rf[i] = 0; + rd[i] = 0; + rcf[i] = 0; + rcd[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rf[10] = 1; + rd[15] = 1; + rcf[10] = 1; + rcd[15] = 1; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rf[i] = 1; + rd[i] = 1; + rcf[i] = 1; + rcd[i] = 1; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rf[10] = 0; + rd[15] = 0; + rcf[10] = 0; + rcd[15] = 0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-3.c new file mode 100644 index 0000000..0f09aab --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-3.c @@ -0,0 +1,192 @@ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to a floating-poing + or complex type. + + While having a floating-point/complex array element with || and && can make + sense, having a non-integer/non-bool reduction variable is odd but valid. + + Test: integer reduction variable + FP array. */ + +#define N 1024 +_Complex float rcf[N]; +_Complex double rcd[N]; +float rf[N]; +double rd[N]; + +int +reduction_or () +{ + char orf = 0; + short ord = 0; + int orfc = 0; + long ordc = 0; + + #pragma omp parallel reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp parallel for reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp parallel for simd reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp parallel loop reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_or_teams () +{ + char orf = 0; + short ord = 0; + int orfc = 0; + long ordc = 0; + + #pragma omp teams distribute parallel for reduction(||: orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp teams distribute parallel for reduction(||: orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_and () +{ + unsigned char andf = 1; + unsigned short andd = 1; + unsigned int andfc = 1; + unsigned long anddc = 1; + + #pragma omp parallel reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp parallel for reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp parallel for simd reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp parallel loop reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +reduction_and_teams () +{ + unsigned char andf = 1; + unsigned short andd = 1; + unsigned int andfc = 1; + unsigned long anddc = 1; + + #pragma omp teams distribute parallel for reduction(&&: andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp teams distribute parallel for reduction(&&: andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rf[i] = 0; + rd[i] = 0; + rcf[i] = 0; + rcd[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rf[10] = 1.0; + rd[15] = 1.0; + rcf[10] = 1.0; + rcd[15] = 1.0i; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rf[i] = 1; + rd[i] = 1; + rcf[i] = 1; + rcd[i] = 1; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rf[10] = 0.0; + rd[15] = 0.0; + rcf[10] = 0.0; + rcd[15] = 0.0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-4.c new file mode 100644 index 0000000..a465e10 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-4.c @@ -0,0 +1,194 @@ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to an integer complex + type. + + While having a floating-point/complex array element with || and && can make + sense, having a complex reduction variable is odd but valid. + + Test: int complex reduction variable + int complex array. */ + +#define N 1024 +_Complex char rcc[N]; +_Complex short rcs[N]; +_Complex int rci[N]; +_Complex long long rcl[N]; + +int +reduction_or () +{ + _Complex char orc = 0; + _Complex short ors = 0; + _Complex int ori = 0; + _Complex long orl = 0; + + #pragma omp parallel reduction(||: orc) + for (int i=0; i < N; ++i) + orc = orc || rcl[i]; + + #pragma omp parallel for reduction(||: ors) + for (int i=0; i < N; ++i) + ors = ors || rci[i]; + + #pragma omp parallel for simd reduction(||: ori) + for (int i=0; i < N; ++i) + ori = ori || rcs[i]; + + #pragma omp parallel loop reduction(||: orl) + for (int i=0; i < N; ++i) + orl = orl || rcc[i]; + + return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl); +} + +int +reduction_or_teams () +{ + _Complex char orc = 0; + _Complex short ors = 0; + _Complex int ori = 0; + _Complex long orl = 0; + + #pragma omp teams distribute parallel for reduction(||: orc) + for (int i=0; i < N; ++i) + orc = orc || rcc[i]; + + #pragma omp teams distribute parallel for simd reduction(||: ors) + for (int i=0; i < N; ++i) + ors = ors || rcs[i]; + + #pragma omp teams distribute parallel for reduction(||: ori) + for (int i=0; i < N; ++i) + ori = ori || rci[i]; + + #pragma omp teams distribute parallel for simd reduction(||: orl) + for (int i=0; i < N; ++i) + orl = orl || rcl[i]; + + return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl); +} + +int +reduction_and () +{ + _Complex char andc = 1; + _Complex short ands = 1; + _Complex int andi = 1; + _Complex long andl = 1; + + #pragma omp parallel reduction(&&: andc) + for (int i=0; i < N; ++i) + andc = andc && rcc[i]; + + #pragma omp parallel for reduction(&&: ands) + for (int i=0; i < N; ++i) + ands = ands && rcs[i]; + + #pragma omp parallel for simd reduction(&&: andi) + for (int i=0; i < N; ++i) + andi = andi && rci[i]; + + #pragma omp parallel loop reduction(&&: andl) + for (int i=0; i < N; ++i) + andl = andl && rcl[i]; + + return __real__ (andc + ands + andi + andl) + + __imag__ (andc + ands + andi + andl); +} + +int +reduction_and_teams () +{ + _Complex char andc = 1; + _Complex short ands = 1; + _Complex int andi = 1; + _Complex long andl = 1; + + #pragma omp teams distribute parallel for reduction(&&: andc) + for (int i=0; i < N; ++i) + andc = andc && rcl[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: ands) + for (int i=0; i < N; ++i) + ands = ands && rci[i]; + + #pragma omp teams distribute parallel for reduction(&&: andi) + for (int i=0; i < N; ++i) + andi = andi && rcs[i]; + + #pragma omp teams distribute parallel for simd reduction(&&: andl) + for (int i=0; i < N; ++i) + andl = andl && rcc[i]; + + return __real__ (andc + ands + andi + andl) + + __imag__ (andc + ands + andi + andl); +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rcc[i] = 0; + rcs[i] = 0; + rci[i] = 0; + rcl[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rcc[10] = 1.0; + rcs[15] = 1.0i; + rci[10] = 1.0; + rcl[15] = 1.0i; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rcc[i] = 1; + rcs[i] = 1i; + rci[i] = 1; + rcl[i] = 1 + 1i; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rcc[10] = 0.0; + rcs[15] = 0.0; + rci[10] = 0.0; + rcl[15] = 0.0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c new file mode 100644 index 0000000..52f23e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c @@ -0,0 +1,193 @@ +/* { dg-additional-options "-foffload-options=nvptx-none=-latomic" { target { offload_target_nvptx } } } */ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to a floating-poing + or complex type. + + While having a floating-point/complex array element with || and && can make + sense, having a non-integer/non-bool reduction variable is odd but valid. + + Test: FP reduction variable + FP array - as reduction-1.c but with target */ + +#define N 1024 +_Complex float rcf[N]; +_Complex double rcd[N]; +float rf[N]; +double rd[N]; + +int +reduction_or () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp target parallel reduction(||: orf) map(orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp target parallel for reduction(||: ord) map(ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp target parallel for simd reduction(||: orfc) map(orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp target parallel loop reduction(||: ordc) map(ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_or_teams () +{ + float orf = 0; + double ord = 0; + _Complex float orfc = 0; + _Complex double ordc = 0; + + #pragma omp target teams distribute parallel for reduction(||: orf) map(orf) + for (int i=0; i < N; ++i) + orf = orf || rf[i]; + + #pragma omp target teams distribute parallel for simd reduction(||: ord) map(ord) + for (int i=0; i < N; ++i) + ord = ord || rcd[i]; + + #pragma omp target teams distribute parallel for reduction(||: orfc) map(orfc) + for (int i=0; i < N; ++i) + orfc = orfc || rcf[i]; + + #pragma omp target teams distribute parallel for simd reduction(||: ordc) map(ordc) + for (int i=0; i < N; ++i) + ordc = ordc || rcd[i]; + + return orf + ord + __real__ orfc + __real__ ordc; +} + +int +reduction_and () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp target parallel reduction(&&: andf) map(andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp target parallel for reduction(&&: andd) map(andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp target parallel for simd reduction(&&: andfc) map(andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp target parallel loop reduction(&&: anddc) map(anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +reduction_and_teams () +{ + float andf = 1; + double andd = 1; + _Complex float andfc = 1; + _Complex double anddc = 1; + + #pragma omp target teams distribute parallel for reduction(&&: andf) map(andf) + for (int i=0; i < N; ++i) + andf = andf && rf[i]; + + #pragma omp target teams distribute parallel for simd reduction(&&: andd) map(andd) + for (int i=0; i < N; ++i) + andd = andd && rcd[i]; + + #pragma omp target teams distribute parallel for reduction(&&: andfc) map(andfc) + for (int i=0; i < N; ++i) + andfc = andfc && rcf[i]; + + #pragma omp target teams distribute parallel for simd reduction(&&: anddc) map(anddc) + for (int i=0; i < N; ++i) + anddc = anddc && rcd[i]; + + return andf + andd + __real__ andfc + __real__ anddc; +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rf[i] = 0; + rd[i] = 0; + rcf[i] = 0; + rcd[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rf[10] = 1.0; + rd[15] = 1.0; + rcf[10] = 1.0; + rcd[15] = 1.0i; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rf[i] = 1; + rd[i] = 1; + rcf[i] = 1; + rcd[i] = 1; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rf[10] = 0.0; + rd[15] = 0.0; + rcf[10] = 0.0; + rcd[15] = 0.0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c new file mode 100644 index 0000000..62e8150 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c @@ -0,0 +1,196 @@ +/* { dg-additional-options "-foffload-options=nvptx-none=-latomic" { target { offload_target_nvptx } } } */ +/* C / C++'s logical AND and OR operators take any scalar argument + which compares (un)equal to 0 - the result 1 or 0 and of type int. + + In this testcase, the int result is again converted to an integer complex + type. + + While having a floating-point/complex array element with || and && can make + sense, having a complex reduction variable is odd but valid. + + Test: int complex reduction variable + int complex array. + as reduction-4.c but with target. */ + +#define N 1024 +_Complex char rcc[N]; +_Complex short rcs[N]; +_Complex int rci[N]; +_Complex long long rcl[N]; + +int +reduction_or () +{ + _Complex char orc = 0; + _Complex short ors = 0; + _Complex int ori = 0; + _Complex long orl = 0; + + #pragma omp target parallel reduction(||: orc) map(orc) + for (int i=0; i < N; ++i) + orc = orc || rcl[i]; + + #pragma omp target parallel for reduction(||: ors) map(ors) + for (int i=0; i < N; ++i) + ors = ors || rci[i]; + + #pragma omp target parallel for simd reduction(||: ori) map(ori) + for (int i=0; i < N; ++i) + ori = ori || rcs[i]; + + #pragma omp target parallel loop reduction(||: orl) map(orl) + for (int i=0; i < N; ++i) + orl = orl || rcc[i]; + + return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl); +} + +int +reduction_or_teams () +{ + _Complex char orc = 0; + _Complex short ors = 0; + _Complex int ori = 0; + _Complex long orl = 0; + + #pragma omp target teams distribute parallel for reduction(||: orc) map(orc) + for (int i=0; i < N; ++i) + orc = orc || rcc[i]; + + #pragma omp target teams distribute parallel for simd reduction(||: ors) map(ors) + for (int i=0; i < N; ++i) + ors = ors || rcs[i]; + + #pragma omp target teams distribute parallel for reduction(||: ori) map(ori) + for (int i=0; i < N; ++i) + ori = ori || rci[i]; + + #pragma omp target teams distribute parallel for simd reduction(||: orl) map(orl) + for (int i=0; i < N; ++i) + orl = orl || rcl[i]; + + return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl); +} + +int +reduction_and () +{ + _Complex char andc = 1; + _Complex short ands = 1; + _Complex int andi = 1; + _Complex long andl = 1; + + #pragma omp target parallel reduction(&&: andc) map(andc) + for (int i=0; i < N; ++i) + andc = andc && rcc[i]; + + #pragma omp target parallel for reduction(&&: ands) map(ands) + for (int i=0; i < N; ++i) + ands = ands && rcs[i]; + + #pragma omp target parallel for simd reduction(&&: andi) map(andi) + for (int i=0; i < N; ++i) + andi = andi && rci[i]; + + #pragma omp target parallel loop reduction(&&: andl) map(andl) + for (int i=0; i < N; ++i) + andl = andl && rcl[i]; + + return __real__ (andc + ands + andi + andl) + + __imag__ (andc + ands + andi + andl); +} + +int +reduction_and_teams () +{ + _Complex char andc = 1; + _Complex short ands = 1; + _Complex int andi = 1; + _Complex long andl = 1; + + #pragma omp target teams distribute parallel for reduction(&&: andc) map(andc) + for (int i=0; i < N; ++i) + andc = andc && rcl[i]; + + #pragma omp target teams distribute parallel for simd reduction(&&: ands) map(ands) + for (int i=0; i < N; ++i) + ands = ands && rci[i]; + + #pragma omp target teams distribute parallel for reduction(&&: andi) map(andi) + for (int i=0; i < N; ++i) + andi = andi && rcs[i]; + + #pragma omp target teams distribute parallel for simd reduction(&&: andl) map(andl) + for (int i=0; i < N; ++i) + andl = andl && rcc[i]; + + return __real__ (andc + ands + andi + andl) + + __imag__ (andc + ands + andi + andl); +} + +int +main () +{ + for (int i = 0; i < N; ++i) + { + rcc[i] = 0; + rcs[i] = 0; + rci[i] = 0; + rcl[i] = 0; + } + + if (reduction_or () != 0) + __builtin_abort (); + if (reduction_or_teams () != 0) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + rcc[10] = 1.0; + rcs[15] = 1.0i; + rci[10] = 1.0; + rcl[15] = 1.0i; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + { + rcc[i] = 1; + rcs[i] = 1i; + rci[i] = 1; + rcl[i] = 1 + 1i; + } + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 4) + __builtin_abort (); + if (reduction_and_teams () != 4) + __builtin_abort (); + + rcc[10] = 0.0; + rcs[15] = 0.0; + rci[10] = 0.0; + rcl[15] = 0.0; + + if (reduction_or () != 4) + __builtin_abort (); + if (reduction_or_teams () != 4) + __builtin_abort (); + if (reduction_and () != 0) + __builtin_abort (); + if (reduction_and_teams () != 0) + __builtin_abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c new file mode 100644 index 0000000..5ccd908 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c @@ -0,0 +1,61 @@ +#include <omp.h> +#include <stdlib.h> + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + unsigned int a = 0xcdcdcdcd; + #pragma omp target enter data map (to:a) + + a = 0xabababab; + unsigned char *p = (unsigned char *) &a; + unsigned char *q = p + 2; + + #pragma omp target enter data map (alloc:p[:1], q[:1]) + + if (d != id) + { + if (!omp_target_is_present (&a, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + } + + #pragma omp target exit data map (release:a) + + if (d != id) + { + if (!omp_target_is_present (&a, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + } + + #pragma omp target exit data map (from:q[:1]) + + if (d != id) + { + if (omp_target_is_present (&a, d)) + abort (); + if (omp_target_is_present (&p[0], d)) + abort (); + if (omp_target_is_present (&q[0], d)) + abort (); + + if (q[0] != 0xcd) + abort (); + if (p[0] != 0xab) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/scope-1.c b/libgomp/testsuite/libgomp.c-c++-common/scope-1.c new file mode 100644 index 0000000..d262312 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/scope-1.c @@ -0,0 +1,50 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (); + +int +main () +{ + int a[64] = {}; + int r = 0, r2 = 0, i; + #pragma omp parallel + { + #pragma omp scope nowait + #pragma omp scope nowait + #pragma omp for + for (i = 0; i < 64; i++) + a[i] += 1; + #pragma omp scope reduction(+: r) nowait + { + #pragma omp for nowait + for (i = 0; i < 64; i++) + { + r += i; + if (a[i] != 1) + abort (); + } + #pragma omp barrier + } + #pragma omp barrier + if (r != 64 * 63 / 2) + abort (); + #pragma omp scope nowait private (i) + #pragma omp scope reduction(+: r2) + { + #pragma omp for nowait + for (i = 0; i < 64; i++) + { + r2 += 2 * i; + a[i] += i; + } + } + if (r2 != 64 * 63) + abort (); + #pragma omp for nowait + for (i = 0; i < 64; i++) + if (a[i] != i + 1) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c new file mode 100644 index 0000000..5f40fd7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c @@ -0,0 +1,29 @@ +#include <omp.h> +#include <stdlib.h> + +struct S +{ + int a, b; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + #pragma omp target enter data map (alloc: s.a, s.b) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + "s.a" should be removed together by above 'exit data'. */ + if (d != id && omp_target_is_present (&s.a, d)) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c new file mode 100644 index 0000000..c50b299 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c @@ -0,0 +1,47 @@ +#include <omp.h> +#include <stdlib.h> + +struct S +{ + int a, b, c, d; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + #pragma omp target enter data map (alloc: s.a, s.b, s.c, s.d) + #pragma omp target enter data map (alloc: s.c) + #pragma omp target enter data map (alloc: s.b, s.d) + #pragma omp target enter data map (alloc: s.a, s.c, s.b) + + #pragma omp target exit data map (release: s.a) + #pragma omp target exit data map (release: s.d) + #pragma omp target exit data map (release: s.c) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (d != id) + { + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c new file mode 100644 index 0000000..e2b6a6a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c @@ -0,0 +1,69 @@ +#include <omp.h> +#include <stdlib.h> + +struct S +{ + int a, b, c, d; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + + #pragma omp target enter data map (alloc: s) + #pragma omp target enter data map (alloc: s) + + #pragma omp target exit data map (release: s.a) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (d != id) + { + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + } + + #pragma omp target enter data map (alloc: s.a, s.b) + #pragma omp target enter data map (alloc: s.a) + #pragma omp target enter data map (alloc: s.b) + + #pragma omp target exit data map (release: s) + #pragma omp target exit data map (release: s) + #pragma omp target exit data map (release: s) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (d != id) + { + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c new file mode 100644 index 0000000..9a23b4f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c @@ -0,0 +1,56 @@ +#include <omp.h> +#include <stdlib.h> + +struct S +{ + int a, b, c, d, e; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s = { 1, 2, 3, 4, 5 }; + #pragma omp target enter data map (to:s) + + int *p = &s.b; + int *q = &s.d; + #pragma omp target enter data map (alloc: p[:1], q[:1]) + + s.b = 88; + s.d = 99; + + #pragma omp target exit data map (release: s) + if (d != id) + { + if (!omp_target_is_present (&s, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + } + + #pragma omp target exit data map (from: q[:1]) + if (d != id) + { + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&p[0], d)) + abort (); + if (omp_target_is_present (&q[0], d)) + abort (); + + if (q[0] != 4) + abort (); + if (p[0] != 88) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c new file mode 100644 index 0000000..31a2fa5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c @@ -0,0 +1,20 @@ +/* { dg-do run { target offload_device_nonshared_as } } */ + +struct S +{ + int a, b, c; +}; +typedef struct S S; + +int main (void) +{ + S s; + #pragma omp target data map (alloc: s.a, s.c) + { + #pragma omp target enter data map (alloc: s.b) + } + + return 0; +} +/* { dg-output "Trying to map into device \\\[\[0-9a-fA-FxX\]+..\[0-9a-fA-FxX\]+\\\) structure element when other mapped elements from the same structure weren't mapped together with it" } */ +/* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-41.c b/libgomp/testsuite/libgomp.c-c++-common/target-41.c new file mode 100644 index 0000000..3aca19a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-41.c @@ -0,0 +1,28 @@ +/* PR libgomp/100573 */ + +int +foo (int a) +{ + if (a == 0) + { + int c; + a++; + #pragma omp target map(tofrom:a) + a = foo (a); + #pragma omp target data map(tofrom:a) + c = a != 2; + if (c) + return -1; + #pragma omp target enter data map(to:a) + #pragma omp target exit data map(from:a) + } + return a + 1; +} + +int +main () +{ + if (foo (0) != 3) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-42.c b/libgomp/testsuite/libgomp.c-c++-common/target-42.c new file mode 100644 index 0000000..a334f47 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-42.c @@ -0,0 +1,26 @@ +/* PR libgomp/100573 */ + +int +foo (int a) +{ + #pragma omp target firstprivate(a) + if (a == 0) + { + a++; + #pragma omp target map(tofrom:a) /* { dg-warning "'target' construct inside of 'target' region" } */ + a = foo (a); + #pragma omp target data map(tofrom:a) /* { dg-warning "'target data' construct inside of 'target' region" } */ + a++; + #pragma omp target enter data map(to:a) /* { dg-warning "'target enter data' construct inside of 'target' region" } */ + #pragma omp target exit data map(from:a) /* { dg-warning "'target exit data' construct inside of 'target' region" } */ + } + return a + 1; +} + +int +main () +{ + if (foo (1) != 2) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-45.c b/libgomp/testsuite/libgomp.c-c++-common/target-45.c new file mode 100644 index 0000000..81acee8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-45.c @@ -0,0 +1,30 @@ +/* { dg-xfail-run-if TODO { offload_device_intel_mic } } */ + +#include <omp.h> +#include <stdlib.h> + +int main (void) +{ + + int host_device_num = omp_get_device_num (); + + if (host_device_num != omp_get_initial_device ()) + abort (); + + int device_num; + int initial_device; + + #pragma omp target map(from: device_num, initial_device) + { + initial_device = omp_is_initial_device (); + device_num = omp_get_device_num (); + } + + if (initial_device && host_device_num != device_num) + abort (); + + if (!initial_device && host_device_num == device_num) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c new file mode 100644 index 0000000..813b5d9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-1.c @@ -0,0 +1,104 @@ +void +foo (int x, int *y, int n, int v) +{ + int z[3] = { 45, 46, 47 }; + int u[n], w[n], i; + for (i = 0; i < n; i++) + w[i] = u[i] = n + i; + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x++; + y[0] += 2; + y[1] += 3; + z[1] += 4; + u[0] += 5; + w[1] += 6; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x += 4; + y[0] += 5; + y[1] += 6; + z[2] += 7; + u[1] += 8; + w[2] += 7; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x += 9; + y[0] += 10; + y[1] += 11; + z[1] += 12; + u[2] += 13; + w[1] += 14; + } + } + if (x != 56 || y[0] != 60 || y[1] != 64) + __builtin_abort (); + if (z[0] != 45 || z[1] != 62 || z[2] != 54) + __builtin_abort (); + if (u[0] != 8 || u[1] != 12 || u[2] != 18) + __builtin_abort (); + if (w[0] != 3 || w[1] != 24 || w[2] != 12) + __builtin_abort (); +} + +void +bar (int x, int *y, int n, int v) +{ + int z[3] = { 45, 46, 47 }; + int u[n], w[n], i; + for (i = 0; i < n; i++) + w[i] = u[i] = n + i; + #pragma omp parallel master + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x++; + y[0] += 2; + y[1] += 3; + z[1] += 4; + u[0] += 5; + w[1] += 6; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x += 4; + y[0] += 5; + y[1] += 6; + z[2] += 7; + u[1] += 8; + w[2] += 7; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x += 9; + y[0] += 10; + y[1] += 11; + z[1] += 12; + u[2] += 13; + w[1] += 14; + } + } + if (x != 56 || y[0] != 77 || y[1] != 84) + __builtin_abort (); + if (z[0] != 45 || z[1] != 62 || z[2] != 54) + __builtin_abort (); + if (u[0] != 8 || u[1] != 12 || u[2] != 18) + __builtin_abort (); + if (w[0] != 3 || w[1] != 24 || w[2] != 12) + __builtin_abort (); +} + +int +main () +{ + int y[2] = { 43, 44 }; + #pragma omp parallel master + foo (42, y, 3, 2); + bar (42, y, 3, 2); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c new file mode 100644 index 0000000..dd56965 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-in-reduction-2.c @@ -0,0 +1,173 @@ +struct S { int a, b, c[2]; }; +#pragma omp declare reduction (+: struct S : (omp_out.a += omp_in.a, omp_out.b += omp_in.b)) \ + initializer (omp_priv = { 0, 0, { 0, 0 } }) + +void +foo (struct S x, struct S *y, int n, int v) +{ + struct S z[3] = { { 45, 47, {} }, { 46, 48, {} }, { 47, 49, {} } }; + struct S u[n], w[n]; + int i; + for (i = 0; i < n; i++) + { + w[i].a = u[i].a = n + i; + w[i].b = u[i].b = n - i; + w[i].c[0] = u[i].c[0] = 0; + w[i].c[1] = u[i].c[1] = 0; + } + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a++; + x.b++; + y[0].a += 2; + y[0].b += 12; + y[1].a += 3; + y[1].b += 13; + z[1].a += 4; + z[1].b += 14; + u[0].a += 5; + u[0].b += 15; + w[1].a += 6; + w[1].b += 16; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[:2]) + { + x.a += 4; + x.b += 14; + y[0].a += 5; + y[0].b += 15; + y[1].a += 6; + y[1].b += 16; + z[2].a += 7; + z[2].b += 17; + u[1].a += 8; + u[1].b += 18; + w[2].a += 7; + w[2].b += 17; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x.a += 9; + x.b += 19; + y[0].a += 10; + y[0].b += 20; + y[1].a += 11; + y[1].b += 21; + z[1].a += 12; + z[1].b += 22; + u[2].a += 13; + u[2].b += 23; + w[1].a += 14; + w[1].b += 24; + } + } + if (x.a != 56 || y[0].a != 60 || y[1].a != 64) + __builtin_abort (); + if (x.b != 86 || y[0].b != 100 || y[1].b != 104) + __builtin_abort (); + if (z[0].a != 45 || z[1].a != 62 || z[2].a != 54) + __builtin_abort (); + if (z[0].b != 47 || z[1].b != 84 || z[2].b != 66) + __builtin_abort (); + if (u[0].a != 8 || u[1].a != 12 || u[2].a != 18) + __builtin_abort (); + if (u[0].b != 18 || u[1].b != 20 || u[2].b != 24) + __builtin_abort (); + if (w[0].a != 3 || w[1].a != 24 || w[2].a != 12) + __builtin_abort (); + if (w[0].b != 3 || w[1].b != 42 || w[2].b != 18) + __builtin_abort (); +} + +void +bar (struct S x, struct S *y, int n, int v) +{ + struct S z[3] = { { 45, 47, {} }, { 46, 48, {} }, { 47, 49, {} } }; + struct S u[n], w[n]; + int i; + for (i = 0; i < n; i++) + { + w[i].a = u[i].a = n + i; + w[i].b = u[i].b = n - i; + w[i].c[0] = u[i].c[0] = 0; + w[i].c[1] = u[i].c[1] = 0; + } + #pragma omp parallel master + #pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + #pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) + { + x.a++; + x.b++; + y[0].a += 2; + y[0].b += 12; + y[1].a += 3; + y[1].b += 13; + z[1].a += 4; + z[1].b += 14; + u[0].a += 5; + u[0].b += 15; + w[1].a += 6; + w[1].b += 16; + } + #pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v]) map(tofrom: x.a, x.b, x.c[:2]) + { + x.a += 4; + x.b += 14; + y[0].a += 5; + y[0].b += 15; + y[1].a += 6; + y[1].b += 16; + z[2].a += 7; + z[2].b += 17; + u[1].a += 8; + u[1].b += 18; + w[2].a += 7; + w[2].b += 17; + } + #pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2]) + { + x.a += 9; + x.b += 19; + y[0].a += 10; + y[0].b += 20; + y[1].a += 11; + y[1].b += 21; + z[1].a += 12; + z[1].b += 22; + u[2].a += 13; + u[2].b += 23; + w[1].a += 14; + w[1].b += 24; + } + } + if (x.a != 56 || y[0].a != 77 || y[1].a != 84) + __builtin_abort (); + if (x.b != 86 || y[0].b != 147 || y[1].b != 154) + __builtin_abort (); + if (z[0].a != 45 || z[1].a != 62 || z[2].a != 54) + __builtin_abort (); + if (z[0].b != 47 || z[1].b != 84 || z[2].b != 66) + __builtin_abort (); + if (u[0].a != 8 || u[1].a != 12 || u[2].a != 18) + __builtin_abort (); + if (u[0].b != 18 || u[1].b != 20 || u[2].b != 24) + __builtin_abort (); + if (w[0].a != 3 || w[1].a != 24 || w[2].a != 12) + __builtin_abort (); + if (w[0].b != 3 || w[1].b != 42 || w[2].b != 18) + __builtin_abort (); +} + +int +main () +{ + struct S x = { 42, 52 }; + struct S y[2] = { { 43, 53 }, { 44, 54 } }; + #pragma omp parallel master + foo (x, y, 3, 2); + bar (x, y, 3, 2); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-detach-12.c b/libgomp/testsuite/libgomp.c-c++-common/task-detach-12.c new file mode 100644 index 0000000..6583318 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-detach-12.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options "-fopenmp" } */ + +#include <omp.h> + +int +main () +{ + struct S { int a[7]; } s = { { 1, 2, 3, 4, 5, 6, 7 } }; + omp_event_handle_t x; + #pragma omp parallel master + #pragma omp task firstprivate (s) detach (x) + { + if (s.a[3] != 4) + __builtin_abort (); + omp_fulfill_event (x); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c b/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c new file mode 100644 index 0000000..9622fd8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-detach-13.c @@ -0,0 +1,59 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-freebsd* } } */ +/* { dg-timeout 10 } */ + +/* Test that omp_fulfill_event works when called from an external + non-OpenMP thread. */ + +#include <omp.h> +#include <unistd.h> +#include <pthread.h> +#include <stdio.h> + +int finished = 0; +int event_pending = 0; +omp_event_handle_t detach_event; + +void * +fulfill_thread (void *) +{ + while (!__atomic_load_n (&finished, __ATOMIC_RELAXED)) + { + if (__atomic_load_n (&event_pending, __ATOMIC_ACQUIRE)) + { + omp_fulfill_event (detach_event); + __atomic_store_n (&event_pending, 0, __ATOMIC_RELEASE); + } + + sleep(1); + } + + return 0; +} + +int +main (void) +{ + pthread_t thr; + int dep; + pthread_create (&thr, NULL, fulfill_thread, 0); + + #pragma omp parallel + #pragma omp single + { + omp_event_handle_t ev; + + #pragma omp task depend (out: dep) detach (ev) + { + detach_event = ev; + __atomic_store_n (&event_pending, 1, __ATOMIC_RELEASE); + } + + #pragma omp task depend (in: dep) + { + __atomic_store_n (&finished, 1, __ATOMIC_RELAXED); + } + } + + pthread_join (thr, 0); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-detach-6.c b/libgomp/testsuite/libgomp.c-c++-common/task-detach-6.c index e5c2291..f18b57b 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/task-detach-6.c +++ b/libgomp/testsuite/libgomp.c-c++-common/task-detach-6.c @@ -2,6 +2,9 @@ #include <omp.h> #include <assert.h> +#include <unistd.h> // For 'alarm'. + +#include "on_device_arch.h" /* Test tasks with detach clause on an offload device. Each device thread spawns off a chain of tasks, that can then be executed by @@ -9,6 +12,11 @@ int main (void) { + //TODO See '../libgomp.c/pr99555-1.c'. + if (on_device_arch_nvptx ()) + alarm (4); /*TODO Until resolved, make sure that we exit quickly, with error status. + { dg-xfail-run-if "PR99555" { offload_device_nvptx } } */ + int x = 0, y = 0, z = 0; int thread_count; omp_event_handle_t detach_event1, detach_event2; diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-15.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-15.c new file mode 100644 index 0000000..5e87139 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-15.c @@ -0,0 +1,61 @@ +/* PR middle-end/101167 */ + +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +struct S { int a, b, c[2]; }; + +void +init (struct S *x) +{ + x->a = 0; + x->b = 0; + x->c[0] = 0; + x->c[1] = 0; +} + +void +merge (struct S *x, struct S *y) +{ + x->a += y->a; + x->b += y->b; +} + +#pragma omp declare reduction (+: struct S : merge (&omp_out, &omp_in)) initializer (init (&omp_priv)) + +void +foo (struct S x) +{ + #pragma omp taskgroup task_reduction (+: x) + { + #pragma omp task in_reduction (+: x) + { + x.a++; + x.b++; + } + #pragma omp task in_reduction (+: x) + { + x.a += 4; + x.b += 14; + } + #pragma omp task in_reduction (+: x) + { + x.a += 9; + x.b += 19; + } + } + if (x.a != 56 || x.b != 86) + abort (); +} + +int +main () +{ + struct S x = { 42, 52 }; + #pragma omp parallel master num_threads(3) + foo (x); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c new file mode 100644 index 0000000..44d32c7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-16.c @@ -0,0 +1,76 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int a, b[3] = { 1, 1, 1 }; +unsigned long int c[2] = { ~0UL, ~0UL }; + +void +bar (int i) +{ + #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \ + in_reduction (+: a) + { + a += 4; + b[1] *= 4; + c[1] &= ~(1UL << (i + 16)); + } +} + +void +foo (int x) +{ + #pragma omp scope reduction (task, +: a) + { + #pragma omp scope reduction (task, *: b) + { + #pragma omp scope reduction (task, &: c[1:1]) + { + #pragma omp barrier + #pragma omp sections + { + { + a++; b[0] *= 2; bar (2); b[2] *= 3; c[1] &= ~(1UL << 2); + } + #pragma omp section + { b[0] *= 2; bar (4); b[2] *= 3; c[1] &= ~(1UL << 4); a++; } + #pragma omp section + { bar (6); b[2] *= 3; c[1] &= ~(1UL << 6); a++; b[0] *= 2; } + #pragma omp section + { b[2] *= 3; c[1] &= ~(1UL << 8); a++; b[0] *= 2; bar (8); } + #pragma omp section + { c[1] &= ~(1UL << 10); a++; b[0] *= 2; bar (10); b[2] *= 3; } + #pragma omp section + { a++; b[0] *= 2; b[2] *= 3; c[1] &= ~(1UL << 12); bar (12); } + #pragma omp section + if (x) + { + a++; b[0] *= 2; b[2] *= 3; bar (14); c[1] &= ~(1UL << 14); + } + } + } + } + } +} + +int +main () +{ + volatile int one = 1; + foo (!one); + if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x15541554UL) + abort (); + a = 0; + b[0] = 1; + b[1] = 1; + b[2] = 1; + c[1] = ~0UL; + #pragma omp parallel + foo (one); + if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x55545554UL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c index 4ac1b5a..b949938 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c @@ -85,7 +85,8 @@ main () if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7 || ntasks != 1 || min_iters != 7 || max_iters != 7) __builtin_abort (); - /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */ + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. */ if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54 || ntasks != 9) __builtin_abort (); diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c new file mode 100644 index 0000000..1b64a6d --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c @@ -0,0 +1,135 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int u[64], v, w[64]; + +__attribute__((noinline, noclone)) int +test (int a, int b, int c, int d, void (*fn) (int, int, int, int), + int *num_tasks, int *min_iters, int *max_iters, int *sep) +{ + int i, j, t = 0; + __builtin_memset (u, 0, sizeof u); + v = 0; + fn (a, b, c, d); + *min_iters = 0; + *max_iters = 0; + *num_tasks = v; + *sep = v; + if (v) + { + *min_iters = u[0]; + *max_iters = u[0]; + t = u[0]; + for (i = 1; i < v; i++) + { + if (*min_iters > u[i]) + *min_iters = u[i]; + if (*max_iters < u[i]) + *max_iters = u[i]; + t += u[i]; + } + if (*min_iters != *max_iters) + { + for (i = 0; i < v - 1; i++) + { + int min_idx = i; + for (j = i + 1; j < v; j++) + if (w[min_idx] > w[j]) + min_idx = j; + if (min_idx != i) + { + int tem = u[i]; + u[i] = u[min_idx]; + u[min_idx] = tem; + tem = w[i]; + w[i] = w[min_idx]; + w[min_idx] = tem; + } + } + if (u[0] != *max_iters) + __builtin_abort (); + for (i = 1; i < v; i++) + if (u[i] != u[i - 1]) + { + if (*sep != v || u[i] != *min_iters) + __builtin_abort (); + *sep = i; + } + } + } + return t; +} + +void +grainsize (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) grainsize(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +void +num_tasks (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) num_tasks(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int min_iters, max_iters, ntasks, sep; + /* If grainsize is present and has strict modifier, # of task loop iters is == grainsize, + except that it can be smaller on the last task. */ + if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79 + || ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4) + __builtin_abort (); + if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370 + || ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13) + __builtin_abort (); + if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7 + || ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1) + __builtin_abort (); + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. If strict modifier is present, + first set of tasks has ceil (# of loop iters / num_tasks) iterations, + followed by possibly empty set of tasks with floor (# of loop iters / num_tasks) + iterations. */ + if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54 + || ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9) + __builtin_abort (); + if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57 + || ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3) + __builtin_abort (); + if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13 + || ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13) + __builtin_abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/variable-not-offloaded.c b/libgomp/testsuite/libgomp.c-c++-common/variable-not-offloaded.c index bc4b916..fe2a8b2 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/variable-not-offloaded.c +++ b/libgomp/testsuite/libgomp.c-c++-common/variable-not-offloaded.c @@ -1,7 +1,7 @@ /* { dg-do link } */ -/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target offload_device_nonshared_as } } */ +/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target { offload_target_nvptx || offload_target_amdgcn } } } */ -int var; /* { dg-error "variable 'var' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target offload_device_nonshared_as } } */ +int var; /* { dg-error "variable 'var' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target { offload_target_nvptx || offload_target_amdgcn } } } */ #pragma omp declare target void __attribute__((noinline, noclone)) diff --git a/libgomp/testsuite/libgomp.c/address-space-1.c b/libgomp/testsuite/libgomp.c/address-space-1.c new file mode 100644 index 0000000..6ad57de --- /dev/null +++ b/libgomp/testsuite/libgomp.c/address-space-1.c @@ -0,0 +1,28 @@ +/* Verify OMP instances of variables with address space. */ + +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target offload_device_nonshared_as } */ + +/* With Intel MIC (emulated) offloading: + offload error: process on the device 0 unexpectedly exited with code 0 + { dg-xfail-run-if TODO { offload_device_intel_mic } } */ + +#include <assert.h> + +int __seg_fs a; + +int +main (void) +{ + // a = 123; // SIGSEGV + int b; +#pragma omp target map(alloc: a) map(from: b) + { + a = 321; // no SIGSEGV (given 'offload_device_nonshared_as') + asm volatile ("" : : "g" (&a) : "memory"); + b = a; + } + assert (b == 321); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/affinity-1.c b/libgomp/testsuite/libgomp.c/affinity-1.c index 13a743e..574a9f7 100644 --- a/libgomp/testsuite/libgomp.c/affinity-1.c +++ b/libgomp/testsuite/libgomp.c/affinity-1.c @@ -183,20 +183,26 @@ main () int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; int test_spread_master_close - = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0; + = (env_proc_bind + && (strcmp (env_proc_bind, "spread,master,close") == 0 + || strcmp (env_proc_bind, "spread,primary,close") == 0)); char *env_places = getenv ("OMP_PLACES"); int test_places = 0; + if (omp_proc_bind_master != omp_proc_bind_primary) + abort (); + #ifdef DO_FORK if (env_places == NULL && contig_cpucount >= 8 && test_false && getenv ("GOMP_AFFINITY") == NULL) { int i, j, status; pid_t pid; - for (j = 0; j < 2; j++) + for (j = 0; j < 3; j++) { - if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1) - < 0) + if (setenv ("OMP_PROC_BIND", + j > 1 ? "spread,primary,close" + : (j ? "spread,master,close" : "true"), 1) < 0) break; for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; i; --i) diff --git a/libgomp/testsuite/libgomp.c/omp-nested-3.c b/libgomp/testsuite/libgomp.c/omp-nested-3.c index 7790c58..446e6bd 100644 --- a/libgomp/testsuite/libgomp.c/omp-nested-3.c +++ b/libgomp/testsuite/libgomp.c/omp-nested-3.c @@ -1,4 +1,5 @@ // { dg-do run { target lto } } // { dg-additional-options "-fipa-pta -flto -flto-partition=max" } +// { dg-prune-output "warning: using serial compilation" } #include "omp-nested-1.c" diff --git a/libgomp/testsuite/libgomp.c/pr46032-2.c b/libgomp/testsuite/libgomp.c/pr46032-2.c index 1125f6e..36f3730 100644 --- a/libgomp/testsuite/libgomp.c/pr46032-2.c +++ b/libgomp/testsuite/libgomp.c/pr46032-2.c @@ -1,4 +1,5 @@ /* { dg-do run { target lto } } */ /* { dg-options "-O2 -ftree-vectorize -std=c99 -fipa-pta -flto -flto-partition=max" } */ +/* { dg-prune-output "warning: using serial compilation" } */ #include "pr46032.c" diff --git a/libgomp/testsuite/libgomp.c/pr81778.c b/libgomp/testsuite/libgomp.c/pr81778.c new file mode 100644 index 0000000..571668e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr81778.c @@ -0,0 +1,48 @@ +/* Minimized from for-5.c. */ + +#include <stdio.h> +#include <stdlib.h> + +/* Size of array we want to write. */ +#define N 32 + +/* Size of extra space before and after. */ +#define CANARY_SIZE (N * 32) + +/* Start of array we want to write. */ +#define BASE (CANARY_SIZE) + +// Total size to be allocated. +#define ALLOC_SIZE (CANARY_SIZE + N + CANARY_SIZE) + +#pragma omp declare target +int a[ALLOC_SIZE]; +#pragma omp end declare target + +int +main (void) +{ + /* Use variable step in for loop. */ + int s = 1; + +#pragma omp target update to(a) + + /* Write a[BASE] .. a[BASE + N - 1]. */ +#pragma omp target simd + for (int i = N - 1; i > -1; i -= s) + a[BASE + i] = 1; + +#pragma omp target update from(a) + + for (int i = 0; i < ALLOC_SIZE; i++) + { + int expected = (BASE <= i && i < BASE + N) ? 1 : 0; + if (a[i] == expected) + continue; + + printf ("Expected %d, got %d at base[%d]\n", expected, a[i], i - BASE); + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr86416-1.c b/libgomp/testsuite/libgomp.c/pr86416-1.c index ad9370f..6d38692 100644 --- a/libgomp/testsuite/libgomp.c/pr86416-1.c +++ b/libgomp/testsuite/libgomp.c/pr86416-1.c @@ -2,8 +2,8 @@ /* { dg-require-effective-target large_long_double } */ /* PR middle-end/86416 */ -/* { dg-error "bit-precision floating-point numbers unsupported .mode '.F'." "" { target offload_device } 0 } */ -/* { dg-excess-errors "Follow-up errors from mkoffload and lto-wrapper" { target offload_device } } */ +/* { dg-error "bit-precision floating-point numbers unsupported .mode '.F'." "" { target { offload_target_nvptx || offload_target_amdgcn } } 0 } */ +/* { dg-excess-errors "Follow-up errors from mkoffload and lto-wrapper" { target { offload_target_nvptx || offload_target_amdgcn } } } */ #include <stdlib.h> /* For abort. */ diff --git a/libgomp/testsuite/libgomp.c/pr86416-2.c b/libgomp/testsuite/libgomp.c/pr86416-2.c index ec45e40..cffeb3f 100644 --- a/libgomp/testsuite/libgomp.c/pr86416-2.c +++ b/libgomp/testsuite/libgomp.c/pr86416-2.c @@ -2,8 +2,8 @@ /* { dg-add-options __float128 } */ /* PR middle-end/86416 */ -/* { dg-error "bit-precision floating-point numbers unsupported .mode '.F'." "" { target offload_device } 0 } */ -/* { dg-excess-errors "Follow-up errors from mkoffload and lto-wrapper" { target offload_device } } */ +/* { dg-error "bit-precision floating-point numbers unsupported .mode '.F'." "" { target { offload_target_nvptx || offload_target_amdgcn } } 0 } */ +/* { dg-excess-errors "Follow-up errors from mkoffload and lto-wrapper" { target { offload_target_nvptx || offload_target_amdgcn } } } */ #include <stdlib.h> /* For abort. */ diff --git a/libgomp/testsuite/libgomp.c/pr99555-1.c b/libgomp/testsuite/libgomp.c/pr99555-1.c new file mode 100644 index 0000000..bd33b93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr99555-1.c @@ -0,0 +1,21 @@ +// PR99555 "[OpenMP/nvptx] Execution-time hang for simple nested OpenMP 'target'/'parallel'/'task' constructs" + +// { dg-additional-options "-O0" } + +#include <unistd.h> // For 'alarm'. + +#include "../libgomp.c-c++-common/on_device_arch.h" + +int main (void) +{ + if (on_device_arch_nvptx ()) + alarm (4); /*TODO Until resolved, make sure that we exit quickly, with error status. + { dg-xfail-run-if "PR99555" { offload_device_nvptx } } */ + +#pragma omp target +#pragma omp parallel // num_threads(1) +#pragma omp task + ; + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-43.c b/libgomp/testsuite/libgomp.c/target-43.c new file mode 100644 index 0000000..028e912 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-43.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-latomic" { target { offload_target_nvptx } } } */ + +#include <stdlib.h> + +#define N 32 +#define TYPE char + +int +main (void) +{ + TYPE result = 1; + TYPE a[N]; + for (int x = 0; x < N; ++x) + a[x] = 1; + +#pragma omp target map(tofrom: result) map(to:a) +#pragma omp for simd reduction(&&:result) + for (int x = 0; x < N; ++x) + result = result && a[x]; + + if (result != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-44.c b/libgomp/testsuite/libgomp.c/target-44.c new file mode 100644 index 0000000..a5da81d --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-44.c @@ -0,0 +1,27 @@ +/* { dg-additional-options "-foffload-options=nvptx-none=-latomic" { target { offload_target_nvptx } } } */ + +#include <stdlib.h> + +struct s +{ + int i; +}; + +#pragma omp declare reduction(+: struct s: omp_out.i += omp_in.i) + +int +main (void) +{ + const int N0 = 32768; + + struct s counter_N0 = { 0 }; +#pragma omp target +#pragma omp for simd reduction(+: counter_N0) + for (int i0 = 0 ; i0 < N0 ; i0++ ) + counter_N0.i += 1; + + if (counter_N0.i != N0) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/task-reduction-4.c b/libgomp/testsuite/libgomp.c/task-reduction-4.c new file mode 100644 index 0000000..7ca1d02 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-reduction-4.c @@ -0,0 +1,21 @@ +/* PR middle-end/100471 */ + +extern void abort (void); + +int c; + +int +main () +{ +#pragma omp parallel +#pragma omp single + { + int r = 0, i; + #pragma omp taskloop reduction(+:r) + for (i = 0; i < c; i++) + r++; + if (r != 0) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/alloc-1.F90 b/libgomp/testsuite/libgomp.fortran/alloc-1.F90 index 8ecb4c4..e636583 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-1.F90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-1.F90 @@ -36,22 +36,6 @@ type (omp_alloctrait), allocatable :: traits(:), traits5(:) - interface - ! omp_alloc + omp_free part of OpenMP for C/C++ - ! but not (yet) in the OpenMP spec for Fortran - type(c_ptr) function omp_alloc (size, handle) bind(C) - import - integer (c_size_t), value :: size - integer (omp_allocator_handle_kind), value :: handle - end function - - subroutine omp_free (ptr, handle) bind(C) - import - type (c_ptr), value :: ptr - integer (omp_allocator_handle_kind), value :: handle - end subroutine - end interface - type(c_ptr), volatile :: cp, cq, cr integer :: i integer(c_intptr_t) :: intptr @@ -155,12 +139,13 @@ cp = omp_alloc (ONEoFIVE, & & omp_null_allocator) if (mod (transfer (cp, intptr), 32_c_intptr_t) /= 0) stop 17 - call c_f_pointer (cq, q, [ONEoFIVE & + call c_f_pointer (cp, p, [ONEoFIVE & & / c_sizeof (i)]) p(1) = 5 p(ONEoFIVE / c_sizeof (i)) = 6 cq = omp_alloc (768_c_size_t, omp_null_allocator) if (mod (transfer (cq, intptr), 128_c_intptr_t) /= 0) stop 18 + call c_f_pointer (cq, q, [768 / c_sizeof (i)]) q(1) = 7 q(768 / c_sizeof (i)) = 8 if (c_associated (omp_alloc (768_c_size_t, omp_null_allocator))) & diff --git a/libgomp/testsuite/libgomp.fortran/alloc-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-4.f90 index ce353b5..87b6add 100644 --- a/libgomp/testsuite/libgomp.fortran/alloc-4.f90 +++ b/libgomp/testsuite/libgomp.fortran/alloc-4.f90 @@ -3,22 +3,6 @@ program main use ISO_C_Binding implicit none (external, type) - interface - ! omp_alloc + omp_free part of OpenMP for C/C++ - ! but not (yet) in the OpenMP spec for Fortran - type(c_ptr) function omp_alloc (size, handle) bind(C) - import - integer (c_size_t), value :: size - integer (omp_allocator_handle_kind), value :: handle - end function - - subroutine omp_free (ptr, handle) bind(C) - import - type (c_ptr), value :: ptr - integer (omp_allocator_handle_kind), value :: handle - end subroutine - end interface - type (omp_alloctrait) :: traits(3) integer (omp_allocator_handle_kind) :: a diff --git a/libgomp/testsuite/libgomp.fortran/class-firstprivate-1.f90 b/libgomp/testsuite/libgomp.fortran/class-firstprivate-1.f90 new file mode 100644 index 0000000..b77117e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/class-firstprivate-1.f90 @@ -0,0 +1,323 @@ +! FIRSTPRIVATE: CLASS(*) + intrinsic types +program select_type_openmp + implicit none + class(*), allocatable :: val1, val1a, val2, val3 + + call sub() ! local var + + call sub2(val1, val1a, val2, val3) ! allocatable args + + allocate(val1, source=7) + allocate(val1a, source=7) + allocate(val2, source="abcdef") + allocate(val3, source=4_"zyx4") + call sub3(val1, val1a, val2, val3) ! nonallocatable vars + deallocate(val1, val1a, val2, val3) +contains +subroutine sub() + class(*), allocatable :: val1, val1a, val2, val3 + allocate(val1a, source=7) + allocate(val2, source="abcdef") + allocate(val3, source=4_"zyx4") + + if (allocated(val1)) stop 1 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 2 + if (.not.allocated(val1a)) stop 3 + if (.not.allocated(val2)) stop 4 + if (.not.allocated(val3)) stop 5 + + allocate(val1, source=7) + + select type (val1) + type is (integer) + if (val1 /= 7) stop 6 + val1 = 8 + class default + stop 7 + end select + + select type (val1a) + type is (integer) + if (val1a /= 7) stop 8 + val1a = 8 + class default + stop 9 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 10 + if (val2 /= "abcdef") stop 11 + val2 = "123456" + class default + stop 12 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 13 + if (val3 /= 4_"zyx4") stop 14 + val3 = 4_"AbCd" + class default + stop 15 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 16 + if (val3 /= 4_"AbCd") stop 17 + val3 = 4_"1ab2" + class default + stop 18 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 19 + if (val2 /= "123456") stop 20 + val2 = "A2C4E6" + class default + stop 21 + end select + + select type (val1) + type is (integer) + if (val1 /= 8) stop 22 + val1 = 9 + class default + stop 23 + end select + + select type (val1a) + type is (integer) + if (val1a /= 8) stop 24 + val1a = 9 + class default + stop 25 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 26 + if (.not. allocated(val1a)) stop 27 + if (.not. allocated(val2)) stop 28 + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 29 + if (val2 /= "abcdef") stop 30 + class default + stop 31 + end select + select type (val3) + type is (character(len=*,kind=4)) + if (len(val3) /= 4) stop 32 + if (val3 /= 4_"zyx4") stop 33 + class default + stop 34 + end select + deallocate(val1a, val2, val3) +end subroutine sub + +subroutine sub2(val1, val1a, val2, val3) + class(*), allocatable :: val1, val1a, val2, val3 + optional :: val1a + allocate(val1a, source=7) + allocate(val2, source="abcdef") + allocate(val3, source=4_"zyx4") + + if (allocated(val1)) stop 35 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 36 + if (.not.allocated(val1a)) stop 37 + if (.not.allocated(val2)) stop 38 + if (.not.allocated(val3)) stop 39 + + allocate(val1, source=7) + + select type (val1) + type is (integer) + if (val1 /= 7) stop 40 + val1 = 8 + class default + stop 41 + end select + + select type (val1a) + type is (integer) + if (val1a /= 7) stop 42 + val1a = 8 + class default + stop 43 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 44 + if (val2 /= "abcdef") stop 45 + val2 = "123456" + class default + stop 46 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 47 + if (val3 /= 4_"zyx4") stop 48 + val3 = "AbCd" + class default + stop 49 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 50 + if (val3 /= 4_"AbCd") stop 51 + val3 = 4_"1ab2" + class default + stop 52 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 53 + if (val2 /= "123456") stop 54 + val2 = "A2C4E6" + class default + stop 55 + end select + + select type (val1) + type is (integer) + if (val1 /= 8) stop 56 + val1 = 9 + class default + stop 57 + end select + + select type (val1a) + type is (integer) + if (val1a /= 8) stop 58 + val1a = 9 + class default + stop 59 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 60 + if (.not. allocated(val1a)) stop 61 + if (.not. allocated(val2)) stop 62 + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 63 + if (val2 /= "abcdef") stop 64 + class default + stop 65 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 66 + if (val3 /= 4_"zyx4") stop 67 + val3 = 4_"AbCd" + class default + stop 68 + end select + deallocate(val1a, val2, val3) +end subroutine sub2 + +subroutine sub3(val1, val1a, val2, val3) + class(*) :: val1, val1a, val2, val3 + optional :: val1a + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + select type (val1) + type is (integer) + if (val1 /= 7) stop 69 + val1 = 8 + class default + stop 70 + end select + + select type (val1a) + type is (integer) + if (val1a /= 7) stop 71 + val1a = 8 + class default + stop 72 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 73 + if (val2 /= "abcdef") stop 74 + val2 = "123456" + class default + stop 75 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 76 + if (val3 /= 4_"zyx4") stop 77 + val3 = 4_"AbCd" + class default + stop 78 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 79 + if (val3 /= 4_"AbCd") stop 80 + val3 = 4_"1ab2" + class default + stop 81 + end select + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 82 + if (val2 /= "123456") stop 83 + val2 = "A2C4E6" + class default + stop 84 + end select + + select type (val1) + type is (integer) + if (val1 /= 8) stop 85 + val1 = 9 + class default + stop 86 + end select + + select type (val1a) + type is (integer) + if (val1a /= 8) stop 87 + val1a = 9 + class default + stop 88 + end select + !$OMP END PARALLEL + + select type (val2) + type is (character(len=*)) + if (len(val2) /= 6) stop 89 + if (val2 /= "abcdef") stop 90 + class default + stop 91 + end select + + select type (val3) + type is (character(len=*, kind=4)) + if (len(val3) /= 4) stop 92 + if (val3 /= 4_"zyx4") stop 93 + val3 = 4_"AbCd" + class default + stop 94 + end select +end subroutine sub3 +end program select_type_openmp diff --git a/libgomp/testsuite/libgomp.fortran/class-firstprivate-2.f90 b/libgomp/testsuite/libgomp.fortran/class-firstprivate-2.f90 new file mode 100644 index 0000000..7528d32 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/class-firstprivate-2.f90 @@ -0,0 +1,334 @@ +! FIRSTPRIVATE: CLASS(t) + derived types +program select_type_openmp + implicit none + type t + end type t + type, extends(t) :: t_int + integer :: i + end type + type, extends(t) :: t_char1 + character(len=:, kind=1), allocatable :: str + end type + type, extends(t) :: t_char4 + character(len=:, kind=4), allocatable :: str + end type + class(t), allocatable :: val1, val1a, val2, val3 + + call sub() ! local var + + call sub2(val1, val1a, val2, val3) ! allocatable args + + allocate(val1, source=t_int(7)) + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + call sub3(val1, val1a, val2, val3) ! nonallocatable vars + deallocate(val1, val1a, val2, val3) +contains +subroutine sub() + class(t), allocatable :: val1, val1a, val2, val3 + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + + if (allocated(val1)) stop 1 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 2 + if (.not.allocated(val1a)) stop 3 + if (.not.allocated(val2)) stop 4 + if (.not.allocated(val3)) stop 5 + + allocate(val1, source=t_int(7)) + + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 6 + val1%i = 8 + class default + stop 7 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 8 + val1a%i = 8 + class default + stop 9 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 10 + if (val2%str /= "abcdef") stop 11 + val2%str = "123456" + class default + stop 12 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 13 + if (val3%str /= 4_"zyx4") stop 14 + val3%str = 4_"AbCd" + class default + stop 15 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 16 + if (val3%str /= 4_"AbCd") stop 17 + val3%str = 4_"1ab2" + class default + stop 18 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 19 + if (val2%str /= "123456") stop 20 + val2%str = "A2C4E6" + class default + stop 21 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 22 + val1%i = 9 + class default + stop 23 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 24 + val1a%i = 9 + class default + stop 25 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 26 + if (.not. allocated(val1a)) stop 27 + if (.not. allocated(val2)) stop 28 + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 29 + if (val2%str /= "abcdef") stop 30 + class default + stop 31 + end select + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 32 + if (val3%str /= 4_"zyx4") stop 33 + class default + stop 34 + end select + deallocate(val1a,val2, val3) +end subroutine sub + +subroutine sub2(val1, val1a, val2, val3) + class(t), allocatable :: val1, val1a, val2, val3 + optional :: val1a + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + + if (allocated(val1)) stop 35 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 36 + if (.not.allocated(val1a)) stop 37 + if (.not.allocated(val2)) stop 38 + if (.not.allocated(val3)) stop 39 + + allocate(val1, source=t_int(7)) + + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 40 + val1%i = 8 + class default + stop 41 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 42 + val1a%i = 8 + class default + stop 43 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 44 + if (val2%str /= "abcdef") stop 45 + val2%str = "123456" + class default + stop 46 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 47 + if (val3%str /= 4_"zyx4") stop 48 + val3%str = "AbCd" + class default + stop 49 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 50 + if (val3%str /= 4_"AbCd") stop 51 + val3%str = 4_"1ab2" + class default + stop 52 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 53 + if (val2%str /= "123456") stop 54 + val2%str = "A2C4E6" + class default + stop 55 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 56 + val1%i = 9 + class default + stop 57 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 58 + val1a%i = 9 + class default + stop 59 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 60 + if (.not. allocated(val1a)) stop 61 + if (.not. allocated(val2)) stop 62 + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 63 + if (val2%str /= "abcdef") stop 64 + class default + stop 65 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 66 + if (val3%str /= 4_"zyx4") stop 67 + val3%str = 4_"AbCd" + class default + stop 68 + end select + deallocate(val1a, val2, val3) +end subroutine sub2 + +subroutine sub3(val1, val1a, val2, val3) + class(t) :: val1, val1a, val2, val3 + optional :: val1a + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 69 + val1%i = 8 + class default + stop 70 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 71 + val1a%i = 8 + class default + stop 72 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 73 + if (val2%str /= "abcdef") stop 74 + val2%str = "123456" + class default + stop 75 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 76 + if (val3%str /= 4_"zyx4") stop 77 + val3%str = 4_"AbCd" + class default + stop 78 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 79 + if (val3%str /= 4_"AbCd") stop 80 + val3%str = 4_"1ab2" + class default + stop 81 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 82 + if (val2%str /= "123456") stop 83 + val2%str = "A2C4E6" + class default + stop 84 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 85 + val1%i = 9 + class default + stop 86 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 87 + val1a%i = 9 + class default + stop 88 + end select + !$OMP END PARALLEL + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 89 + if (val2%str /= "abcdef") stop 90 + class default + stop 91 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 92 + if (val3%str /= 4_"zyx4") stop 93 + val3%str = 4_"AbCd" + class default + stop 94 + end select +end subroutine sub3 +end program select_type_openmp diff --git a/libgomp/testsuite/libgomp.fortran/class-firstprivate-3.f90 b/libgomp/testsuite/libgomp.fortran/class-firstprivate-3.f90 new file mode 100644 index 0000000..a450fde --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/class-firstprivate-3.f90 @@ -0,0 +1,334 @@ +! FIRSTPRIVATE: CLASS(*) + derived types +program select_type_openmp + implicit none + type t + end type t + type, extends(t) :: t_int + integer :: i + end type + type, extends(t) :: t_char1 + character(len=:, kind=1), allocatable :: str + end type + type, extends(t) :: t_char4 + character(len=:, kind=4), allocatable :: str + end type + class(*), allocatable :: val1, val1a, val2, val3 + + call sub() ! local var + + call sub2(val1, val1a, val2, val3) ! allocatable args + + allocate(val1, source=t_int(7)) + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + call sub3(val1, val1a, val2, val3) ! nonallocatable vars + deallocate(val1, val1a, val2, val3) +contains +subroutine sub() + class(*), allocatable :: val1, val1a, val2, val3 + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + + if (allocated(val1)) stop 1 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 2 + if (.not.allocated(val1a)) stop 3 + if (.not.allocated(val2)) stop 4 + if (.not.allocated(val3)) stop 5 + + allocate(val1, source=t_int(7)) + + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 6 + val1%i = 8 + class default + stop 7 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 8 + val1a%i = 8 + class default + stop 9 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 10 + if (val2%str /= "abcdef") stop 11 + val2%str = "123456" + class default + stop 12 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 13 + if (val3%str /= 4_"zyx4") stop 14 + val3%str = 4_"AbCd" + class default + stop 15 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 16 + if (val3%str /= 4_"AbCd") stop 17 + val3%str = 4_"1ab2" + class default + stop 18 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 19 + if (val2%str /= "123456") stop 20 + val2%str = "A2C4E6" + class default + stop 21 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 22 + val1%i = 9 + class default + stop 23 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 24 + val1a%i = 9 + class default + stop 25 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 26 + if (.not. allocated(val1a)) stop 27 + if (.not. allocated(val2)) stop 28 + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 29 + if (val2%str /= "abcdef") stop 30 + class default + stop 31 + end select + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 32 + if (val3%str /= 4_"zyx4") stop 33 + class default + stop 34 + end select + deallocate(val1a,val2, val3) +end subroutine sub + +subroutine sub2(val1, val1a, val2, val3) + class(*), allocatable :: val1, val1a, val2, val3 + optional :: val1a + allocate(val1a, source=t_int(7)) + allocate(val2, source=t_char1("abcdef")) + allocate(val3, source=t_char4(4_"zyx4")) + + if (allocated(val1)) stop 35 + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + if (allocated(val1)) stop 36 + if (.not.allocated(val1a)) stop 37 + if (.not.allocated(val2)) stop 38 + if (.not.allocated(val3)) stop 39 + + allocate(val1, source=t_int(7)) + + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 40 + val1%i = 8 + class default + stop 41 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 42 + val1a%i = 8 + class default + stop 43 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 44 + if (val2%str /= "abcdef") stop 45 + val2%str = "123456" + class default + stop 46 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 47 + if (val3%str /= 4_"zyx4") stop 48 + val3%str = "AbCd" + class default + stop 49 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 50 + if (val3%str /= 4_"AbCd") stop 51 + val3%str = 4_"1ab2" + class default + stop 52 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 53 + if (val2%str /= "123456") stop 54 + val2%str = "A2C4E6" + class default + stop 55 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 56 + val1%i = 9 + class default + stop 57 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 58 + val1a%i = 9 + class default + stop 59 + end select + !$OMP END PARALLEL + + if (allocated(val1)) stop 60 + if (.not. allocated(val1a)) stop 61 + if (.not. allocated(val2)) stop 62 + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 63 + if (val2%str /= "abcdef") stop 64 + class default + stop 65 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 66 + if (val3%str /= 4_"zyx4") stop 67 + val3%str = 4_"AbCd" + class default + stop 68 + end select + deallocate(val1a, val2, val3) +end subroutine sub2 + +subroutine sub3(val1, val1a, val2, val3) + class(*) :: val1, val1a, val2, val3 + optional :: val1a + + !$OMP PARALLEL firstprivate(val1, val1a, val2, val3) + select type (val1) + type is (t_int) + if (val1%i /= 7) stop 69 + val1%i = 8 + class default + stop 70 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 7) stop 71 + val1a%i = 8 + class default + stop 72 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 73 + if (val2%str /= "abcdef") stop 74 + val2%str = "123456" + class default + stop 75 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 76 + if (val3%str /= 4_"zyx4") stop 77 + val3%str = 4_"AbCd" + class default + stop 78 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 79 + if (val3%str /= 4_"AbCd") stop 80 + val3%str = 4_"1ab2" + class default + stop 81 + end select + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 82 + if (val2%str /= "123456") stop 83 + val2%str = "A2C4E6" + class default + stop 84 + end select + + select type (val1) + type is (t_int) + if (val1%i /= 8) stop 85 + val1%i = 9 + class default + stop 86 + end select + + select type (val1a) + type is (t_int) + if (val1a%i /= 8) stop 87 + val1a%i = 9 + class default + stop 88 + end select + !$OMP END PARALLEL + + select type (val2) + type is (t_char1) + if (len(val2%str) /= 6) stop 89 + if (val2%str /= "abcdef") stop 90 + class default + stop 91 + end select + + select type (val3) + type is (t_char4) + if (len(val3%str) /= 4) stop 92 + if (val3%str /= 4_"zyx4") stop 93 + val3%str = 4_"AbCd" + class default + stop 94 + end select +end subroutine sub3 +end program select_type_openmp diff --git a/libgomp/testsuite/libgomp.fortran/defaultmap-8.f90 b/libgomp/testsuite/libgomp.fortran/defaultmap-8.f90 new file mode 100644 index 0000000..54f4b2e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/defaultmap-8.f90 @@ -0,0 +1,292 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-gimple" } +! +! PR fortran/92568 +! +program main + implicit none + integer :: xa1, xa2, xp1, xp2, xat1, xat2, xt1, xt2, xi1, xi2 + allocatable :: xa1, xa2, xat1, xat2 + pointer :: xp1, xp2 + + allocate (xa1, xa2, xat1, xat2, xp1, xp2) + + call foo (xa1, xa2, xp1, xp2, xat1, xat2, xt1, xt2, xi1, xi2) + call foo2 (xa1, xa2, xp1, xp2, xat1, xat2, xt1, xt2, xi1, xi2) + call foo3 (xa1, xa2, xp1, xp2, xat1, xat2, xt1, xt2, xi1, xi2) + call bar (xa1, xa2, xp1, xp2, xat1, xat2, xt1, xt2, xi1, xi2) + + deallocate (xa1, xa2, xat1, xat2, xp1, xp2) +contains +! Implicit mapping +subroutine foo (ia1, ia2, ip1, ip2, iat1, iat2, it1, it2, ii1, ii2) + implicit none + integer :: ia1, ia2, ia3, ip1, ip2, ip3, iat1, iat2, iat3, it1, it2, it3, ii1, ii2, ii3 + allocatable :: ia1, ia2, ia3, iat1, iat2, iat3 + pointer :: ip1, ip2, ip3 + target :: iat1, iat2, iat3, it1, it2, it3 + optional :: ia1, ip1, iat1, it1, ii1 + + allocate(ia3, iat3, ip3) + + ia1 = 2; ia2 = 2; ia3 = 2; ip1 = 2; ip2 = 2; ip3 = 2; + iat1 = 2; iat2 = 2; iat3 = 2; it1 = 2; it2 = 2; it3 = 2 + ii1 = 2; ii2 = 2; ii3 = 2 + + ! Implicitly, scalars are 'firstprivate' except + ! if target, allocatable, pointer they are always tofrom. + !$omp target + if (ia1 /= 2) stop 1 + if (ia2 /= 2) stop 2 + if (ia3 /= 2) stop 3 + if (ip1 /= 2) stop 4 + if (ip2 /= 2) stop 5 + if (ip3 /= 2) stop 6 + if (iat1 /= 2) stop 7 + if (iat2 /= 2) stop 8 + if (iat3 /= 2) stop 9 + if (it1 /= 2) stop 10 + if (it2 /= 2) stop 11 + if (it3 /= 2) stop 12 + if (ii1 /= 2) stop 13 + if (ii2 /= 2) stop 14 + if (ii3 /= 2) stop 15 + + ia1 = 1; ia2 = 1; ia3 = 1; ip1 = 1; ip2 = 1; ip3 = 1; + iat1 = 1; iat2 = 1; iat3 = 1; it1 = 1; it2 = 1; it3 = 1 + ii1 = 1; ii2 = 1; ii3 = 1 + !$omp end target + + ! (target,allocatable,pointer) -> tofrom + if (ia1 /= 1) stop 16 + if (ia2 /= 1) stop 17 + if (ia3 /= 1) stop 18 + if (ip1 /= 1) stop 19 + if (ip2 /= 1) stop 20 + if (ip3 /= 1) stop 21 + if (iat1 /= 1) stop 22 + if (iat2 /= 1) stop 23 + if (iat3 /= 1) stop 24 + if (it1 /= 1) stop 25 + if (it2 /= 1) stop 26 + if (it3 /= 1) stop 27 + ! non-(target,allocatable,pointer) -> firstprivate + !if (ii1 /= 2) stop 28 ! FIXME: optional scalar wrongly mapped as tofrom, PR fortran/100991 + if (ii2 /= 2) stop 29 + if (ii3 /= 2) stop 30 + + deallocate(ia3, iat3, ip3) +end + +! Implicit mapping likewise even though there is defaultmap +subroutine foo2 (ia1, ia2, ip1, ip2, iat1, iat2, it1, it2, ii1, ii2) + implicit none + integer :: ia1, ia2, ia3, ip1, ip2, ip3, iat1, iat2, iat3, it1, it2, it3, ii1, ii2, ii3 + allocatable :: ia1, ia2, ia3, iat1, iat2, iat3 + pointer :: ip1, ip2, ip3 + target :: iat1, iat2, iat3, it1, it2, it3 + optional :: ia1, ip1, iat1, it1, ii1 + + allocate(ia3, iat3, ip3) + + ia1 = 2; ia2 = 2; ia3 = 2; ip1 = 2; ip2 = 2; ip3 = 2; + iat1 = 2; iat2 = 2; iat3 = 2; it1 = 2; it2 = 2; it3 = 2 + ii1 = 2; ii2 = 2; ii3 = 2 + + ! Implicitly, scalars are 'firstprivate' except + ! if target, allocatable, pointer they are always tofrom. + !$omp target defaultmap(default) + if (ia1 /= 2) stop 31 + if (ia2 /= 2) stop 32 + if (ia3 /= 2) stop 33 + if (ip1 /= 2) stop 34 + if (ip2 /= 2) stop 35 + if (ip3 /= 2) stop 36 + if (iat1 /= 2) stop 37 + if (iat2 /= 2) stop 38 + if (iat3 /= 2) stop 39 + if (it1 /= 2) stop 40 + if (it2 /= 2) stop 41 + if (it3 /= 2) stop 42 + if (ii1 /= 2) stop 43 + if (ii2 /= 2) stop 44 + if (ii3 /= 2) stop 45 + + ia1 = 1; ia2 = 1; ia3 = 1; ip1 = 1; ip2 = 1; ip3 = 1; + iat1 = 1; iat2 = 1; iat3 = 1; it1 = 1; it2 = 1; it3 = 1 + ii1 = 1; ii2 = 1; ii3 = 1 + !$omp end target + + ! (target,allocatable,pointer) -> tofrom + if (ia1 /= 1) stop 46 + if (ia2 /= 1) stop 47 + if (ia3 /= 1) stop 48 + if (ip1 /= 1) stop 49 + if (ip2 /= 1) stop 50 + if (ip3 /= 1) stop 51 + if (iat1 /= 1) stop 52 + if (iat2 /= 1) stop 53 + if (iat3 /= 1) stop 54 + if (it1 /= 1) stop 55 + if (it2 /= 1) stop 56 + if (it3 /= 1) stop 57 + ! non-(target,allocatable,pointer) -> firstprivate + !if (ii1 /= 2) stop 58 ! FIXME: optional scalar wrongly mapped as tofrom, PR fortran/100991 + if (ii2 /= 2) stop 59 + if (ii3 /= 2) stop 60 + + deallocate(ia3, iat3, ip3) +end + +! Implicit mapping likewise even though there is defaultmap +subroutine foo3 (ia1, ia2, ip1, ip2, iat1, iat2, it1, it2, ii1, ii2) + implicit none + integer :: ia1, ia2, ia3, ip1, ip2, ip3, iat1, iat2, iat3, it1, it2, it3, ii1, ii2, ii3 + allocatable :: ia1, ia2, ia3, iat1, iat2, iat3 + pointer :: ip1, ip2, ip3 + target :: iat1, iat2, iat3, it1, it2, it3 + optional :: ia1, ip1, iat1, it1, ii1 + + allocate(ia3, iat3, ip3) + + ia1 = 2; ia2 = 2; ia3 = 2; ip1 = 2; ip2 = 2; ip3 = 2; + iat1 = 2; iat2 = 2; iat3 = 2; it1 = 2; it2 = 2; it3 = 2 + ii1 = 2; ii2 = 2; ii3 = 2 + + ! Implicitly, scalars are 'firstprivate' except + ! if target, allocatable, pointer they are always tofrom. + !$omp target defaultmap(none:aggregate) + if (ia1 /= 2) stop 61 + if (ia2 /= 2) stop 62 + if (ia3 /= 2) stop 63 + if (ip1 /= 2) stop 64 + if (ip2 /= 2) stop 65 + if (ip3 /= 2) stop 66 + if (iat1 /= 2) stop 67 + if (iat2 /= 2) stop 68 + if (iat3 /= 2) stop 69 + if (it1 /= 2) stop 70 + if (it2 /= 2) stop 71 + if (it3 /= 2) stop 72 + if (ii1 /= 2) stop 73 + if (ii2 /= 2) stop 74 + if (ii3 /= 2) stop 75 + + ia1 = 1; ia2 = 1; ia3 = 1; ip1 = 1; ip2 = 1; ip3 = 1; + iat1 = 1; iat2 = 1; iat3 = 1; it1 = 1; it2 = 1; it3 = 1 + ii1 = 1; ii2 = 1; ii3 = 1 + !$omp end target + + ! (target,allocatable,pointer) -> tofrom + if (ia1 /= 1) stop 76 + if (ia2 /= 1) stop 77 + if (ia3 /= 1) stop 78 + if (ip1 /= 1) stop 79 + if (ip2 /= 1) stop 80 + if (ip3 /= 1) stop 81 + if (iat1 /= 1) stop 82 + if (iat2 /= 1) stop 83 + if (iat3 /= 1) stop 84 + if (it1 /= 1) stop 85 + if (it2 /= 1) stop 86 + if (it3 /= 1) stop 87 + ! non-(target,allocatable,pointer) -> firstprivate + !if (ii1 /= 2) stop 88 ! FIXME: optional scalar wrongly mapped as tofrom, PR fortran/100991 + if (ii2 /= 2) stop 89 + if (ii3 /= 2) stop 90 + + deallocate(ia3, iat3, ip3) +end + +subroutine bar (ea1, ea2, ep1, ep2, eat1, eat2, et1, et2, ei1, ei2) + implicit none + integer :: ea1, ea2, ea3, ep1, ep2, ep3, eat1, eat2, eat3, et1, et2, et3, ei1, ei2, ei3 + allocatable :: ea1, ea2, ea3, eat1, eat2, eat3 + pointer :: ep1, ep2, ep3 + target :: eat1, eat2, eat3, et1, et2, et3 + optional :: ea1, ep1, eat1, et1, ei1 + logical :: shared_memory + + allocate(ea3, eat3, ep3) + + ea1 = 2; ea2 = 2; ea3 = 2; ep1 = 2; ep2 = 2; ep3 = 2; + eat1 = 2; eat2 = 2; eat3 = 2; et1 = 2; et2 = 2; et3 = 2 + ei1 = 2; ei2 = 2; ei3 = 2 + + shared_memory = .false. + !$omp target map(to: shared_memory) + shared_memory = .true. + !$omp end target + + ! While here 'scalar' implies nonallocatable/nonpointer and + ! the target attribute plays no role. + !$omp target defaultmap(tofrom:scalar) defaultmap(firstprivate:allocatable) & + !$omp& defaultmap(none:aggregate) defaultmap(firstprivate:pointer) & + !$omp& map(always, to: shared_memory) + if (shared_memory) then + ! Due to fortran/90742 this fails when doing non-shared memory offloading + if (ea1 /= 2) stop 91 + if (ea2 /= 2) stop 92 + if (ea3 /= 2) stop 93 + if (ep1 /= 2) stop 94 + if (ep2 /= 2) stop 95 + if (ep3 /= 2) stop 96 + if (eat1 /= 2) stop 97 + if (eat2 /= 2) stop 98 + if (eat3 /= 2) stop 99 + end if + if (et1 /= 2) stop 100 + if (et2 /= 2) stop 101 + if (et3 /= 2) stop 102 + if (ei1 /= 2) stop 103 + if (ei2 /= 2) stop 104 + if (ei3 /= 2) stop 105 + ep1 => null(); ep2 => null(); ep3 => null() + if (shared_memory) then + ! Due to fortran/90742 this fails when doing non-shared memory offloading + ea1 = 1; ea2 = 1; ea3 = 1 + eat1 = 1; eat2 = 1; eat3 = 1 + end if + et1 = 1; et2 = 1; et3 = 1 + ei1 = 1; ei2 = 1; ei3 = 1 + !$omp end target + ! (allocatable,pointer) -> firstprivate + +! FIXME: allocatables not properly privatized, cf. PR fortran/90742 + +! if (ea1 /= 2) stop 106 +! if (ea2 /= 2) stop 107 +! if (ea3 /= 2) stop 108 +! if (eat1 /= 2) stop 112 +! if (eat2 /= 2) stop 113 +! if (eat3 /= 2) stop 114 + if (ep1 /= 2) stop 109 + if (ep2 /= 2) stop 110 + if (ep3 /= 2) stop 111 + ! (scalar) -> tofrom + !if (et1 /= 1) stop 115 ! FIXME: optional scalar wrongly mapped as 'firstprivate', PR fortran/100991 + if (et2 /= 1) stop 116 + if (et3 /= 1) stop 117 + !if (ei1 /= 1) stop 118 ! FIXME: optional scalar wrongly mapped as 'firstprivate', PR fortran/100991 + if (ei2 /= 1) stop 119 + if (ei3 /= 1) stop 120 + + deallocate(ea3, eat3, ep3) +end + +end + +! FIXME/xfail: Optional scalars wrongly classified, PR fortran/100991 +! { dg-final { scan-tree-dump-times "firstprivate\\(ii1\\)" 3 "gimple" { xfail *-*-* } } } +! { dg-final { scan-tree-dump-not "firstprivate\\(et1\\)" "gimple" { xfail *-*-* } } } +! { dg-final { scan-tree-dump-not "firstprivate\\(ei1\\)" "gimple" { xfail *-*-* } } } + +! { dg-final { scan-tree-dump-times "firstprivate\\(ea1\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(ea2\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(ea3\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(eat1\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(eat2\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(eat3\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(ep1\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(ep2\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(ep3\\)" 1 "gimple" } } diff --git a/libgomp/testsuite/libgomp.fortran/depend-iterator-2.f90 b/libgomp/testsuite/libgomp.fortran/depend-iterator-2.f90 new file mode 100644 index 0000000..05090d3 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/depend-iterator-2.f90 @@ -0,0 +1,89 @@ +module m + implicit none (type, external) + integer, volatile :: v +contains +subroutine foo (p, i) + integer :: p(0:*) + integer :: i + !$omp task depend (out: p(0)) + v = v + 1 + !$omp end task + !$omp task depend (in: p(0)) + v = v + 1 + !$omp end task + !$omp task depend (inout: p(0)) + v = v + 1 + !$omp end task + !$omp task depend (mutexinoutset: p(0)) + v = v + 1 + !$omp end task + !$omp task depend (out: p(0)) depend (in: p(1)) + v = v + 1 + !$omp end task + !$omp task depend (in: p(0)) depend (inout: p(1)) + v = v + 1 + !$omp end task + !$omp task depend (inout: p(0)) depend (mutexinoutset: p(1)) + v = v + 1 + !$omp end task + !$omp task depend (mutexinoutset: p(0)) depend (out: p(1)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , out : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , in : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , inout : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , mutexinoutset : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , out : p(j)) depend (iterator (j=0:2) , in : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , in : p(j)) depend (iterator (j=0:2) , inout : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , inout : p(j)) depend (iterator (j=0:2) , mutexinoutset : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:2) , mutexinoutset : p(j)) depend (iterator (j=0:2) , out : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , out : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , in : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , inout : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , mutexinoutset : p(j)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , out : p(j)) depend (iterator (j=0:i) , in : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , in : p(j)) depend (iterator (j=0:i) , inout : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , inout : p(j)) depend (iterator (j=0:i) , mutexinoutset : p(j + 2)) + v = v + 1 + !$omp end task + !$omp task depend (iterator (j=0:i) , mutexinoutset : p(j)) depend (iterator (j=0:i) , out : p(j + 2)) + v = v + 1 + !$omp end task +end +end module + +program main + use m + implicit none (external, type) + integer p(4) + call foo (p, 2) + call foo (p, -1) +end diff --git a/libgomp/testsuite/libgomp.fortran/depobj-1.f90 b/libgomp/testsuite/libgomp.fortran/depobj-1.f90 new file mode 100644 index 0000000..273b10c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/depobj-1.f90 @@ -0,0 +1,115 @@ +module m + use omp_lib, only: omp_depend_kind + implicit none (type, external) + integer :: xx + integer(omp_depend_kind) :: dd1, dd2 +contains + subroutine dep + integer :: x + integer(omp_depend_kind) :: d1, d2 + x = 1 + + !$omp depobj (d1) depend(in: x) + !$omp depobj (d2) depend(in: x) + !$omp depobj (d2) update(out) + !$omp parallel + !$omp single + !$omp task shared (x) depend(depobj: d2) + x = 2 + !$omp end task + !$omp task shared (x) depend(depobj: d1) + if (x /= 2) & + stop 1 + !$omp end task + !$omp end single + !$omp end parallel + !$omp depobj (d2) destroy + !$omp depobj (d1) destroy + end + + subroutine dep2 + integer(omp_depend_kind) :: d1, d2 + pointer :: d1 + allocate(d1) + call dep2i(d1, d2) + deallocate(d1) + contains + subroutine dep2i(d1, d2) + integer(omp_depend_kind) :: d1 + integer(omp_depend_kind), optional :: d2 + pointer :: d1 + !$omp parallel + !$omp single + block + integer :: x + x = 1 + !$omp depobj (d1) depend(out: x) + !$omp depobj (d2) depend (in:x) + !$omp depobj(d2)update(in) + !$omp task shared (x) depend(depobj:d1) + x = 2 + !$omp end task + !$omp task shared (x) depend(depobj : d2) + if (x /= 2) & + stop 2 + !$omp end task + !$omp taskwait + !$omp depobj(d1)destroy + !$omp depobj(d2) destroy + end block + !$omp end single + !$omp end parallel + end + end + + subroutine dep3 + integer(omp_depend_kind) :: d(2) + !$omp parallel + block + integer :: x + x = 1 + !$omp single + !$omp depobj(d(1)) depend(out:x) + !$omp depobj(d(2)) depend(in: x) + !$omp task shared (x) depend(depobj: d(1)) + x = 2 + !$omp end task + !$omp task shared (x) depend(depobj: d(2)) + if (x /= 2) & + stop 3 + !$omp end task + !$omp end single + end block + !$omp end parallel + !$omp depobj(d(1)) destroy + !$omp depobj(d(2)) destroy + end + + subroutine antidep + xx = 1 + !$omp parallel + !$omp single + !$omp task shared(xx) depend(depobj:dd2) + if (xx /= 1) & + stop 4 + !$omp end task + !$omp task shared(xx) depend(depobj:dd1) + xx = 2 + !$omp end task + !$omp end single + !$omp end parallel + end +end module m + +program main + use m + implicit none (type, external) + call dep () + call dep2 () + call dep3 () + !$omp depobj (dd1) depend (inout: xx) + !$omp depobj (dd2) depend (in : xx) + call antidep () + !$omp depobj (dd2) destroy + !$omp depobj (dd1) destroy +end program main diff --git a/libgomp/testsuite/libgomp.fortran/error-1.f90 b/libgomp/testsuite/libgomp.fortran/error-1.f90 new file mode 100644 index 0000000..ee3222d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/error-1.f90 @@ -0,0 +1,87 @@ +! { dg-shouldfail "error directive" } + +module m + implicit none (external, type) +contains +integer function foo (i, x) + integer, value :: i, x + if (x /= 0) then + !$omp error severity(warning) ! { dg-warning ".OMP ERROR encountered at .1." } + i = i + 1 + end if + if (x /= 0) then + ! ... + else + !$omp error severity(warning) ! { dg-warning ".OMP ERROR encountered at .1." } + i = i + 2 + end if + select case(0) + !$omp error severity(warning) ! { dg-warning ".OMP ERROR encountered at .1." } + case default + ! + end select + do while (.false.) + !$omp error message("42 - 1") severity (warning) ! { dg-warning ".OMP ERROR encountered at .1.: 42 - 1" } + i = i + 4 + end do +99 continue + !$omp error severity(warning) message("bar") at(compilation) ! { dg-warning ".OMP ERROR encountered at .1.: bar" } + i = i + 8 + foo = i +end function +end module + +program main + use m + implicit none (external, type) + character(len=13) :: msg + character(len=:), allocatable :: msg2, msg3 + + ! Initialize offloading early, so that any output this may produce doesn't + ! disturb the 'dg-output' scanning below. + !$omp target + !$omp end target + + msg = "my message" + if (foo (5, 0) /= 15 .or. foo (7, 1) /= 16) & + stop 1 + msg2 = "Paris" + msg3 = "To thine own self be true" + call bar ("Polonius", "Laertes", msg2, msg3) + msg2 = "Hello World" + !$omp error at (execution) severity (warning) + !$omp error at (execution) severity (warning) message(trim(msg(4:))) + !$omp error at (execution) severity (warning) message ("Farewell") + !$omp target + !$omp error at (execution) severity (warning) message ("ffrom a distanceee"(2:16)) + !$omp end target + !$omp error at (execution) severity (warning) message (msg2) + !$omp error at (execution) severity (warning) message (msg(4:6)) + !$omp error at (execution) severity (fatal) message (msg) + ! unreachable due to 'fatal'---------^ + !$omp error at (execution) severity (warning) message ("foobar") +contains + subroutine bar(x, y, a, b) + character(len=*) :: x, y + character(len=:), allocatable :: a, b + optional :: y, b + intent(in) :: x, y, a, b + !$omp error at (execution) severity (warning) message (x) + !$omp error at (execution) severity (warning) message (y) + !$omp error at (execution) severity (warning) message (a) + !$omp error at (execution) severity (warning) message (b) + end subroutine +end + +! { dg-output "(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: Polonius(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: Laertes(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: Paris(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: To thine own self be true(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: message(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: Farewell(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: from a distance(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: Hello World(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: error directive encountered: mes(\n|\r|\r\n)(\n|\r|\r\n)" } +! { dg-output "libgomp: fatal error: error directive encountered: my message (\n|\r|\r\n)" } diff --git a/libgomp/testsuite/libgomp.fortran/masked-1.f90 b/libgomp/testsuite/libgomp.fortran/masked-1.f90 new file mode 100644 index 0000000..6b7ebc7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/masked-1.f90 @@ -0,0 +1,119 @@ +module m + use omp_lib + implicit none (type, external) +contains + subroutine foo (x, a) + integer, value :: x + integer, contiguous :: a(0:) + integer :: i + + !$omp masked + if (omp_get_thread_num () /= 0) & + stop 1 + a(128) = a(128) + 1 + !$omp end masked + + !$omp masked filter (0) + if (omp_get_thread_num () /= 0) & + stop 2 + a(129) = a(129) + 1 + !$omp end masked + + !$omp masked filter (7) + if (omp_get_thread_num () /= 7) & + stop 3 + a(130) = a(130) + 1 + !$omp end masked + + !$omp masked filter (x) + if (omp_get_thread_num () /= x) & + stop 4 + a(131) = a(131) + 1 + !$omp end masked + + !$omp masked taskloop simd filter (x) shared(a) grainsize (12) simdlen (4) + do i = 0, 127 + a(i) = a(i) + i + end do + !$omp end masked taskloop simd + end +end + +program main + use m + implicit none (type, external) + integer :: i + integer :: a(0:135) + + a = 0 + + !$omp parallel num_threads (4) + call foo (4, a) + !$omp end parallel + do i = 0, 127 + if (a(i) /= 0) & + stop 5 + end do + if (a(128) /= 1 .or. a(129) /= 1 .or. a(130) /= 0 .or. a(131) /= 0) & + stop 6 + + !$omp parallel num_threads (4) + call foo (3, a) + !$omp end parallel + do i = 0, 127 + if (a(i) /= i) & + stop 7 + end do + if (a(128) /= 2 .or. a(129) /= 2 .or. a(130) /= 0 .or. a(131) /= 1) & + stop 8 + + !$omp parallel num_threads (8) + call foo (8, a) + !$omp end parallel + do i = 0, 127 + if (a(i) /= i) & + stop 9 + end do + if (a(128) /= 3 .or. a(129) /= 3 .or. a(130) /= 1 .or. a(131) /= 1) & + stop 10 + + !$omp parallel num_threads (8) + call foo (6, a) + !$omp end parallel + do i = 0, 127 + if (a(i) /= 2 * i) & + stop 11 + end do + if (a(128) /= 4 .or. a(129) /= 4 .or. a(130) /= 2 .or. a(131) /= 2) & + stop 12 + + do i = 0, 7 + a(i) = 0 + end do + ! The filter expression can evaluate to different values in different threads. + !$omp parallel masked num_threads (8) filter (omp_get_thread_num () + 1) + a(omp_get_thread_num ()) = a(omp_get_thread_num ()) + 1 + !$omp end parallel masked + do i = 0, 7 + if (a(i) /= 0) & + stop 13 + end do + + ! And multiple threads can be filtered. + !$omp parallel masked num_threads (8) filter (iand (omp_get_thread_num (), not(1))) + a(omp_get_thread_num ()) = a(omp_get_thread_num ()) + 1 + !$omp end parallel masked + do i = 0, 7 + block + integer :: j + j = iand (i, 1) + if (j /= 0) then + j = 0 + else + j = 1 + end if + if (a(i) /= j) & + stop 14 + end block + end do +end program main diff --git a/libgomp/testsuite/libgomp.fortran/on_device_arch.c b/libgomp/testsuite/libgomp.fortran/on_device_arch.c new file mode 100644 index 0000000..98822c4 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/on_device_arch.c @@ -0,0 +1,3 @@ +/* Auxiliar file. */ +/* { dg-do compile { target skip-all-targets } } */ +#include "../libgomp.c-c++-common/on_device_arch.h" diff --git a/libgomp/testsuite/libgomp.fortran/parallel-master.f90 b/libgomp/testsuite/libgomp.fortran/parallel-master.f90 new file mode 100644 index 0000000..1e30b48 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/parallel-master.f90 @@ -0,0 +1,14 @@ +! { dg-additional-options "-fdump-tree-original" } +program main + use omp_lib + implicit none (type, external) + integer :: p, a(20) + !$omp parallel master num_threads(4) private (p) shared(a) + p = omp_get_thread_num (); + if (p /= 0) stop 1 + a = 0 + !$omp end parallel master +end + +! { dg-final { scan-tree-dump "#pragma omp parallel private\\(p\\) shared\\(a\\) num_threads\\(4\\)" "original"} } +! { dg-final { scan-tree-dump "#pragma omp master" "original"} } diff --git a/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 b/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 new file mode 100644 index 0000000..9814224 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! { dg-additional-options "-O3 -ftree-parallelize-loops=2 -fno-signed-zeros -fno-trapping-math" } + +complex function cdcdot(n, cx) + implicit none + + integer :: n, i, kx + complex :: cx(*) + double precision :: dsdotr, dsdoti, dt1, dt3 + + kx = 1 + dsdotr = 0 + dsdoti = 0 + do i = 1, n + dt1 = real(cx(kx)) + dt3 = aimag(cx(kx)) + dsdotr = dsdotr + dt1 * 2 - dt3 * 2 + dsdoti = dsdoti + dt1 * 2 + dt3 * 2 + kx = kx + 1 + end do + cdcdot = cmplx(real(dsdotr), real(dsdoti)) + return +end function cdcdot +program test + implicit none + complex :: cx(100), ct, cdcdot + integer :: i + do i = 1, 100 + cx(i) = cmplx(2*i, i) + end do + ct = cdcdot (100, cx) + if (ct.ne.cmplx(10100.0000,30300.0000)) call abort +end diff --git a/libgomp/testsuite/libgomp.fortran/refcount-1.f90 b/libgomp/testsuite/libgomp.fortran/refcount-1.f90 new file mode 100644 index 0000000..e3b9d04 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/refcount-1.f90 @@ -0,0 +1,61 @@ +program main + use omp_lib + use iso_c_binding + implicit none (type, external) + + integer :: d, id + integer(kind=1), target :: a(4) + integer(kind=1), pointer :: p, q + + d = omp_get_default_device () + id = omp_get_initial_device () + + if (d < 0 .or. d >= omp_get_num_devices ()) & + d = id + + a = transfer (int(z'cdcdcdcd'), mold=a) + + !$omp target enter data map (to:a) + + a = transfer (int(z'abababab'), mold=a) + p => a(1) + q => a(3) + + !$omp target enter data map (alloc:p, q) + + if (d /= id) then + if (omp_target_is_present (c_loc(a), d) == 0) & + stop 1 + if (omp_target_is_present (c_loc(p), d) == 0) & + stop 2 + if (omp_target_is_present (c_loc(q), d) == 0) & + stop 3 + end if + + !$omp target exit data map (release:a) + + if (d /= id) then + if (omp_target_is_present (c_loc(a), d) == 0) & + stop 4 + if (omp_target_is_present (c_loc(p), d) == 0) & + stop 5 + if (omp_target_is_present (c_loc(q), d) == 0) & + stop 6 + end if + + !$omp target exit data map (from:q) + + if (d /= id) then + if (omp_target_is_present (c_loc(a), d) /= 0) & + stop 7 + if (omp_target_is_present (c_loc(p), d) /= 0) & + stop 8 + if (omp_target_is_present (c_loc(q), d) /= 0) & + stop 9 + + if (q /= int(z'cd', kind=1)) & + stop 10 + if (p /= int(z'ab', kind=1)) & + stop 11 + end if +end program main diff --git a/libgomp/testsuite/libgomp.fortran/scope-1.f90 b/libgomp/testsuite/libgomp.fortran/scope-1.f90 new file mode 100644 index 0000000..3f41e89 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/scope-1.f90 @@ -0,0 +1,55 @@ +program main + implicit none (type, external) + integer :: r, r2, i + integer a(0:63) + a = 0 + r = 0; r2 = 0 + !$omp parallel + !$omp scope + !$omp scope + !$omp do + do i = 0, 63 + a(i) = a(i) + 1 + end do + !$omp end do + !$omp end scope nowait + !$omp end scope nowait + + !$omp scope reduction(+: r) + !$omp do + do i = 0, 63 + r = r + i + if (a(i) /= 1) & + stop 1 + end do + !$omp end do nowait + !$omp barrier + !$omp end scope nowait + + !$omp barrier + + if (r /= 64 * 63 / 2) & + stop 2 + + !$omp scope private (i) + !$omp scope reduction(+: r2) + !$omp do + do i = 0, 63 + r2 = r2 + 2 * i + a(i) = a(i) + i + end do + !$omp end do nowait + !$omp end scope + !$omp end scope nowait + + if (r2 /= 64 * 63) & + stop 3 + + !$omp do + do i = 0, 63 + if (a(i) /= i + 1) & + stop 4 + end do + !$omp end do nowait + !$omp end parallel +end diff --git a/libgomp/testsuite/libgomp.fortran/target-12.f90 b/libgomp/testsuite/libgomp.fortran/target-12.f90 new file mode 100644 index 0000000..17c78f1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-12.f90 @@ -0,0 +1,147 @@ +program main + use omp_lib + use iso_c_binding + implicit none (external, type) + integer :: d, id, i, j, k, l + logical :: err + integer, target :: q(0:127) + type(c_ptr) :: p + + integer(kind=c_size_t) :: volume(0:2) + integer(kind=c_size_t) :: dst_offsets(0:2) + integer(kind=c_size_t) :: src_offsets(0:2) + integer(kind=c_size_t) :: dst_dimensions(0:2) + integer(kind=c_size_t) :: src_dimensions(0:2) + integer(kind=c_size_t) :: empty(1:0) + + err = .false. + d = omp_get_default_device () + id = omp_get_initial_device () + + if (d < 0 .or. d >= omp_get_num_devices ()) & + d = id + + q = [(i, i = 0, 127)] + p = omp_target_alloc (130 * c_sizeof (q), d) + if (.not. c_associated (p)) & + stop 0 ! okay + + if (omp_target_memcpy_rect (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, & + empty, empty, empty, empty, empty, d, id) < 3 & + .or. omp_target_memcpy_rect (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, & + empty, empty, empty, empty, empty, & + id, d) < 3 & + .or. omp_target_memcpy_rect (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, & + empty, empty, empty, empty, empty, & + id, id) < 3) & + stop 1 + + if (omp_target_associate_ptr (c_loc (q), p, 128 * c_sizeof (q(0)), & + c_sizeof (q(0)), d) == 0) then + volume = [ 128, 0, 0 ] + dst_offsets = [ 0, 0, 0 ] + src_offsets = [ 1, 0, 0 ] + dst_dimensions = [ 128, 0, 0 ] + src_dimensions = [ 128, 0, 0 ] + + + if (omp_target_associate_ptr (c_loc (q), p, 128 * sizeof (q(0)), & + sizeof (q(0)), d) /= 0) & + stop 2 + + if (omp_target_is_present (c_loc (q), d) /= 1 & + .or. omp_target_is_present (c_loc (q(32)), d) /= 1 & + .or. omp_target_is_present (c_loc (q(127)), d) /= 1) & + stop 3 + + if (omp_target_memcpy (p, c_loc (q), 128 * sizeof (q(0)), sizeof (q(0)), & + 0_c_size_t, d, id) /= 0) & + stop 4 + + i = 0 + if (d >= 0) i = d + !$omp target if (d >= 0) device (i) map(alloc:q(0:31)) map(from:err) + err = .false. + do j = 0, 127 + if (q(j) /= j) then + err = .true. + else + q(j) = q(j) + 4 + end if + end do + !$omp end target + + if (err) & + stop 5 + + if (omp_target_memcpy_rect (c_loc (q), p, sizeof (q(0)), 1, volume, & + dst_offsets, src_offsets, dst_dimensions, & + src_dimensions, id, d) /= 0) & + stop 6 + + do i = 0, 127 + if (q(i) /= i + 4) & + stop 7 + end do + + volume(2) = 2 + volume(1) = 3 + volume(0) = 6 + dst_offsets(2) = 1 + dst_offsets(1) = 0 + dst_offsets(0) = 0 + src_offsets(2) = 1 + src_offsets(1) = 0 + src_offsets(0) = 3 + dst_dimensions(2) = 2 + dst_dimensions(1) = 3 + dst_dimensions(0) = 6 + src_dimensions(2) = 3 + src_dimensions(1) = 4 + src_dimensions(0) = 6 + + if (omp_target_memcpy_rect (p, c_loc (q), sizeof (q(0)), 3, volume, & + dst_offsets, src_offsets, dst_dimensions, & + src_dimensions, d, id) /= 0) & + stop 8 + + i = 0 + if (d >= 0) i = d + !$omp target if (d >= 0) device (i) map(alloc:q(1:32)) map(from:err) + err = .false. + do j = 0, 5 + do k = 0, 2 + do l = 0, 1 + if (q(j * 6 + k * 2 + l) /= 3 * 12 + 4 + 1 + l + k * 3 + j * 12) & + err = .true. + end do + end do + end do + !$omp end target + + if (err) & + stop 9 + + if (omp_target_memcpy (p, p, 10 * sizeof (q(1)), 51 * sizeof (q(1)), & + 111 * sizeof (q(1)), d, d) /= 0) & + stop 10 + + i = 0 + if (d >= 0) i = d + !$omp target if (d >= 0) device (i) map(alloc:q(0:31)) map(from:err) + err = .false. + do j = 1, 9 + if (q(50+j) /= q(110 + j)) & + err = .true. + end do + !$omp end target + + if (err) & + stop 11 + + if (omp_target_disassociate_ptr (c_loc (q), d) /= 0) & + stop 12 + end if + + call omp_target_free (p, d) +end program main diff --git a/libgomp/testsuite/libgomp.fortran/target10.f90 b/libgomp/testsuite/libgomp.fortran/target10.f90 new file mode 100644 index 0000000..f41a726 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target10.f90 @@ -0,0 +1,21 @@ +! { dg-do run } +! { dg-xfail-run-if TODO { offload_device_intel_mic } } + +program main + use omp_lib + implicit none + integer :: device_num, host_device_num + logical :: initial_device + + host_device_num = omp_get_device_num () + if (host_device_num .ne. omp_get_initial_device ()) stop 1 + + !$omp target map(from: device_num, initial_device) + initial_device = omp_is_initial_device () + device_num = omp_get_device_num () + !$omp end target + + if (initial_device .and. (host_device_num .ne. device_num)) stop 2 + if ((.not. initial_device) .and. (host_device_num .eq. device_num)) stop 3 + +end program main diff --git a/libgomp/testsuite/libgomp.fortran/task-detach-12.f90 b/libgomp/testsuite/libgomp.fortran/task-detach-12.f90 new file mode 100644 index 0000000..88546fe --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-detach-12.f90 @@ -0,0 +1,22 @@ +program test + use omp_lib + implicit none + integer(omp_event_handle_kind) :: oevent, ievent + integer :: i + integer, allocatable :: temp(:) + ALLOCATE(temp(5)) + + !$omp parallel num_threads(3) + !$omp single + DO i=1,5 + !$omp task firstprivate(i) firstprivate(temp) detach(oevent) + temp(:) = 0; + temp(1) = -1; + !print *,temp + call omp_fulfill_event(oevent) + !$omp end task + ENDDO + !$omp taskwait + !$omp end single + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 b/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 index b2c476f..e4373b4 100644 --- a/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 +++ b/libgomp/testsuite/libgomp.fortran/task-detach-6.f90 @@ -1,5 +1,8 @@ ! { dg-do run } +! { dg-additional-sources on_device_arch.c } + ! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } + ! Test tasks with detach clause on an offload device. Each device ! thread spawns off a chain of tasks, that can then be executed by ! any available thread. @@ -11,6 +14,17 @@ program task_detach_6 integer :: x = 0, y = 0, z = 0 integer :: thread_count + interface + integer function on_device_arch_nvptx() bind(C) + end function on_device_arch_nvptx + end interface + + !TODO See '../libgomp.c/pr99555-1.c'. + if (on_device_arch_nvptx () /= 0) then + call alarm (4, 0); !TODO Until resolved, make sure that we exit quickly, with error status. + ! { dg-xfail-run-if "PR99555" { offload_device_nvptx } } + end if + !$omp target map (tofrom: x, y, z) map (from: thread_count) !$omp parallel private (detach_event1, detach_event2) !$omp single diff --git a/libgomp/testsuite/libgomp.fortran/task-reduction-16.f90 b/libgomp/testsuite/libgomp.fortran/task-reduction-16.f90 new file mode 100644 index 0000000..c6b39e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/task-reduction-16.f90 @@ -0,0 +1,82 @@ +module m + implicit none (external, type) + integer :: a, b(0:2) = [1, 1, 1] + integer(8) :: c(0:1) = [not(0_8), not(0_8)] +contains + subroutine bar (i) + integer :: i + !$omp task in_reduction (*: b) in_reduction (iand: c) & + !$omp& in_reduction (+: a) + a = a + 4 + b(1) = b(1) * 4 + c(1) = iand (c(1), not(ishft(1_8, i + 16))) + !$omp end task + end subroutine bar + + subroutine foo (x) + integer :: x + !$omp scope reduction (task, +: a) + !$omp scope reduction (task, *: b) + !$omp scope reduction (task, iand: c) + !$omp barrier + !$omp sections + block + a = a + 1; b(0) = b(0) * 2; call bar (2); b(2) = b(2) * 3 + c(1) = iand(c(1), not(ishft(1_8, 2))) + end block + !$omp section + block + b(0) = b(0) * 2; call bar (4); b(2) = b(2) * 3 + c(1) = iand(c(1), not(ishft(1_8, 4))); a = a + 1 + end block + !$omp section + block + call bar (6); b(2) = b(2) * 3; c(1) = iand(c(1), not(ishft(1_8, 6))) + a = a + 1; b(0) = b(0) * 2 + end block + !$omp section + block + b(2) = b(2) * 3; c(1) = iand(c(1), not(ishft(1_8, 8))) + a = a + 1; b(0) = b(0) * 2; call bar (8) + end block + !$omp section + block + c(1) = iand(c(1), not(ishft(1_8, 10))); a = a + 1 + b(0) = b(0) * 2; call bar (10); b(2) = b(2) * 3 + end block + !$omp section + block + a = a + 1; b(0) = b(0) * 2; b(2) = b(2) * 3 + c(1) = iand(c(1), not(ishft(1_8, 12))); call bar (12) + end block + !$omp section + if (x /= 0) then + a = a + 1; b(0) = b(0) * 2; b(2) = b(2) * 3 + call bar (14); c(1) = iand (c(1), not(ishft(1_8, 14))) + end if + !$omp end sections + !$omp end scope + !$omp end scope + !$omp end scope + end subroutine foo +end module m + +program main + use m + implicit none (type, external) + integer, volatile :: one + one = 1 + call foo (0) + if (a /= 30 .or. b(0) /= 64 .or. b(1) /= ishft (1, 12) .or. b(2) /= 3 * 3 * 3 * 3 * 3 * 3 & + .or. c(0) /= not(0_8) .or. c(1) /= not(int(z'15541554', kind=8))) & + stop 1 + a = 0 + b(:) = [1, 1, 1] + c(1) = not(0_8) + !$omp parallel + call foo (one) + !$omp end parallel + if (a /= 35 .or. b(0) /= 128 .or. b(1) /= ishft(1, 14) .or. b(2) /= 3 * 3 * 3 * 3 * 3 * 3 * 3 & + .or. c(0) /= not(0_8) .or. c(1) /= not(int(z'55545554', kind=8))) & + stop 2 +end program main diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 new file mode 100644 index 0000000..2049f5c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 @@ -0,0 +1,86 @@ +! { dg-do compile { target skip-all-targets } } +! Only used by taskloop-4.f90 +! To avoid inlining + +module m2 + use m_taskloop4 + implicit none (external, type) +contains + +subroutine grainsize (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) grainsize(d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 1 + end if + j = j + 1 + u(k) = j + end do +end + +subroutine num_tasks (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) num_tasks(d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 2 + end if + j = j + 1 + u(k) = j + end do + end +end module + +program main + use m2 + implicit none (external, type) + !$omp parallel + !$omp single + block + integer :: min_iters, max_iters, ntasks + + ! If grainsize is present, # of task loop iters is >= grainsize && < 2 * grainsize, + ! unless # of loop iterations is smaller than grainsize. + if (test (0, 79, 1, 17, grainsize, ntasks, min_iters, max_iters) /= 79) & + stop 3 + if (min_iters < 17 .or. max_iters >= 17 * 2) & + stop 4 + if (test (-49, 2541, 7, 28, grainsize, ntasks, min_iters, max_iters) /= 370) & + stop 5 + if (min_iters < 28 .or. max_iters >= 28 * 2) & + stop 6 + if (test (7, 21, 2, 15, grainsize, ntasks, min_iters, max_iters) /= 7) & + stop 7 + if (ntasks /= 1 .or. min_iters /= 7 .or. max_iters /= 7) & + stop 8 + ! If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + ! and each task has at least one iteration. + if (test (-51, 2500, 48, 9, num_tasks, ntasks, min_iters, max_iters) /= 54) & + stop 9 + if (ntasks /= 9) & + stop 10 + if (test (0, 25, 2, 17, num_tasks, ntasks, min_iters, max_iters) /= 13) & + stop 11 + if (ntasks /= 13) & + stop 12 + end block + !$omp end single + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 new file mode 100644 index 0000000..910e197 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 @@ -0,0 +1,41 @@ +! { dg-do run } +! { dg-options "-O2" } +! { dg-additional-sources taskloop-4-a.f90 } + +module m_taskloop4 + implicit none (type, external) + integer :: v, u(0:63) + +contains +integer function test (a, b, c, d, fn, num_tasks, min_iters, max_iters) + integer, value :: a, b, c, d + interface + subroutine fn (n1, n2, n3, n4) + integer, value :: n1, n2, n3, n4 + end + end interface + integer :: num_tasks, min_iters, max_iters + integer :: i, t + + t = 0 + u = 0 + v = 0 + call fn (a, b, c, d) + min_iters = 0 + max_iters = 0 + num_tasks = v + if (v /= 0) then + min_iters = u(0) + max_iters = u(0) + t = u(0) + do i = 1, v - 1 + if (min_iters > u(i)) & + min_iters = u(i) + if (max_iters < u(i)) & + max_iters = u(i) + t = t + u(i) + end do + end if + test = t +end +end module diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 new file mode 100644 index 0000000..f12681b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 @@ -0,0 +1,95 @@ +! { dg-do compile { target skip-all-targets } } +! Only used by taskloop-5-a.f90 +! To avoid inlining + +module m2 + use m_taskloop5 + implicit none (external, type) +contains + +subroutine grainsize (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) grainsize(strict:d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 3 + w(k) = i + end if + j = j + 1 + u(k) = j + end do +end + +subroutine num_tasks (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) num_tasks(strict:d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 4 + w(k) = i + end if + j = j + 1 + u(k) = j + end do +end +end module + +program main + use m2 + implicit none (external, type) + !$omp parallel + !$omp single + block + integer :: min_iters, max_iters, ntasks, sep + + ! If grainsize is present and has strict modifier, # of task loop iters is == grainsize, + ! except that it can be smaller on the last task. + if (test (0, 79, 1, 17, grainsize, ntasks, min_iters, max_iters, sep) /= 79) & + stop 5 + if (ntasks /= 5 .or. min_iters /= 11 .or. max_iters /= 17 .or. sep /= 4) & + stop + if (test (-49, 2541, 7, 28, grainsize, ntasks, min_iters, max_iters, sep) /= 370) & + stop 6 + if (ntasks /= 14 .or. min_iters /= 6 .or. max_iters /= 28 .or. sep /= 13) & + stop + if (test (7, 21, 2, 15, grainsize, ntasks, min_iters, max_iters, sep) /= 7) & + stop 7 + if (ntasks /= 1 .or. min_iters /= 7 .or. max_iters /= 7 .or. sep /= 1) & + stop 8 + ! If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + ! and each task has at least one iteration. If strict modifier is present, + ! first set of tasks has ceil (# of loop iters / num_tasks) iterations, + ! followed by possibly empty set of tasks with floor (# of loop iters / num_tasks) + ! iterations. + if (test (-51, 2500, 48, 9, num_tasks, ntasks, min_iters, max_iters, sep) /= 54) & + stop 9 + if (ntasks /= 9 .or. min_iters /= 6 .or. max_iters /= 6 .or. sep /= 9) & + stop 10 + if (test (0, 57, 1, 9, num_tasks, ntasks, min_iters, max_iters, sep) /= 57) & + stop 11 + if (ntasks /= 9 .or. min_iters /= 6 .or. max_iters /= 7 .or. sep /= 3) & + stop 12 + if (test (0, 25, 2, 17, num_tasks, ntasks, min_iters, max_iters, sep) /= 13) & + stop 13 + if (ntasks /= 13 .or. min_iters /= 1 .or. max_iters /= 1 .or. sep /= 13) & + stop 14 + end block + !$omp end single + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 new file mode 100644 index 0000000..247f93b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 @@ -0,0 +1,75 @@ +! { dg-do run } +! { dg-options "-O2" } +! { dg-additional-sources taskloop-5-a.f90 } + +module m_taskloop5 + implicit none (type, external) + integer :: u(0:63), v, w(0:63) + +contains +integer function test (a, b, c, d, fn, num_tasks, min_iters, max_iters, sep) + integer, value :: a, b, c, d + interface + subroutine fn (n1, n2, n3, n4) + integer, value :: n1, n2, n3, n4 + end + end interface + integer :: num_tasks, min_iters, max_iters, sep + integer :: i, j, t + + t = 0 + u = 0 + v = 0 + call fn (a, b, c, d) + min_iters = 0 + max_iters = 0 + num_tasks = v + sep = v + if (v /= 0) then + min_iters = u(0) + max_iters = u(0) + t = u(0) + do i = 1, v - 1 + if (min_iters > u(i)) & + min_iters = u(i) + if (max_iters < u(i)) & + max_iters = u(i) + t = t + u(i) + end do + + if (min_iters /= max_iters) then + do i = 0, v - 2 + block + integer :: min_idx + min_idx = i + do j = i + 1, v - 1 + if (w(min_idx) > w(j)) & + min_idx = j + end do + if (min_idx /= i) then + block + integer tem + tem = u(i) + u(i) = u(min_idx) + u(min_idx) = tem + tem = w(i) + w(i) = w(min_idx) + w(min_idx) = tem + end block + end if + end block + end do + if (u(0) /= max_iters) & + stop 1 + do i = 1, v - 1 + if (u(i) /= u(i - 1)) then + if (sep /= v .or. u(i) /= min_iters) & + stop 2 + sep = i; + end if + end do + end if + end if + test = t +end +end module diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-4.c b/libgomp/testsuite/libgomp.graphite/force-parallel-4.c index d2af142..ef6f64d 100644 --- a/libgomp/testsuite/libgomp.graphite/force-parallel-4.c +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-4.c @@ -1,4 +1,5 @@ /* Autopar with IF conditions. */ +/* { dg-additional-options "-fdisable-tree-thread1" } */ void abort(); diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c index 32ba5ab..a97eb97 100644 --- a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-fdisable-tree-thread1" } */ + #define N 1500 int x[N][N], y[N]; diff --git a/libgomp/testsuite/libgomp.oacc-c++/declare-1.C b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C index 0286955..461b778 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/declare-1.C +++ b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C @@ -1,5 +1,3 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ - #include <stdlib.h> template<class T> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c index 6334cfd..8223901 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c @@ -4,6 +4,7 @@ /* { dg-additional-sources acc_get_property-aux.c } */ /* { dg-additional-options "-lcuda -lcudart" } */ /* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-require-effective-target openacc_cudart } */ #include <openacc.h> #include <cuda.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c index 8112745..064c6f5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c @@ -19,6 +19,8 @@ main (int argc, char *argv[]) abort (); if (acc_on_device (acc_device_nvidia)) abort (); + if (acc_on_device (acc_device_radeon)) + abort (); } @@ -34,6 +36,8 @@ main (int argc, char *argv[]) abort (); if (acc_on_device (acc_device_nvidia)) abort (); + if (acc_on_device (acc_device_radeon)) + abort (); } @@ -56,6 +60,13 @@ main (int argc, char *argv[]) if (acc_on_device (acc_device_nvidia)) abort (); #endif +#if ACC_DEVICE_TYPE_radeon + if (!acc_on_device (acc_device_radeon)) + abort (); +#else + if (acc_on_device (acc_device_radeon)) + abort (); +#endif } #endif diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index 7d05f48..b5e7715 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -19,6 +19,19 @@ #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__) +static acc_prof_reg reg; +static acc_prof_reg unreg; +static acc_prof_lookup_func lookup; +void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + reg = reg_; + unreg = unreg_; + lookup = lookup_; +} + + static int state = -1; #define STATE_OP(state, op) \ @@ -34,7 +47,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; -static int acc_async; +static int acc_async = acc_async_sync; struct tool_info @@ -192,6 +205,21 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * assert (state == 11 || state == 111); +#if defined COPYIN + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -240,19 +268,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * } -static acc_prof_reg reg; -static acc_prof_reg unreg; -static acc_prof_lookup_func lookup; -void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) -{ - DEBUG_printf ("%s\n", __FUNCTION__); - - reg = reg_; - unreg = unreg_; - lookup = lookup_; -} - - int main() { acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); @@ -271,14 +286,15 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 11); } @@ -297,14 +313,15 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 111); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index a5e9ab3..1f50386 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -29,6 +29,19 @@ #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__) +static acc_prof_reg reg; +static acc_prof_reg unreg; +static acc_prof_lookup_func lookup; +void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + reg = reg_; + unreg = unreg_; + lookup = lookup_; +} + + static int state = -1; #define STATE_OP(state, op) \ @@ -44,7 +57,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; -static int acc_async; +static int acc_async = acc_async_sync; struct tool_info @@ -235,6 +248,25 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i assert (state == 4 || state == 104); +#if defined COPYIN + /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying, + before 'acc_ev_enqueue_launch_start' marks invoking the compute region. + That's the 'state_init = state;' intended to be captured in the compute + regions. */ + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -664,19 +696,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve } -static acc_prof_reg reg; -static acc_prof_reg unreg; -static acc_prof_lookup_func lookup; -void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) -{ - DEBUG_printf ("%s\n", __FUNCTION__); - - reg = reg_; - unreg = unreg_; - lookup = lookup_; -} - - int main() { acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); @@ -696,7 +715,6 @@ int main() acc_device_type = acc_get_device_type (); acc_device_num = acc_get_device_num (acc_device_type); - acc_async = acc_async_sync; assert (state == 0); { @@ -713,15 +731,16 @@ int main() STATE_OP (state, = 100); - acc_async = 12; { int state_init; + acc_async = 12; #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init) { asm volatile ("" : : : "memory"); // TODO PR90488 state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 104); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c new file mode 100644 index 0000000..9f2bed8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c @@ -0,0 +1,88 @@ +/* Verify back to back 'async' operations, one data mapping. + + Due to one data mapping, this isn't using the libgomp 'cbuf' buffering. +*/ + + +#include <stdlib.h> + + +#define N 128 + + +static void +t1 (void) +{ + unsigned int *a; + int i; + int nbytes; + + nbytes = N * sizeof (unsigned int); + + a = (unsigned int *) malloc (nbytes); + + for (i = 0; i < N; i++) + a[i] = 3; + +#pragma acc parallel async copy (a[0:N]) + for (int ii = 0; ii < N; ii++) + a[ii] += 1; + +#pragma acc parallel async copy (a[0:N]) + for (int ii = 0; ii < N; ii++) + a[ii] += 1; + +#pragma acc wait + + for (i = 0; i < N; i++) + if (a[i] != 5) + abort (); +} + + +static void +t2 (void) +{ + unsigned int *a; + int i; + int nbytes; + + nbytes = N * sizeof (unsigned int); + + a = (unsigned int *) malloc (nbytes); + +#pragma acc data copyin (a[0:N]) + { + for (i = 0; i < N; i++) + a[i] = 3; + +#pragma acc update async device (a[0:N]) +#pragma acc parallel async present (a[0:N]) + for (int ii = 0; ii < N; ii++) + a[ii] += 1; +#pragma acc update async host (a[0:N]) + +#pragma acc update async device (a[0:N]) +#pragma acc parallel async present (a[0:N]) + for (int ii = 0; ii < N; ii++) + a[ii] += 1; +#pragma acc update async host (a[0:N]) + +#pragma acc wait + } + + for (i = 0; i < N; i++) + if (a[i] != 5) + abort (); +} + + +int +main (void) +{ + t1 (); + + t2 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c new file mode 100644 index 0000000..3299499 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c @@ -0,0 +1,99 @@ +/* Verify back to back 'async' operations, two data mappings. + + Make sure that despite two data mappings, this isn't using the libgomp + 'cbuf' buffering. +*/ + + +#include <stdlib.h> + + +#define N 128 + + +static void +t1 (void) +{ + unsigned int *a, *b; + int i; + int nbytes; + + nbytes = N * sizeof (unsigned int); + + a = (unsigned int *) malloc (nbytes); + b = (unsigned int *) malloc (nbytes); + + for (i = 0; i < N; i++) + b[i] = a[i] = 3; + +#pragma acc parallel async copy (a[0:N], b[0:N]) + for (int ii = 0; ii < N; ii++) + b[ii] += (a[ii] += 1); + +#pragma acc parallel async copy (a[0:N], b[0:N]) + for (int ii = 0; ii < N; ii++) + b[ii] += (a[ii] += 1); + +#pragma acc wait + + for (i = 0; i < N; i++) + { + if (a[i] != 5) + abort (); + if (b[i] != 12) + abort (); + } +} + + +static void +t2 (void) +{ + unsigned int *a, *b; + int i; + int nbytes; + + nbytes = N * sizeof (unsigned int); + + a = (unsigned int *) malloc (nbytes); + b = (unsigned int *) malloc (nbytes); + +#pragma acc data copyin (a[0:N], b[0:N]) + { + for (i = 0; i < N; i++) + b[i] = a[i] = 3; + +#pragma acc update async device (a[0:N], b[0:N]) +#pragma acc parallel async present (a[0:N], b[0:N]) + for (int ii = 0; ii < N; ii++) + b[ii] += (a[ii] += 1); +#pragma acc update async host (a[0:N], b[0:N]) + +#pragma acc update async device (a[0:N], b[0:N]) +#pragma acc parallel async present (a[0:N], b[0:N]) + for (int ii = 0; ii < N; ii++) + b[ii] += (a[ii] += 1); +#pragma acc update async host (a[0:N], b[0:N]) + +#pragma acc wait + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5) + abort (); + if (b[i] != 12) + abort (); + } +} + + +int +main (void) +{ + t1 (); + + t2 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c index 4f9e53d..533d498 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c @@ -1,5 +1,3 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ - /* Test mapping of async values to specific underlying queues. */ #undef NDEBUG @@ -29,6 +27,8 @@ int main(void) acc_device_t d; #if defined ACC_DEVICE_TYPE_nvidia d = acc_device_nvidia; +#elif defined ACC_DEVICE_TYPE_radeon + d = acc_device_radeon; #elif defined ACC_DEVICE_TYPE_host d = acc_device_host; #else @@ -88,6 +88,9 @@ int main(void) assert (queues[i].cuda_stream == NULL); else assert (queues[i].cuda_stream != NULL); +#elif defined ACC_DEVICE_TYPE_radeon + /* For "acc_device_radeon" there are no CUDA streams. */ + assert (queues[i].cuda_stream == NULL); #elif defined ACC_DEVICE_TYPE_host /* For "acc_device_host" there are no CUDA streams. */ assert (queues[i].cuda_stream == NULL); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c index e780845..e91642c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c @@ -1,9 +1,10 @@ /* { dg-do run } */ -/* { dg-additional-options "-lcuda" { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-DUSE_CUDA_H" { target openacc_cuda } } */ +/* { dg-additional-options "-lcuda" { target { openacc_nvidia_accel_selected && openacc_cuda } } } */ #include <openacc.h> #include <stdlib.h> -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H #include "cuda.h" #endif @@ -13,7 +14,7 @@ int main (int argc, char **argv) { -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H CUresult r; CUstream stream1; #endif @@ -22,7 +23,7 @@ main (int argc, char **argv) int i; int nbytes; -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H acc_init (acc_device_nvidia); #endif @@ -216,7 +217,7 @@ main (int argc, char **argv) } -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); if (r != CUDA_SUCCESS) { @@ -650,7 +651,7 @@ main (int argc, char **argv) } -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); if (r != CUDA_SUCCESS) { @@ -902,7 +903,7 @@ main (int argc, char **argv) abort (); } -#if defined ACC_DEVICE_TYPE_nvidia +#if defined ACC_DEVICE_TYPE_nvidia && defined USE_CUDA_H acc_shutdown (acc_device_nvidia); #endif diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c new file mode 100644 index 0000000..b976094 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c @@ -0,0 +1,1627 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fmodulo-sched -fmodulo-sched-allow-regmoves" } */ + +#include <stdlib.h> + +int +main(int argc, char **argv) +{ + int iexp, igot, imax, imin; + long long lexp, lgot; + int N = 32; + int i; + int idata[N]; + long long ldata[N]; + float fexp, fgot; + float fdata[N]; + + igot = 1234; + iexp = 31; + + for (i = 0; i < N; i++) + idata[i] = i; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { idata[i] = igot; igot = i; } + } + + imax = 0; + imin = N; + + for (i = 0; i < N; i++) + { + imax = idata[i] > imax ? idata[i] : imax; + imin = idata[i] < imin ? idata[i] : imin; + } + + if (imax != 1234 || imin != 0) + abort (); + + return 0; + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { idata[i] = igot; igot++; } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { idata[i] = igot; ++igot; } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { ++igot; idata[i] = igot; } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { igot++; idata[i] = igot; } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { idata[i] = igot; igot--; } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { idata[i] = igot; --igot; } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { --igot; idata[i] = igot; } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) +#pragma acc atomic capture + { igot--; idata[i] = igot; } + } + + if (iexp != igot) + abort (); + + /* BINOP = + */ + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot += expr; } + } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot += expr; idata[i] = igot; } + } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot = igot + expr; } + } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot = expr + igot; } + } + } + + if (iexp != igot) + abort (); + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot = igot + expr; idata[i] = igot; } + } + } + + if (iexp != igot) + abort (); + + + igot = 0; + iexp = 32; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot = expr + igot; idata[i] = igot; } + } + } + + if (iexp != igot) + abort (); + + /* BINOP = * */ + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot *= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { lgot *= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot * expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr * lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { lgot = lgot * expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2; + +#pragma acc atomic capture + { lgot = expr * lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = - */ + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot -= expr; } + } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot -= expr; idata[i] = igot; } + } + } + + if (iexp != igot) + abort (); + + igot = 32; + iexp = 0; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot = igot - expr; } + } + } + + if (iexp != igot) + abort (); + + igot = 1; + iexp = 1; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { idata[i] = igot; igot = expr - igot; } + } + } + + for (i = 0; i < N; i++) + if (i % 2 == 0) + { + if (idata[i] != 1) + abort (); + } + else + { + if (idata[i] != 0) + abort (); + } + + if (iexp != igot) + abort (); + + igot = 1; + iexp = -31; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot = igot - expr; idata[i] = igot; } + } + } + + if (iexp != igot) + abort (); + + igot = 1; + iexp = 1; + +#pragma acc data copy (igot, idata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + int expr = 1; + +#pragma acc atomic capture + { igot = expr - igot; idata[i] = igot; } + } + } + + for (i = 0; i < N; i++) + if (i % 2 == 0) + { + if (idata[i] != 0) + abort (); + } + else + { + if (idata[i] != 1) + abort (); + } + + if (iexp != igot) + abort (); + + /* BINOP = / */ + lgot = 1LL << 32; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot /= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL << 32; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { lgot /= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL << 32; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot / expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 2LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr / lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 2LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { lgot = lgot / expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 2LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { lgot = expr / lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = & */ + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot &= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + iexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot &= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot & expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr & lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + iexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = lgot & expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = expr & lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = ^ */ + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1 << i; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot ^= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + iexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot ^= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot ^ expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr ^ lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + iexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = lgot ^ expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = ~0LL; + lexp = 0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = expr ^ lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = | */ + lgot = 0LL; + lexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1 << i; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot |= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 0LL; + iexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot |= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 0LL; + lexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot | expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 0LL; + lexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr | lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 0LL; + iexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = lgot | expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 0LL; + lexp = ~0LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = ~(1 << i); + +#pragma acc atomic capture + { lgot = expr | lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = << */ + lgot = 1LL; + lexp = 1LL << N; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot <<= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + iexp = 1LL << N; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { lgot <<= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << N; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot << expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < 1; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr << lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < 1; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { lgot = lgot << expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 2LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < 1; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { lgot = expr << lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + /* BINOP = >> */ + lgot = 1LL << N; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot >>= expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL << N; + iexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { lgot >>= expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL << N; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = lgot >> expr; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << (N - 1); + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < 1; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { ldata[i] = lgot; lgot = expr >> lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL << N; + lexp = 1LL; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL; + +#pragma acc atomic capture + { lgot = lgot >> expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + lgot = 1LL; + lexp = 1LL << (N - 1); + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < 1; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { lgot = expr >> lgot; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + // FLOAT FLOAT FLOAT + + /* BINOP = + */ + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot += expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot += expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { idata[i] = fgot; fgot = fgot + expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = expr + fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot = fgot + expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 0.0; + fexp = 32.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot = expr + fgot; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + /* BINOP = * */ + fgot = 1.0; + fexp = 8192.0*8192.0*64.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot *= expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = 8192.0*8192.0*64.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fgot *= expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = 8192.0*8192.0*64.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = fgot * expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = 8192.0*8192.0*64.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = expr * fgot; } + } + } + + if (fexp != fgot) + abort (); + + lgot = 1LL; + lexp = 1LL << 32; + +#pragma acc data copy (lgot, ldata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2LL; + +#pragma acc atomic capture + { lgot = lgot * expr; ldata[i] = lgot; } + } + } + + if (lexp != lgot) + abort (); + + fgot = 1.0; + fexp = 8192.0*8192.0*64.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 2; + +#pragma acc atomic capture + { fgot = expr * fgot; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + /* BINOP = - */ + fgot = 32.0; + fexp = 0.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot -= expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 32.0; + fexp = 0.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot -= expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 32.0; + fexp = 0.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = fgot - expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = expr - fgot; } + } + } + + for (i = 0; i < N; i++) + if (i % 2 == 0) + { + if (fdata[i] != 1.0) + abort (); + } + else + { + if (fdata[i] != 0.0) + abort (); + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = -31.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot = fgot - expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 1.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fgot = expr - fgot; fdata[i] = fgot; } + } + } + + for (i = 0; i < N; i++) + if (i % 2 == 0) + { + if (fdata[i] != 0.0) + abort (); + } + else + { + if (fdata[i] != 1.0) + abort (); + } + + if (fexp != fgot) + abort (); + + /* BINOP = / */ + fgot = 8192.0*8192.0*64.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot /= expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 8192.0*8192.0*64.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fgot /= expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 8192.0*8192.0*64.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = fgot / expr; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 8192.0*8192.0*64.0; + fexp = 1.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 1.0; + +#pragma acc atomic capture + { fdata[i] = fgot; fgot = expr / fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 4.0; + fexp = 4.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + long long expr = 1LL << N; + +#pragma acc atomic capture + { fgot = fgot / expr; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + fgot = 4.0; + fexp = 4.0; + +#pragma acc data copy (fgot, fdata[0:N]) + { +#pragma acc parallel loop + for (i = 0; i < N; i++) + { + float expr = 2.0; + +#pragma acc atomic capture + { fgot = expr / fgot; fdata[i] = fgot; } + } + } + + if (fexp != fgot) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c index 1ea0a6b..7a8cfd2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c @@ -5,7 +5,7 @@ int main (void) { - int i, j, k, l = 0, f = 0, x = 0; + int i, j, k, l = 0, f = 0, x = 0, l2 = 0; int m1 = 4, m2 = -5, m3 = 17; #pragma acc parallel @@ -20,6 +20,19 @@ main (void) } } + /* Test loop with > condition. */ +#pragma acc parallel + #pragma acc loop seq collapse(3) reduction(+:l2) + for (i = -2; i < m1; i++) + for (j = -3; j > (m2 - 1); j--) + { + for (k = 13; k < m3; k++) + { + if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++) + l2++; + } + } + for (i = -2; i < m1; i++) for (j = m2; j < -2; j++) { @@ -30,7 +43,7 @@ main (void) } } - if (l != x) + if (l != x || l2 != x) abort (); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c index 6800428..50f538d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c @@ -7,7 +7,7 @@ int main (void) { - int i2, l = 0, r = 0; + int i2, l = 0, r = 0, l2 = 0; int a[3][3][3]; memset (a, '\0', sizeof (a)); @@ -27,13 +27,24 @@ main (void) l += 1; } + /* Test loop with >= condition. */ +#pragma acc parallel + { + #pragma acc loop collapse(2) reduction(|:l2) + for (i2 = 0; i2 < 2; i2++) + for (int j = 1; j >= 0; j--) + for (int k = 0; k < 2; k++) + if (a[i2][j][k] != i2 + j * 4 + k * 16) + l2 += 1; + } + for (i2 = 0; i2 < 2; i2++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++) if (a[i2][j][k] != i2 + j * 4 + k * 16) r += 1; - if (l != r) + if (l != r || l2 != r) abort (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c index 2e3b62e..3479fc7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c @@ -1,5 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda -lcublas -lcudart" } */ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target openacc_cudart } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c index 6bdcfe7..db82b90 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c @@ -1,5 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda -lcublas -lcudart" } */ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target openacc_cudart } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c index 8f14560..b96f661 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c @@ -1,5 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda -lcublas -lcudart" } */ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target openacc_cudart } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c index b403a5c..7bfd216 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c @@ -1,5 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda -lcublas -lcudart" } */ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target openacc_cudart } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c index 2cd98bd..49c11ac 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c @@ -1,4 +1,5 @@ /* { dg-do run { target lto } } */ /* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */ +/* { dg-prune-output "warning: using serial compilation" } */ #include "data-clauses-kernels.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c index ddcf4e3..4d61d84 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c @@ -1,4 +1,5 @@ /* { dg-do run { target lto } } */ /* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */ +/* { dg-prune-output "warning: using serial compilation" } */ #include "data-clauses-parallel.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c index c3a2187..dc6c7f3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c @@ -1,5 +1,3 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ - #include <stdlib.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c index c7eae12..0777b61 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fopenacc-kernels=decompose" } */ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ /* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. { dg-ice "TODO" } TODO { dg-prune-output "during GIMPLE pass: omplower" } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c index dd8a1c1..0369ae9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fopenacc-kernels=decompose" } */ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ /* See also 'declare-vla-kernels-decompose-ice-1.c'. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c index 689a443..27da765 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c @@ -1,3 +1,12 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <openacc.h> @@ -12,9 +21,11 @@ void t1 () ary[i] = ~0; #pragma acc parallel num_gangs (32) copy (ok) firstprivate (val) copy(ary, ondev) + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { ondev = acc_on_device (acc_device_not_host); #pragma acc loop gang(static:1) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (unsigned i = 0; i < 32; i++) { if (val != 2) @@ -68,7 +79,7 @@ void t2 () void t3 () { int a, b[N], c, d, i; - int n = acc_get_device_type () == acc_device_nvidia ? N : 1; + int n = acc_get_device_type () != acc_device_host ? N : 1; a = 5; for (i = 0; i < n; i++) @@ -76,6 +87,7 @@ void t3 () #pragma acc parallel num_gangs (n) firstprivate (a) #pragma acc loop gang + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < n; i++) { a = a + i; @@ -117,8 +129,11 @@ void t4 () arr[i] = 3; #pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) arr[i] += x; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c index 21d2139..20f7f04 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -1,5 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */ +/* { dg-require-effective-target openacc_cublas } */ +/* { dg-require-effective-target openacc_cudart } */ #include <stdlib.h> #include <math.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c index 6830ef1..66501e6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c @@ -1,6 +1,11 @@ -/* { dg-do run } */ - /* Test if, if_present clauses on host_data construct. */ + +/* { dg-additional-options "-fopt-info-all-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-all-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + /* C/C++ variant of 'libgomp.oacc-fortran/host_data-5.F90' */ #include <assert.h> @@ -14,15 +19,19 @@ foo (float *p, intptr_t host_p, int cond) #pragma acc data copyin(host_p) { #pragma acc host_data use_device(p) if_present + /* { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ /* p not mapped yet, so it will be equal to the host pointer. */ assert (p == (float *) host_p); #pragma acc data copy(p[0:100]) + /* { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { /* Not inside a host_data construct, so p is still the host pointer. */ assert (p == (float *) host_p); #pragma acc host_data use_device(p) + /* { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #if ACC_MEM_SHARED assert (p == (float *) host_p); @@ -33,6 +42,7 @@ foo (float *p, intptr_t host_p, int cond) } #pragma acc host_data use_device(p) if_present + /* { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #if ACC_MEM_SHARED assert (p == (float *) host_p); @@ -43,6 +53,8 @@ foo (float *p, intptr_t host_p, int cond) } #pragma acc host_data use_device(p) if(cond) + /* { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { ! openacc_host_selected } } .-2 } */ { #if ACC_MEM_SHARED assert (p == (float *) host_p); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c index e76e409..e08cfa5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c @@ -1,13 +1,19 @@ /* Test OpenACC 'kernels' construct decomposition. */ /* { dg-additional-options "-fopt-info-omp-all" } */ -/* { dg-additional-options "-fopenacc-kernels=decompose" } */ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ + +/* { dg-additional-options "-fopt-info-all-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-all-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' passed to 'incr' may be unset, and in that case, it will be set to [...]", so to maintain compatibility with earlier Tcl releases, we manually initialize counter variables: - { dg-line l_dummy[variable c_loop_i 0] } + { dg-line l_dummy[variable c_compute 0 c_loop_i 0] } { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid "WARNING: dg-line var l_dummy defined, but not used". */ @@ -22,15 +28,19 @@ int main() #define N 123 int b[N] = { 0 }; -#pragma acc kernels +#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ { int c = 234; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */ + /* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'c\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ /*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */ (volatile int *) &c; #pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */ /* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) b[i] = c; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c index bcbe28a..f28513d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared in a local scope, broadcasting @@ -12,30 +18,40 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; int x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; int x = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c index a944486..21f2511 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared in a local scope, broadcasting @@ -12,25 +18,32 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; int x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; x = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c index ba0b44d..8b4cde8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared in a local scope, broadcasting @@ -17,13 +23,18 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'pt' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; @@ -33,10 +44,12 @@ main (int argc, char* argv[]) pt.y = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.x * k; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c index 7189d2a..a658d16 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared in a local scope, broadcasting @@ -17,13 +23,19 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'pt' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'ptp' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (j = 0; j < 32; j++) { int k; @@ -34,12 +46,14 @@ main (int argc, char* argv[]) pt.x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += ptp->x * k; ptp->y = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c index 854ad7e..b82b9bf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared in a local scope, broadcasting @@ -12,13 +18,18 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'pt' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; @@ -27,12 +38,14 @@ main (int argc, char* argv[]) pt[0] = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[0] * k; pt[1] = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[1] * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c index 5bc90c2..38d89c7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private variables declared on loop directive. */ @@ -13,6 +19,8 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (i = 0; i < 32; i++) { x = i * 2; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c index 3eb1167..62dd12f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private variables declared on loop directive, with broadcasting @@ -14,11 +20,15 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (i = 0; i < 32; i++) { x = i * 2; #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c index 86b9a71..c22c3b4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private variables declared on loop directive, with broadcasting @@ -14,11 +20,15 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (i = 0; i < 32; i++) { x = i * 2; #pragma acc loop vector(length:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c index 4174248..27a8e80 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private addressable variable declared on loop directive, with @@ -14,6 +20,10 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'p' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (i = 0; i < 32; i++) { int *p = &x; @@ -21,6 +31,7 @@ main (int argc, char* argv[]) x = i * 2; #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c index b160eaa..f570c22 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private array variable declared on loop directive, with @@ -14,12 +20,16 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (i = 0; i < 32; i++) { for (int j = 0; j < 8; j++) x[j] = j * 2; #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x[j % 8]; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c index 88ab245..5b776f1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of gang-private aggregate variable declared on loop directive, with @@ -20,6 +26,9 @@ main (int argc, char* argv[]) #pragma acc kernels copy(arr) { #pragma acc loop gang private(pt) + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (i = 0; i < 32; i++) { pt.x = i; @@ -28,6 +37,7 @@ main (int argc, char* argv[]) pt.attr[5] = i * 6; #pragma acc loop worker + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5]; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c index df4add1..696da0f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of vector-private variables declared on loop directive. */ @@ -11,18 +17,24 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (j = 0; j < 32; j++) { int k; #pragma acc loop vector(length:32) private(x) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (k = 0; k < 32; k++) { x = i ^ j * 3; @@ -30,6 +42,8 @@ main (int argc, char* argv[]) } #pragma acc loop vector(length:32) private(x) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (k = 0; k < 32; k++) { x = i | j * 5; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c index 53c56b2..2e3b635 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of vector-private variables declared on loop directive. Array type. */ @@ -11,18 +17,24 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (j = 0; j < 32; j++) { int k; #pragma acc loop vector(length:32) private(pt) + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (k = 0; k < 32; k++) { pt[0] = i ^ j * 3; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c index 95db2f8..1aedc79 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive. */ @@ -11,13 +17,17 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ for (j = 0; j < 32; j++) { x = i ^ j * 3; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c index ceaa3ee..3bf62aa 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive, broadcasting @@ -12,19 +18,25 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c index 193a1d1..8de5516 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive, broadcasting @@ -12,30 +18,40 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; x = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c index 4320cd8..425fe63 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive, broadcasting @@ -12,25 +18,32 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; x = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c index 80992ee..c027c02 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive, broadcasting @@ -12,13 +18,19 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(x) + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'p' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (j = 0; j < 32; j++) { int k; @@ -27,12 +39,14 @@ main (int argc, char* argv[]) x = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; *p = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c index 005ba60..4f17566 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on a loop directive, broadcasting @@ -18,13 +24,18 @@ main (int argc, char* argv[]) arr[i] = i; #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { #pragma acc loop worker(num:32) private(pt) + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; @@ -33,10 +44,12 @@ main (int argc, char* argv[]) pt.y = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.x * k; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c index 8d367fb..12b4c54 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <assert.h> /* Test of worker-private variables declared on loop directive, broadcasting @@ -15,14 +21,19 @@ main (int argc, char* argv[]) /* "pt" is treated as "present_or_copy" on the kernels directive because it is an array variable. */ #pragma acc kernels copy(arr) + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { int j; #pragma acc loop gang(num:32) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 32; i++) { /* But here, it is made private per-worker. */ #pragma acc loop worker(num:32) private(pt) + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ for (j = 0; j < 32; j++) { int k; @@ -30,12 +41,14 @@ main (int argc, char* argv[]) pt[0] = i ^ j * 3; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[0] * k; pt[1] = i | j * 5; #pragma acc loop vector(length:32) + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[1] * k; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c index 86cfeb6..1f05161 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c @@ -1,5 +1,4 @@ -/* Only nvptx plugin does the required error checking. - { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> @@ -22,6 +21,9 @@ main (int argc, char **argv) return 0; } -/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "invalid device address" } */ +/* { dg-output "CheCKpOInT(\n|\r\n|\r)+" } */ +/* { dg-output "libgomp: invalid device address(\n|\r\n|\r)+" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "libgomp: GCN fatal error: Could not free device memory(\n|\r\n|\r)+" { target openacc_radeon_accel_selected } } + { dg-output "Runtime message: HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested allocation is not valid\.(\n|\r\n|\r)+" { target openacc_radeon_accel_selected } } */ +/* { dg-output "libgomp: error in freeing device memory in acc_free(\n|\r\n|\r)+$" } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c index aca4c25..90b137f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c @@ -1,6 +1,6 @@ /* Check acc_is_present and acc_delete. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdlib.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c index de6d38b..892f97c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c @@ -1,6 +1,6 @@ /* Check acc_is_present. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdlib.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c index 50c1701..335b26f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c @@ -1,6 +1,6 @@ /* Check acc_is_present and acc_copyout. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdlib.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c index 10d3cbc..f1d9a21 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c @@ -1,6 +1,6 @@ -/* Exercise acc_copyin and acc_copyout on nvidia targets. */ +/* Exercise acc_copyin and acc_copyout. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c index b1f3e71..d39f31e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c @@ -1,6 +1,6 @@ -/* Exercise acc_copyin and acc_copyout on nvidia targets. */ +/* Exercise acc_copyin and acc_copyout. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c index 09e2817..96e3129 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c @@ -1,6 +1,6 @@ /* Exercise acc_create, acc_is_present and acc_delete. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdlib.h> #include <openacc.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c index a24916d..8ddd897 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c @@ -1,6 +1,6 @@ -/* Exercise an invalid acc_present_or_create on nvidia targets. */ +/* Exercise an invalid acc_present_or_create. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c index 30b90d4..adab109 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c @@ -1,6 +1,6 @@ -/* Exercise acc_update_device on unmapped data on nvidia targets. */ +/* Exercise acc_update_device on unmapped data. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c index 8bbf016..f02fe21 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c @@ -1,6 +1,6 @@ -/* Exercise acc_update_device with size zero data on nvidia targets. */ +/* Exercise acc_update_device with size zero data. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c index afa137f..9975c9e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c @@ -1,6 +1,6 @@ -/* Exercise acc_update_self with a size zero data mapping on nvidia targets. */ +/* Exercise acc_update_self with a size zero data mapping. */ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c index 961a62c..1e0ab9c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c @@ -35,6 +35,24 @@ main (int argc, char **argv) acc_shutdown (acc_device_nvidia); } - return 0; + if (acc_get_num_devices (acc_device_radeon) != 0) + { + acc_init (acc_device_radeon); + + if (acc_get_device_type () != acc_device_radeon) + abort (); + + acc_shutdown (acc_device_radeon); + + acc_init (acc_device_default); + acc_set_device_type (acc_device_radeon); + + if (acc_get_device_type () != acc_device_radeon) + abort (); + + acc_shutdown (acc_device_radeon); + } + + return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c index 25c70c2..9a562b3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c @@ -1,6 +1,4 @@ -/* Exercise acc_map_data with a NULL data mapping on nvidia targets. */ - -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Exercise acc_map_data with a NULL data mapping. */ #include <stdio.h> #include <stdlib.h> @@ -30,6 +28,6 @@ main (int argc, char **argv) } /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "\\\[\[^\n\r]*,\\\+256\]->\[\[0-9a-fA-FxX\]+,\\\+256\\\] is a bad map" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "\\\[\[^\n\r]*,\\\+256\]->\[\[0-9a-fA-FxX\]+,\\\+256\\\] is a bad map" { target { ! openacc_host_selected } } } */ /* { dg-output "cannot map data on shared-memory system" { target openacc_host_selected } } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c index a8ee7df..d452a69 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c @@ -1,6 +1,4 @@ -/* Exercise acc_map_data with a NULL data mapping on nvidia targets. */ - -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Exercise acc_map_data with a NULL data mapping. */ #include <stdio.h> #include <stdlib.h> @@ -30,6 +28,6 @@ main (int argc, char **argv) } /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "\\\[\[0-9a-fA-FxX\]+,\\\+256\]->\\\[\[^\n\r]*,\\\+256\\\] is a bad map" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "\\\[\[0-9a-fA-FxX\]+,\\\+256\]->\\\[\[^\n\r]*,\\\+256\\\] is a bad map" { target { ! openacc_host_selected } } } */ /* { dg-output "cannot map data on shared-memory system" { target openacc_host_selected } } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c index fc221f4..1922754 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c @@ -1,6 +1,4 @@ -/* Exercise acc_map_data with data size of zero on nvidia targets. */ - -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Exercise acc_map_data with data size of zero. */ #include <stdio.h> #include <stdlib.h> @@ -30,6 +28,6 @@ main (int argc, char **argv) } /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "\\\[\[0-9a-fA-FxX\]+,\\\+0\\\]->\\\[\[0-9a-fA-FxX\]+,\\\+0\\\] is a bad map" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "\\\[\[0-9a-fA-FxX\]+,\\\+0\\\]->\\\[\[0-9a-fA-FxX\]+,\\\+0\\\] is a bad map" { target { ! openacc_host_selected } } } */ /* { dg-output "cannot map data on shared-memory system" { target openacc_host_selected } } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c index 971a014..81653c6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c @@ -33,6 +33,6 @@ main (int argc, char **argv) } /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "\[0-9a-fA-FxX\]+ is not a mapped block" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "\[0-9a-fA-FxX\]+ is not a mapped block" { target { ! openacc_host_selected } } } */ /* { dg-output "cannot map data on shared-memory system" { target openacc_host_selected } } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c index fedda77..c6bc261 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c @@ -33,6 +33,6 @@ main (int argc, char **argv) } /* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "\[^\n\r]* is not a mapped block" { target openacc_nvidia_accel_selected } } */ +/* { dg-output "\[^\n\r]* is not a mapped block" { target { ! openacc_host_selected } } } */ /* { dg-output "cannot map data on shared-memory system" { target openacc_host_selected } } */ /* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c index afdd480..a3affc0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c @@ -11,26 +11,47 @@ main (int argc, char **argv) if (acc_get_device_type () == acc_device_default) abort (); - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; + if (acc_get_num_devices (acc_device_nvidia)) + { + acc_set_device_type (acc_device_nvidia); - acc_set_device_type (acc_device_nvidia); + if (acc_get_device_type () != acc_device_nvidia) + abort (); - if (acc_get_device_type () != acc_device_nvidia) - abort (); + acc_shutdown (acc_device_nvidia); - acc_shutdown (acc_device_nvidia); + acc_set_device_type (acc_device_nvidia); - acc_set_device_type (acc_device_nvidia); + if (acc_get_device_type () != acc_device_nvidia) + abort (); - if (acc_get_device_type () != acc_device_nvidia) - abort (); + devnum = acc_get_num_devices (acc_device_host); + if (devnum != 1) + abort (); - devnum = acc_get_num_devices (acc_device_host); - if (devnum != 1) - abort (); + acc_shutdown (acc_device_nvidia); + } + + if (acc_get_num_devices (acc_device_radeon)) + { + acc_set_device_type (acc_device_radeon); + + if (acc_get_device_type () != acc_device_radeon) + abort (); + + acc_shutdown (acc_device_radeon); + + acc_set_device_type (acc_device_radeon); + + if (acc_get_device_type () != acc_device_radeon) + abort (); + + devnum = acc_get_num_devices (acc_device_host); + if (devnum != 1) + abort (); - acc_shutdown (acc_device_nvidia); + acc_shutdown (acc_device_radeon); + } if (acc_get_device_type () == acc_device_default) abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c index ace4b05..2e7184a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c index a3fa728..84bbccb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c index b57f67a..e26681a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c index 0fca821..69add3f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c index ec3c2a5..c13333b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c index f109034..7fffd0b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c @@ -1,4 +1,5 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* Not all implement this checking. + { dg-skip-if "" { openacc_radeon_accel_selected || openacc_host_selected } } */ #include <stdio.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c index c10beba..00e0ca8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c index 912b266..a2918c0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c index e383ba0..99b62f1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c index 43a8b7e..5b4b3fd 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c index 0efcf0d..939f255 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c index 1942211..804ee39 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c index 11d9d62..f904526 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c index 4f58fb2..d8cba4d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c index b2e2687..b805d5f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c index d5f18f0..958672c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c index 9cf73b3..a36f8e6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c index 51b7ee7..44ef1db 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdio.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c index d793c74..c1ff763 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdlib.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c index 141c83b..db25065 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdlib.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c index b8a8ee9..7e8a7e2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c @@ -7,9 +7,6 @@ int main (int argc, char **argv) { - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - if (acc_get_current_cuda_device () != 0) abort (); @@ -20,18 +17,28 @@ main (int argc, char **argv) acc_shutdown (acc_device_host); - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - if (acc_get_current_cuda_device () != 0) abort (); - acc_init (acc_device_nvidia); + if (acc_get_num_devices (acc_device_nvidia)) + { + acc_init (acc_device_nvidia); - if (acc_get_current_cuda_device () == 0) - abort (); + if (acc_get_current_cuda_device () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + } + + if (acc_get_num_devices (acc_device_radeon)) + { + acc_init (acc_device_radeon); + + if (acc_get_current_cuda_device () != 0) + abort (); - acc_shutdown (acc_device_nvidia); + acc_shutdown (acc_device_radeon); + } if (acc_get_current_cuda_device () != 0) abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c index 147d443..cdc87ed 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c @@ -7,9 +7,6 @@ int main (int argc, char **argv) { - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - if (acc_get_current_cuda_context () != 0) abort (); @@ -20,18 +17,28 @@ main (int argc, char **argv) acc_shutdown (acc_device_host); - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - if (acc_get_current_cuda_context () != 0) abort (); - acc_init (acc_device_nvidia); + if (acc_get_num_devices (acc_device_nvidia)) + { + acc_init (acc_device_nvidia); - if (acc_get_current_cuda_context () == 0) - abort (); + if (acc_get_current_cuda_context () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + } + + if (acc_get_num_devices (acc_device_radeon)) + { + acc_init (acc_device_radeon); + + if (acc_get_current_cuda_context () != 0) + abort (); - acc_shutdown (acc_device_nvidia); + acc_shutdown (acc_device_radeon); + } if (acc_get_current_cuda_context () != 0) abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c index 10f4ad8..c1cccd9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <pthread.h> @@ -47,10 +47,7 @@ main (int argc, char **argv) pthread_attr_t attr; pthread_t *tid; - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - - acc_init (acc_device_nvidia); + acc_init (acc_device_default); x = (unsigned char *) malloc (N); @@ -103,8 +100,6 @@ main (int argc, char **argv) if (acc_is_present (x, N) != 0) abort (); - acc_shutdown (acc_device_nvidia); - return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c index 061c409..6b4e3ac 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <stdio.h> #include <pthread.h> @@ -23,11 +23,16 @@ test (void *arg) tid = (int) (long) arg; - devnum = acc_get_device_num (acc_device_nvidia); - acc_set_device_num (devnum, acc_device_nvidia); + devnum = acc_get_device_num (acc_device_default); + acc_set_device_num (devnum, acc_device_default); +#if ACC_DEVICE_TYPE_nvidia if (acc_get_current_cuda_context () == NULL) abort (); +#else + if (acc_get_current_cuda_context () != NULL) + abort (); +#endif p = (unsigned char *) malloc (N); @@ -50,10 +55,7 @@ main (int argc, char **argv) pthread_attr_t attr; pthread_t *tid; - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - - acc_init (acc_device_nvidia); + acc_init (acc_device_default); x = (unsigned char **) malloc (NTHREADS * N); d_x = (void **) malloc (NTHREADS * N); @@ -110,8 +112,6 @@ main (int argc, char **argv) abort (); } - acc_shutdown (acc_device_nvidia); - return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c index d17755b..5ca5505 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <pthread.h> #include <stdio.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c index 36fff08..d14226c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <stdlib.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c index 18193e0..0043fb3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <pthread.h> #include <stdio.h> @@ -22,11 +22,16 @@ test (void *arg) tid = (int) (long) arg; - devnum = acc_get_device_num (acc_device_nvidia); - acc_set_device_num (devnum, acc_device_nvidia); + devnum = acc_get_device_num (acc_device_default); + acc_set_device_num (devnum, acc_device_default); +#if ACC_DEVICE_TYPE_nvidia if (acc_get_current_cuda_context () == NULL) abort (); +#else + if (acc_get_current_cuda_context () != NULL) + abort (); +#endif acc_copyout (x[tid], N); @@ -49,10 +54,7 @@ main (int argc, char **argv) pthread_t *tid; unsigned char *p; - if (acc_get_num_devices (acc_device_nvidia) == 0) - return 0; - - acc_init (acc_device_nvidia); + acc_init (acc_device_default); x = (unsigned char **) malloc (NTHREADS * N); d_x = (void **) malloc (NTHREADS * N); @@ -104,8 +106,6 @@ main (int argc, char **argv) abort (); } - acc_shutdown (acc_device_nvidia); - return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c index 5449723..baa3ac8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c @@ -22,10 +22,10 @@ main (int argc, char **argv) acc_copyin_async (h, N, async); - memset (h, 0, N); - acc_wait (async); + memset (h, 0, N); + acc_copyout_async (h, N, async + 1); acc_wait (async + 1); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c index 85b238d..842fb84 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c @@ -23,10 +23,11 @@ main (int argc, char **argv) acc_create_async (h, N, q); acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q); - memset (&h[0], 0, N); acc_wait (q); + memset (h, 0, N); + acc_update_self_async (h, N, q + 1); acc_delete_async (h, N, q + 1); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 0273c2b..c13cab7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -3,6 +3,9 @@ /* { dg-additional-options "-fopenacc-dim=32" } */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -151,6 +154,7 @@ int gang_1 (int *ary, int size) clear (ary, size); #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */ { #pragma acc loop auto for (int jx = 0; jx < size / 64; jx++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c index ca77164..419bc33 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c @@ -132,9 +132,7 @@ int main () /* AMD GCN uses the autovectorizer for the vector dimension: the use of a function call in vector-partitioned code in this test is not currently supported. */ - /* AMD GCN does not currently support multiple workers. This should be - set to 16 when that changes. */ - return test_1 (16, 1, 1); + return test_1 (16, 16, 1); #else return test_1 (16, 16, 32); #endif diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c index 98f02e9..12272ad 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -14,8 +20,13 @@ int main () ary[ix] = -1; #pragma acc parallel num_gangs(32) copy(ary) copy(ondev) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #pragma acc loop gang + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c index 4152a4e..683bd12 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -14,8 +20,13 @@ int main () ary[ix] = -1; #pragma acc parallel num_gangs(32) copy(ary) copy(ondev) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #pragma acc loop gang (static:1) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 5c84301..e5ed2ab 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -16,8 +22,13 @@ int main () #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop gang worker vector + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c new file mode 100644 index 0000000..cb3878b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c @@ -0,0 +1,106 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +#include <stdio.h> +#include <openacc.h> +#include <alloca.h> +#include <string.h> +#include <gomp-constants.h> +#include <stdlib.h> + +#if 0 +#define DEBUG(DIM, IDX, VAL) \ + fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL)) +#else +#define DEBUG(DIM, IDX, VAL) +#endif + +#define N (32*32*32) + +int +check (const char *dim, int *dist, int dimsize) +{ + int ix; + int exit = 0; + + for (ix = 0; ix < dimsize; ix++) + { + DEBUG(dim, ix, dist[ix]); + if (dist[ix] < (N) / (dimsize + 0.5) + || dist[ix] > (N) / (dimsize - 0.5)) + { + fprintf (stderr, "did not distribute to %ss (%d not between %d " + "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)), + (int) ((N) / (dimsize - 0.5))); + exit |= 1; + } + } + + return exit; +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int gangsize = 0, workersize = 0, vectorsize = 0; + int *gangdist, *workerdist, *vectordist; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ary) copyout(gangsize, workersize, vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + { +#pragma acc loop gang worker vector + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + for (unsigned ix = 0; ix < N; ix++) + { + int g, w, v; + + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + ary[ix] = (g << 16) | (w << 8) | v; + } + + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } + + gangdist = (int *) alloca (gangsize * sizeof (int)); + workerdist = (int *) alloca (workersize * sizeof (int)); + vectordist = (int *) alloca (vectorsize * sizeof (int)); + memset (gangdist, 0, gangsize * sizeof (int)); + memset (workerdist, 0, workersize * sizeof (int)); + memset (vectordist, 0, vectorsize * sizeof (int)); + + /* Test that work is shared approximately equally amongst each active + gang/worker/vector. */ + for (ix = 0; ix < N; ix++) + { + int g = (ary[ix] >> 16) & 255; + int w = (ary[ix] >> 8) & 255; + int v = ary[ix] & 255; + + gangdist[g]++; + workerdist[w]++; + vectordist[v]++; + } + + exit = check ("gang", gangdist, gangsize); + exit |= check ("worker", workerdist, workersize); + exit |= check ("vector", vectordist, vectorsize); + + return exit; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c index 7107502..0c84027 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -10,8 +16,14 @@ int main () int t = 0, h = 0; #pragma acc parallel num_gangs(32) copy(ondev) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #pragma acc loop gang reduction (+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 9c4a85f..c1a2d0c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,8 +18,14 @@ int main () #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ copy(ondev) copyout(gangsize, workersize, vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop gang worker vector reduction(+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index 1173c1f..58c7b6a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,8 +18,14 @@ int main () int vectorsize; #pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ { #pragma acc loop vector reduction (+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 84c2296..85931f5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,10 +18,17 @@ int main () int vectorsize; #pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize) + /* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { int t = q; #pragma acc loop vector reduction (+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 648f89e..b9ceec9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -1,3 +1,12 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,8 +21,15 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ copyout(workersize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { #pragma acc loop worker reduction(+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index f9fcf37..ff5e4a1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -1,3 +1,12 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,10 +21,18 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \ copyout(workersize) + /* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ { int t = q; #pragma acc loop worker reduction(+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index c360ad1..5d60899 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -12,8 +18,14 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ copyout(workersize, vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop worker vector reduction (+:t) + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'val' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ for (unsigned ix = 0; ix < N; ix++) { int val = ix; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 8c858f3..9ccc1a8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -16,8 +22,13 @@ int main () #pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ copyout(vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop vector + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index 5fe486f..0e99ec6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -1,3 +1,12 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -16,8 +25,14 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ copyout(workersize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } .-3 } */ { #pragma acc loop worker + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index fd4e4cf..f4707d1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -16,8 +22,13 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ copyout(workersize, vectorsize) + /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ { #pragma acc loop worker vector + /* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */ + /* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */ + /* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */ for (unsigned ix = 0; ix < N; ix++) { if (acc_on_device (acc_device_not_host)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c index 4474c12..94dc9d0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c @@ -1,3 +1,6 @@ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + /* Miscellaneous test cases for gang/worker/vector mode transitions. */ #include <assert.h> @@ -121,7 +124,7 @@ void t3() assert (n[i] == 2); for (i = 0; i < 1024; i++) - assert (arr[i] == ((i % 64) < 32) ? 1 : -1); + assert (arr[i] == (((i % 64) < 32) ? 1 : -1)); } @@ -287,6 +290,7 @@ void t7() int n = 0; #pragma acc parallel copy(n) \ num_gangs(1) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { n++; } @@ -310,6 +314,7 @@ void t8() #pragma acc parallel copy(arr) \ num_gangs(gangs) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -339,6 +344,7 @@ void t9() #pragma acc parallel copy(arr) \ num_gangs(gangs) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -350,7 +356,7 @@ void t9() } for (i = 0; i < 1024; i++) - assert (arr[i] == ((i % 3) == 0) ? 1 : 2); + assert (arr[i] == ((i % 3) == 0 ? 1 : 2)); } } @@ -371,6 +377,7 @@ void t10() #pragma acc parallel copy(arr) \ num_gangs(gangs) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -404,6 +411,7 @@ void t11() #pragma acc parallel copy(arr) \ num_gangs(1024) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; @@ -442,6 +450,7 @@ void t12() #pragma acc parallel copyout(fizz, buzz, fizzbuzz) \ num_gangs(NUM_GANGS) num_workers(1) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; @@ -488,6 +497,7 @@ void t13() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -613,6 +623,7 @@ void t16() #pragma acc parallel copy(n, arr) \ num_gangs(8) num_workers(16) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -665,6 +676,7 @@ void t17() #pragma acc parallel copyin(arr_a) copyout(arr_b) \ num_gangs(num_gangs) num_workers(num_workers) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -882,6 +894,8 @@ void t21() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { int j; #pragma acc loop gang @@ -905,6 +919,8 @@ void t22() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { int j; #pragma acc loop gang @@ -931,6 +947,8 @@ void t23() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { int j; #pragma acc loop gang @@ -942,7 +960,7 @@ void t23() } for (i = 0; i < 32; i++) - assert (arr[i] == ((i % 2) != 0) ? i + 1 : i + 2); + assert (arr[i] == (((i % 2) != 0) ? i + 1 : i + 2)); } @@ -957,6 +975,8 @@ void t24() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */ { int j; #pragma acc loop gang @@ -988,6 +1008,7 @@ void t25() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -1020,6 +1041,7 @@ void t26() #pragma acc parallel copy(arr) \ num_gangs(8) num_workers(8) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ { int j; #pragma acc loop gang @@ -1070,6 +1092,8 @@ void t27() #pragma acc parallel copy(n, arr) copyout(ondev) \ num_gangs(ACTUAL_GANGS) num_workers(8) vector_length(32) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .-2 } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-3 } */ { int j; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c index a88b60f..8157cff 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c @@ -1,4 +1,5 @@ -/* { dg-additional-options "-w" } */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ #include <assert.h> @@ -18,6 +19,9 @@ main (int argc, char *argv[]) #endif #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ reduction(+:res1) copy(res2, res1) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail { ! openacc_host_selected } } .-2 } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-3 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ { res1 += 5; @@ -39,6 +43,9 @@ main (int argc, char *argv[]) #endif #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ reduction(*:res1) copy(res1, res2) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail { ! openacc_host_selected } } .-2 } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-3 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ { res1 *= 5; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c index 5ab52e9..8b7900e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c @@ -1,7 +1,8 @@ /* Test of reduction on parallel directive (with async). */ /* See also Fortran variants in "../libgomp.oacc-fortran/par-reduction-2*". */ -/* { dg-additional-options "-w" } */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ #include <assert.h> #include <openacc.h> @@ -18,6 +19,9 @@ main (int argc, char *argv[]) #endif #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ reduction(+:res1) copy(res1, res2) async(1) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail { ! openacc_host_selected } } .-2 } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-3 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ { res1 += 5; @@ -41,6 +45,9 @@ main (int argc, char *argv[]) #endif #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ reduction(*:res1) copy(res1, res2) async(1) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail { ! openacc_host_selected } } .-2 } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-3 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */ { res1 *= 5; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index cc4c738..9392e1d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -1,48 +1,31 @@ /* OpenACC parallelism dimensions clauses: num_gangs, num_workers, vector_length. */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + /* See also '../libgomp.oacc-fortran/parallel-dims.f90'. */ #include <limits.h> #include <openacc.h> #include <gomp-constants.h> -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper - not behaving as expected for -O0. */ #pragma acc routine seq -static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () +static int acc_gang () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia) - || acc_on_device ((int) acc_device_radeon)) - return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); } #pragma acc routine seq -static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () +static int acc_worker () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia) - || acc_on_device ((int) acc_device_radeon)) - return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); } #pragma acc routine seq -static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () +static int acc_vector () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia) - || acc_on_device ((int) acc_device_radeon)) - return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); } @@ -69,22 +52,9 @@ int main () gangs_actual = 1; for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) { - /* <https://gcc.gnu.org/PR80547>. */ -#if 0 gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); vectors_min = vectors_max = acc_vector (); -#else - int gangs = acc_gang (); - gangs_min = (gangs_min < gangs) ? gangs_min : gangs; - gangs_max = (gangs_max > gangs) ? gangs_max : gangs; - int workers = acc_worker (); - workers_min = (workers_min < workers) ? workers_min : workers; - workers_max = (workers_max > workers) ? workers_max : workers; - int vectors = acc_vector (); - vectors_min = (vectors_min < vectors) ? vectors_min : vectors; - vectors_max = (vectors_max > vectors) ? vectors_max : vectors; -#endif } } if (gangs_actual != 1) @@ -105,6 +75,7 @@ int main () gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc parallel copy (gangs_actual) \ num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */ + /* { dg-warning "region contains gang partitioned code but is not gang partitioned" "" { target *-*-* } .-2 } */ { /* We're actually executing with num_gangs (1). */ gangs_actual = 1; @@ -134,6 +105,7 @@ int main () gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc parallel copy (workers_actual) \ num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */ + /* { dg-warning "region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 } */ { /* We're actually executing with num_workers (1). */ workers_actual = 1; @@ -163,6 +135,7 @@ int main () gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \ vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */ + /* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target *-*-* } .-2 } */ { /* We're actually executing with vector_length (1), just the GCC nvptx back end enforces vector_length (32). */ @@ -208,6 +181,7 @@ int main () #pragma acc parallel copy (gangs_actual) \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \ num_gangs (gangs) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-3 } */ { if (acc_on_device (acc_device_host)) { @@ -287,9 +261,8 @@ int main () } else if (acc_on_device (acc_device_radeon)) { - /* The GCC GCN back end is limited to num_workers (16). - Temporarily set this to 1 until multiple workers are permitted. */ - workers_actual = 1; // 16; + /* The GCC GCN back end is limited to num_workers (16). */ + workers_actual = 16; } else __builtin_abort (); @@ -378,7 +351,7 @@ int main () } else if (acc_on_device (acc_device_nvidia)) { - /* The GCC nvptx back end enforces vector_length (32). */ + /* The GCC nvptx back end reduces to vector_length (1024). */ vectors_actual = 1024; } else if (acc_on_device (acc_device_radeon)) @@ -490,8 +463,6 @@ int main () } else if (acc_on_device (acc_device_radeon)) { - /* Temporary setting, until multiple workers are permitted. */ - workers_actual = 1; /* See above comments about GCN vectors_actual. */ vectors_actual = 1; } @@ -617,6 +588,9 @@ int main () gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc serial copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \ copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max) + /* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-2 } + { dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "TODO 'serial'" { xfail *-*-* } .-3 } + { dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-4 } */ { if (acc_on_device (acc_device_nvidia)) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c index 077571f..f88babc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c @@ -1,5 +1,11 @@ -/* { dg-do run } */ -/* { dg-additional-options "-w" } */ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ #include <stdlib.h> #include <openacc.h> @@ -16,6 +22,7 @@ main () #pragma acc data copy (dummy) { #pragma acc parallel num_gangs (N) reduction (+:s1) copy(s1) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-1 } */ { s1++; } @@ -36,6 +43,7 @@ main () s2 = 0; #pragma acc parallel num_gangs (10) reduction (+:s1, s2) copy(s1, s2) + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-1 } */ { s1++; s2 += N; @@ -61,6 +69,7 @@ main () #pragma acc parallel num_gangs (10) reduction (+:s1) copy(s1) { #pragma acc loop gang reduction (+:s1) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < 10; i++) s1++; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c index cddbf27..2a7e481 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c @@ -2,10 +2,14 @@ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ /* { dg-additional-options "-foffload=-fdump-rtl-mach" } */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + int a; #pragma acc declare create(a) #pragma acc routine vector +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .+2 } */ void __attribute__((noinline, noclone)) foo_v (void) { @@ -13,6 +17,8 @@ foo_v (void) } #pragma acc routine worker +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .+3 } + { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .+2 } */ void __attribute__((noinline, noclone)) foo_w (void) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c index d453264..17cc9bd 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c @@ -2,10 +2,10 @@ /* { dg-additional-options "-DVECTOR_LENGTH=" } */ /* { dg-additional-options "-fopenacc-dim=::128" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include "pr85486.c" -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c index 33480a4..5d05540 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c @@ -2,10 +2,10 @@ /* { dg-additional-options "-DVECTOR_LENGTH=" } */ /* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include "pr85486.c" -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c index 0d98b82..f95f2ee 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c @@ -1,7 +1,7 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ /* Minimized from ref-1.C. */ @@ -54,5 +54,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c index 88c2c77..e48f307 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c @@ -1,5 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda" } */ +/* { dg-require-effective-target openacc_cuda } */ #include <openacc.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c new file mode 100644 index 0000000..2c1ffb1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c @@ -0,0 +1,99 @@ +/* Tests for gang-private variables, 'atomic' access */ + +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' + passed to 'incr' may be unset, and in that case, it will be set to [...]", + so to maintain compatibility with earlier Tcl releases, we manually + initialize counter variables: + { dg-line l_dummy[variable c_compute 0 c_loop 0] } + { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid + "WARNING: dg-line var l_dummy defined, but not used". */ + +#include <assert.h> +#include <openacc.h> + +int main (void) +{ + int ret; + + + ret = 0; + #pragma acc parallel num_gangs(1444) num_workers(32) reduction(+: ret) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'w' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'w' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'w' adjusted for OpenACC privatization level: 'gang'} "" { target { ! openacc_host_selected } } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + { + int w = -22; + + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + for (int i = 0; i < 2232; i++) + { + #pragma acc atomic update + w++; + } + + ret = (w == -22 + 2232); + } + if (acc_get_device_type () == acc_device_host) + assert (ret == 1); + else + assert (ret == 1444); + + + ret = 0; + #pragma acc parallel num_gangs(1414) vector_length(32) reduction(+: ret) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'v' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'v' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'v' adjusted for OpenACC privatization level: 'gang'} "" { target { ! openacc_host_selected } } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + { + int v = 10; + + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + for (int i = 0; i < 3201; i++) + { + #pragma acc atomic update + v++; + } + + ret = (v == 10 + 3201); + } + if (acc_get_device_type () == acc_device_host) + assert (ret == 1); + else + assert (ret == 1414); + + + ret = 0; +#pragma acc parallel num_gangs(314) reduction(+: ret) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'v' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'v' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_compute$c_compute } + { dg-note {variable 'v' adjusted for OpenACC privatization level: 'gang'} "" { target { ! openacc_host_selected } } l_compute$c_compute } */ + { + int v = -222; + +#pragma acc atomic update + ++v; +#pragma acc atomic update + ++v; +#pragma acc atomic update + ++v; + + ret += (v == -222 + 3); + } + if (acc_get_device_type () == acc_device_host) + assert (ret == 1); + else + assert (ret == 314); + + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c new file mode 100644 index 0000000..e651012 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c @@ -0,0 +1,50 @@ +// 'atomic' access of thread-private variable + +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +#include <assert.h> + +int main (void) +{ + int res; + + res = 0; +#pragma acc parallel reduction(+: res) + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + { +#pragma acc loop vector reduction(+: res) + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + /* { dg-note {variable 'v' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } + { dg-note {variable 'v' ought to be adjusted for OpenACC privatization level: 'vector'} "" { target *-*-* } .-3 } + { dg-note {variable 'v' adjusted for OpenACC privatization level: 'vector'} "" { target { ! openacc_host_selected } } .-4 } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ + for (int i = 0; i < 2322; i++) + { + int v = -222; + +#pragma acc loop seq + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ + for (int j = 0; j < 121; ++j) + { +#pragma acc atomic update + ++v; + /* nvptx offloading: PR83812 "operation not supported on global/shared address space". + { dg-output "(\n|\r\n|\r)libgomp: cuStreamSynchronize error: operation not supported on global/shared address space(\n|\r\n|\r)$" { target openacc_nvidia_accel_selected } } + Scan for what we expect in the "XFAILed" case (without actually XFAILing). + { dg-shouldfail "XFAILed" { openacc_nvidia_accel_selected } } + ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + { dg-final { if { [dg-process-target { xfail openacc_nvidia_accel_selected }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } + ... so that we still get an XFAIL visible in the log. */ + } + + res += (v == -222 + 121); + } + } + assert (res == 2322); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c index 53f03d1..366f818 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c @@ -1,3 +1,20 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + +/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' + passed to 'incr' may be unset, and in that case, it will be set to [...]", + so to maintain compatibility with earlier Tcl releases, we manually + initialize counter variables: + { dg-line l_dummy[variable c_compute 0 c_loop 0] } + { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid + "WARNING: dg-line var l_dummy defined, but not used". */ + #include <assert.h> #include <openacc.h> @@ -21,15 +38,20 @@ void local_g_1() for (i = 0; i < 32; i++) arr[i] = 3; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } l_compute$c_compute } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { int x; - #pragma acc loop gang(static:1) + #pragma acc loop gang(static:1) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) x = i * 2; - #pragma acc loop gang(static:1) + #pragma acc loop gang(static:1) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { if (acc_on_device (acc_device_host)) @@ -53,31 +75,41 @@ void local_w_1() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; int x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; int x = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -104,26 +136,33 @@ void local_w_2() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; int x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; x = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -150,14 +189,19 @@ void local_w_3() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; @@ -166,11 +210,13 @@ void local_w_3() pt.x = i ^ j * 3; pt.y = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.x * k; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } @@ -197,14 +243,22 @@ void local_w_4() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'pt' ought to be adjusted for OpenACC privatization level: 'worker'} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'pt' adjusted for OpenACC privatization level: 'worker'} "TODO" { target { ! openacc_host_selected } xfail *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'ptp' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; @@ -214,13 +268,15 @@ void local_w_4() pt.x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += ptp->x * k; ptp->y = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } @@ -247,14 +303,19 @@ void local_w_5() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; @@ -262,13 +323,15 @@ void local_w_5() pt[0] = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[0] * k; pt[1] = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[1] * k; } @@ -294,9 +357,13 @@ void loop_g_1() for (i = 0; i < 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } l_compute$c_compute } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(x) + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { x = i * 2; @@ -319,14 +386,19 @@ void loop_g_2() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(x) + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { x = i * 2; - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; } @@ -347,14 +419,19 @@ void loop_g_3() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(x) + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { x = i * 2; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; } @@ -375,16 +452,33 @@ void loop_g_4() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(x) + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + But, with optimizations enabled, per the '*.ssa' dump ('gcc/tree-ssa.c:execute_update_addresses_taken'): + No longer having address taken: x + Now a gimple register: x + However, 'x' remains in the candidate set: + { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + Now, for GCN offloading, 'adjust_private_decl' does the privatization change right away: + { dg-note {variable 'x' adjusted for OpenACC privatization level: 'gang'} "" { target openacc_radeon_accel_selected } l_loop$c_loop } + For nvptx offloading however, we first mark up 'x', and then later apply the privatization change -- or, with optimizations enabled, don't, because we then don't actually call 'expand_var_decl'. + { dg-note {variable 'x' adjusted for OpenACC privatization level: 'gang'} "" { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } l_loop$c_loop } + { dg-bogus {note: variable 'x' adjusted for OpenACC privatization level: 'gang'} "" { target { openacc_nvidia_accel_selected && __OPTIMIZE__ } } l_loop$c_loop } + */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'p' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { int *p = &x; x = i * 2; - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x; @@ -407,15 +501,22 @@ void loop_g_5() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(x) + #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' adjusted for OpenACC privatization level: 'gang'} "" { target { ! openacc_host_selected } } l_loop$c_loop } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { for (int j = 0; j < 8; j++) x[j] = j * 2; - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += x[j % 8]; } @@ -437,9 +538,13 @@ void loop_g_6() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang private(pt) + #pragma acc loop gang private(pt) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { pt.x = i; @@ -447,7 +552,8 @@ void loop_g_6() pt.z = i * 4; pt.attr[5] = i * 6; - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (int j = 0; j < 32; j++) arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5]; } @@ -467,26 +573,34 @@ void loop_v_1() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; - #pragma acc loop vector private(x) + #pragma acc loop vector private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) { x = i ^ j * 3; arr[i * 1024 + j * 32 + k] += x * k; } - #pragma acc loop vector private(x) + #pragma acc loop vector private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) { x = i | j * 5; @@ -515,19 +629,25 @@ void loop_v_2() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; - #pragma acc loop vector private(pt) + #pragma acc loop vector private(pt) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) { pt[0] = i ^ j * 3; @@ -558,14 +678,19 @@ void loop_w_1() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { x = i ^ j * 3; @@ -592,20 +717,26 @@ void loop_w_2() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -632,31 +763,41 @@ void loop_w_3() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; x = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -683,26 +824,33 @@ void loop_w_4() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; x = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -729,14 +877,22 @@ void loop_w_5() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(x) + #pragma acc loop worker private(x) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'worker'} "" { target *-*-* } l_loop$c_loop } + { dg-note {variable 'x' adjusted for OpenACC privatization level: 'worker'} "TODO" { target { ! openacc_host_selected } xfail *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'p' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; @@ -744,13 +900,15 @@ void loop_w_5() x = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; *p = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += x * k; } @@ -778,14 +936,19 @@ void loop_w_6() for (i = 0; i < 32 * 32 * 32; i++) arr[i] = i; - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { - #pragma acc loop worker private(pt) + #pragma acc loop worker private(pt) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; @@ -793,11 +956,13 @@ void loop_w_6() pt.x = i ^ j * 3; pt.y = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.x * k; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt.y * k; } @@ -827,28 +992,35 @@ void loop_w_7() /* "pt" is treated as "present_or_copy" on the parallel directive because it is an array variable. */ - #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ { int j; - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { /* But here, it is made private per-worker. */ - #pragma acc loop worker private(pt) + #pragma acc loop worker private(pt) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) { int k; pt[0] = i ^ j * 3; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[0] * k; pt[1] = i | j * 5; - #pragma acc loop vector + #pragma acc loop vector /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (k = 0; k < 32; k++) arr[i * 1024 + j * 32 + k] += pt[1] * k; } @@ -874,13 +1046,17 @@ void parallel_g_1() for (i = 0; i < 32; i++) arr[i] = 3; - #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32) + #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } l_compute$c_compute } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang(static:1) + #pragma acc loop gang(static:1) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) x = i * 2; - #pragma acc loop gang(static:1) + #pragma acc loop gang(static:1) /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { if (acc_on_device (acc_device_host)) @@ -903,16 +1079,20 @@ void parallel_g_2() for (i = 0; i < 32 * 32; i++) arr[i] = i; - #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32) + #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32) /* { dg-line l_compute[incr c_compute] } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } l_compute$c_compute } */ { - #pragma acc loop gang + #pragma acc loop gang /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ + /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (i = 0; i < 32; i++) { int j; for (j = 0; j < 32; j++) x[j] = j * 2; - #pragma acc loop worker + #pragma acc loop worker /* { dg-line l_loop[incr c_loop] } */ + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } */ for (j = 0; j < 32; j++) arr[i * 32 + j] += x[31 - j]; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c index 6f5d293..bae1dee 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c @@ -1,5 +1,5 @@ -/* { dg-do run } */ -/* { dg-additional-options "-w" } */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ /* Multiple reductions. */ @@ -46,6 +46,7 @@ main (void) /* Nvptx targets require a vector_length or 32 in to allow spinlocks with gangs. */ check_reduction (num_workers (nw) vector_length (vl), worker); + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */ check_reduction (vector_length (vl), vector); check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang worker vector); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c index af30b31..82a6ade 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c @@ -1,5 +1,4 @@ /* { dg-do run } */ -/* { dg-additional-options "-w" } */ /* Test reductions on explicitly private variables. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c index c4940b8..c2fb922 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c @@ -1,5 +1,8 @@ /* Tests of reduction on loop directive. */ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <assert.h> @@ -14,6 +17,8 @@ void g_np_1() arr[i] = i; #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang reduction(+:res) for (i = 0; i < 1024; i++) @@ -28,6 +33,8 @@ void g_np_1() res = hres = 1; #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang reduction(*:res) for (i = 0; i < 12; i++) @@ -52,6 +59,7 @@ void gv_np_1() arr[i] = i; #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */ { #pragma acc loop gang vector reduction(+:res) for (i = 0; i < 1024; i++) @@ -76,6 +84,7 @@ void gw_np_1() arr[i] = i; #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */ { #pragma acc loop gang worker reduction(+:res) for (i = 0; i < 1024; i++) @@ -239,6 +248,7 @@ void v_p_1() #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ private(res) copyout(out) + /* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang for (j = 0; j < 32; j++) @@ -315,6 +325,7 @@ void w_p_1() #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ private(res) copyout(out) + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang for (j = 0; j < 32; j++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c index d6ff44d..0402e44 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c @@ -1,3 +1,9 @@ +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + #include <stdlib.h> #include <stdio.h> @@ -11,6 +17,7 @@ vector (int *a) int i; #pragma acc loop vector + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) a[i] -= a[i]; } @@ -22,9 +29,11 @@ worker (int *b) int i, j; #pragma acc loop worker + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) { #pragma acc loop vector + /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (j = 0; j < M; j++) b[i * M + j] += b[i * M + j]; } @@ -37,6 +46,7 @@ gang (int *a) int i; #pragma acc loop gang worker vector + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) a[i] -= i; } @@ -66,6 +76,7 @@ main(int argc, char **argv) #pragma acc parallel copy (a[0:N]) { #pragma acc loop seq + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) seq (&a[0]); } @@ -79,6 +90,7 @@ main(int argc, char **argv) #pragma acc parallel copy (a[0:N]) { #pragma acc loop seq + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) gang (&a[0]); } @@ -109,6 +121,7 @@ main(int argc, char **argv) #pragma acc parallel copy (a[0:N]) { #pragma acc loop + /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */ for (i = 0; i < N; i++) vector (&a[0]); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c index a164f57..1536ce0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c @@ -1,3 +1,6 @@ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -6,6 +9,8 @@ #pragma acc routine gang void __attribute__ ((noinline)) gang (int ary[N]) +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */ { #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c new file mode 100644 index 0000000..7dc7459e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c @@ -0,0 +1,63 @@ +/* Test 'nohost' clause via 'acc_on_device'. + + With optimizations disabled, we currently don't expect that 'acc_on_device' "evaluates at compile time to a constant". + { dg-skip-if "TODO PR82391" { *-*-* } { "-O0" } } +*/ + +/* { dg-additional-options "-fdump-tree-oaccloops" } */ + +/* { dg-additional-options "-fno-inline" } for stable results regarding OpenACC 'routine'. */ + +#include <assert.h> +#include <openacc.h> + +#pragma acc routine +static int fact(int n) +{ + if (n == 0 || n == 1) + return 1; + else + return n * fact(n - 1); +} + +#pragma acc routine nohost +static int fact_nohost(int n) +{ + /* Make sure this fails host compilation. */ +#if defined ACC_DEVICE_TYPE_host + asm ("IT'S A TRAP"); +#elif defined ACC_DEVICE_TYPE_nvidia + asm ("{\n\t .reg .u32 %tid_x;\n\t mov.u32 %tid_x, %tid.x;\n\t}"); +#elif defined ACC_DEVICE_TYPE_radeon + asm ("s_nop 0"); +#else +# error Not ported to this ACC_DEVICE_TYPE +#endif + + return fact(n); +} +/* { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'fact_nohost' has 'nohost' clause\.$} 1 oaccloops { target c } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'int fact_nohost\(int\)' has 'nohost' clause\.$} 1 oaccloops { target { c++ && { ! offloading_enabled } } } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'fact_nohost\(int\)' has 'nohost' clause\.$} 1 oaccloops { target { c++ && offloading_enabled } } } } + TODO See PR101551 for 'offloading_enabled' differences. */ + +int main() +{ +#define N 10 + int x[N]; + +#pragma acc parallel loop copyout(x) + for (int i = 0; i < N; ++i) + /*TODO PR82391: '(int) acc_device_*' cast to avoid the C++ 'acc_on_device' wrapper. */ + x[i] = acc_on_device((int) acc_device_not_host) ? fact_nohost(i) : 0; + + for (int i = 0; i < N; ++i) + { + if (acc_get_device_type() == acc_device_host) + assert(x[i] == 0); + else + assert(x[i] == fact(i)); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c new file mode 100644 index 0000000..4d081f2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c @@ -0,0 +1,39 @@ +/* Test 'nohost' clause via 'weak'. + + { dg-require-effective-target weak_undefined } + + When the OpenACC 'routine' with 'nohost' clauses gets discarded, the weak symbol then resolves to 'NULL'. +*/ + +/* { dg-additional-sources routine-nohost-2_2.c } */ + +/* { dg-additional-options "-fno-inline" } for stable results regarding OpenACC 'routine'. */ + +#include <assert.h> +#include <openacc.h> + +#pragma acc routine //nohost +__attribute__((weak)) +extern int f1(int); + +int main() +{ + int x = -10; + +#pragma acc serial copy(x) + /* { dg-warning {using vector_length \(32\), ignoring 1} "" { target openacc_nvidia_accel_selected } .-1 } */ + { + if (f1) + x = f1(x); + else + x = 0; + + } + + if (acc_get_device_type() == acc_device_host) + assert(x == 0); + else + assert(x == -20); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2_2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2_2.c new file mode 100644 index 0000000..6029545 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-nohost-2_2.c @@ -0,0 +1,18 @@ +/* { dg-skip-if "" { *-*-* } } */ + +#pragma acc routine nohost +int f1(int x) +{ + /* Make sure this fails host compilation. */ +#if defined ACC_DEVICE_TYPE_host + asm ("IT'S A TRAP"); +#elif defined ACC_DEVICE_TYPE_nvidia + asm ("{\n\t .reg .u32 %tid_x;\n\t mov.u32 %tid_x, %tid.x;\n\t}"); +#elif defined ACC_DEVICE_TYPE_radeon + asm ("s_nop 0"); +#else +# error Not ported to this ACC_DEVICE_TYPE +#endif + + return 2 * x; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index acd9884..b9137d8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -1,3 +1,6 @@ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> @@ -6,6 +9,7 @@ #pragma acc routine worker void __attribute__ ((noinline)) worker (int ary[N]) +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */ { #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c index 9769ee7..4f88b1c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c @@ -1,12 +1,17 @@ +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> -#ifdef ACC_DEVICE_TYPE_radeon #define NUM_WORKERS 16 +#ifdef ACC_DEVICE_TYPE_radeon +/* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ #define NUM_VECTORS 1 #else -#define NUM_WORKERS 16 #define NUM_VECTORS 32 #endif #define WIDTH 64 @@ -44,6 +49,7 @@ int DoWorkVec (int nw) printf ("spawning %d ...", nw); fflush (stdout); #pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary) + /* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target openacc_radeon_accel_selected } .-1 } */ { WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/static-variable-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/static-variable-1.c new file mode 100644 index 0000000..69df0a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/static-variable-1.c @@ -0,0 +1,492 @@ +/* "Function scope" (top-level block scope) 'static' variables + + ... inside OpenACC compute construct regions as well as OpenACC 'routine'. + + This is to document/verify aspects of GCC's observed behavior, not + necessarily as it's (intended to be?) restricted by the OpenACC + specification. See also PR84991, PR84992, PR90779 etc., and + <https://github.com/OpenACC/openacc-spec/issues/372> "C/C++ 'static' + variables" (only visible to members of the GitHub OpenACC organization). +*/ + +/* { dg-additional-options "-fopt-info-note-omp" } + { dg-additional-options "--param=openacc-privatization=noisy" } + { dg-additional-options "-foffload=-fopt-info-note-omp" } + { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } + for testing/documenting aspects of that functionality. */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + + +#undef NDEBUG +#include <assert.h> +#include <string.h> +#include <openacc.h> +#include <gomp-constants.h> + + +#define IF_DEBUG if (0) + + +/* Without explicit 'num_gangs'. */ + +static void t0_c(void) +{ + IF_DEBUG + __builtin_printf ("%s\n", __FUNCTION__); + + const int i_limit = 11; + const int var_init = 16; + + for (int i = 0; i < i_limit; ++i) + { + int result = 0; + int num_gangs_actual = -1; +#pragma acc parallel \ + reduction(max:num_gangs_actual) \ + reduction(max:result) + /* { dg-note {variable 'var' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } .-3 } */ + { + num_gangs_actual = 1 + __builtin_goacc_parlevel_id(GOMP_DIM_GANG); + + static int var = var_init; + +#pragma acc atomic capture + result = ++var; + + /* Irrespective of the order in which the gang-redundant threads + execute, 'var' has now been incremented 'num_gangs_actual' times, and + the final value captured as 'result'. */ + } + /* Without an explicit 'num_gangs' clause GCC assigns 'num_gangs(1)' + because it doesn't see any use of gang-level parallelism inside the + region. */ + assert(num_gangs_actual == 1); + assert(result == var_init + num_gangs_actual * (1 + i)); + } +} + + +/* Call a gang-level routine. */ + +static const int t0_r_var_init = 61; + +#pragma acc routine gang +/* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+4 } */ +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .+3 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .+2 } */ +__attribute__((noinline)) +static int t0_r_r(void) +{ + static int var = t0_r_var_init; + + int tmp; +#pragma acc atomic capture + tmp = ++var; + + return tmp; +} + +static void t0_r(void) +{ + IF_DEBUG + __builtin_printf ("%s\n", __FUNCTION__); + + const int i_limit = 11; + + for (int i = 0; i < i_limit; ++i) + { + int result = 0; + int num_gangs_actual = -1; +#pragma acc parallel \ + reduction(max:num_gangs_actual) \ + reduction(max:result) + { + num_gangs_actual = 1 + __builtin_goacc_parlevel_id(GOMP_DIM_GANG); + + result = t0_r_r(); + + /* Irrespective of the order in which the gang-redundant threads + execute, 'var' has now been incremented 'num_gangs_actual' times, and + the final value captured as 'result'. */ + } + /* The number of gangs selected by the implemention ought to but must not + be bigger than one. */ + IF_DEBUG + __builtin_printf ("%d: num_gangs_actual: %d\n", i, num_gangs_actual); + assert(num_gangs_actual >= 1); + assert(result == t0_r_var_init + num_gangs_actual * (1 + i)); + } +} + + +/* Explicit 'num_gangs'. */ + +static void t1_c(void) +{ + IF_DEBUG + __builtin_printf ("%s\n", __FUNCTION__); + + const int i_limit = 22; + const int num_gangs_request = 444; + const int var_init = 5; + + for (int i = 0; i < i_limit; ++i) + { + int result = 0; + int num_gangs_actual = -1; + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+1 } */ +#pragma acc parallel \ + num_gangs(num_gangs_request) \ + reduction(max:num_gangs_actual) \ + reduction(max:result) + /* { dg-note {variable 'var' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } .-4 } */ + { + num_gangs_actual = 1 + __builtin_goacc_parlevel_id(GOMP_DIM_GANG); + + static int var = var_init; + +#pragma acc atomic capture + result = ++var; + + /* Irrespective of the order in which the gang-redundant threads + execute, 'var' has now been incremented 'num_gangs_actual' times, and + the final value captured as 'result'. */ + } + if (acc_get_device_type() == acc_device_host) + assert(num_gangs_actual == 1); + else + assert(num_gangs_actual == num_gangs_request); + assert(result == var_init + num_gangs_actual * (1 + i)); + } +} + + +/* Check the same routine called from two compute constructs. */ + +static const int t1_r2_var_init = 166; + +#pragma acc routine gang +/* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+4 } */ +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .+3 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .+2 } */ +__attribute__((noinline)) +static int t1_r2_r(void) +{ + static int var = t1_r2_var_init; + + int tmp; +#pragma acc atomic capture + tmp = ++var; + + return tmp; +} + +static void t1_r2(void) +{ + IF_DEBUG + __builtin_printf ("%s\n", __FUNCTION__); + + const int i_limit = 71; + /* The checking assumes the same 'num_gangs' for all compute constructs. */ + const int num_gangs_request = 333; + int num_gangs_actual = -1; + if (acc_get_device_type() == acc_device_host) + num_gangs_actual = 1; + else + { + /* We're assuming that the implementation is able to accomodate the + 'num_gangs' requested (which really ought to be true for + 'num_gangs'). */ + num_gangs_actual = num_gangs_request; + } + + for (int i = 0; i < i_limit; ++i) + { + int result_1 = 0; +#pragma acc parallel \ + num_gangs(num_gangs_request) \ + reduction(max:result_1) + { + result_1 = t1_r2_r(); + + /* Irrespective of the order in which the gang-redundant threads + execute, 'var' has now been incremented 'num_gangs_actual' times, and + the final value captured as 'result_1'. */ + } + IF_DEBUG + __builtin_printf ("%d: result_1: %d\n", i, result_1); + assert(result_1 == t1_r2_var_init + num_gangs_actual * (1 + (i * 3 + 0))); + + int result_2 = 0; +#pragma acc parallel \ + num_gangs(num_gangs_request) \ + reduction(max:result_2) + { + result_2 = t1_r2_r() + t1_r2_r(); + + /* Irrespective of the order in which the gang-redundant threads + execute, 'var' has now been incremented '2 * num_gangs_actual' times. + However, the order of the two 't1_r2_r' function calls is not + synchronized (between different gang-redundant threads). We thus + cannot verify the actual 'result_2' values in this case. */ + } + IF_DEBUG + __builtin_printf ("%d: result_2: %d\n", i, result_2); + if (num_gangs_actual == 1) + /* Per the rationale above, only in this case we can check the actual + result. */ + assert(result_2 == (t1_r2_var_init + num_gangs_actual * (1 + (i * 3 + 1)) + + t1_r2_var_init + num_gangs_actual * (1 + (i * 3 + 2)))); + /* But we can generally check low and high limits. */ + { + /* Must be bigger than '2 * result_1'. */ + int c = 2 * result_1; + IF_DEBUG + __builtin_printf (" > %d\n", c); + assert(result_2 > c); + } + { + /* ..., but limited by the base value for next 'i'. */ + int c = 2 * (t1_r2_var_init + num_gangs_actual * (0 + ((i + 1) * 3 + 0))); + IF_DEBUG + __builtin_printf (" < %d\n", c); + assert(result_2 < c); + } + } +} + + +/* Asynchronous execution. */ + +static const int t2_var_init_2 = -55; + +#pragma acc routine gang +/* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+4 } */ +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .+3 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .+2 } */ +__attribute__((noinline)) +static int t2_r(void) +{ + static int var = t2_var_init_2; + + int tmp; +#pragma acc atomic capture + tmp = ++var; + + return tmp; +} + +static void t2(void) +{ + IF_DEBUG + __builtin_printf ("%s\n", __FUNCTION__); + + const int i_limit = 12; + const int num_gangs_request_1 = 14; + const int var_init_1 = 5; + int results_1[i_limit][num_gangs_request_1]; + memset (results_1, 0, sizeof results_1); + const int num_gangs_request_2 = 5; + int results_2[i_limit][num_gangs_request_2]; + memset (results_2, 0, sizeof results_2); + const int num_gangs_request_3 = 34; + const int var_init_3 = 1250; + int results_3[i_limit][num_gangs_request_3]; + memset (results_3, 0, sizeof results_3); + +#pragma acc data \ + copy(results_1, results_2, results_3) + /* { dg-note {variable 'num_gangs_request_1\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { c && { ! __OPTIMIZE__ } } } .-2 } */ + /* { dg-note {variable 'num_gangs_request_2\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { c && { ! __OPTIMIZE__ } } } .-3 } */ + /* { dg-note {variable 'num_gangs_request_3\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { c && { ! __OPTIMIZE__ } } } .-4 } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ + { + for (int i = 0; i < i_limit; ++i) + { + /* The following 'async' clauses effect asynchronous execution, but + using the same async-argument for each compute construct implies that + the respective compute constructs' execution is synchronized with + itself, meaning that all 'i = 0' execution has finished (on the + device) before 'i = 1' is started (on the device), etc. */ + + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+1 } */ +#pragma acc parallel \ + present(results_1) \ + num_gangs(num_gangs_request_1) \ + async(1) + /* { dg-note {variable 'var' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'tmp' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ + { + static int var = var_init_1; + + int tmp; +#pragma acc atomic capture + tmp = ++var; + + results_1[i][__builtin_goacc_parlevel_id(GOMP_DIM_GANG)] += tmp; + } + +#pragma acc parallel \ + present(results_2) \ + num_gangs(num_gangs_request_2) \ + async(2) + { + results_2[i][__builtin_goacc_parlevel_id(GOMP_DIM_GANG)] += t2_r(); + } + + /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'atomic'" { xfail *-*-* } .+1 } */ +#pragma acc parallel \ + present(results_3) \ + num_gangs(num_gangs_request_3) \ + async(3) + /* { dg-note {variable 'var' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } .-4 } */ + /* { dg-note {variable 'tmp' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } */ + { + static int var = var_init_3; + + int tmp; +#pragma acc atomic capture + tmp = ++var; + + results_3[i][__builtin_goacc_parlevel_id(GOMP_DIM_GANG)] += tmp; + } + } +#pragma acc wait + } + int num_gangs_actual_1; + int num_gangs_actual_2; + int num_gangs_actual_3; + if (acc_get_device_type() == acc_device_host) + { + num_gangs_actual_1 = 1; + num_gangs_actual_2 = 1; + num_gangs_actual_3 = 1; + } + else + { + /* We're assuming that the implementation is able to accomodate the + 'num_gangs' requested (which really ought to be true for + 'num_gangs'). */ + num_gangs_actual_1 = num_gangs_request_1; + num_gangs_actual_2 = num_gangs_request_2; + num_gangs_actual_3 = num_gangs_request_3; + } + + /* For 'i = 0', 'results_*[i][0..num_gangs_actual_*]' are expected to each + contain one value of '(1 + var_init_*)..(var_init_* + num_gangs_actual_*)', + and so on for increasing 'i'. Their order however is unspecified due to + the gang-redundant execution. (Thus checking that their sums match.) */ + + int result_1 = 0; + int result_2 = 0; + int result_3 = 0; + for (int i = 0; i < i_limit; ++i) + { + int result_1_ = 0; + for (int g = 0; g < num_gangs_actual_1; ++g) + { + IF_DEBUG + __builtin_printf ("results_1[%d][%d]: %d\n", i, g, results_1[i][g]); + result_1_ += results_1[i][g]; + } + IF_DEBUG + __builtin_printf ("%d result_1_: %d\n", i, result_1_); + assert (result_1_ == (((var_init_1 + num_gangs_actual_1 * (1 + i)) * (1 + var_init_1 + num_gangs_actual_1 * (1 + i)) / 2) + - ((var_init_1 + num_gangs_actual_1 * (0 + i)) * (1 + var_init_1 + num_gangs_actual_1 * (0 + i)) / 2))); + result_1 += result_1_; + + int result_2_ = 0; + for (int g = 0; g < num_gangs_actual_2; ++g) + { + IF_DEBUG + __builtin_printf ("results_2[%d][%d]: %d\n", i, g, results_2[i][g]); + result_2_ += results_2[i][g]; + } + IF_DEBUG + __builtin_printf ("%d result_2_: %d\n", i, result_2_); + assert (result_2_ == (((t2_var_init_2 + num_gangs_actual_2 * (1 + i)) * (1 + t2_var_init_2 + num_gangs_actual_2 * (1 + i)) / 2) + - ((t2_var_init_2 + num_gangs_actual_2 * (0 + i)) * (1 + t2_var_init_2 + num_gangs_actual_2 * (0 + i)) / 2))); + result_2 += result_2_; + + int result_3_ = 0; + for (int g = 0; g < num_gangs_actual_3; ++g) + { + IF_DEBUG + __builtin_printf ("results_3[%d][%d]: %d\n", i, g, results_3[i][g]); + result_3_ += results_3[i][g]; + } + IF_DEBUG + __builtin_printf ("%d result_3_: %d\n", i, result_3_); + assert (result_3_ == (((var_init_3 + num_gangs_actual_3 * (1 + i)) * (1 + var_init_3 + num_gangs_actual_3 * (1 + i)) / 2) + - ((var_init_3 + num_gangs_actual_3 * (0 + i)) * (1 + var_init_3 + num_gangs_actual_3 * (0 + i)) / 2))); + result_3 += result_3_; + } + IF_DEBUG + __builtin_printf ("result_1: %d\n", result_1); + assert (result_1 == (((var_init_1 + num_gangs_actual_1 * i_limit) * (1 + var_init_1 + num_gangs_actual_1 * i_limit) / 2) + - (var_init_1 * (var_init_1 + 1) / 2))); + IF_DEBUG + __builtin_printf ("result_2: %d\n", result_2); + assert (result_2 == (((t2_var_init_2 + num_gangs_actual_2 * i_limit) * (1 + t2_var_init_2 + num_gangs_actual_2 * i_limit) / 2) + - (t2_var_init_2 * (t2_var_init_2 + 1) / 2))); + IF_DEBUG + __builtin_printf ("result_3: %d\n", result_3); + assert (result_3 == (((var_init_3 + num_gangs_actual_3 * i_limit) * (1 + var_init_3 + num_gangs_actual_3 * i_limit) / 2) + - (var_init_3 * (var_init_3 + 1) / 2))); +} + + +#pragma acc routine seq +__attribute__((noinline)) +static int pr84991_1_r_s(int n) +{ + static const int test[] = {1,2,3,4}; + return test[n]; +} + +static void pr84991_1(void) +{ + int n[1]; + n[0] = 3; +#pragma acc parallel copy(n) + { + n[0] = pr84991_1_r_s(n[0]); + } + assert(n[0] == 4); +} + + +static void pr84992_1(void) +{ + int n[1]; + n[0] = 3; +#pragma acc parallel copy(n) + /* { dg-note {variable 'test' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } .-1 } */ + { + static const int test[] = {1,2,3,4}; + n[0] = test[n[0]]; + } + assert(n[0] == 4); +} + + +int main(void) +{ + t0_c(); + + t0_r(); + + t1_c(); + + t1_r2(); + + t2(); + + pr84991_1(); + + pr84992_1(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c index 18d77cc..5158bb5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c @@ -1,5 +1,5 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -34,5 +34,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c index 8b5b2a4..a3e44eb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c @@ -1,6 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-fopenacc-dim=::128" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -35,5 +35,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c index 59be37a..a85400d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c @@ -1,5 +1,5 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* We default to warp size 32 for the vector length, so the GOMP_OPENACC_DIM has no effect. */ /* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */ @@ -38,5 +38,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c index e5d1df0..24c078f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c @@ -1,5 +1,5 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -36,5 +36,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c index e60f1c2..fcca9f5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c @@ -1,6 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-fopenacc-dim=:2:128" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -37,5 +37,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c index a1f6762..0807eab 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c @@ -1,6 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":2:" } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -37,5 +37,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c index c419f64..4a8c1bf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c @@ -1,5 +1,5 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccloops" } */ /* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ #include <stdlib.h> @@ -36,5 +36,5 @@ main (void) return 0; } -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccloops" } } */ /* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=8, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 index 1a8432c..cd599e5 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 @@ -1,6 +1,12 @@ ! { dg-do run } ! { dg-additional-options "-cpp" } -! + +! { dg-additional-options "-fopt-info-all-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-all-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + ! TODO: Have to disable the acc_on_device builtin for we want to test the ! libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for @@ -15,15 +21,19 @@ if (.not. acc_on_device (acc_device_none)) STOP 1 if (.not. acc_on_device (acc_device_host)) STOP 2 if (acc_on_device (acc_device_not_host)) STOP 3 if (acc_on_device (acc_device_nvidia)) STOP 4 +if (acc_on_device (acc_device_radeon)) STOP 4 ! Host via offloading fallback mode. !$acc parallel if(.false.) +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } +!TODO Unhandled 'CONST_DECL' instances for constant arguments in 'acc_on_device' calls. if (.not. acc_on_device (acc_device_none)) STOP 5 if (.not. acc_on_device (acc_device_host)) STOP 6 if (acc_on_device (acc_device_not_host)) STOP 7 if (acc_on_device (acc_device_nvidia)) STOP 8 +if (acc_on_device (acc_device_radeon)) STOP 8 !$acc end parallel @@ -32,6 +42,7 @@ if (acc_on_device (acc_device_nvidia)) STOP 8 ! Offloaded. !$acc parallel +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target { ! openacc_host_selected } } .-1 } if (acc_on_device (acc_device_none)) STOP 9 if (acc_on_device (acc_device_host)) STOP 10 if (.not. acc_on_device (acc_device_not_host)) STOP 11 @@ -40,6 +51,11 @@ if (.not. acc_on_device (acc_device_nvidia)) STOP 12 #else if (acc_on_device (acc_device_nvidia)) STOP 13 #endif +#if ACC_DEVICE_TYPE_radeon +if (.not. acc_on_device (acc_device_radeon)) STOP 14 +#else +if (acc_on_device (acc_device_radeon)) STOP 15 +#endif !$acc end parallel #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f index 56f99d4..eb3daba 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f @@ -1,6 +1,12 @@ ! { dg-do run } ! { dg-additional-options "-cpp" } -! + +! { dg-additional-options "-fopt-info-all-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-all-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + ! TODO: Have to disable the acc_on_device builtin for we want to test ! the libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not @@ -15,15 +21,19 @@ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 2 IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 3 IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 4 + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 4 !Host via offloading fallback mode. !$ACC PARALLEL IF(.FALSE.) +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } +!TODO Unhandled 'CONST_DECL' instances for constant arguments in 'acc_on_device' calls. IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) STOP 5 IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 6 IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 7 IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 8 + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 8 !$ACC END PARALLEL @@ -32,6 +42,7 @@ ! Offloaded. !$ACC PARALLEL +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target { ! openacc_host_selected } } .-1 } IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) STOP 9 IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 10 IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 11 @@ -40,6 +51,11 @@ #else IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 13 #endif +#if ACC_DEVICE_TYPE_radeon + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 14 +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 15 +#endif !$ACC END PARALLEL #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f index 5657238..5f500c1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f @@ -1,6 +1,12 @@ ! { dg-do run } ! { dg-additional-options "-cpp" } -! + +! { dg-additional-options "-fopt-info-all-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-all-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + ! TODO: Have to disable the acc_on_device builtin for we want to test ! the libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not @@ -15,15 +21,19 @@ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 2 IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 3 IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 4 + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 4 !Host via offloading fallback mode. !$ACC PARALLEL IF(.FALSE.) +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } +!TODO Unhandled 'CONST_DECL' instances for constant arguments in 'acc_on_device' calls. IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) STOP 5 IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 6 IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 7 IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 8 + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 8 !$ACC END PARALLEL @@ -32,6 +42,7 @@ ! Offloaded. !$ACC PARALLEL +! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target { ! openacc_host_selected } } .-1 } IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) STOP 9 IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) STOP 10 IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) STOP 11 @@ -40,6 +51,11 @@ #else IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) STOP 13 #endif +#if ACC_DEVICE_TYPE_radeon + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 14 +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_RADEON)) STOP 15 +#endif !$ACC END PARALLEL #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 index 084f336..51776a1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 @@ -1,6 +1,12 @@ ! { dg-do run } ! { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } +! { dg-additional-options "-fopt-info-all-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-all-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + ! Tests to exercise the declare directive along with ! the clauses: copy ! copyin @@ -34,6 +40,7 @@ subroutine subr5 (a, b, c, d) i = 0 !$acc parallel + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N b(i) = a(i) c(i) = b(i) @@ -55,6 +62,7 @@ subroutine subr4 (a, b) i = 0 !$acc parallel + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N b(i) = a(i) end do @@ -74,6 +82,7 @@ subroutine subr3 (a, c) i = 0 !$acc parallel + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N a(i) = c(i) c(i) = 0 @@ -96,6 +105,7 @@ subroutine subr2 (a, b, c) i = 0 !$acc parallel + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N b(i) = a(i) c(i) = b(i) + c(i) + 1 @@ -114,6 +124,7 @@ subroutine subr1 (a) i = 0 !$acc parallel + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N a(i) = a(i) + 1 end do @@ -133,6 +144,9 @@ subroutine test (a, e) end subroutine subroutine subr0 (a, b, c, d) + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'a\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } implicit none integer, parameter :: N = 8 integer :: a(N) @@ -198,6 +212,10 @@ subroutine subr0 (a, b, c, d) end subroutine program main + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-1 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'S\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'desc\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-4 } use vars use openacc implicit none diff --git a/libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90 index 644ad1f..7bca2df 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90 @@ -88,6 +88,7 @@ end do !$acc data copyin(var3%t2(5)%t1%arr1) !$acc serial present(var3%t2(5)%t1%arr1) +! { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } var3%t2(5)%t1%arr1(:,:) = 6 !$acc end serial diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 index ab70e4e..fe1ae8a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 @@ -3,6 +3,7 @@ ! { dg-do run { target openacc_nvidia_accel_selected } } ! { dg-additional-options "-lcublas -Wall -Wextra" } +! { dg-require-effective-target openacc_cublas } program test implicit none diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f b/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f index 434c18c..912bac6 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f @@ -2,6 +2,7 @@ ! { dg-do run { target openacc_nvidia_accel_selected } } ! { dg-additional-options "-lcublas -Wall -Wextra" } +! { dg-require-effective-target openacc_cublas } include "cublas-fixed.h" diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90 index e81a8b2..0daba8b 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90 @@ -2,6 +2,7 @@ ! { dg-do run { target openacc_nvidia_accel_selected } } ! { dg-additional-options "-lcublas -Wall -Wextra" } +! { dg-require-effective-target openacc_cublas } module cublas interface diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 index 483ac3f..93e9ee0 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 @@ -1,7 +1,13 @@ ! { dg-do run } ! ! Test if, if_present clauses on host_data construct. -! + +! { dg-additional-options "-fopt-info-all-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-all-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + ! Fortran variant of 'libgomp.oacc-c-c++-common/host_data-7.c'. ! program main @@ -33,11 +39,24 @@ subroutine foo (p2, parr, host_p, host_parr, cond) #endif !$acc data copyin(host_p, host_parr) + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target { ! openacc_host_selected } } .-1 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { ! openacc_host_selected } } .-2 } + ! { dg-note {variable 'p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target { ! openacc_host_selected } } .-5 } #if !ACC_MEM_SHARED if (acc_is_present(p, c_sizeof(p))) stop 5 if (acc_is_present(parr, 1)) stop 6 #endif !$acc host_data use_device(p, parr) if_present + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } + ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-6 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-8 } ! not mapped yet, so it will be equal to the host pointer. if (transfer(c_loc(p), host_p) /= host_p) stop 7 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 8 @@ -48,6 +67,17 @@ subroutine foo (p2, parr, host_p, host_parr, cond) #endif !$acc data copy(p, parr) + ! { dg-note {variable 'p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } + ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } + ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-9 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-10 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-11 } if (.not. acc_is_present(p, c_sizeof(p))) stop 11 if (.not. acc_is_present(parr, 1)) stop 12 ! Not inside a host_data construct, so still the host pointer. @@ -55,6 +85,14 @@ subroutine foo (p2, parr, host_p, host_parr, cond) if (transfer(c_loc(parr), host_parr) /= host_parr) stop 14 !$acc host_data use_device(p, parr) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } + ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-6 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 15 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 16 @@ -66,6 +104,14 @@ subroutine foo (p2, parr, host_p, host_parr, cond) !$acc end host_data !$acc host_data use_device(p, parr) if_present + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } + ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 19 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 20 @@ -77,6 +123,14 @@ subroutine foo (p2, parr, host_p, host_parr, cond) !$acc end host_data !$acc host_data use_device(p, parr) if(cond) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } + ! { dg-note {variable 'host_p\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } + ! { dg-note {variable 'parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } + ! { dg-note {variable 'host_parr\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-5 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-6 } + ! { dg-note {variable 'D\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-7 } + ! { dg-note {variable 'transfer\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "TODO" { target *-*-* } .-8 } #if ACC_MEM_SHARED if (transfer(c_loc(p), host_p) /= host_p) stop 23 if (transfer(c_loc(parr), host_parr) /= host_parr) stop 24 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 index f3bf1ee..3089d6a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 @@ -1,6 +1,20 @@ ! { dg-do run } ! { dg-additional-options "-cpp" } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c_compute 0] } +! { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". */ + program main use openacc implicit none @@ -19,8 +33,11 @@ program main a(:) = 4.0 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } + !TODO Unhandled 'CONST_DECL' instances for constant argument in 'acc_on_device' call. if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -41,8 +58,10 @@ program main a(:) = 16.0 - !$acc parallel if (0 == 1) + !$acc parallel if (0 == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -57,8 +76,10 @@ program main a(:) = 8.0 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -79,8 +100,10 @@ program main a(:) = 22.0 - !$acc parallel if (zero == 1) + !$acc parallel if (zero == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -95,8 +118,10 @@ program main a(:) = 16.0 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -117,8 +142,10 @@ program main a(:) = 76.0 - !$acc parallel if (.FALSE.) + !$acc parallel if (.FALSE.) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -135,8 +162,10 @@ program main nn = 1 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (nn == 1) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (nn == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -159,8 +188,10 @@ program main nn = 0 - !$acc parallel if (nn == 1) + !$acc parallel if (nn == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -177,8 +208,10 @@ program main nn = 1 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -201,8 +234,10 @@ program main nn = 0; - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -217,8 +252,10 @@ program main a(:) = 91.0 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -233,8 +270,10 @@ program main a(:) = 43.0 - !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -255,8 +294,10 @@ program main a(:) = 87.0 - !$acc parallel if (one == 0) + !$acc parallel if (one == 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -333,8 +374,11 @@ program main b(:) = 0.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } - !$acc parallel present (a(1:N)) + !$acc parallel present (a(1:N)) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N b(i) = a(i) end do @@ -349,6 +393,7 @@ program main b(:) = 1.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { ! openacc_host_selected } } .-1 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .TRUE.) STOP 21 @@ -361,18 +406,25 @@ program main b(:) = 21.0 !$acc data copyin (a(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .FALSE.) STOP 23 #endif !$acc data copyout (b(1:N)) if (0 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (b) .eqv. .TRUE.) STOP 24 #endif !$acc data copyout (b(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } - !$acc parallel present (a(1:N)) present (b(1:N)) + !$acc parallel present (a(1:N)) present (b(1:N)) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N b(i) = a(i) end do @@ -452,8 +504,10 @@ program main a(:) = 4.0 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -474,8 +528,10 @@ program main a(:) = 16.0 - !$acc kernels if (0 == 1) + !$acc kernels if (0 == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -490,8 +546,10 @@ program main a(:) = 8.0 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -512,8 +570,10 @@ program main a(:) = 22.0 - !$acc kernels if (zero == 1) + !$acc kernels if (zero == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -528,8 +588,10 @@ program main a(:) = 16.0 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -550,8 +612,10 @@ program main a(:) = 76.0 - !$acc kernels if (.FALSE.) + !$acc kernels if (.FALSE.) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -568,8 +632,10 @@ program main nn = 1 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -592,8 +658,10 @@ program main nn = 0 - !$acc kernels if (nn == 1) + !$acc kernels if (nn == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -610,8 +678,10 @@ program main nn = 1 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -634,8 +704,10 @@ program main nn = 0; - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -650,8 +722,10 @@ program main a(:) = 91.0 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -666,8 +740,10 @@ program main a(:) = 43.0 - !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -688,8 +764,10 @@ program main a(:) = 87.0 - !$acc kernels if (one == 0) + !$acc kernels if (one == 0) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_compute$c_compute } if (acc_on_device (acc_device_host) .eqv. .TRUE.) then b(i) = a(i) + 1 else @@ -766,8 +844,11 @@ program main b(:) = 0.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } - !$acc kernels present (a(1:N)) + !$acc kernels present (a(1:N)) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N b(i) = a(i) end do @@ -782,6 +863,7 @@ program main b(:) = 1.0 !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target { ! openacc_host_selected } } .-1 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .TRUE.) STOP 56 @@ -794,18 +876,25 @@ program main b(:) = 21.0 !$acc data copyin (a(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (a) .eqv. .FALSE.) STOP 58 #endif !$acc data copyout (b(1:N)) if (0 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } #if !ACC_MEM_SHARED if (acc_is_present (b) .eqv. .TRUE.) STOP 59 #endif !$acc data copyout (b(1:N)) if (1 == 1) + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } - !$acc kernels present (a(1:N)) present (b(1:N)) + !$acc kernels present (a(1:N)) present (b(1:N)) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } do i = 1, N b(i) = a(i) end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 index bcc0476..0ae7c4b 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 @@ -2,6 +2,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, arr(32) @@ -11,6 +17,8 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) private(x) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do i = 1, 32 x = i * 2; arr(i) = arr(i) + x; diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 index 5571059..e3ff248 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, arr(0:32*32) @@ -12,10 +18,13 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) private(x) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do i = 0, 31 x = i * 2; !$acc loop worker(num:32) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + x; end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 index 6abbed7..370a25a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, arr(0:32*32) @@ -12,10 +18,13 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) private(x) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do i = 0, 31 x = i * 2; !$acc loop vector(length:32) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + x; end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 index d92be2d..abb86d0 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main type vec3 integer x, y, z, attr(13) @@ -17,6 +23,8 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) private(pt) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } do i = 0, 31 pt%x = i pt%y = i * 2 @@ -24,6 +32,7 @@ program main pt%attr(5) = i * 6 !$acc loop vector(length:32) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5); end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 index e9c0fb3..fe796f3 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 @@ -2,6 +2,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, k, idx, arr(0:32*32*32) @@ -11,15 +17,21 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 !$acc loop vector(length:32) private(x) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do k = 0, 31 x = ieor(i, j * 3) arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do !$acc loop vector(length:32) private(x) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do k = 0, 31 x = ior(i, j * 5) arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 index 13badb5..b5cefec 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 @@ -2,6 +2,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: i, j, k, idx, arr(0:32*32*32), pt(2) @@ -11,10 +17,14 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 !$acc loop vector(length:32) private(x, pt) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } do k = 0, 31 pt(1) = ieor(i, j * 3) pt(2) = ior(i, j * 5) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 index 04d732e..3fd1239 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 @@ -2,6 +2,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, arr(0:32*32) common x @@ -12,8 +18,11 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) private(x) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(x) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 x = ieor(i, j * 3) arr(i * 32 + j) = arr(i * 32 + j) + x diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 index 6c9a6b8..1dc5d9e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, k, idx, arr(0:32*32*32) @@ -12,12 +18,16 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(x) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 x = ieor(i, j * 3) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 index fab14c3..25bc67a 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, k, idx, arr(0:32*32*32) @@ -12,22 +18,29 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(x) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 x = ieor(i, j * 3) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do end do !$acc loop worker(num:8) private(x) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 x = ior(i, j * 5) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 index 71f4a11..b3f66ea 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: x, i, j, k, idx, arr(0:32*32*32) @@ -12,12 +18,16 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(x) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 x = ieor(i, j * 3) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -25,6 +35,7 @@ program main x = ior(i, j * 5) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 index bb45755..d9dbb07 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: i, j, k, idx, arr(0:32*32*32) integer, target :: x @@ -14,13 +20,18 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(x, p) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } + ! { dg-note {variable 'p' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } do j = 0, 31 p => x x = ieor(i, j * 3) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -28,6 +39,7 @@ program main p = ior(i, j * 5) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 index e169714..b4225c2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main type vec2 integer x, y @@ -17,18 +23,23 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(pt) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 0, 31 pt%x = ieor(i, j * 3) pt%y = ior(i, j * 5) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k end do !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 index e262c02..76bbda7 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 @@ -3,6 +3,12 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + program main integer :: i, j, k, idx, arr(0:32*32*32), pt(2) @@ -12,18 +18,23 @@ program main !$acc kernels copy(arr) !$acc loop gang(num:32) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker(num:8) private(pt) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } do j = 0, 31 pt(1) = ieor(i, j * 3) pt(2) = ior(i, j * 5) !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k end do !$acc loop vector(length:32) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 index 2875f16..2b2f8fe 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 @@ -15,9 +15,7 @@ program main integer, parameter :: c_size = sizeof (c) integer, parameter :: r_size = sizeof (r) - if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit - - call acc_init (acc_device_nvidia) + call acc_init (acc_device_default) call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) @@ -39,8 +37,6 @@ program main end do end do - call acc_shutdown (acc_device_nvidia) - contains subroutine set3d (clear, a_i, a_c, a_r) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 index bf35631..90c2868 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 @@ -1,7 +1,8 @@ ! Exercise the data movement runtime library functions on non-shared memory ! targets. -! { dg-do run { target openacc_nvidia_accel_selected } } +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } program main use openacc diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 index ddd557d3..2be75dc 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 @@ -27,6 +27,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 1 + call acc_wait (async) + h(:) = 0 call acc_copyout_async (h, sizeof (h), async) @@ -45,6 +47,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 3 + call acc_wait (async) + do i = 1, N if (h(i) /= i + i) stop 4 end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 index ccd1ce6..fae0d10 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 @@ -27,6 +27,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 1 + call acc_wait (async) + h(:) = 0 call acc_copyout_async (h, sizeof (h), async) @@ -45,6 +47,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 3 + call acc_wait (async) + do i = 1, N if (h(i) /= i + i) stop 4 end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 index 505b2c6..08808a4 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 @@ -6,26 +6,52 @@ program main integer n - if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + if (acc_get_num_devices (acc_device_nvidia) .ne. 0) then - call acc_init (acc_device_nvidia) + call acc_init (acc_device_nvidia) - n = 0 + n = 0 - call acc_set_device_num (n, acc_device_nvidia) + call acc_set_device_num (n, acc_device_nvidia) - if (acc_get_device_num (acc_device_nvidia) .ne. 0) STOP 1 + if (acc_get_device_num (acc_device_nvidia) .ne. 0) stop 11 - if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then - n = 1 + n = 1 - call acc_set_device_num (n, acc_device_nvidia) + call acc_set_device_num (n, acc_device_nvidia) - if (acc_get_device_num (acc_device_nvidia) .ne. 1) STOP 2 + if (acc_get_device_num (acc_device_nvidia) .ne. 1) stop 12 + + end if + + call acc_shutdown (acc_device_nvidia) end if - call acc_shutdown (acc_device_nvidia) + if (acc_get_num_devices (acc_device_radeon) .ne. 0) then + + call acc_init (acc_device_radeon) + + n = 0 + + call acc_set_device_num (n, acc_device_radeon) + + if (acc_get_device_num (acc_device_radeon) .ne. 0) stop 21 + + if (acc_get_num_devices (acc_device_radeon) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_radeon) + + if (acc_get_device_num (acc_device_radeon) .ne. 1) stop 22 + + end if + + call acc_shutdown (acc_device_radeon) + + end if end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 index 2ce93c3..fa610b1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 @@ -6,26 +6,52 @@ program main integer n - if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + if (acc_get_num_devices (acc_device_nvidia) .ne. 0) then - call acc_init (acc_device_nvidia) + call acc_init (acc_device_nvidia) - n = 0 + n = 0 - call acc_set_device_num (n, acc_device_nvidia) + call acc_set_device_num (n, acc_device_nvidia) - if (acc_get_device_num (acc_device_nvidia) .ne. 0) STOP 1 + if (acc_get_device_num (acc_device_nvidia) .ne. 0) STOP 1 - if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then - n = 1 + n = 1 - call acc_set_device_num (n, acc_device_nvidia) + call acc_set_device_num (n, acc_device_nvidia) - if (acc_get_device_num (acc_device_nvidia) .ne. 1) STOP 2 + if (acc_get_device_num (acc_device_nvidia) .ne. 1) STOP 2 + + end if + + call acc_shutdown (acc_device_nvidia) end if - call acc_shutdown (acc_device_nvidia) + if (acc_get_num_devices (acc_device_radeon) .ne. 0) then + + call acc_init (acc_device_radeon) + + n = 0 + + call acc_set_device_num (n, acc_device_radeon) + + if (acc_get_device_num (acc_device_radeon) .ne. 0) STOP 1 + + if (acc_get_num_devices (acc_device_radeon) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_radeon) + + if (acc_get_device_num (acc_device_radeon) .ne. 1) STOP 2 + + end if + + call acc_shutdown (acc_device_radeon) + + end if end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 index 263cedb..2b36b40 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 @@ -16,9 +16,7 @@ program main integer, parameter :: c_size = sizeof (c) integer, parameter :: r_size = sizeof (r) - if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit - - call acc_init (acc_device_nvidia) + call acc_init (acc_device_default) call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) @@ -40,8 +38,6 @@ program main end do end do - call acc_shutdown (acc_device_nvidia) - contains subroutine set3d (clear, a_i, a_c, a_r) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/optional-private.f90 b/libgomp/testsuite/libgomp.oacc-fortran/optional-private.f90 index 0320bbb..4e67809 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/optional-private.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/optional-private.f90 @@ -4,6 +4,16 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + + program main implicit none @@ -30,7 +40,11 @@ contains end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } !$acc loop gang private(x) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'x' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "TODO" { target *-*-* } .-2 } do i = 1, 32 x = i * 2; arr(i) = arr(i) + x @@ -55,7 +69,10 @@ contains end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } !$acc loop gang private(pt) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "TODO" { target *-*-* } .-2 } do i = 0, 31 pt%x = i pt%y = i * 2 @@ -63,6 +80,7 @@ contains pt%attr(5) = i * 6 !$acc loop vector + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5); end do @@ -86,10 +104,14 @@ contains !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) !$acc loop gang + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 0, 31 !$acc loop worker + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 0, 31 !$acc loop vector private(pt) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'pt' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "TODO" { target *-*-* } .-2 } do k = 0, 31 pt(1) = ieor(i, j * 3) pt(2) = ior(i, j * 5) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/optional-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/optional-reduction.f90 index 29f92c0..69b69b6 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/optional-reduction.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/optional-reduction.f90 @@ -4,6 +4,9 @@ ! { dg-do run } +!TODO +! { dg-xfail-run-if TODO { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } + program optional_reduction implicit none diff --git a/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-1.f b/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-1.f index aa1bb63..1fe1b47 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-1.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-1.f @@ -4,6 +4,9 @@ ! { dg-do run } +! { dg-additional-options "-Wopenacc-parallelism" } for +! testing/documenting aspects of that functionality. + PROGRAM MAIN IMPLICIT NONE INCLUDE "openacc_lib.h" @@ -15,6 +18,9 @@ !$ACC PARALLEL NUM_GANGS(256) NUM_WORKERS(32) VECTOR_LENGTH(32) !$ACC& REDUCTION(+:RES1) COPY(RES1, RES2) ASYNC(1) +! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail *-*-* } .-1 } +! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } +! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } res1 = res1 + 5 !$ACC ATOMIC @@ -37,6 +43,9 @@ !$ACC PARALLEL NUM_GANGS(8) NUM_WORKERS(32) VECTOR_LENGTH(32) !$ACC& REDUCTION(*:RES1) COPY(RES1, RES2) ASYNC(1) +! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail *-*-* } .-1 } +! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } +! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } res1 = res1 * 5 !$ACC ATOMIC diff --git a/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-2.f b/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-2.f index 5694de1..0221ab8 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/par-reduction-2-2.f @@ -4,6 +4,9 @@ ! { dg-do run } +! { dg-additional-options "-Wopenacc-parallelism" } for +! testing/documenting aspects of that functionality. + PROGRAM MAIN USE OPENACC IMPLICIT NONE @@ -15,6 +18,9 @@ !$ACC PARALLEL NUM_GANGS(256) NUM_WORKERS(32) VECTOR_LENGTH(32) !$ACC& REDUCTION(+:RES1) COPY(RES1, RES2) ASYNC(1) +! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail *-*-* } .-1 } +! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } +! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } res1 = res1 + 5 !$ACC ATOMIC @@ -37,6 +43,9 @@ !$ACC PARALLEL NUM_GANGS(8) NUM_WORKERS(32) VECTOR_LENGTH(32) !$ACC& REDUCTION(*:RES1) COPY(RES1, RES2) ASYNC(1) +! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction', 'atomic'" { xfail *-*-* } .-1 } +! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } +! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } res1 = res1 * 5 !$ACC ATOMIC diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims-aux.c b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims-aux.c index b5986f4..cdece32 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims-aux.c +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims-aux.c @@ -5,41 +5,22 @@ /* Used by 'parallel-dims.f90'. */ -#include <limits.h> -#include <openacc.h> #include <gomp-constants.h> -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper - not behaving as expected for -O0. */ #pragma acc routine seq -/* static */ unsigned int __attribute__ ((optimize ("O2"))) acc_gang () +/* static */ int acc_gang () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia)) - return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); } #pragma acc routine seq -/* static */ unsigned int __attribute__ ((optimize ("O2"))) acc_worker () +/* static */ int acc_worker () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia)) - return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); } #pragma acc routine seq -/* static */ unsigned int __attribute__ ((optimize ("O2"))) acc_vector () +/* static */ int acc_vector () { - if (acc_on_device ((int) acc_device_host)) - return 0; - else if (acc_on_device ((int) acc_device_nvidia)) - return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); - else - __builtin_abort (); + return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 index 1bfcd6c..fad3d9d 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 @@ -5,6 +5,15 @@ ! { dg-do run } ! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + ! See also '../libgomp.oacc-c-c++-common/parallel-dims.c'. module acc_routines @@ -59,6 +68,7 @@ program main vectors_max = -huge(gangs_max) - 1 ! INT_MIN !$acc serial & !$acc reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) ! { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 100, -99, -1 gangs_min = acc_gang (); gangs_max = acc_gang (); @@ -84,6 +94,11 @@ program main vectors_max = -huge(gangs_max) - 1 ! INT_MIN !$acc serial copy (vectors_actual) & !$acc copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max) ! { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } + ! { dg-bogus "\[Ww\]arning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-1 } + ! { dg-bogus "\[Ww\]arning: region contains worker partitioned code but is not worker partitioned" "TODO 'serial'" { xfail *-*-* } .-2 } + ! { dg-bogus "\[Ww\]arning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-3 } + ! { dg-note {variable 'C.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } .-4 } + !TODO Unhandled 'CONST_DECL' instance for constant argument in 'acc_on_device' call. if (acc_on_device (acc_device_nvidia)) then ! The GCC nvptx back end enforces vector_length (32). ! It's unclear if that's actually permissible here; @@ -92,10 +107,14 @@ program main vectors_actual = 32 end if !$acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 100, -99, -1 !$acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } do j = 100, -99, -1 !$acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do k = 100 * vectors_actual, -99 * vectors_actual, -1 gangs_min = acc_gang (); gangs_max = acc_gang (); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 index 487cfc4..99b1441 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 @@ -1,5 +1,7 @@ ! { dg-do run } -! { dg-additional-options "-w" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. program reduction implicit none @@ -11,6 +13,7 @@ program reduction s2 = 0 !$acc parallel reduction(+:s1,s2) num_gangs (n) copy(s1) + ! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-1 } s1 = s1 + 1 s2 = s2 + 1 !$acc end parallel @@ -41,6 +44,7 @@ subroutine redsub(s1, s2, n) integer :: s1, s2, n !$acc parallel reduction(+:s1,s2) num_gangs (10) copy(s1) + ! { dg-bogus "\[Ww\]arning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-1 } s1 = s1 + 1 s2 = s2 + 1 !$acc end parallel diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr84028.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr84028.f90 index 2b36122..e603221 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/pr84028.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr84028.f90 @@ -1,11 +1,15 @@ ! { dg-do run } +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + program foo integer :: a(3,3,3), ll, lll a = 1 !$acc parallel num_gangs(1) num_workers(2) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } if (any(a(1:3,1:3,1:3).ne.1)) STOP 1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 index 99a7041..cf1d0e5 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-additional-options "-fopt-info-omp-all" } -! { dg-additional-options "-fopenacc-kernels=decompose" } +! { dg-additional-options "--param=openacc-kernels=decompose" } ! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' ! passed to 'incr' may be unset, and in that case, it will be set to [...]", diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90 new file mode 100644 index 0000000..4be7507 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90 @@ -0,0 +1,35 @@ +! 'atomic' access of gang-private variable + +! { dg-do run } + +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + + +program main + integer :: w, arr(0:31) + + !$acc parallel num_gangs(32) num_workers(32) copyout(arr) + !$acc loop gang private(w) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'w' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } + ! { dg-note {variable 'w' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } .-3 } + ! { dg-note {variable 'w' adjusted for OpenACC privatization level: 'gang'} "" { target { ! openacc_host_selected } } .-4 } + do j = 0, 31 + w = 0 + !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + do i = 0, 31 + !$acc atomic update + w = w + 1 + !$acc end atomic + end do + arr(j) = w + end do + !$acc end parallel + + if (any (arr .ne. 32)) stop 1 +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90 new file mode 100644 index 0000000..e916837 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90 @@ -0,0 +1,42 @@ +! 'atomic' access of vector-private variable + +! { dg-do run } + +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + + +program main + integer :: w, arr(0:31) + + !$acc parallel num_gangs(32) num_workers(32) copyout(arr) + !$acc loop gang worker vector private(w) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'w' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } + ! { dg-note {variable 'w' ought to be adjusted for OpenACC privatization level: 'vector'} "" { target *-*-* } .-3 } + ! { dg-note {variable 'w' adjusted for OpenACC privatization level: 'vector'} "" { target { ! openacc_host_selected } } .-4 } + do j = 0, 31 + w = 0 + !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + do i = 0, 31 + !$acc atomic update + w = w + 1 + ! nvptx offloading: PR83812 "operation not supported on global/shared address space". + ! { dg-output "(\n|\r\n|\r)libgomp: cuStreamSynchronize error: operation not supported on global/shared address space(\n|\r\n|\r)$" { target openacc_nvidia_accel_selected } } + ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + ! { dg-shouldfail "XFAILed" { openacc_nvidia_accel_selected } } + ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + ! { dg-final { if { [dg-process-target { xfail openacc_nvidia_accel_selected }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } + ! ... so that we still get an XFAIL visible in the log. + !$acc end atomic + end do + arr(j) = w + end do + !$acc end parallel + + if (any (arr .ne. 32)) stop 1 +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90 new file mode 100644 index 0000000..5fa157b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90 @@ -0,0 +1,42 @@ +! 'atomic' access of worker-private variable + +! { dg-do run } + +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + + +program main + integer :: w, arr(0:31) + + !$acc parallel num_gangs(32) num_workers(32) copyout(arr) + !$acc loop gang worker private(w) + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + ! { dg-note {variable 'w' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } .-2 } + ! { dg-note {variable 'w' ought to be adjusted for OpenACC privatization level: 'worker'} "" { target *-*-* } .-3 } + ! { dg-note {variable 'w' adjusted for OpenACC privatization level: 'worker'} "TODO" { target { ! openacc_host_selected } xfail *-*-* } .-4 } + do j = 0, 31 + w = 0 + !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } + do i = 0, 31 + !$acc atomic update + w = w + 1 + ! nvptx offloading: PR83812 "operation not supported on global/shared address space". + ! { dg-output "(\n|\r\n|\r)libgomp: cuStreamSynchronize error: operation not supported on global/shared address space(\n|\r\n|\r)$" { target openacc_nvidia_accel_selected } } + ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + ! { dg-shouldfail "XFAILed" { openacc_nvidia_accel_selected } } + ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + ! { dg-final { if { [dg-process-target { xfail openacc_nvidia_accel_selected }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } + ! ... so that we still get an XFAIL visible in the log. + !$acc end atomic + end do + arr(j) = w + end do + !$acc end parallel + + if (any (arr .ne. 32)) stop 1 +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90 index 472a6a1..e40a82f 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90 @@ -2,6 +2,23 @@ ! { dg-do run } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c_loop 0] } +! { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". + ! Test of gang-private variables declared on loop directive. @@ -13,7 +30,11 @@ subroutine t1() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang private(x) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } + !$acc loop gang private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 1, 32 x = i * 2; arr(i) = arr(i) + x @@ -37,11 +58,15 @@ subroutine t2() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang private(x) + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } + !$acc loop gang private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 x = i * 2; - !$acc loop worker + !$acc loop worker ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + x end do @@ -65,11 +90,15 @@ subroutine t3() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang private(x) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } + !$acc loop gang private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 x = i * 2; - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + x end do @@ -98,14 +127,27 @@ subroutine t4() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang private(pt) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } + !$acc loop gang private(pt) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! But, with optimizations enabled, per the '*.ssa' dump ('gcc/tree-ssa.c:execute_update_addresses_taken'): + ! No longer having address taken: pt + ! However, 'pt' remains in the candidate set: + ! { dg-note {variable 'pt' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + ! Now, for GCN offloading, 'adjust_private_decl' does the privatization change right away: + ! { dg-note {variable 'pt' adjusted for OpenACC privatization level: 'gang'} "" { target openacc_radeon_accel_selected } l_loop$c_loop } + ! For nvptx offloading however, we first mark up 'pt', and then later apply the privatization change -- or, with optimizations enabled, don't, because we then don't actually call 'expand_var_decl'. + ! { dg-note {variable 'pt' adjusted for OpenACC privatization level: 'gang'} "" { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } l_loop$c_loop } + ! { dg-bogus {note: variable 'pt' adjusted for OpenACC privatization level: 'gang'} "" { target { openacc_nvidia_accel_selected && __OPTIMIZE__ } } l_loop$c_loop } do i = 0, 31 pt%x = i pt%y = i * 2 pt%z = i * 4 pt%attr(5) = i * 6 - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5); end do @@ -128,16 +170,22 @@ subroutine t5() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker + !$acc loop worker ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 - !$acc loop vector private(x) + !$acc loop vector private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 x = ieor(i, j * 3) arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do - !$acc loop vector private(x) + !$acc loop vector private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 x = ior(i, j * 5) arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k @@ -169,11 +217,18 @@ subroutine t6() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker + !$acc loop worker ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 - !$acc loop vector private(x, pt) + !$acc loop vector private(x, pt) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-bogus {note: variable 'x' in 'private' clause} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' ought to be adjusted for OpenACC privatization level: 'vector'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' adjusted for OpenACC privatization level: 'vector'} "" { target { ! openacc_host_selected } } l_loop$c_loop } do k = 0, 31 pt(1) = ieor(i, j * 3) pt(2) = ior(i, j * 5) @@ -208,9 +263,14 @@ subroutine t7() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang private(x) + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } + !$acc loop gang private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-bogus {note: variable 'x' in 'private' clause} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(x) + !$acc loop worker private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 x = ieor(i, j * 3) arr(i * 32 + j) = arr(i * 32 + j) + x @@ -235,13 +295,17 @@ subroutine t8() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(x) + !$acc loop worker private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 x = ieor(i, j * 3) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -271,23 +335,30 @@ subroutine t9() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(x) + !$acc loop worker private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 x = ieor(i, j * 3) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do end do - !$acc loop worker private(x) + !$acc loop worker private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 x = ior(i, j * 5) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -319,20 +390,25 @@ subroutine t10() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(x) + !$acc loop worker private(x) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 x = ieor(i, j * 3) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do x = ior(i, j * 5) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -366,21 +442,29 @@ subroutine t11() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(x, p) + !$acc loop worker private(x, p) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'worker'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'x' adjusted for OpenACC privatization level: 'worker'} "TODO" { target { ! openacc_host_selected } xfail *-*-* } l_loop$c_loop } + ! { dg-note {variable 'p' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 p => x x = ieor(i, j * 3) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do p = ior(i, j * 5) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k end do @@ -417,19 +501,24 @@ subroutine t12() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(pt) + !$acc loop worker private(pt) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do j = 0, 31 pt%x = ieor(i, j * 3) pt%y = ior(i, j * 5) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k end do - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k end do @@ -461,19 +550,26 @@ subroutine t13() end do !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) - !$acc loop gang + !$acc loop gang ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 0, 31 - !$acc loop worker private(pt) + !$acc loop worker private(pt) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' ought to be adjusted for OpenACC privatization level: 'worker'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'pt' adjusted for OpenACC privatization level: 'worker'} "TODO" { target { ! openacc_host_selected } xfail *-*-* } l_loop$c_loop } */ do j = 0, 31 pt(1) = ieor(i, j * 3) pt(2) = ior(i, j * 5) - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k end do - !$acc loop vector + !$acc loop vector ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do k = 0, 31 arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k end do @@ -507,13 +603,19 @@ subroutine t14() end do !$acc parallel private(x) copy(arr) num_gangs(n) num_workers(8) vector_length(32) - !$acc loop gang(static:1) + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } + !$acc loop gang(static:1) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 1, n x = i * 2; end do - !$acc loop gang(static:1) + !$acc loop gang(static:1) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } do i = 1, n + ! { dg-note {variable 'C\.[0-9]+' declared in block potentially has improper OpenACC privatization level: 'const_decl'} "TODO" { target *-*-* } l_loop$c_loop } + !TODO Unhandled 'CONST_DECL' instance for constant argument in 'acc_on_device' call. if (acc_on_device (acc_device_host) .eqv. .TRUE.) x = i * 2 arr(i) = arr(i) + x end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 new file mode 100644 index 0000000..baaee02 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 @@ -0,0 +1,157 @@ +! { dg-do run } + +! PR65181 "Support for alloca in nvptx" +! { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target openacc_nvidia_accel_selected } } +! Aside from restricting this testcase to non-nvptx offloading, and duplicating +! it with 'dg-do link' for nvptx offloading, there doesn't seem to be a way to +! XFAIL the "UNRESOLVED: [...] compilation failed to produce executable", or +! get rid of it, unfortunately. + +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. +! Prune a few: uninteresting, and varying depending on GCC configuration (data types): +! { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c_compute 0 c_loop 0] } +! { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". */ + +program main + implicit none (type, external) + integer :: j + integer, allocatable :: A(:) + character(len=:), allocatable :: my_str + character(len=15), allocatable :: my_str15 + + A = [(3*j, j=1, 10)] + call foo (A, size(A)) + call bar (A) + my_str = "1234567890" + call foo_str(my_str) + call bar_str(my_str) + my_str15 = "123456789012345" + call foobar (my_str15) + deallocate (A, my_str, my_str15) +contains + subroutine foo (array, nn) + integer :: i, nn + integer :: array(nn) + + !$acc parallel copyout(array) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'atmp\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'shadow_loopvar\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'offset\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'S\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'test\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + array = [(-i, i = 1, nn)] + !$acc loop gang private(array) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'array' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "" { target *-*-* } l_loop$c_loop } + ! { dg-message {sorry, unimplemented: target cannot support alloca} PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop } + do i = 1, 10 + array(i) = i + end do + if (any (array /= [(-i, i = 1, nn)])) error stop 1 + !$acc end parallel + end subroutine foo + subroutine bar (array) + integer :: i + integer :: array(:) + + !$acc parallel copyout(array) ! { dg-line l_compute[incr c_compute] } + ! { dg-note {variable 'atmp\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'shadow_loopvar\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'offset\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'S\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'test\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'parm\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'parm\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_compute$c_compute } + ! { dg-note {variable 'parm\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || openacc_nvidia_accel_selected } } } l_compute$c_compute } + ! { dg-note {variable 'A\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: static} "" { target *-*-* } l_compute$c_compute } + array = [(-2*i, i = 1, size(array))] + !$acc loop gang private(array) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'array\.[0-9]+' in 'private' clause is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'array\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'array\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || openacc_nvidia_accel_selected } } } l_loop$c_loop } + ! { dg-message {sorry, unimplemented: target cannot support alloca} PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop } + do i = 1, 10 + array(i) = 9*i + end do + if (any (array /= [(-2*i, i = 1, 10)])) error stop 2 + !$acc end parallel + end subroutine bar + subroutine foo_str(str) + integer :: i + character(len=*) :: str + + !$acc parallel copyout(str) + str = "abcdefghij" + !$acc loop gang private(str) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'str' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || openacc_nvidia_accel_selected } } } l_loop$c_loop } + ! { dg-message {sorry, unimplemented: target cannot support alloca} PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop } + do i = 1, 10 + str(i:i) = achar(ichar('A') + i) + end do + if (str /= "abcdefghij") error stop 3 + !$acc end parallel + end + subroutine bar_str(str) + integer :: i + character(len=:), allocatable :: str + +! *************************************** +! FIXME: Fails due to PR middle-end/95499 +! *************************************** + !!$acc parallel copyout(str) + str = "abcdefghij" + !!$acc loop gang private(str) + !do i = 1, 10 + ! str(i:i) = achar(ichar('A') + i) + !end do + if (str /= "abcdefghij") error stop 5 + !!$acc end parallel + end + subroutine foobar (scalar) + integer :: i + character(len=15), optional :: scalar + + !$acc parallel copyout(scalar) + scalar = "abcdefghi-12345" + !$acc loop gang private(scalar) ! { dg-line l_loop[incr c_loop] } + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'scalar' in 'private' clause potentially has improper OpenACC privatization level: 'parm_decl'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' ought to be adjusted for OpenACC privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop } + ! { dg-note {variable 'char\.[0-9]+' adjusted for OpenACC privatization level: 'gang'} "" { target { ! { openacc_host_selected || openacc_nvidia_accel_selected } } } l_loop$c_loop } + do i = 1, 15 + scalar(i:i) = achar(ichar('A') + i) + end do + !$acc end parallel + if (scalar /= "abcdefghi-12345") error stop 6 + end subroutine foobar + subroutine foobar15 (scalar) + integer :: i + character(len=15), optional, allocatable :: scalar + + !$acc parallel copyout(scalar) + scalar = "abcdefghi-12345" + !$acc loop gang private(scalar) + do i = 1, 15 + scalar(i:i) = achar(ichar('A') + i) + end do + !$acc end parallel + if (scalar /= "abcdefghi-12345") error stop 1 + end subroutine foobar15 +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 index 764affd..95c3ed7 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 @@ -1,5 +1,7 @@ ! { dg-do run } -! { dg-additional-options "-w" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. ! Integer reductions @@ -280,6 +282,7 @@ program reduction_1 !$acc end parallel !$acc parallel vector_length(vl) copy(rv) + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } !$acc loop reduction(ior:rv) gang do i = 1, n rv = ior (rv, array(i)) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 index 833d0e4..5d1c1d9 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -1,5 +1,7 @@ ! { dg-do run } -! { dg-additional-options "-w" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. ! subroutine reduction @@ -46,6 +48,7 @@ subroutine redsub_worker(sum, n, c) sum = 0 !$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum) + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } !$acc loop reduction(+:sum) worker do i = 1, n sum = sum + c diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 index e76867a..6908d16 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 @@ -1,5 +1,8 @@ ! { dg-do run } -! { dg-additional-options "-cpp -w" } +! { dg-additional-options "-cpp" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. program reduction implicit none @@ -28,6 +31,7 @@ program reduction !$acc end parallel !$acc parallel num_workers (4) vector_length (32) + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } !$acc loop reduction(+:ws1, ws2) worker do i = 1, n ws1 = ws1 + 1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 index a7d6dd8..a8b0c60 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 @@ -1,5 +1,7 @@ ! { dg-do run } -! { dg-additional-options "-w" } + +!TODO +! { dg-xfail-run-if TODO { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } ! subroutine reduction with private and firstprivate variables diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 index 1009f4a..75660bb 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 @@ -2,6 +2,16 @@ ! { dg-do run } ! { dg-additional-options "-cpp" } +! { dg-additional-options "-fopt-info-note-omp" } +! { dg-additional-options "--param=openacc-privatization=noisy" } +! { dg-additional-options "-foffload=-fopt-info-note-omp" } +! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" } +! for testing/documenting aspects of that functionality. + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. +!TODO { dg-additional-options "-fno-inline" } for stable results regarding OpenACC 'routine'. + #define M 8 #define N 32 @@ -16,6 +26,7 @@ program main !$acc parallel copy (a) !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N call seq (a) end do @@ -27,6 +38,7 @@ program main !$acc parallel copy (a) !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N call gang (a) end do @@ -42,6 +54,7 @@ program main !$acc parallel copy (b) !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N call worker (b) end do @@ -57,6 +70,7 @@ program main !$acc parallel copy (a) !$acc loop seq + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N call vector (a) end do @@ -74,6 +88,7 @@ subroutine vector (a) integer :: i !$acc loop vector + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N a(i) = a(i) - a(i) end do @@ -86,8 +101,10 @@ subroutine worker (b) integer :: i, j !$acc loop worker + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N !$acc loop vector + ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do j = 1, M b(j + ((i - 1) * M)) = b(j + ((i - 1) * M)) + 1 end do @@ -97,10 +114,13 @@ end subroutine worker subroutine gang (a) !$acc routine gang + ! { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } + ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } integer, intent (inout) :: a(N) integer :: i !$acc loop gang + ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } do i = 1, N a(i) = a(i) - i end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-nohost-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-nohost-1.f90 new file mode 100644 index 0000000..b0537b8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-nohost-1.f90 @@ -0,0 +1,63 @@ +! Test 'nohost' clause via 'acc_on_device'. + +! { dg-do run } + +! With optimizations disabled, we currently don't expect that 'acc_on_device' "evaluates at compile time to a constant". +! { dg-skip-if "TODO PR82391" { *-*-* } { "-O0" } } + +! { dg-additional-options "-fdump-tree-oaccloops" } + +program main + use openacc + implicit none + integer, parameter :: n = 10 + integer :: a(n), i + integer, external :: fact_nohost + !$acc routine (fact_nohost) + integer, external :: fact + + !$acc parallel loop + do i = 1, n + if (acc_on_device(acc_device_not_host)) then + a(i) = fact_nohost(i) + else + a(i) = 0 + end if + end do + !$acc end parallel loop + + do i = 1, n + if (acc_get_device_type() .eq. acc_device_host) then + if (a(i) .ne. 0) stop 10 + i + else + if (a(i) .ne. fact(i)) stop 20 + i + end if + end do +end program main + +recursive function fact(x) result(res) + implicit none + !$acc routine (fact) + integer, intent(in) :: x + integer :: res + + if (x < 1) then + res = 1 + else + res = x * fact(x - 1) + end if +end function fact + +function fact_nohost(x) result(res) + use openacc + implicit none + !$acc routine (fact_nohost) nohost + integer, intent(in) :: x + integer :: res + integer, external :: fact + + res = fact(x) +end function fact_nohost +! { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'fact_nohost' has 'nohost' clause\.$} 1 oaccloops { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'fact_nohost_' has 'nohost' clause\.$} 1 oaccloops { target offloading_enabled } } } +!TODO See PR101551 for 'offloading_enabled' differences. |