diff options
author | Aldy Hernandez <aldyh@redhat.com> | 2020-06-17 07:50:57 -0400 |
---|---|---|
committer | Aldy Hernandez <aldyh@redhat.com> | 2020-06-17 07:50:57 -0400 |
commit | b9e67f2840ce0d8859d96e7f8df8fe9584af5eba (patch) | |
tree | ed3b7284ff15c802583f6409b9c71b3739642d15 /libgomp | |
parent | 1957047ed1c94bf17cf993a2b1866965f493ba87 (diff) | |
parent | 56638b9b1853666f575928f8baf17f70e4ed3517 (diff) | |
download | gcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.zip gcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.tar.gz gcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.tar.bz2 |
Merge from trunk at:
commit 56638b9b1853666f575928f8baf17f70e4ed3517
Author: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Wed Jun 17 00:16:36 2020 +0000
Daily bump.
Diffstat (limited to 'libgomp')
144 files changed, 5749 insertions, 816 deletions
diff --git a/libgomp/.gitattributes b/libgomp/.gitattributes new file mode 100644 index 0000000..47e74eb --- /dev/null +++ b/libgomp/.gitattributes @@ -0,0 +1,2 @@ +# For the Fortran file, complain about tabs +openacc_lib.h whitespace=tab-in-indent,space-before-tab,trailing-space diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index a204585..ae72976 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,691 @@ +2020-06-16 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/routine-10.f90: New test. + +2020-06-08 Tobias Burnus <tobias@codesourcery.com> + + PR lto/94848 + PR middle-end/95551 + * testsuite/libgomp.fortran/target-var.f90: New test. + +2020-06-05 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>: Explain + special handling. + +2020-06-05 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>: + Simplify. + +2020-06-05 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c: New test. + * testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c: New test. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>: + Evaluate 'copyfrom' individually for each entry. + * testsuite/libgomp.oacc-c-c++-common/struct-1.c: Update. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>: + Evaluate 'finalize' individually for each entry. + * testsuite/libgomp.oacc-c-c++-common/struct-1.c: New file. + * testsuite/libgomp.oacc-c-c++-common/struct-refcount-1.c: Remove + file. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: Fix 'sizeof' + usage. + * testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: Likewise. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (goacc_exit_datum): Repair 'is_tgt_unmapped' + checking. + (acc_unmap_data, goacc_exit_data_internal): Restore + 'is_tgt_unmapped' checking. + * testsuite/libgomp.oacc-c-c++-common/struct-refcount-1.c: New + file. + * testsuite/libgomp.oacc-fortran/deep-copy-6.f90: Adjust. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90: Likewise. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (acc_unmap_data): Don't open-code 'gomp_remove_var'. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/92854 + * oacc-mem.c (acc_unmap_data): Remove 'tgt' reference counting. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/92854 + * testsuite/libgomp.oacc-c-c++-common/pr92854-1.c: Extend some + more. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (goacc_enter_datum): Use 'tgt' returned from + 'gomp_map_vars'. + (acc_map_data): Clean up accordingly. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/deep-copy-6.f90: XFAIL behavior + of over-eager 'finalize' clause. + * testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90: New + file. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90: Likewise. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-mem.c (goacc_exit_data_internal): Unlock on error path. + +2020-06-04 Julian Brown <julian@codesourcery.com> + + * oacc-mem.c (acc_attach_async): Add missing gomp_mutex_unlock on + error path. + (goacc_detach_internal): Likewise. + +2020-06-04 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/error_stop-1.f: Initialize before + the checkpoint. + * testsuite/libgomp.oacc-fortran/error_stop-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/error_stop-3.f: Likewise. + * testsuite/libgomp.oacc-fortran/stop-1.f: Likewise. + * testsuite/libgomp.oacc-fortran/stop-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/stop-3.f: Likewise. + +2020-06-02 Jakub Jelinek <jakub@redhat.com> + + * allocator.c (omp_free): Fix up build if HAVE_SYNC_BUILTINS is not + defined. + +2020-05-30 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/alloc-4.c: New test. + +2020-05-30 Jakub Jelinek <jakub@redhat.com> + + * allocator.c (omp_alloc): For size == 0, return NULL early. + +2020-05-29 H.J. Lu <hjl.tools@gmail.com> + + PR bootstrap/95413 + * configure: Regenerated. + +2020-05-23 Thomas Koenig <tkoenig@gcc.gnu.org> + + PR libfortran/95191 + * testsuite/libgomp.fortran/async_io_9.f90: New test. + +2020-05-19 Jakub Jelinek <jakub@redhat.com> + + * omp.h.in (omp_uintptr_t): New typedef. + (__GOMP_UINTPTR_T_ENUM): Define. + (omp_memspace_handle_t, omp_allocator_handle_t, omp_alloctrait_key_t, + omp_alloctrait_value_t, omp_alloctrait_t): New typedefs. + (__GOMP_DEFAULT_NULL_ALLOCATOR): Define. + (omp_init_allocator, omp_destroy_allocator, omp_set_default_allocator, + omp_get_default_allocator, omp_alloc, omp_free): Declare. + * libgomp.h (struct gomp_team_state): Add def_allocator field. + (gomp_def_allocator): Declare. + * libgomp.map (OMP_5.0.1): Export omp_set_default_allocator, + omp_get_default_allocator, omp_init_allocator, omp_destroy_allocator, + omp_alloc and omp_free. + * team.c (gomp_team_start): Copy over ts.def_allocator. + * env.c (gomp_def_allocator): New variable. + (parse_wait_policy): Adjust function comment. + (parse_allocator): New function. + (handle_omp_display_env): Print OMP_ALLOCATOR. + (initialize_env): Call parse_allocator. + * Makefile.am (libgomp_la_SOURCES): Add allocator.c. + * allocator.c: New file. + * icv.c (omp_set_default_allocator, omp_get_default_allocator): New + functions. + * testsuite/libgomp.c-c++-common/alloc-1.c: New test. + * testsuite/libgomp.c-c++-common/alloc-2.c: New test. + * testsuite/libgomp.c-c++-common/alloc-3.c: New test. + * Makefile.in: Regenerated. + +2020-05-15 H.J. Lu <hongjiu.lu@intel.com> + + PR bootstrap/95147 + * configure: Regenerated. + +2020-05-14 Thomas Koenig <tkoenig@gcc.gnu.org> + + PR libfortran/95119 + * testsuite/libgomp.fortran/close_errors_1.f90: New test. + +2020-05-14 H.J. Lu <hongjiu.lu@intel.com> + + * configure: Regenerated. + +2020-05-14 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/target-40.c: New test. + +2020-05-13 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/94690 + * testsuite/libgomp.fortran/pr66199-3.f90: New. + * testsuite/libgomp.fortran/pr66199-4.f90: New. + * testsuite/libgomp.fortran/pr66199-5.f90: New. + * testsuite/libgomp.fortran/pr66199-6.f90: New. + * testsuite/libgomp.fortran/pr66199-7.f90: New. + * testsuite/libgomp.fortran/pr66199-8.f90: New. + * testsuite/libgomp.fortran/pr66199-9.f90: New. + +2020-05-12 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c/target-39.c: New test. + +2020-04-29 Thomas Schwinge <thomas@codesourcery.com> + + * config/accel/openacc.f90 (acc_device_current): Set to '-1'. + * openacc.f90 (acc_device_current): Likewise. + * openacc.h (acc_device_current): Likewise. + * openacc_lib.h (acc_device_current): Likewise. + + PR target/94282 + * testsuite/libgomp.c-c++-common/function-not-offloaded.c: Remove + 'dg-allow-blank-lines-in-output'. + + * oacc-init.c (get_openacc_name): Handle 'gcn'. + * testsuite/lib/libgomp.exp + (offload_target_to_openacc_device_type) [amdgcn*]: Return + 'radeon'. Adjust all users. + (check_effective_target_openacc_amdgcn_accel_present): Rename + to... + (check_effective_target_openacc_radeon_accel_present): ... this. + Adjust all users. + (check_effective_target_openacc_amdgcn_accel_selected): Rename to... + (check_effective_target_openacc_radeon_accel_selected): ... this. + Adjust all users. + + * testsuite/libgomp.fortran/use_device_ptr-optional-2.f90: Add + 'dg-do run'. + +2020-04-23 Andrew Stubbs <ams@codesourcery.com> + + PR other/94629 + + * plugin/plugin-gcn.c (init_hsa_context): Check return value from + hsa_iterate_agents. + (GOMP_OFFLOAD_init_device): Check return values from both calls to + hsa_agent_iterate_regions. + +2020-04-20 Thomas Schwinge <thomas@codesourcery.com> + + PR middle-end/94635 + * testsuite/libgomp.fortran/target-enter-data-2.F90: Add 'dg-do + run'. + +2020-04-20 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/94120 + * testsuite/libgomp.oacc-c++/declare-pr94120.C: Fix 'declare copy(out)' + test case. + +2020-04-17 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/94635 + * testsuite/libgomp.fortran/target-enter-data-2.F90: New. + +2020-04-13 Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/92843 + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8-lib.c: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c: + ... this. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8.c:: + Rename to... + * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c: + ... this. + +2020-04-10 Julian Brown <julian@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + PR libgomp/92843 + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1-lib.c: + New file. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8-lib.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8.c: + Likewise. + +2020-04-10 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.fortran/target-enter-data-1.f90: Add 'dg-do + run'. + +2020-04-08 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/94120 + * libgomp.oacc-c++/declare-pr94120.C: New. + +2020-04-06 Maciej W. Rozycki <macro@wdc.com> + + * configure.ac: Add testsuite/libgomp-site-extra.exp to output + files. + * configure: Regenerate. + * testsuite/libgomp-site-extra.exp.in: New file. + * testsuite/libgomp-test-support.exp.in (GCC_UNDER_TEST): Remove + variable. + * testsuite/Makefile.am (EXTRA_DEJAGNU_SITE_CONFIG): New + variable. + * testsuite/Makefile.in: Regenerate. + +2020-04-03 Thomas Schwinge <thomas@codesourcery.com> + + PR tree-optimization/89713 + PR c/94392 + * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: Again expect + 'bar.sync'. + * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: Likewise. + +2020-03-31 Tobias Burnus <tobias@codesourcery.com> + + * target.c (GOMP_target_enter_exit_data): Handle PSET/MAP_POINTER. + * testsuite/libgomp.fortran/target-enter-data-1.f90: New. + +2020-03-24 Tobias Burnus <tobias@codesourcery.com> + + PR libgomp/81689 + * testsuite/libgomp.c/target-link-1.c: Remove xfail. + +2020-03-20 Tobias Burnus <tobias@codesourcery.com> + + PR libgomp/94251 + * target.c (gomp_load_image_to_device): Fix link + variable handling. + +2020-03-19 Jakub Jelinek <jakub@redhat.com> + + PR c++/93931 + * testsuite/libgomp.c++/pr93931.C: New test. + +2020-03-19 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.c-c++-common/function-not-offloaded.c: Add + dg-allow-blank-lines-in-output. + +2020-03-18 Julian Brown <julian@codesourcery.com> + Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/atomic_capture-1.f90: Really make + it work concurrently. + +2020-03-18 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: Add + #define DO_LONG_DOUBLE; set to 1, except for nvidia + gcn. + * libgomp.oacc-c-c++-common/firstprivate-mappings-1.c: Likewise. + +2020-03-14 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/93566 + * testsuite/libgomp.c/pr93566.c: New test. + +2020-02-21 Frederik Harwath <frederik@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/acc_get_property.f90: Adapt to + changes from 2020-02-19, i.e. use integer(c_size_t) instead of + integer(acc_device_property) for the type of the return value of + acc_get_property. + +2020-02-19 Tobias Burnus <tobias@codesourcery.com> + + * .gitattributes: New; whitespace handling for Fortran's openacc_lib.h. + * config/accel/openacc.f90 (openacc_kinds): Add acc_device_current. + (openacc_internal, acc_on_device_h): Fix argument name; minor cleanup. + * libgomp.texi (Enabling OpenACC): No longer mark as experimental. + (acc_set_device_num): Fix Fortran argument name, use same name for C. + (acc_get_property): Update Fortran interface to post-OpenACC 3.0 + corrections; add note about the previous interface and named constant. + (OpenACC library and environment variables): Fix two typos. + * openacc.f90: Use for all procedures the argument names from the spec + as for …_h they are user visible. + (openacc_kinds): Rename acc_device_property to + acc_device_property_kinds and change value to int32 ; and update users. + Re-add acc_device_property for for backward compatibility. + (acc_get_property_string_h): Clean up as acc_device_property_kind + changed. + (acc_get_property_h): Likewise and return c_size_t instead of + acc_device_property. + (openacc): Also export acc_device_property_kinds. + (acc_async_test_h, acc_async_test_all_h, acc_on_device_h, + acc_is_present_32_h, acc_is_present_64_h): Simplify logical-return-value + handling; check against /= 0 instead of == 1 to match C. + * openacc_lib.h: Use for all procedures the argument names from the spec + as for …_h they are user visible. Place !GCC$ into the first column to + be active also for fixed-form souce form. + (acc_device_current, acc_device_property_kind, acc_device_property, + acc_property_memory, acc_property_free_memory, acc_property_name, + acc_property_vendor, acc_property_driver): New named constants. + (acc_get_property, acc_get_property_string): New generic interface. + +2020-02-13 Frederik Harwath <frederik@codesourcery.com> + + PR libgomp/93481 + * plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub. + * target.c (gomp_load_plugin_for_device): Make "async_run" loading + optional. + (gomp_target_task_fn): Assert "devicep->async_run_func". + (clear_unsupported_flags): New function to remove unsupported flags + (right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored. + (GOMP_target_ext): Apply clear_unsupported_flags to flags. + * testsuite/libgomp.c/target-33.c: + Remove xfail for offload_target_nvptx. + * testsuite/libgomp.c/target-34.c: Likewise. + +2020-02-10 Frederik Harwath <frederik@codesourcery.com> + + * testsuite/libgomp.c/target-33.c: Add xfail for execution on + offload_target_nvptx, cf. https://gcc.gnu.org/PR81688. + * testsuite/libgomp.c/target-34.c: Likewise. + * testsuite/libgomp.c/target-link-1.c: Add xfail for + offload_target_nvptx, cf. https://gcc.gnu.org/PR81689. + +2020-02-09 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c/target-38.c: New test. + +2020-02-06 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/93515 + * testsuite/libgomp.c-c++-common/pr93515.c: New test. + +2020-02-05 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_offload_target_nvptx): Pass flags as 'options' + and not as 'source' argument to libgomp_target_compile. + +2020-02-03 Andrew Stubbs <ams@codesourcery.com> + + * plugin/plugin-gcn.c (EF_AMDGPU_MACH_AMDGCN_GFX801): Remove. + (gcn_gfx801_s): Remove. + (isa_hsa_name): Remove gfx801. + (isa_gcc_name): Remove gfx801/carizzo. + (isa_code): Remove gfx801. + +2020-02-03 Julian Brown <julian@codesourcery.com> + Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi (OpenACC Runtime Library Routines): Document *_async + and *_finalize variants; document acc_attach and acc_detach; update + references from OpenACC 2.0 to 2.6. + * openacc.f90 (openacc_version): Update to 201711. + * openacc_lib.h (openacc_version): Update to 201711. + * testsuite/libgomp.oacc-fortran/openacc_version-1.f: Update expected + openacc_version to 201711. + * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise. + +2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com> + + * plugin/plugin-gcn.c (struct hsa_kernel_description): Add sgpr_count + and vgpr_count fields. + (struct kernel_info): Add a field for a hsa_kernel_description. + (run_kernel): Reduce the number of threads/workers if the requested + number would require too many VGPRs. + (init_basic_kernel_info): Initialize description field with + the hsa_kernel_description entry for the kernel. + +2020-01-29 Tobias Burnus <tobias@codesourcery.com> + + PR bootstrap/93409 + * plugin/configfrag.ac (enable_offload_targets): Skip + HSA and GCN plugin besides -m32 also for -mx32. + * configure: Regenerate. + +2020-01-29 Frederik Harwath <frederik@codesourcery.com> + + * oacc-init.c (name_of_acc_device_t): Handle acc_device_radeon. + +2020-01-29 Frederik Harwath <frederik@codesourcery.com> + + * plugin-gcn.c (struct agent_info): Add fields "name" and + "vendor_name" ... + (GOMP_OFFLOAD_init_device): ... and init from here. + (struct hsa_context_info): Add field "driver_version_s" ... + (init_hsa_contest): ... and init from here. + (GOMP_OFFLOAD_openacc_get_property): Replace stub with a proper + implementation. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property.c: + Enable test execution for amdgcn and host offloading targets. + * testsuite/libgomp.oacc-fortran/acc_get_property.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c + (expect_device_properties): Split function into ... + (expect_device_string_properties): ... this new function ... + (expect_device_memory): ... and this new function. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c: + Add test. + +2020-01-28 Julian Brown <julian@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/deep-copy-2.f90: Remove test from here. + * testsuite/libgomp.oacc-fortran/deep-copy-3.f90: Don't use mixed + component/non-component variable refs in a single directive. + * testsuite/libgomp.oacc-fortran/classtypes-1.f95: Likewise. + +2020-01-24 Maciej W. Rozycki <macro@wdc.com> + + * configure.ac: Handle `--with-toolexeclibdir='. + * Makefile.in: Regenerate. + * aclocal.m4: Regenerate. + * configure: Regenerate. + * testsuite/Makefile.in: Regenerate. + +2020-01-24 Frederik Harwath <frederik@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c + (expect_device_properties): Remove "expected_free_mem" argument, + change "expected_total_mem" argument type to size_t; + change types of acc_get_property results to size_t, + adapt format strings. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property.c: + Use %zu instead of %zd to print size_t values. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c: Adapt and + rename to ... + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c: ... this. + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c: Adapt and + rename to ... + * testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c: ... this. + +2020-01-23 Andrew Stubbs <ams@codesourcery.com> + + * plugin/plugin-gcn.c (parse_target_attributes): Use correct mask for + the device id. + +2020-01-20 Andrew Stubbs <ams@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn. + * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main): + Adjust test dimensions for amdgcn. + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust + gang/worker/vector expectations dynamically. + * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c + (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c + (acc_gang): Recognise acc_device_radeon. + (acc_worker): Likewise. + (acc_vector): Likewise. + (main): Set expectations for amdgcn. + * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c + (main): Adjust gang/worker/vector expectations dynamically. + * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations + for amdgcn. + +2020-01-17 Andrew Stubbs <ams@codesourcery.com> + + * config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to + acc_device_radeon. + (openacc): Likewise. + * openacc.f90 (openacc_kinds): Likewise. + (openacc): Likewise. + * openacc.h (acc_device_t): Likewise. + * openacc_lib.h: Likewise. + * testsuite/lib/libgomp.exp + (check_effective_target_openacc_amdgcn_accel_present): Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c + (cb_compute_construct_end): Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c + (cb_enqueue_launch_start): Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c + (cb_enter_data_end): Likewise. + (cb_exit_data_start): Likewise. + (cb_exit_data_end): Likewise. + (cb_compute_construct_end): Likewise. + (cb_enqueue_launch_start): Likewise. + (cb_enqueue_launch_end): Likewise. + * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c + (main): Likewise. + +2020-01-10 Thomas Schwinge <thomas@codesourcery.com> + + * libgomp-plugin.h (enum goacc_property): New. Adjust all users + to use this instead of 'enum gomp_device_property'. + (GOMP_OFFLOAD_get_property): Rename to... + (GOMP_OFFLOAD_openacc_get_property): ... this. Adjust all users. + * libgomp.h (struct gomp_device_descr): Move + 'GOMP_OFFLOAD_openacc_get_property'... + (struct acc_dispatch_t): ... here. Adjust all users. + * plugin/plugin-hsa.c (GOMP_OFFLOAD_get_property): Remove. + + * target.c (gomp_map_vars_internal) + <GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT>: Clean up/elaborate code + paths. + +2020-01-10 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/93219 + * libgomp.h (gomp_print_string): Change return type from void to int. + * affinity-fmt.c (gomp_print_string): Likewise. Return true if + not all characters have been written. + +2020-01-08 Tobias Burnus <tobias@codesourcery.com> + + * libgomp.texi: Fix typos, use https. + 2020-01-03 Tobias Burnus <tobias@codesourcery.com> * testsuite/libgomp.fortran/optional-map.f90: Add test for diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 669b9e4..b841562 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c oacc-target.c + affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 3d772ee..5ff2ac1 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -133,10 +133,12 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/lthostflags.m4 \ $(top_srcdir)/../config/multi.m4 \ $(top_srcdir)/../config/override.m4 \ - $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \ - $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \ - $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/../libtool.m4 $(top_srcdir)/../config/cet.m4 \ + $(top_srcdir)/../config/tls.m4 \ + $(top_srcdir)/../config/toolexeclibdir.m4 \ + $(top_srcdir)/../ltoptions.m4 $(top_srcdir)/../ltsugar.m4 \ + $(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/../libtool.m4 \ + $(top_srcdir)/../config/cet.m4 \ $(top_srcdir)/plugin/configfrag.ac $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) @@ -229,7 +231,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ - teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1) + teams.lo allocator.lo oacc-profiling.lo oacc-target.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -570,7 +573,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \ + affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c \ $(am__append_4) # Nvidia PTX OpenACC plugin. @@ -763,6 +766,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@ diff --git a/libgomp/aclocal.m4 b/libgomp/aclocal.m4 index 1212599..55d9d71 100644 --- a/libgomp/aclocal.m4 +++ b/libgomp/aclocal.m4 @@ -1177,6 +1177,7 @@ m4_include([../config/lthostflags.m4]) m4_include([../config/multi.m4]) m4_include([../config/override.m4]) m4_include([../config/tls.m4]) +m4_include([../config/toolexeclibdir.m4]) m4_include([../ltoptions.m4]) m4_include([../ltsugar.m4]) m4_include([../ltversion.m4]) diff --git a/libgomp/affinity-fmt.c b/libgomp/affinity-fmt.c index c423e35..9a5334d 100644 --- a/libgomp/affinity-fmt.c +++ b/libgomp/affinity-fmt.c @@ -37,10 +37,10 @@ #include <sys/utsname.h> #endif -void +bool gomp_print_string (const char *str, size_t len) { - fwrite (str, 1, len, stderr); + return fwrite (str, 1, len, stderr) != len; } void diff --git a/libgomp/allocator.c b/libgomp/allocator.c new file mode 100644 index 0000000..4e29399 --- /dev/null +++ b/libgomp/allocator.c @@ -0,0 +1,357 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains wrappers for the system allocation routines. Most + places in the OpenMP API do not make any provision for failure, so in + general we cannot allow memory allocation to fail. */ + +#define _GNU_SOURCE +#include "libgomp.h" +#include <stdlib.h> + +#define omp_max_predefined_alloc omp_thread_mem_alloc + +struct omp_allocator_data +{ + omp_memspace_handle_t memspace; + omp_uintptr_t alignment; + omp_uintptr_t pool_size; + omp_uintptr_t used_pool_size; + omp_allocator_handle_t fb_data; + unsigned int sync_hint : 8; + unsigned int access : 8; + unsigned int fallback : 8; + unsigned int pinned : 1; + unsigned int partition : 7; +#ifndef HAVE_SYNC_BUILTINS + gomp_mutex_t lock; +#endif +}; + +struct omp_mem_header +{ + void *ptr; + size_t size; + omp_allocator_handle_t allocator; + void *pad; +}; + +omp_allocator_handle_t +omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, + const omp_alloctrait_t traits[]) +{ + struct omp_allocator_data data + = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all, + omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment }; + struct omp_allocator_data *ret; + int i; + + if (memspace > omp_low_lat_mem_space) + return omp_null_allocator; + for (i = 0; i < ntraits; i++) + switch (traits[i].key) + { + case omp_atk_sync_hint: + switch (traits[i].value) + { + case omp_atv_default: + data.sync_hint = omp_atv_contended; + break; + case omp_atv_contended: + case omp_atv_uncontended: + case omp_atv_sequential: + case omp_atv_private: + data.sync_hint = traits[i].value; + break; + default: + return omp_null_allocator; + } + break; + case omp_atk_alignment: + if ((traits[i].value & (traits[i].value - 1)) != 0 + || !traits[i].value) + return omp_null_allocator; + data.alignment = traits[i].value; + break; + case omp_atk_access: + switch (traits[i].value) + { + case omp_atv_default: + data.access = omp_atv_all; + break; + case omp_atv_all: + case omp_atv_cgroup: + case omp_atv_pteam: + case omp_atv_thread: + data.access = traits[i].value; + break; + default: + return omp_null_allocator; + } + break; + case omp_atk_pool_size: + data.pool_size = traits[i].value; + break; + case omp_atk_fallback: + switch (traits[i].value) + { + case omp_atv_default: + data.fallback = omp_atv_default_mem_fb; + break; + case omp_atv_default_mem_fb: + case omp_atv_null_fb: + case omp_atv_abort_fb: + case omp_atv_allocator_fb: + data.fallback = traits[i].value; + break; + default: + return omp_null_allocator; + } + break; + case omp_atk_fb_data: + data.fb_data = traits[i].value; + break; + case omp_atk_pinned: + switch (traits[i].value) + { + case omp_atv_default: + case omp_atv_false: + data.pinned = omp_atv_false; + break; + case omp_atv_true: + data.pinned = omp_atv_true; + break; + default: + return omp_null_allocator; + } + break; + case omp_atk_partition: + switch (traits[i].value) + { + case omp_atv_default: + data.partition = omp_atv_environment; + break; + case omp_atv_environment: + case omp_atv_nearest: + case omp_atv_blocked: + case omp_atv_interleaved: + data.partition = traits[i].value; + break; + default: + return omp_null_allocator; + } + break; + default: + return omp_null_allocator; + } + + if (data.alignment < sizeof (void *)) + data.alignment = sizeof (void *); + + /* No support for these so far (for hbw will use memkind). */ + if (data.pinned || data.memspace == omp_high_bw_mem_space) + return omp_null_allocator; + + ret = gomp_malloc (sizeof (struct omp_allocator_data)); + *ret = data; +#ifndef HAVE_SYNC_BUILTINS + gomp_mutex_init (&ret->lock); +#endif + return (omp_allocator_handle_t) ret; +} + +void +omp_destroy_allocator (omp_allocator_handle_t allocator) +{ + if (allocator != omp_null_allocator) + { +#ifndef HAVE_SYNC_BUILTINS + gomp_mutex_destroy (&((struct omp_allocator_data *) allocator)->lock); +#endif + free ((void *) allocator); + } +} + +void * +omp_alloc (size_t size, omp_allocator_handle_t allocator) +{ + struct omp_allocator_data *allocator_data; + size_t alignment, new_size; + void *ptr, *ret; + + if (__builtin_expect (size == 0, 0)) + return NULL; + +retry: + if (allocator == omp_null_allocator) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->ts.def_allocator == omp_null_allocator) + thr->ts.def_allocator = gomp_def_allocator; + allocator = (omp_allocator_handle_t) thr->ts.def_allocator; + } + + if (allocator > omp_max_predefined_alloc) + { + allocator_data = (struct omp_allocator_data *) allocator; + alignment = allocator_data->alignment; + } + else + { + allocator_data = NULL; + alignment = sizeof (void *); + } + + new_size = sizeof (struct omp_mem_header); + if (alignment > sizeof (void *)) + new_size += alignment - sizeof (void *); + if (__builtin_add_overflow (size, new_size, &new_size)) + goto fail; + + if (__builtin_expect (allocator_data + && allocator_data->pool_size < ~(uintptr_t) 0, 0)) + { + uintptr_t used_pool_size; + if (new_size > allocator_data->pool_size) + goto fail; +#ifdef HAVE_SYNC_BUILTINS + used_pool_size = __atomic_load_n (&allocator_data->used_pool_size, + MEMMODEL_RELAXED); + do + { + uintptr_t new_pool_size; + if (__builtin_add_overflow (used_pool_size, new_size, + &new_pool_size) + || new_pool_size > allocator_data->pool_size) + goto fail; + if (__atomic_compare_exchange_n (&allocator_data->used_pool_size, + &used_pool_size, new_pool_size, + true, MEMMODEL_RELAXED, + MEMMODEL_RELAXED)) + break; + } + while (1); +#else + gomp_mutex_lock (&allocator_data->lock); + if (__builtin_add_overflow (allocator_data->used_pool_size, new_size, + &used_pool_size) + || used_pool_size > allocator_data->pool_size) + { + gomp_mutex_unlock (&allocator_data->lock); + goto fail; + } + allocator_data->used_pool_size = used_pool_size; + gomp_mutex_unlock (&allocator_data->lock); +#endif + ptr = malloc (new_size); + if (ptr == NULL) + { +#ifdef HAVE_SYNC_BUILTINS + __atomic_add_fetch (&allocator_data->used_pool_size, -new_size, + MEMMODEL_RELAXED); +#else + gomp_mutex_lock (&allocator_data->lock); + allocator_data->used_pool_size -= new_size; + gomp_mutex_unlock (&allocator_data->lock); +#endif + goto fail; + } + } + else + { + ptr = malloc (new_size); + if (ptr == NULL) + goto fail; + } + + if (alignment > sizeof (void *)) + ret = (void *) (((uintptr_t) ptr + + sizeof (struct omp_mem_header) + + alignment - sizeof (void *)) & ~(alignment - 1)); + else + ret = (char *) ptr + sizeof (struct omp_mem_header); + ((struct omp_mem_header *) ret)[-1].ptr = ptr; + ((struct omp_mem_header *) ret)[-1].size = new_size; + ((struct omp_mem_header *) ret)[-1].allocator = allocator; + return ret; + +fail: + if (allocator_data) + { + switch (allocator_data->fallback) + { + case omp_atv_default_mem_fb: + if (alignment > sizeof (void *) + || (allocator_data + && allocator_data->pool_size < ~(uintptr_t) 0)) + { + allocator = omp_default_mem_alloc; + goto retry; + } + /* Otherwise, we've already performed default mem allocation + and if that failed, it won't succeed again (unless it was + intermitent. Return NULL then, as that is the fallback. */ + break; + case omp_atv_null_fb: + break; + default: + case omp_atv_abort_fb: + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) size); + case omp_atv_allocator_fb: + allocator = allocator_data->fb_data; + goto retry; + } + } + return NULL; +} + +void +omp_free (void *ptr, omp_allocator_handle_t allocator) +{ + struct omp_mem_header *data; + + if (ptr == NULL) + return; + (void) allocator; + data = &((struct omp_mem_header *) ptr)[-1]; + if (data->allocator > omp_max_predefined_alloc) + { + struct omp_allocator_data *allocator_data + = (struct omp_allocator_data *) (data->allocator); + if (allocator_data->pool_size < ~(uintptr_t) 0) + { +#ifdef HAVE_SYNC_BUILTINS + __atomic_add_fetch (&allocator_data->used_pool_size, -data->size, + MEMMODEL_RELAXED); +#else + gomp_mutex_lock (&allocator_data->lock); + allocator_data->used_pool_size -= data->size; + gomp_mutex_unlock (&allocator_data->lock); +#endif + } + } + free (data->ptr); +} diff --git a/libgomp/config/accel/openacc.f90 b/libgomp/config/accel/openacc.f90 index b4d4036..9933073 100644 --- a/libgomp/config/accel/openacc.f90 +++ b/libgomp/config/accel/openacc.f90 @@ -44,13 +44,14 @@ module openacc_kinds integer, parameter :: acc_device_kind = int32 ! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_current = -1 integer (acc_device_kind), parameter :: acc_device_none = 0 integer (acc_device_kind), parameter :: acc_device_default = 1 integer (acc_device_kind), parameter :: acc_device_host = 2 ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_nvidia = 5 - integer (acc_device_kind), parameter :: acc_device_gcn = 8 + integer (acc_device_kind), parameter :: acc_device_radeon = 8 end module openacc_kinds @@ -59,19 +60,19 @@ module openacc_internal implicit none interface - function acc_on_device_h (d) + function acc_on_device_h (devicetype) import - integer (acc_device_kind) d + integer (acc_device_kind) devicetype logical acc_on_device_h end function end interface interface - function acc_on_device_l (d) & + function acc_on_device_l (devicetype) & bind (C, name = "acc_on_device") use iso_c_binding, only: c_int integer (c_int) :: acc_on_device_l - integer (c_int), value :: d + integer (c_int), value :: devicetype end function end interface end module openacc_internal @@ -86,7 +87,7 @@ module openacc ! From openacc_kinds public :: acc_device_kind public :: acc_device_none, acc_device_default, acc_device_host - public :: acc_device_not_host, acc_device_nvidia, acc_device_gcn + public :: acc_device_not_host, acc_device_nvidia, acc_device_radeon public :: acc_on_device @@ -96,14 +97,10 @@ module openacc end module openacc -function acc_on_device_h (d) +function acc_on_device_h (devicetype) use openacc_internal, only: acc_on_device_l use openacc_kinds - integer (acc_device_kind) d + integer (acc_device_kind) devicetype logical acc_on_device_h - if (acc_on_device_l (d) .eq. 1) then - acc_on_device_h = .TRUE. - else - acc_on_device_h = .FALSE. - end if + acc_on_device_h = acc_on_device_l (devicetype) /= 0 end function diff --git a/libgomp/configure b/libgomp/configure index 04a6fd9..9ffa66c 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -826,6 +826,7 @@ enable_version_specific_runtime_libs enable_generated_files_in_srcdir enable_silent_rules enable_multilib +with_toolexeclibdir enable_dependency_tracking enable_shared enable_static @@ -1500,11 +1501,14 @@ Optional Features: --enable-tls Use thread-local storage [default=yes] --enable-symvers=STYLE enables symbol versioning of the shared library [default=yes] - --enable-cet enable Intel CET in target libraries [default=no] + --enable-cet enable Intel CET in target libraries [default=auto] Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-toolexeclibdir=DIR + install libraries built with a cross compiler within + DIR --with-pic try to use only PIC/non-PIC objects [default=use both] --with-gnu-ld assume the C compiler uses GNU ld [default=no] @@ -3501,6 +3505,22 @@ fi ac_config_commands="$ac_config_commands default-1" + +# Check whether --with-toolexeclibdir was given. +if test "${with_toolexeclibdir+set}" = set; then : + withval=$with_toolexeclibdir; case ${with_toolexeclibdir} in + /) + ;; + */) + with_toolexeclibdir=`echo $with_toolexeclibdir | sed 's,/$,,'` + ;; +esac +else + with_toolexeclibdir=no +fi + + + # Calculate toolexeclibdir # Also toolexecdir, though it's only used in toolexeclibdir case ${enable_version_specific_runtime_libs} in @@ -3516,7 +3536,14 @@ case ${enable_version_specific_runtime_libs} in test x"$with_cross_host" != x"no"; then # Install a library built with a cross compiler in tooldir, not libdir. toolexecdir='$(exec_prefix)/$(target_alias)' - toolexeclibdir='$(toolexecdir)/lib' + case ${with_toolexeclibdir} in + no) + toolexeclibdir='$(toolexecdir)/lib' + ;; + *) + toolexeclibdir=${with_toolexeclibdir} + ;; + esac else toolexecdir='$(libdir)/gcc-lib/$(target_alias)' toolexeclibdir='$(libdir)' @@ -11405,7 +11432,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11408 "configure" +#line 11435 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11511,7 +11538,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11514 "configure" +#line 11541 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14991,7 +15018,7 @@ fi # Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- # -# Copyright (C) 2014-2019 Free Software Foundation, Inc. +# Copyright (C) 2014-2020 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -15320,7 +15347,7 @@ rm -f core conftest.err conftest.$ac_objext \ case "${target}" in x86_64-*-*) case " ${CC} ${CFLAGS} " in - *" -m32 "*) + *" -m32 "*|*" -mx32 "*) PLUGIN_HSA=0 ;; *) @@ -15360,7 +15387,7 @@ rm -f core conftest.err conftest.$ac_objext \ case "${target}" in x86_64-*-*) case " ${CC} ${CFLAGS} " in - *" -m32 "*) + *" -m32 "*|*" -mx32 "*) PLUGIN_GCN=0 ;; *) @@ -16713,7 +16740,7 @@ if test "${enable_cet+set}" = set; then : esac else - enable_cet=no + enable_cet=auto fi @@ -16726,6 +16753,8 @@ case "$host" in auto) # Check if target supports multi-byte NOPs # and if assembler supports CET insn. + cet_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fcf-protection" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -16749,6 +16778,7 @@ else enable_cet=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$cet_save_CFLAGS" ;; yes) # Check if assembler supports CET. @@ -17020,6 +17050,8 @@ ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" ac_config_files="$ac_config_files testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" +ac_config_files="$ac_config_files testsuite/libgomp-site-extra.exp" + cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -18173,6 +18205,7 @@ do "testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;; "libgomp.spec") CONFIG_FILES="$CONFIG_FILES libgomp.spec" ;; "testsuite/libgomp-test-support.pt.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" ;; + "testsuite/libgomp-site-extra.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-site-extra.exp" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac diff --git a/libgomp/configure.ac b/libgomp/configure.ac index 725f3bf..ef5d293 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -64,6 +64,8 @@ target_alias=${target_alias-$host_alias} AM_INIT_AUTOMAKE([1.9.0 foreign no-dist -Wall -Wno-portability -Wno-override]) AM_ENABLE_MULTILIB(, ..) +GCC_WITH_TOOLEXECLIBDIR + # Calculate toolexeclibdir # Also toolexecdir, though it's only used in toolexeclibdir case ${enable_version_specific_runtime_libs} in @@ -79,7 +81,14 @@ case ${enable_version_specific_runtime_libs} in test x"$with_cross_host" != x"no"; then # Install a library built with a cross compiler in tooldir, not libdir. toolexecdir='$(exec_prefix)/$(target_alias)' - toolexeclibdir='$(toolexecdir)/lib' + case ${with_toolexeclibdir} in + no) + toolexeclibdir='$(toolexecdir)/lib' + ;; + *) + toolexeclibdir=${with_toolexeclibdir} + ;; + esac else toolexecdir='$(libdir)/gcc-lib/$(target_alias)' toolexeclibdir='$(libdir)' @@ -427,4 +436,5 @@ GCC_BASE_VER AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) +AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp]) AC_OUTPUT diff --git a/libgomp/env.c b/libgomp/env.c index dbec3ae..c0c4730 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -86,6 +86,7 @@ char *gomp_bind_var_list; unsigned long gomp_bind_var_list_len; void **gomp_places_list; unsigned long gomp_places_list_len; +uintptr_t gomp_def_allocator = omp_default_mem_alloc; int gomp_debug_var; unsigned int gomp_num_teams_var; bool gomp_display_affinity_var; @@ -949,8 +950,7 @@ parse_boolean (const char *name, bool *value) gomp_error ("Invalid value for environment variable %s", name); } -/* Parse the OMP_WAIT_POLICY environment variable and store the - result in gomp_active_wait_policy. */ +/* Parse the OMP_WAIT_POLICY environment variable and return the value. */ static int parse_wait_policy (void) @@ -1084,6 +1084,47 @@ parse_affinity (bool ignore) return false; } +/* Parse the OMP_ALLOCATOR environment variable and return the value. */ + +static uintptr_t +parse_allocator (void) +{ + const char *env; + uintptr_t ret = omp_default_mem_alloc; + + env = getenv ("OMP_ALLOCATOR"); + if (env == NULL) + return ret; + + while (isspace ((unsigned char) *env)) + ++env; + if (0) + ; +#define C(v) \ + else if (strncasecmp (env, #v, sizeof (#v) - 1) == 0) \ + { \ + ret = v; \ + env += sizeof (#v) - 1; \ + } + C (omp_default_mem_alloc) + C (omp_large_cap_mem_alloc) + C (omp_const_mem_alloc) + C (omp_high_bw_mem_alloc) + C (omp_low_lat_mem_alloc) + C (omp_cgroup_mem_alloc) + C (omp_pteam_mem_alloc) + C (omp_thread_mem_alloc) +#undef C + else + env = "X"; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '\0') + return ret; + gomp_error ("Invalid value for environment variable OMP_ALLOCATOR"); + return omp_default_mem_alloc; +} + static void parse_acc_device_type (void) { @@ -1276,6 +1317,22 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) gomp_display_affinity_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_AFFINITY_FORMAT = '%s'\n", gomp_affinity_format_var); + fprintf (stderr, " OMP_ALLOCATOR = '"); + switch (gomp_def_allocator) + { +#define C(v) case v: fputs (#v, stderr); break; + C (omp_default_mem_alloc) + C (omp_large_cap_mem_alloc) + C (omp_const_mem_alloc) + C (omp_high_bw_mem_alloc) + C (omp_low_lat_mem_alloc) + C (omp_cgroup_mem_alloc) + C (omp_pteam_mem_alloc) + C (omp_thread_mem_alloc) +#undef C + default: break; + } + fputs ("'\n", stderr); if (verbose) { @@ -1312,6 +1369,7 @@ initialize_env (void) parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, true); + gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { gomp_global_icv.thread_limit_var diff --git a/libgomp/icv.c b/libgomp/icv.c index ff4430e..b13289b 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -197,6 +197,25 @@ omp_get_partition_place_nums (int *place_nums) *place_nums++ = thr->ts.place_partition_off + i; } +void +omp_set_default_allocator (omp_allocator_handle_t allocator) +{ + struct gomp_thread *thr = gomp_thread (); + if (allocator == omp_null_allocator) + allocator = omp_default_mem_alloc; + thr->ts.def_allocator = (uintptr_t) allocator; +} + +omp_allocator_handle_t +omp_get_default_allocator (void) +{ + struct gomp_thread *thr = gomp_thread (); + if (thr->ts.def_allocator == omp_null_allocator) + return (omp_allocator_handle_t) gomp_def_allocator; + else + return (omp_allocator_handle_t) thr->ts.def_allocator; +} + ialias (omp_set_dynamic) ialias (omp_set_nested) ialias (omp_set_num_threads) diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 2559ce0..64f138d 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -54,13 +54,6 @@ enum offload_target_type OFFLOAD_TARGET_TYPE_GCN = 8 }; -/* Container type for passing device properties. */ -union gomp_device_property_value -{ - const char *ptr; - size_t val; -}; - /* Opaque type to represent plugin-dependent implementation of an OpenACC asynchronous queue. */ struct goacc_asyncqueue; @@ -75,6 +68,32 @@ struct goacc_asyncqueue_list typedef struct goacc_asyncqueue *goacc_aq; typedef struct goacc_asyncqueue_list *goacc_aq_list; + +/* OpenACC 'acc_get_property' support. */ + +/* Device property values. Keep in sync with + 'libgomp/{openacc.h,openacc.f90}:acc_device_property_t'. */ +enum goacc_property + { + /* Mask to tell numeric and string values apart. */ +#define GOACC_PROPERTY_STRING_MASK 0x10000 + + /* Start from 1 to catch uninitialized use. */ + GOACC_PROPERTY_MEMORY = 1, + GOACC_PROPERTY_FREE_MEMORY = 2, + GOACC_PROPERTY_NAME = GOACC_PROPERTY_STRING_MASK | 1, + GOACC_PROPERTY_VENDOR = GOACC_PROPERTY_STRING_MASK | 2, + GOACC_PROPERTY_DRIVER = GOACC_PROPERTY_STRING_MASK | 3 + }; + +/* Container type for passing device properties. */ +union goacc_property_value +{ + const char *ptr; + size_t val; +}; + + /* Auxiliary struct, used for transferring pairs of addresses from plugin to libgomp. */ struct addr_pair @@ -101,7 +120,6 @@ extern const char *GOMP_OFFLOAD_get_name (void); extern unsigned int GOMP_OFFLOAD_get_caps (void); extern int GOMP_OFFLOAD_get_type (void); extern int GOMP_OFFLOAD_get_num_devices (void); -extern union gomp_device_property_value GOMP_OFFLOAD_get_property (int, int); extern bool GOMP_OFFLOAD_init_device (int); extern bool GOMP_OFFLOAD_fini_device (int); extern unsigned GOMP_OFFLOAD_version (void); @@ -141,6 +159,8 @@ extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *); extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *, void *); +extern union goacc_property_value + GOMP_OFFLOAD_openacc_get_property (int, enum goacc_property); #ifdef __cplusplus } diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 01eb1fb..ca42e0d 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -397,6 +397,9 @@ struct gomp_team_state unsigned place_partition_off; unsigned place_partition_len; + /* Def-allocator-var ICV. */ + uintptr_t def_allocator; + #ifdef HAVE_SYNC_BUILTINS /* Number of single stmts encountered. */ unsigned long single_count; @@ -450,6 +453,7 @@ extern int gomp_debug_var; extern bool gomp_display_affinity_var; extern char *gomp_affinity_format_var; extern size_t gomp_affinity_format_len; +extern uintptr_t gomp_def_allocator; extern int goacc_device_num; extern char *goacc_device_type; extern int goacc_default_dims[GOMP_DIM_MAX]; @@ -832,7 +836,7 @@ extern void gomp_display_affinity_place (char *, size_t, size_t *, int); /* affinity-fmt.c */ -extern void gomp_print_string (const char *str, size_t len); +extern bool gomp_print_string (const char *str, size_t len); extern void gomp_set_affinity_format (const char *, size_t); extern void gomp_display_string (char *, size_t, size_t *, const char *, size_t); @@ -1068,6 +1072,8 @@ typedef struct acc_dispatch_t __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func; } async; + __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func; + /* NVIDIA target specific routines. */ struct { __typeof (GOMP_OFFLOAD_openacc_cuda_get_current_device) @@ -1113,7 +1119,6 @@ struct gomp_device_descr __typeof (GOMP_OFFLOAD_get_caps) *get_caps_func; __typeof (GOMP_OFFLOAD_get_type) *get_type_func; __typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func; - __typeof (GOMP_OFFLOAD_get_property) *get_property_func; __typeof (GOMP_OFFLOAD_init_device) *init_device_func; __typeof (GOMP_OFFLOAD_fini_device) *fini_device_func; __typeof (GOMP_OFFLOAD_version) *version_func; diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index c7268bf..012e3d6 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -180,6 +180,16 @@ OMP_5.0 { omp_pause_resource_all_; } OMP_4.5; +OMP_5.0.1 { + global: + omp_set_default_allocator; + omp_get_default_allocator; + omp_init_allocator; + omp_destroy_allocator; + omp_alloc; + omp_free; +} OMP_5.0; + GOMP_1.0 { global: GOMP_atomic_end; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index d3a5b31..b946743 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -1727,9 +1727,9 @@ the stack size is system dependent. @ref{OMP_STACKSIZE} @item @emph{Reference}: -@uref{http://gcc.gnu.org/ml/gcc-patches/2006-06/msg00493.html, +@uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00493.html, GCC Patches Mailinglist}, -@uref{http://gcc.gnu.org/ml/gcc-patches/2006-06/msg00496.html, +@uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00496.html, GCC Patches Mailinglist} @end table @@ -1811,20 +1811,18 @@ pools available and their worker threads run at priority four. To activate the OpenACC extensions for C/C++ and Fortran, the compile-time flag @option{-fopenacc} must be specified. This enables the OpenACC directive -@code{#pragma acc} in C/C++ and @code{!$accp} directives in free form, +@code{#pragma acc} in C/C++ and @code{!$acc} directives in free form, @code{c$acc}, @code{*$acc} and @code{!$acc} directives in fixed form, @code{!$} conditional compilation sentinels in free form and @code{c$}, @code{*$} and @code{!$} sentinels in fixed form, for Fortran. The flag also arranges for automatic linking of the OpenACC runtime library (@ref{OpenACC Runtime Library Routines}). +See @uref{https://gcc.gnu.org/wiki/OpenACC} for more information. + A complete description of all OpenACC directives accepted may be found in the @uref{https://www.openacc.org, OpenACC} Application Programming -Interface manual, version 2.0. - -Note that this is an experimental feature and subject to -change in future versions of GCC. See -@uref{https://gcc.gnu.org/wiki/OpenACC} for more information. +Interface manual, version 2.6. @@ -1836,7 +1834,7 @@ change in future versions of GCC. See @chapter OpenACC Runtime Library Routines The runtime routines described here are defined by section 3 of the OpenACC -specifications in version 2.0. +specifications in version 2.6. They have C linkage, and do not throw exceptions. Generally, they are available only for the host, with the exception of @code{acc_on_device}, which is available for both the host and the @@ -1852,11 +1850,11 @@ acceleration device. * acc_get_property:: Get device property. * acc_async_test:: Tests for completion of a specific asynchronous operation. -* acc_async_test_all:: Tests for completion of all asychronous +* acc_async_test_all:: Tests for completion of all asynchronous operations. * acc_wait:: Wait for completion of a specific asynchronous operation. -* acc_wait_all:: Waits for completion of all asyncrhonous +* acc_wait_all:: Waits for completion of all asynchronous operations. * acc_wait_all_async:: Wait for completion of all asynchronous operations. @@ -1892,6 +1890,8 @@ acceleration device. present on device. * acc_memcpy_to_device:: Copy host memory to device memory. * acc_memcpy_from_device:: Copy device memory to host memory. +* acc_attach:: Let device pointer point to device-pointer target. +* acc_detach:: Let device pointer point to host-pointer target. API routines for target platforms. @@ -1929,7 +1929,7 @@ for the device type specified in @var{devicetype}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.1. @end table @@ -1939,7 +1939,7 @@ for the device type specified in @var{devicetype}. @section @code{acc_set_device_type} -- Set type of device accelerator to use. @table @asis @item @emph{Description} -This function indicates to the runtime library which device typr, specified +This function indicates to the runtime library which device type, specified in @var{devicetype}, to use when executing a parallel or kernels region. @item @emph{C/C++}: @@ -1954,7 +1954,7 @@ in @var{devicetype}, to use when executing a parallel or kernels region. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.2. @end table @@ -1979,7 +1979,7 @@ parallel or kernels region. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.3. @end table @@ -1990,12 +1990,12 @@ parallel or kernels region. @table @asis @item @emph{Description} This function will indicate to the runtime which device number, -specified by @var{num}, associated with the specifed device +specified by @var{devicenum}, associated with the specified device type @var{devicetype}. @item @emph{C/C++}: @multitable @columnfractions .20 .80 -@item @emph{Prototype}: @tab @code{acc_set_device_num(int num, acc_device_t devicetype);} +@item @emph{Prototype}: @tab @code{acc_set_device_num(int devicenum, acc_device_t devicetype);} @end multitable @item @emph{Fortran}: @@ -2006,7 +2006,7 @@ type @var{devicetype}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.4. @end table @@ -2033,7 +2033,7 @@ region. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.5. @end table @@ -2053,6 +2053,14 @@ The Fortran @code{acc_get_property_string} subroutine returns the string retrieved in its fourth argument while the remaining entry points are functions, which pass the return value as their result. +Note for Fortran, only: the OpenACC technical committee corrected and, hence, +modified the interface introduced in OpenACC 2.6. The kind-value parameter +@code{acc_device_property} has been renamed to @code{acc_device_property_kind} +for consistency and the return type of the @code{acc_get_property} function is +now a @code{c_size_t} integer instead of a @code{acc_device_property} integer. +The parameter @code{acc_device_property} will continue to be provided, +but might be removed in a future version of GCC. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{size_t acc_get_property(int devicenum, acc_device_t devicetype, acc_device_property_t property);} @@ -2063,10 +2071,11 @@ functions, which pass the return value as their result. @multitable @columnfractions .20 .80 @item @emph{Interface}: @tab @code{function acc_get_property(devicenum, devicetype, property)} @item @emph{Interface}: @tab @code{subroutine acc_get_property_string(devicenum, devicetype, property, string)} +@item @tab @code{use ISO_C_Binding, only: c_size_t} @item @tab @code{integer devicenum} @item @tab @code{integer(kind=acc_device_kind) devicetype} -@item @tab @code{integer(kind=acc_device_property) property} -@item @tab @code{integer(kind=acc_device_property) acc_get_property} +@item @tab @code{integer(kind=acc_device_property_kind) property} +@item @tab @code{integer(kind=c_size_t) acc_get_property} @item @tab @code{character(*) string} @end multitable @@ -2100,8 +2109,8 @@ a zero and Fortran returns a @code{false}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.6. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.9. @end table @@ -2128,8 +2137,8 @@ Fortran returns a @code{false}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.7. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.10. @end table @@ -2156,8 +2165,8 @@ specified in @var{arg}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.8. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.11. @end table @@ -2181,8 +2190,8 @@ This function waits for the completion of all asynchronous operations. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.10. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.13. @end table @@ -2207,8 +2216,8 @@ any queue. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.11. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.14. @end table @@ -2232,8 +2241,8 @@ asynchronous operations enqueued on queue @var{arg}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.9. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.12. @end table @@ -2257,8 +2266,8 @@ This function initializes the runtime for the device type specified in @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.12. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.7. @end table @@ -2282,8 +2291,8 @@ This function shuts down the runtime for the device type specified in @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.13. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.8. @end table @@ -2313,8 +2322,8 @@ return @code{false}. @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.14. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.17. @end table @@ -2332,8 +2341,8 @@ the device address of the allocated memory. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.15. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.18. @end table @@ -2350,8 +2359,8 @@ Free previously allocated device memory at the device address @code{a}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.16. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.19. @end table @@ -2371,6 +2380,7 @@ variable or array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void *acc_copyin(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{void *acc_copyin_async(h_void *a, size_t len, int async);} @end multitable @item @emph{Fortran}: @@ -2380,11 +2390,18 @@ variable or array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_copyin(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.17. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.20. @end table @@ -2393,7 +2410,7 @@ variable or array element and @var{len} specifies the length in bytes. @section @code{acc_present_or_copyin} -- If the data is not present on the device, allocate device memory and copy from host memory. @table @asis @item @emph{Description} -This function tests if the host data specifed by @var{a} and of length +This function tests if the host data specified by @var{a} and of length @var{len} is present or not. If it is not present, then device memory will be allocated and the host memory copied. The device address of the newly allocated device memory is returned. @@ -2402,6 +2419,9 @@ In Fortran, two (2) forms are supported. In the first form, @var{a} specifies a contiguous array section. The second form @var{a} specifies a variable or array element and @var{len} specifies the length in bytes. +Note that @code{acc_present_or_copyin} and @code{acc_pcopyin} exist for +backward compatibility with OpenACC 2.0; use @ref{acc_copyin} instead. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void *acc_present_or_copyin(h_void *a, size_t len);} @@ -2423,8 +2443,8 @@ array element and @var{len} specifies the length in bytes. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.18. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.20. @end table @@ -2444,6 +2464,7 @@ array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void *acc_create(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{void *acc_create_async(h_void *a, size_t len, int async);} @end multitable @item @emph{Fortran}: @@ -2453,11 +2474,18 @@ array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_create(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_create_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_create_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.19. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.21. @end table @@ -2466,7 +2494,7 @@ array element and @var{len} specifies the length in bytes. @section @code{acc_present_or_create} -- If the data is not present on the device, allocate device memory and map it to host memory. @table @asis @item @emph{Description} -This function tests if the host data specifed by @var{a} and of length +This function tests if the host data specified by @var{a} and of length @var{len} is present or not. If it is not present, then device memory will be allocated and mapped to host memory. In C/C++, the device address of the newly allocated device memory is returned. @@ -2475,6 +2503,8 @@ In Fortran, two (2) forms are supported. In the first form, @var{a} specifies a contiguous array section. The second form @var{a} specifies a variable or array element and @var{len} specifies the length in bytes. +Note that @code{acc_present_or_create} and @code{acc_pcreate} exist for +backward compatibility with OpenACC 2.0; use @ref{acc_create} instead. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -2497,8 +2527,8 @@ array element and @var{len} specifies the length in bytes. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.20. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.21. @end table @@ -2517,6 +2547,9 @@ array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{acc_copyout(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_copyout_async(h_void *a, size_t len, int async);} +@item @emph{Prototype}: @tab @code{acc_copyout_finalize(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_copyout_finalize_async(h_void *a, size_t len, int async);} @end multitable @item @emph{Fortran}: @@ -2526,11 +2559,30 @@ array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_copyout(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a, len)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.21. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.22. @end table @@ -2549,6 +2601,9 @@ array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{acc_delete(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_delete_async(h_void *a, size_t len, int async);} +@item @emph{Prototype}: @tab @code{acc_delete_finalize(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_delete_finalize_async(h_void *a, size_t len, int async);} @end multitable @item @emph{Fortran}: @@ -2558,11 +2613,30 @@ array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_delete(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a, len)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.22. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.23. @end table @@ -2582,6 +2656,7 @@ array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len, async);} @end multitable @item @emph{Fortran}: @@ -2591,11 +2666,18 @@ array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_update_device(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.23. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.24. @end table @@ -2615,6 +2697,7 @@ array element and @var{len} specifies the length in bytes. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{acc_update_self(h_void *a, size_t len);} +@item @emph{Prototype}: @tab @code{acc_update_self_async(h_void *a, size_t len, int async);} @end multitable @item @emph{Fortran}: @@ -2624,11 +2707,18 @@ array element and @var{len} specifies the length in bytes. @item @emph{Interface}: @tab @code{subroutine acc_update_self(a, len)} @item @tab @code{type, dimension(:[,:]...) :: a} @item @tab @code{integer len} +@item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer(acc_handle_kind) :: async} +@item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, len, async)} +@item @tab @code{type, dimension(:[,:]...) :: a} +@item @tab @code{integer len} +@item @tab @code{integer(acc_handle_kind) :: async} @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.24. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.25. @end table @@ -2647,8 +2737,8 @@ specified with the host address @var{h} and a length of @var{len}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.25. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.26. @end table @@ -2666,8 +2756,8 @@ specified by @var{h}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.26. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.27. @end table @@ -2685,8 +2775,8 @@ host address specified by @var{h}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.27. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.28. @end table @@ -2704,8 +2794,8 @@ device address specified by @var{d}. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.28. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.29. @end table @@ -2743,8 +2833,8 @@ a @code{false} is return to indicate the mapped memory is not present. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.29. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.30. @end table @@ -2763,8 +2853,8 @@ device memory specified by the device address @var{dest} for a length of @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.30. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.31. @end table @@ -2783,8 +2873,50 @@ device memory specified by the device address @var{dest} for a length of @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section -3.2.31. +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.32. +@end table + + + +@node acc_attach +@section @code{acc_attach} -- Let device pointer point to device-pointer target. +@table @asis +@item @emph{Description} +This function updates a pointer on the device from pointing to a host-pointer +address to pointing to the corresponding device data. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{acc_attach(h_void **ptr);} +@item @emph{Prototype}: @tab @code{acc_attach_async(h_void **ptr, int async);} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.34. +@end table + + + +@node acc_detach +@section @code{acc_detach} -- Let device pointer point to host-pointer target. +@table @asis +@item @emph{Description} +This function updates a pointer on the device from pointing to a device-pointer +address to pointing to the corresponding host data. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{acc_detach(h_void **ptr);} +@item @emph{Prototype}: @tab @code{acc_detach_async(h_void **ptr, int async);} +@item @emph{Prototype}: @tab @code{acc_detach_finalize(h_void **ptr);} +@item @emph{Prototype}: @tab @code{acc_detach_finalize_async(h_void **ptr, int async);} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.35. @end table @@ -2802,7 +2934,7 @@ as used by the CUDA Runtime or Driver API's. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section A.2.1.1. @end table @@ -2821,7 +2953,7 @@ as used by the CUDA Runtime or Driver API's. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section A.2.1.2. @end table @@ -2840,7 +2972,7 @@ This handle is the same as used by the CUDA Runtime or Driver API's. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section A.2.1.3. @end table @@ -2864,7 +2996,7 @@ The return value is not specified. @end multitable @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section A.2.1.4. @end table @@ -2980,7 +3112,7 @@ The variable @env{GCC_ACC_NOTIFY} is used for diagnostic purposes. @section @code{ACC_DEVICE_TYPE} @table @asis @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 4.1. @end table @@ -2990,7 +3122,7 @@ The variable @env{GCC_ACC_NOTIFY} is used for diagnostic purposes. @section @code{ACC_DEVICE_NUM} @table @asis @item @emph{Reference}: -@uref{https://www.openacc.org, OpenACC specification v2.0}, section +@uref{https://www.openacc.org, OpenACC specification v2.6}, section 4.2. @end table @@ -3033,7 +3165,7 @@ asynchronous functionality is implemented by making use of CUDA streams@footnote{See "Stream Management" in "CUDA Driver API", TRM-06703-001, Version 5.5, for additional information}. -The primary means by that the asychronous functionality is accessed +The primary means by that the asynchronous functionality is accessed is through the use of those OpenACC directives which make use of the @code{async} and @code{wait} clauses. When the @code{async} clause is first used with a directive, it creates a CUDA stream. If an @@ -3206,8 +3338,8 @@ similarly to the first use case. There are two environment variables associated with the OpenACC library that may be used to control the device type and device number: -@env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}, respecively. These two -environement variables can be used as an alternative to calling +@env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}, respectively. These two +environment variables can be used as an alternative to calling @code{acc_set_device_num()}. As seen in the second use case, the device type and device number were specified using @code{acc_set_device_num()}. If however, the aforementioned environment variables were set, then the @@ -3220,7 +3352,7 @@ is called prior to a call to an OpenACC function, then you must call @code{acc_set_device_num()}@footnote{More complete information about @env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM} can be found in sections 4.1 and 4.2 of the @uref{https://www.openacc.org, OpenACC} -Application Programming Interface”, Version 2.0.} +Application Programming Interface”, Version 2.6.} @@ -3935,7 +4067,7 @@ becomes @chapter Reporting Bugs Bugs in the GNU Offloading and Multi Processing Runtime Library should -be reported via @uref{http://gcc.gnu.org/bugzilla/, Bugzilla}. Please add +be reported via @uref{https://gcc.gnu.org/bugzilla/, Bugzilla}. Please add "openacc", or "openmp", or both to the keywords field in the bug report, as appropriate. diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 22c1894..4638789 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -59,27 +59,6 @@ host_get_num_devices (void) return 1; } -static union gomp_device_property_value -host_get_property (int n, int prop) -{ - union gomp_device_property_value nullval = { .val = 0 }; - - if (n >= host_get_num_devices ()) - return nullval; - - switch (prop) - { - case GOMP_DEVICE_PROPERTY_NAME: - return (union gomp_device_property_value) { .ptr = "GOMP" }; - case GOMP_DEVICE_PROPERTY_VENDOR: - return (union gomp_device_property_value) { .ptr = "GNU" }; - case GOMP_DEVICE_PROPERTY_DRIVER: - return (union gomp_device_property_value) { .ptr = VERSION }; - default: - return nullval; - } -} - static bool host_init_device (int n __attribute__ ((unused))) { @@ -245,6 +224,29 @@ host_openacc_async_destruct (struct goacc_asyncqueue *aq return true; } +static union goacc_property_value +host_openacc_get_property (int n, enum goacc_property prop) +{ + union goacc_property_value nullval = { .val = 0 }; + + if (n >= host_get_num_devices ()) + return nullval; + + switch (prop) + { + case GOACC_PROPERTY_NAME: + return (union goacc_property_value) { .ptr = "GOMP" }; + case GOACC_PROPERTY_VENDOR: + return (union goacc_property_value) { .ptr = "GNU" }; + case GOACC_PROPERTY_DRIVER: + return (union goacc_property_value) { .ptr = VERSION }; + case GOACC_PROPERTY_MEMORY: + case GOACC_PROPERTY_FREE_MEMORY: + default: + return nullval; + } +} + static void * host_openacc_create_thread_data (int ord __attribute__ ((unused))) { @@ -269,7 +271,6 @@ static struct gomp_device_descr host_dispatch = .get_caps_func = host_get_caps, .get_type_func = host_get_type, .get_num_devices_func = host_get_num_devices, - .get_property_func = host_get_property, .init_device_func = host_init_device, .fini_device_func = host_fini_device, .version_func = host_version, @@ -303,6 +304,8 @@ static struct gomp_device_descr host_dispatch = .host2dev_func = host_openacc_async_host2dev, }, + .get_property_func = host_openacc_get_property, + .cuda = { .get_current_device_func = NULL, .get_current_context_func = NULL, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index d15f08f..5d786a5 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -99,7 +99,9 @@ unknown_device_type_error (acc_device_t invalid_type) static const char * get_openacc_name (const char *name) { - if (strcmp (name, "nvptx") == 0) + if (strcmp (name, "gcn") == 0) + return "radeon"; + else if (strcmp (name, "nvptx") == 0) return "nvidia"; else return name; @@ -115,6 +117,7 @@ name_of_acc_device_t (enum acc_device_t type) case acc_device_host: return "host"; case acc_device_not_host: return "not_host"; case acc_device_nvidia: return "nvidia"; + case acc_device_radeon: return "radeon"; default: unknown_device_type_error (type); } __builtin_unreachable (); @@ -760,14 +763,14 @@ acc_set_device_num (int ord, acc_device_t d) ialias (acc_set_device_num) -static union gomp_device_property_value +static union goacc_property_value get_property_any (int ord, acc_device_t d, acc_device_property_t prop) { goacc_lazy_initialize (); struct goacc_thread *thr = goacc_thread (); if (d == acc_device_current && thr && thr->dev) - return thr->dev->get_property_func (thr->dev->target_id, prop); + return thr->dev->openacc.get_property_func (thr->dev->target_id, prop); gomp_mutex_lock (&acc_device_lock); @@ -789,7 +792,7 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop) assert (dev); - return dev->get_property_func (dev->target_id, prop); + return dev->openacc.get_property_func (dev->target_id, prop); } size_t @@ -798,7 +801,7 @@ acc_get_property (int ord, acc_device_t d, acc_device_property_t prop) if (!known_device_type_p (d)) unknown_device_type_error(d); - if (prop & GOMP_DEVICE_PROPERTY_STRING_MASK) + if (prop & GOACC_PROPERTY_STRING_MASK) return 0; else return get_property_any (ord, d, prop).val; @@ -812,7 +815,7 @@ acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop) if (!known_device_type_p (d)) unknown_device_type_error(d); - if (prop & GOMP_DEVICE_PROPERTY_STRING_MASK) + if (prop & GOACC_PROPERTY_STRING_MASK) return get_property_any (ord, d, prop).ptr; else return NULL; diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 2d4bba7..936ae649 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -355,7 +355,6 @@ acc_is_present (void *h, size_t s) void acc_map_data (void *h, void *d, size_t s) { - struct target_mem_desc *tgt = NULL; size_t mapnum = 1; void *hostaddrs = h; void *devaddrs = d; @@ -402,10 +401,13 @@ acc_map_data (void *h, void *d, size_t s) gomp_mutex_unlock (&acc_dev->lock); - tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, GOMP_MAP_VARS_ENTER_DATA); + struct target_mem_desc *tgt + = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, + &kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); + assert (tgt->list_count == 1); splay_tree_key n = tgt->list[0].key; + assert (n); assert (n->refcount == 1); assert (n->virtual_refcount == 0); /* Special reference counting behavior. */ @@ -466,8 +468,6 @@ acc_unmap_data (void *h) (void *) h, (int) host_size); } - splay_tree_remove (&acc_dev->mem_map, n); - struct target_mem_desc *tgt = n->tgt; if (tgt->refcount == REFCOUNT_INFINITY) @@ -475,13 +475,18 @@ acc_unmap_data (void *h) gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("cannot unmap target block"); } - else if (tgt->refcount > 1) - tgt->refcount--; - else - { - free (tgt->array); - free (tgt); - } + + /* Above, we've verified that the mapping must have been set up by + 'acc_map_data'. */ + assert (tgt->refcount == 1); + + /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var' + from freeing the target memory. */ + tgt->tgt_end = 0; + tgt->to_free = NULL; + + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped); gomp_mutex_unlock (&acc_dev->lock); @@ -555,16 +560,17 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) goacc_aq aq = get_goacc_asyncqueue (async); - gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, + kinds, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + assert (tgt); + assert (tgt->list_count == 1); + n = tgt->list[0].key; + assert (n); + assert (n->refcount == 1); + assert (n->virtual_refcount == 0); - gomp_mutex_lock (&acc_dev->lock); - n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); - assert (n != NULL); - assert (n->tgt_offset == 0); - assert ((uintptr_t) hostaddrs[0] == n->host_start); - d = (void *) n->tgt->tgt_start; - gomp_mutex_unlock (&acc_dev->lock); + d = (void *) tgt->tgt_start; } if (profiling_p) @@ -722,8 +728,16 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) gomp_remove_var_async (acc_dev, n, aq); else { + size_t num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, we can + check that it is freed when this splay tree key's refcount reaches + zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with + multiple members), fall back to skipping the test. */ + for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i) + if (n->tgt->list[l_i].key) + ++num_mappings; bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); - assert (is_tgt_unmapped); + assert (is_tgt_unmapped || num_mappings > 1); } } @@ -887,7 +901,10 @@ acc_attach_async (void **hostaddr, int async) n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); if (n == NULL) - gomp_fatal ("struct not mapped for acc_attach"); + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("struct not mapped for acc_attach"); + } gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, 0, NULL); @@ -920,7 +937,10 @@ goacc_detach_internal (void **hostaddr, int async, bool finalize) n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); if (n == NULL) - gomp_fatal ("struct not mapped for acc_detach"); + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("struct not mapped for acc_detach"); + } gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL); @@ -1054,7 +1074,10 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, = splay_tree_lookup (&acc_dev->mem_map, &cur_node); if (n == NULL) - gomp_fatal ("struct not mapped for detach operation"); + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("struct not mapped for detach operation"); + } gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL); } @@ -1131,45 +1154,38 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, cur_node.host_end - cur_node.host_start); if (n->refcount == 0) - gomp_remove_var_async (acc_dev, n, aq); - } - break; - - case GOMP_MAP_STRUCT: - { - int elems = sizes[i]; - for (int j = 1; j <= elems; j++) { - struct splay_tree_key_s k; - k.host_start = (uintptr_t) hostaddrs[i + j]; - k.host_end = k.host_start + sizes[i + j]; - splay_tree_key str; - str = splay_tree_lookup (&acc_dev->mem_map, &k); - if (str) + if (aq) + /* TODO We can't do the 'is_tgt_unmapped' checking -- see the + 'gomp_unref_tgt' comment in + <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; + PR92881. */ + gomp_remove_var_async (acc_dev, n, aq); + else { - if (finalize) - { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount -= str->virtual_refcount; - str->virtual_refcount = 0; - } - if (str->virtual_refcount > 0) - { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount--; - str->virtual_refcount--; - } - else if (str->refcount > 0 - && str->refcount != REFCOUNT_INFINITY) - str->refcount--; - if (str->refcount == 0) - gomp_remove_var_async (acc_dev, str, aq); + size_t num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, + we can check that it is freed when this splay tree key's + refcount reaches zero. Otherwise (e.g. for a + 'GOMP_MAP_STRUCT' mapping with multiple members), fall + back to skipping the test. */ + for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i) + if (n->tgt->list[l_i].key) + ++num_mappings; + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped || num_mappings > 1); } } - i += elems; } break; + case GOMP_MAP_STRUCT: + /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing + for all its entries. This special handling exists for GCC 10.1 + compatibility; afterwards, we're not generating these no-op + 'GOMP_MAP_STRUCT's anymore. */ + break; + default: gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", kind); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index edfc606..c7e46e3 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -415,7 +415,8 @@ GOACC_data_start (int flags_m, size_t mapnum, = _ACC_OTHER_EVENT_INFO_VALID_BYTES; enter_data_event_info.other_event.parent_construct = acc_construct_data; for (int i = 0; i < mapnum; ++i) - if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR) + if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR + || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) { /* If there is one such data mapping kind, then this is actually an OpenACC 'host_data' construct. (GCC maps the OpenACC diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 06a96c5..e2db33e 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -90,11 +90,87 @@ typedef enum omp_pause_resource_t omp_pause_hard = 2 } omp_pause_resource_t; +typedef __UINTPTR_TYPE__ omp_uintptr_t; + +#if __cplusplus >= 201103L +# define __GOMP_UINTPTR_T_ENUM : omp_uintptr_t +#else +# define __GOMP_UINTPTR_T_ENUM +#endif + +typedef enum omp_memspace_handle_t __GOMP_UINTPTR_T_ENUM +{ + omp_default_mem_space = 0, + omp_large_cap_mem_space = 1, + omp_const_mem_space = 2, + omp_high_bw_mem_space = 3, + omp_low_lat_mem_space = 4, + __omp_memspace_handle_t_max__ = __UINTPTR_MAX__ +} omp_memspace_handle_t; + +typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM +{ + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + __omp_allocator_handle_t_max__ = __UINTPTR_MAX__ +} omp_allocator_handle_t; + +typedef enum omp_alloctrait_key_t +{ + omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; + +typedef enum omp_alloctrait_value_t +{ + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18, + __omp_alloctrait_value_max__ = __UINTPTR_MAX__ +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t +{ + omp_alloctrait_key_t key; + omp_uintptr_t value; +} omp_alloctrait_t; + #ifdef __cplusplus extern "C" { # define __GOMP_NOTHROW throw () +# define __GOMP_DEFAULT_NULL_ALLOCATOR = omp_null_allocator #else # define __GOMP_NOTHROW __attribute__((__nothrow__)) +# define __GOMP_DEFAULT_NULL_ALLOCATOR #endif extern void omp_set_num_threads (int) __GOMP_NOTHROW; @@ -188,6 +264,20 @@ extern __SIZE_TYPE__ omp_capture_affinity (char *, __SIZE_TYPE__, const char *) extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW; extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW; +extern omp_allocator_handle_t omp_init_allocator (omp_memspace_handle_t, + int, + const omp_alloctrait_t []) + __GOMP_NOTHROW; +extern void omp_destroy_allocator (omp_allocator_handle_t) __GOMP_NOTHROW; +extern void omp_set_default_allocator (omp_allocator_handle_t) __GOMP_NOTHROW; +extern omp_allocator_handle_t omp_get_default_allocator (void) __GOMP_NOTHROW; +extern void *omp_alloc (__SIZE_TYPE__, + omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) + __GOMP_NOTHROW; +extern void omp_free (void *, + omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) + __GOMP_NOTHROW; + #ifdef __cplusplus } #endif diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index a308316..111705d 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -31,34 +31,36 @@ module openacc_kinds use iso_fortran_env, only: int32 - use iso_c_binding, only: c_size_t implicit none public - private :: int32, c_size_t + private :: int32 ! When adding items, also update 'public' setting in 'module openacc' below. integer, parameter :: acc_device_kind = int32 ! Keep in sync with include/gomp-constants.h. - integer (acc_device_kind), parameter :: acc_device_current = -3 + integer (acc_device_kind), parameter :: acc_device_current = -1 integer (acc_device_kind), parameter :: acc_device_none = 0 integer (acc_device_kind), parameter :: acc_device_default = 1 integer (acc_device_kind), parameter :: acc_device_host = 2 ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_nvidia = 5 - integer (acc_device_kind), parameter :: acc_device_gcn = 8 + integer (acc_device_kind), parameter :: acc_device_radeon = 8 - integer, parameter :: acc_device_property = c_size_t + integer, parameter :: acc_device_property_kind = int32 + ! OpenACC 2.6/2.7/3.0 used acc_device_property; in a spec update the + ! missing '_kind' was added for consistency. For backward compatibility, keep: + integer, parameter :: acc_device_property = acc_device_property_kind - ! Keep in sync with include/gomp-constants.h. - integer (acc_device_property), parameter :: acc_property_memory = 1 - integer (acc_device_property), parameter :: acc_property_free_memory = 2 - integer (acc_device_property), parameter :: acc_property_name = int(Z'10001') - integer (acc_device_property), parameter :: acc_property_vendor = int(Z'10002') - integer (acc_device_property), parameter :: acc_property_driver = int(Z'10003') + ! Keep in sync with 'libgomp/libgomp-plugin.h:goacc_property'. + integer (acc_device_property_kind), parameter :: acc_property_memory = 1 + integer (acc_device_property_kind), parameter :: acc_property_free_memory = 2 + integer (acc_device_property_kind), parameter :: acc_property_name = int(Z'10001') + integer (acc_device_property_kind), parameter :: acc_property_vendor = int(Z'10002') + integer (acc_device_property_kind), parameter :: acc_property_driver = int(Z'10003') integer, parameter :: acc_handle_kind = int32 @@ -72,15 +74,15 @@ module openacc_internal implicit none interface - function acc_get_num_devices_h (d) + function acc_get_num_devices_h (devicetype) import integer acc_get_num_devices_h - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end function - subroutine acc_set_device_type_h (d) + subroutine acc_set_device_type_h (devicetype) import - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end subroutine function acc_get_device_type_h () @@ -88,73 +90,74 @@ module openacc_internal integer (acc_device_kind) acc_get_device_type_h end function - subroutine acc_set_device_num_h (n, d) + subroutine acc_set_device_num_h (devicenum, devicetype) import - integer n - integer (acc_device_kind) d + integer devicenum + integer (acc_device_kind) devicetype end subroutine - function acc_get_device_num_h (d) + function acc_get_device_num_h (devicetype) import integer acc_get_device_num_h - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end function - function acc_get_property_h (n, d, p) + function acc_get_property_h (devicenum, devicetype, property) + use iso_c_binding, only: c_size_t import implicit none (type, external) - integer (acc_device_property) :: acc_get_property_h - integer, value :: n - integer (acc_device_kind), value :: d - integer (acc_device_property), value :: p + integer (c_size_t) :: acc_get_property_h + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property end function - subroutine acc_get_property_string_h (n, d, p, s) + subroutine acc_get_property_string_h (devicenum, devicetype, property, string) import implicit none (type, external) - integer, value :: n - integer (acc_device_kind), value :: d - integer (acc_device_property), value :: p - character (*) :: s + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property + character (*) :: string end subroutine - function acc_async_test_h (a) + function acc_async_test_h (arg) logical acc_async_test_h - integer a + integer arg end function function acc_async_test_all_h () logical acc_async_test_all_h end function - subroutine acc_wait_h (a) - integer a + subroutine acc_wait_h (arg) + integer arg end subroutine - subroutine acc_wait_async_h (a1, a2) - integer a1, a2 + subroutine acc_wait_async_h (arg, async) + integer arg, async end subroutine subroutine acc_wait_all_h () end subroutine - subroutine acc_wait_all_async_h (a) - integer a + subroutine acc_wait_all_async_h (async) + integer async end subroutine - subroutine acc_init_h (d) + subroutine acc_init_h (devicetype) import - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end subroutine - subroutine acc_shutdown_h (d) + subroutine acc_shutdown_h (devicetype) import - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end subroutine - function acc_on_device_h (d) + function acc_on_device_h (devicetype) import - integer (acc_device_kind) d + integer (acc_device_kind) devicetype logical acc_on_device_h end function @@ -505,17 +508,17 @@ module openacc_internal end interface interface - function acc_get_num_devices_l (d) & + function acc_get_num_devices_l (devicetype) & bind (C, name = "acc_get_num_devices") use iso_c_binding, only: c_int integer (c_int) :: acc_get_num_devices_l - integer (c_int), value :: d + integer (c_int), value :: devicetype end function - subroutine acc_set_device_type_l (d) & + subroutine acc_set_device_type_l (devicetype) & bind (C, name = "acc_set_device_type") use iso_c_binding, only: c_int - integer (c_int), value :: d + integer (c_int), value :: devicetype end subroutine function acc_get_device_type_l () & @@ -524,37 +527,37 @@ module openacc_internal integer (c_int) :: acc_get_device_type_l end function - subroutine acc_set_device_num_l (n, d) & + subroutine acc_set_device_num_l (devicenum, devicetype) & bind (C, name = "acc_set_device_num") use iso_c_binding, only: c_int - integer (c_int), value :: n, d + integer (c_int), value :: devicenum, devicetype end subroutine - function acc_get_device_num_l (d) & + function acc_get_device_num_l (devicetype) & bind (C, name = "acc_get_device_num") use iso_c_binding, only: c_int integer (c_int) :: acc_get_device_num_l - integer (c_int), value :: d + integer (c_int), value :: devicetype end function - function acc_get_property_l (n, d, p) & + function acc_get_property_l (devicenum, devicetype, property) & bind (C, name = "acc_get_property") use iso_c_binding, only: c_int, c_size_t implicit none (type, external) integer (c_size_t) :: acc_get_property_l - integer (c_int), value :: n - integer (c_int), value :: d - integer (c_int), value :: p + integer (c_int), value :: devicenum + integer (c_int), value :: devicetype + integer (c_int), value :: property end function - function acc_get_property_string_l (n, d, p) & + function acc_get_property_string_l (devicenum, devicetype, property) & bind (C, name = "acc_get_property_string") use iso_c_binding, only: c_int, c_ptr implicit none (type, external) type (c_ptr) :: acc_get_property_string_l - integer (c_int), value :: n - integer (c_int), value :: d - integer (c_int), value :: p + integer (c_int), value :: devicenum + integer (c_int), value :: devicetype + integer (c_int), value :: property end function function acc_async_test_l (a) & @@ -576,10 +579,10 @@ module openacc_internal integer (c_int), value :: a end subroutine - subroutine acc_wait_async_l (a1, a2) & + subroutine acc_wait_async_l (arg, async) & bind (C, name = "acc_wait_async") use iso_c_binding, only: c_int - integer (c_int), value :: a1, a2 + integer (c_int), value :: arg, async end subroutine subroutine acc_wait_all_l () & @@ -587,29 +590,29 @@ module openacc_internal use iso_c_binding, only: c_int end subroutine - subroutine acc_wait_all_async_l (a) & + subroutine acc_wait_all_async_l (async) & bind (C, name = "acc_wait_all_async") use iso_c_binding, only: c_int - integer (c_int), value :: a + integer (c_int), value :: async end subroutine - subroutine acc_init_l (d) & + subroutine acc_init_l (devicetype) & bind (C, name = "acc_init") use iso_c_binding, only: c_int - integer (c_int), value :: d + integer (c_int), value :: devicetype end subroutine - subroutine acc_shutdown_l (d) & + subroutine acc_shutdown_l (devicetype) & bind (C, name = "acc_shutdown") use iso_c_binding, only: c_int - integer (c_int), value :: d + integer (c_int), value :: devicetype end subroutine - function acc_on_device_l (d) & + function acc_on_device_l (devicetype) & bind (C, name = "acc_on_device") use iso_c_binding, only: c_int integer (c_int) :: acc_on_device_l - integer (c_int), value :: d + integer (c_int), value :: devicetype end function subroutine acc_copyin_l (a, len) & @@ -767,9 +770,9 @@ module openacc ! From openacc_kinds public :: acc_device_kind public :: acc_device_none, acc_device_default, acc_device_host - public :: acc_device_not_host, acc_device_nvidia, acc_device_gcn + public :: acc_device_not_host, acc_device_nvidia, acc_device_radeon - public :: acc_device_property + public :: acc_device_property_kind, acc_device_property public :: acc_property_memory, acc_property_free_memory public :: acc_property_name, acc_property_vendor, acc_property_driver @@ -792,7 +795,7 @@ module openacc public :: acc_delete_async, acc_update_device_async, acc_update_self_async public :: acc_copyout_finalize, acc_delete_finalize - integer, parameter :: openacc_version = 201306 + integer, parameter :: openacc_version = 201711 interface acc_get_num_devices procedure :: acc_get_num_devices_h @@ -1002,19 +1005,19 @@ module openacc end module openacc -function acc_get_num_devices_h (d) +function acc_get_num_devices_h (devicetype) use openacc_internal, only: acc_get_num_devices_l use openacc_kinds integer acc_get_num_devices_h - integer (acc_device_kind) d - acc_get_num_devices_h = acc_get_num_devices_l (d) + integer (acc_device_kind) devicetype + acc_get_num_devices_h = acc_get_num_devices_l (devicetype) end function -subroutine acc_set_device_type_h (d) +subroutine acc_set_device_type_h (devicetype) use openacc_internal, only: acc_set_device_type_l use openacc_kinds - integer (acc_device_kind) d - call acc_set_device_type_l (d) + integer (acc_device_kind) devicetype + call acc_set_device_type_l (devicetype) end subroutine function acc_get_device_type_h () @@ -1024,54 +1027,47 @@ function acc_get_device_type_h () acc_get_device_type_h = acc_get_device_type_l () end function -subroutine acc_set_device_num_h (n, d) +subroutine acc_set_device_num_h (devicenum, devicetype) use openacc_internal, only: acc_set_device_num_l use openacc_kinds - integer n - integer (acc_device_kind) d - call acc_set_device_num_l (n, d) + integer devicenum + integer (acc_device_kind) devicetype + call acc_set_device_num_l (devicenum, devicetype) end subroutine -function acc_get_device_num_h (d) +function acc_get_device_num_h (devicetype) use openacc_internal, only: acc_get_device_num_l use openacc_kinds integer acc_get_device_num_h - integer (acc_device_kind) d - acc_get_device_num_h = acc_get_device_num_l (d) + integer (acc_device_kind) devicetype + acc_get_device_num_h = acc_get_device_num_l (devicetype) end function -function acc_get_property_h (n, d, p) - use iso_c_binding, only: c_int, c_size_t +function acc_get_property_h (devicenum, devicetype, property) + use iso_c_binding, only: c_size_t use openacc_internal, only: acc_get_property_l use openacc_kinds implicit none (type, external) - integer (acc_device_property) :: acc_get_property_h - integer, value :: n - integer (acc_device_kind), value :: d - integer (acc_device_property), value :: p - - integer (c_int) :: pint - - pint = int (p, c_int) - acc_get_property_h = acc_get_property_l (n, d, pint) + integer (c_size_t) :: acc_get_property_h + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property + acc_get_property_h = acc_get_property_l (devicenum, devicetype, property) end function -subroutine acc_get_property_string_h (n, d, p, s) - use iso_c_binding, only: c_char, c_int, c_ptr, c_f_pointer, c_associated +subroutine acc_get_property_string_h (devicenum, devicetype, property, string) + use iso_c_binding, only: c_char, c_size_t, c_ptr, c_f_pointer, c_associated use openacc_internal, only: acc_get_property_string_l use openacc_kinds implicit none (type, external) - integer, value :: n - integer (acc_device_kind), value :: d - integer (acc_device_property), value :: p - character (*) :: s + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property + character (*) :: string - integer (c_int) :: pint type (c_ptr) :: cptr - integer :: clen + integer(c_size_t) :: clen, slen, i character (kind=c_char, len=1), pointer, contiguous :: sptr (:) - integer :: slen - integer :: i interface function strlen (s) bind (C, name = "strlen") @@ -1081,53 +1077,44 @@ subroutine acc_get_property_string_h (n, d, p, s) end function strlen end interface - pint = int (p, c_int) - cptr = acc_get_property_string_l (n, d, pint) - s = "" + cptr = acc_get_property_string_l (devicenum, devicetype, property) + string = "" if (.not. c_associated (cptr)) then return end if - clen = int (strlen (cptr)) + clen = strlen (cptr) call c_f_pointer (cptr, sptr, [clen]) - slen = min (clen, len (s)) + slen = min (clen, len (string, kind=c_size_t)) do i = 1, slen - s (i:i) = sptr (i) + string (i:i) = sptr (i) end do end subroutine -function acc_async_test_h (a) +function acc_async_test_h (arg) use openacc_internal, only: acc_async_test_l logical acc_async_test_h - integer a - if (acc_async_test_l (a) .eq. 1) then - acc_async_test_h = .TRUE. - else - acc_async_test_h = .FALSE. - end if + integer arg + acc_async_test_h = acc_async_test_l (arg) /= 0 end function function acc_async_test_all_h () use openacc_internal, only: acc_async_test_all_l logical acc_async_test_all_h - if (acc_async_test_all_l () .eq. 1) then - acc_async_test_all_h = .TRUE. - else - acc_async_test_all_h = .FALSE. - end if + acc_async_test_all_h = acc_async_test_all_l () /= 0 end function -subroutine acc_wait_h (a) +subroutine acc_wait_h (arg) use openacc_internal, only: acc_wait_l - integer a - call acc_wait_l (a) + integer arg + call acc_wait_l (arg) end subroutine -subroutine acc_wait_async_h (a1, a2) +subroutine acc_wait_async_h (arg, async) use openacc_internal, only: acc_wait_async_l - integer a1, a2 - call acc_wait_async_l (a1, a2) + integer arg, async + call acc_wait_async_l (arg, async) end subroutine subroutine acc_wait_all_h () @@ -1135,36 +1122,32 @@ subroutine acc_wait_all_h () call acc_wait_all_l () end subroutine -subroutine acc_wait_all_async_h (a) +subroutine acc_wait_all_async_h (async) use openacc_internal, only: acc_wait_all_async_l - integer a - call acc_wait_all_async_l (a) + integer async + call acc_wait_all_async_l (async) end subroutine -subroutine acc_init_h (d) +subroutine acc_init_h (devicetype) use openacc_internal, only: acc_init_l use openacc_kinds - integer (acc_device_kind) d - call acc_init_l (d) + integer (acc_device_kind) devicetype + call acc_init_l (devicetype) end subroutine -subroutine acc_shutdown_h (d) +subroutine acc_shutdown_h (devicetype) use openacc_internal, only: acc_shutdown_l use openacc_kinds - integer (acc_device_kind) d - call acc_shutdown_l (d) + integer (acc_device_kind) devicetype + call acc_shutdown_l (devicetype) end subroutine -function acc_on_device_h (d) +function acc_on_device_h (devicetype) use openacc_internal, only: acc_on_device_l use openacc_kinds - integer (acc_device_kind) d + integer (acc_device_kind) devicetype logical acc_on_device_h - if (acc_on_device_l (d) .eq. 1) then - acc_on_device_h = .TRUE. - else - acc_on_device_h = .FALSE. - end if + acc_on_device_h = acc_on_device_l (devicetype) /= 0 end function subroutine acc_copyin_32_h (a, len) @@ -1414,11 +1397,7 @@ function acc_is_present_32_h (a, len) !GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len - if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then - acc_is_present_32_h = .TRUE. - else - acc_is_present_32_h = .FALSE. - end if + acc_is_present_32_h = acc_is_present_l (a, int (len, kind = c_size_t)) /= 0 end function function acc_is_present_64_h (a, len) @@ -1428,18 +1407,14 @@ function acc_is_present_64_h (a, len) !GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len - if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then - acc_is_present_64_h = .TRUE. - else - acc_is_present_64_h = .FALSE. - end if + acc_is_present_64_h = acc_is_present_l (a, int (len, kind = c_size_t)) /= 0 end function function acc_is_present_array_h (a) use openacc_internal, only: acc_is_present_l logical acc_is_present_array_h type (*), dimension (..), contiguous :: a - acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 + acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) /= 0 end function subroutine acc_copyin_async_32_h (a, len, async) diff --git a/libgomp/openacc.h b/libgomp/openacc.h index 66786d7..1dc471f 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -49,14 +49,14 @@ extern "C" { /* Types */ typedef enum acc_device_t { /* Keep in sync with include/gomp-constants.h. */ - acc_device_current = -3, + acc_device_current = -1, acc_device_none = 0, acc_device_default = 1, acc_device_host = 2, /* acc_device_host_nonshm = 3 removed. */ acc_device_not_host = 4, acc_device_nvidia = 5, - acc_device_gcn = 8, + acc_device_radeon = 8, _ACC_device_hwm, /* Ensure enumeration is layout compatible with int. */ _ACC_highest = __INT_MAX__, @@ -64,8 +64,7 @@ typedef enum acc_device_t { } acc_device_t; typedef enum acc_device_property_t { - /* Keep in sync with include/gomp-constants.h. */ - /* Start from 1 to catch uninitialized use. */ + /* Keep in sync with 'libgomp/libgomp-plugin.h:goacc_property'. */ acc_property_memory = 1, acc_property_free_memory = 2, acc_property_name = 0x10001, diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index b327ba7..82a3735 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -37,6 +37,7 @@ integer, parameter :: acc_device_kind = 4 ! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_current = -1 integer (acc_device_kind), parameter :: acc_device_none = 0 integer (acc_device_kind), parameter :: acc_device_default = 1 integer (acc_device_kind), parameter :: acc_device_host = 2 @@ -44,7 +45,24 @@ ! removed. integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_nvidia = 5 - integer (acc_device_kind), parameter :: acc_device_gcn = 8 + integer (acc_device_kind), parameter :: acc_device_radeon = 8 + + integer, parameter :: acc_device_property_kind = 4 +! OpenACC 2.6/2.7/3.0 used acc_device_property; in a spec update the +! missing '_kind' was added for consistency. For backward compatibility, keep: + integer, parameter :: acc_device_property & + & = acc_device_property_kind + + integer (acc_device_property_kind), parameter :: & + & acc_property_memory = 1 + integer (acc_device_property_kind), parameter :: & + & acc_property_free_memory = 2 + integer (acc_device_property_kind), parameter :: & + & acc_property_name = int(Z'10001') + integer (acc_device_property_kind), parameter :: & + & acc_property_vendor = int(Z'10002') + integer (acc_device_property_kind), parameter :: & + & acc_property_driver = int(Z'10003') integer, parameter :: acc_handle_kind = 4 @@ -52,20 +70,20 @@ integer (acc_handle_kind), parameter :: acc_async_noval = -1 integer (acc_handle_kind), parameter :: acc_async_sync = -2 - integer, parameter :: openacc_version = 201306 + integer, parameter :: openacc_version = 201711 interface acc_get_num_devices - function acc_get_num_devices_h (d) + function acc_get_num_devices_h (devicetype) import acc_device_kind integer acc_get_num_devices_h - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end function end interface interface acc_set_device_type - subroutine acc_set_device_type_h (d) + subroutine acc_set_device_type_h (devicetype) import acc_device_kind - integer (acc_device_kind) d + integer (acc_device_kind) devicetype end subroutine end interface @@ -77,25 +95,50 @@ end interface interface acc_set_device_num - subroutine acc_set_device_num_h (n, d) + subroutine acc_set_device_num_h (devicenum, devicetype) import acc_device_kind - integer n - integer (acc_device_kind) d + integer devicenum + integer (acc_device_kind) devicetype end subroutine end interface interface acc_get_device_num - function acc_get_device_num_h (d) + function acc_get_device_num_h (devicetype) import acc_device_kind integer acc_get_device_num_h - integer (acc_device_kind) d + integer (acc_device_kind) devicetype + end function + end interface + + interface acc_get_property + function acc_get_property_h (devicenum, devicetype, & + & property) + use iso_c_binding, only: c_size_t + import acc_device_kind, acc_device_property_kind + implicit none (type, external) + integer (c_size_t) :: acc_get_property_h + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property end function end interface + interface acc_get_property_string + subroutine acc_get_property_string_h (devicenum, devicetype, & + & property, string) + import acc_device_kind, acc_device_property_kind + implicit none (type, external) + integer, value :: devicenum + integer (acc_device_kind), value :: devicetype + integer (acc_device_property_kind), value :: property + character (*) :: string + end subroutine + end interface + interface acc_async_test - function acc_async_test_h (a) + function acc_async_test_h (arg) logical acc_async_test_h - integer a + integer arg end function end interface @@ -106,8 +149,8 @@ end interface interface acc_wait - subroutine acc_wait_h (a) - integer a + subroutine acc_wait_h (arg) + integer arg end subroutine end interface @@ -117,8 +160,8 @@ end interface interface acc_wait_async - subroutine acc_wait_async_h (a1, a2) - integer a1, a2 + subroutine acc_wait_async_h (arg, async) + integer arg, async end subroutine end interface @@ -134,8 +177,8 @@ end interface interface acc_wait_all_async - subroutine acc_wait_all_async_h (a) - integer a + subroutine acc_wait_all_async_h (async) + integer async end subroutine end interface @@ -167,14 +210,14 @@ interface acc_copyin subroutine acc_copyin_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_copyin_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -187,14 +230,14 @@ interface acc_present_or_copyin subroutine acc_present_or_copyin_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_present_or_copyin_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -213,14 +256,14 @@ interface acc_create subroutine acc_create_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_create_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -233,14 +276,14 @@ interface acc_present_or_create subroutine acc_present_or_create_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_present_or_create_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -259,14 +302,14 @@ interface acc_copyout subroutine acc_copyout_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_copyout_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -279,14 +322,14 @@ interface acc_copyout_finalize subroutine acc_copyout_finalize_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_copyout_finalize_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -299,14 +342,14 @@ interface acc_delete subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_delete_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -319,14 +362,14 @@ interface acc_delete_finalize subroutine acc_delete_finalize_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_delete_finalize_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -339,14 +382,14 @@ interface acc_update_device subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_update_device_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -359,14 +402,14 @@ interface acc_update_self subroutine acc_update_self_32_h (a, len) use iso_c_binding, only: c_int32_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end subroutine subroutine acc_update_self_64_h (a, len) use iso_c_binding, only: c_int64_t - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end subroutine @@ -385,7 +428,7 @@ function acc_is_present_32_h (a, len) use iso_c_binding, only: c_int32_t logical acc_is_present_32_h - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len end function @@ -393,7 +436,7 @@ function acc_is_present_64_h (a, len) use iso_c_binding, only: c_int64_t logical acc_is_present_64_h - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len end function @@ -411,7 +454,7 @@ subroutine acc_copyin_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -420,7 +463,7 @@ subroutine acc_copyin_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async @@ -437,7 +480,7 @@ subroutine acc_create_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -446,7 +489,7 @@ subroutine acc_create_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async @@ -463,7 +506,7 @@ subroutine acc_copyout_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -472,7 +515,7 @@ subroutine acc_copyout_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async @@ -489,7 +532,7 @@ subroutine acc_delete_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -498,7 +541,7 @@ subroutine acc_delete_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async @@ -515,7 +558,7 @@ subroutine acc_update_device_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -524,7 +567,7 @@ subroutine acc_update_device_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async @@ -541,7 +584,7 @@ subroutine acc_update_self_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int32_t) len integer (acc_handle_kind) async @@ -550,7 +593,7 @@ subroutine acc_update_self_async_64_h (a, len, async) use iso_c_binding, only: c_int64_t import acc_handle_kind - !GCC$ ATTRIBUTES NO_ARG_CHECK :: a +!GCC$ ATTRIBUTES NO_ARG_CHECK :: a type (*), dimension (*) :: a integer (c_int64_t) len integer (acc_handle_kind) async diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac index 9a424aa..fc91702 100644 --- a/libgomp/plugin/configfrag.ac +++ b/libgomp/plugin/configfrag.ac @@ -211,7 +211,7 @@ if test x"$enable_offload_targets" != x; then case "${target}" in x86_64-*-*) case " ${CC} ${CFLAGS} " in - *" -m32 "*) + *" -m32 "*|*" -mx32 "*) PLUGIN_HSA=0 ;; *) @@ -251,7 +251,7 @@ if test x"$enable_offload_targets" != x; then case "${target}" in x86_64-*-*) case " ${CC} ${CFLAGS} " in - *" -m32 "*) + *" -m32 "*|*" -mx32 "*) PLUGIN_GCN=0 ;; *) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index b5ca7c1..4c6a4c0 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -371,6 +371,8 @@ struct hsa_kernel_description { const char *name; int oacc_dims[3]; /* Only present for GCN kernels. */ + int sgpr_count; + int vpgr_count; }; /* Mkoffload uses this structure to describe an offload variable. */ @@ -396,6 +398,19 @@ struct gcn_image_desc struct global_var_info *global_variables; }; +/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we + support. + See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */ + +typedef enum { + EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, + EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, + EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, +} EF_AMDGPU_MACH; + +const static int EF_AMDGPU_MACH_MASK = 0x000000ff; +typedef EF_AMDGPU_MACH gcn_isa; + /* Description of an HSA GPU agent (device) and the program associated with it. */ @@ -408,9 +423,13 @@ struct agent_info /* Whether the agent has been initialized. The fields below are usable only if it has been. */ bool initialized; - /* Precomputed check for problem architectures. */ - bool gfx900_p; + /* The instruction set architecture of the device. */ + gcn_isa device_isa; + /* Name of the agent. */ + char name[64]; + /* Name of the vendor of the agent. */ + char vendor_name[64]; /* Command queues of the agent. */ hsa_queue_t *sync_queue; struct goacc_asyncqueue *async_queues, *omp_async_queue; @@ -460,6 +479,8 @@ struct kernel_info struct agent_info *agent; /* The specific module where the kernel takes place. */ struct module_info *module; + /* Information provided by mkoffload associated with the kernel. */ + struct hsa_kernel_description *description; /* Mutex enforcing that at most once thread ever initializes a kernel for use. A thread should have locked agent->module_rwlock for reading before acquiring it. */ @@ -529,6 +550,8 @@ struct hsa_context_info int agent_count; /* Array of agent_info structures describing the individual HSA agents. */ struct agent_info *agents; + /* Driver version string. */ + char driver_version_s[30]; }; /* Format of the on-device heap. @@ -1213,7 +1236,8 @@ parse_target_attributes (void **input, grid_attrs_found = true; break; } - else if ((id & GOMP_TARGET_ARG_DEVICE_ALL) == GOMP_TARGET_ARG_DEVICE_ALL) + else if ((id & GOMP_TARGET_ARG_DEVICE_MASK) + == GOMP_TARGET_ARG_DEVICE_ALL) { gcn_dims_found = true; switch (id & GOMP_TARGET_ARG_ID_MASK) @@ -1232,7 +1256,8 @@ parse_target_attributes (void **input, if (gcn_dims_found) { - if (agent->gfx900_p && gcn_threads == 0 && override_z_dim == 0) + if (agent->device_isa == EF_AMDGPU_MACH_AMDGCN_GFX900 + && gcn_threads == 0 && override_z_dim == 0) { gcn_threads = 4; GCN_WARNING ("VEGA BUG WORKAROUND: reducing default number of " @@ -1483,6 +1508,8 @@ init_hsa_context (void) = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count * sizeof (struct agent_info)); status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index); + if (status != HSA_STATUS_SUCCESS) + return hsa_error ("Scanning compute agents failed", status); if (agent_index != hsa_context.agent_count) { GOMP_PLUGIN_error ("Failed to assign IDs to all GCN agents"); @@ -1496,6 +1523,25 @@ init_hsa_context (void) GOMP_PLUGIN_error ("Failed to list all HSA runtime agents"); } + uint16_t minor, major; + status = hsa_fns.hsa_system_get_info_fn (HSA_SYSTEM_INFO_VERSION_MINOR, + &minor); + if (status != HSA_STATUS_SUCCESS) + GOMP_PLUGIN_error ("Failed to obtain HSA runtime minor version"); + status = hsa_fns.hsa_system_get_info_fn (HSA_SYSTEM_INFO_VERSION_MAJOR, + &major); + if (status != HSA_STATUS_SUCCESS) + GOMP_PLUGIN_error ("Failed to obtain HSA runtime major version"); + + size_t len = sizeof hsa_context.driver_version_s; + int printed = snprintf (hsa_context.driver_version_s, len, + "HSA Runtime %hu.%hu", (unsigned short int)major, + (unsigned short int)minor); + if (printed >= len) + GCN_WARNING ("HSA runtime version string was truncated." + "Version %hu.%hu is too long.", (unsigned short int)major, + (unsigned short int)minor); + hsa_context.initialized = true; return true; } @@ -1578,6 +1624,66 @@ get_data_memory_region (hsa_region_t region, void *data) HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED); } +static int +elf_gcn_isa_field (Elf64_Ehdr *image) +{ + return image->e_flags & EF_AMDGPU_MACH_MASK; +} + +const static char *gcn_gfx803_s = "gfx803"; +const static char *gcn_gfx900_s = "gfx900"; +const static char *gcn_gfx906_s = "gfx906"; +const static int gcn_isa_name_len = 6; + +/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not + support the ISA. */ + +static const char* +isa_hsa_name (int isa) { + switch(isa) + { + case EF_AMDGPU_MACH_AMDGCN_GFX803: + return gcn_gfx803_s; + case EF_AMDGPU_MACH_AMDGCN_GFX900: + return gcn_gfx900_s; + case EF_AMDGPU_MACH_AMDGCN_GFX906: + return gcn_gfx906_s; + } + return NULL; +} + +/* Returns the user-facing name that GCC uses to identify the architecture (e.g. + with -march) or NULL if we do not support the ISA. + Keep in sync with /gcc/config/gcn/gcn.{c,opt}. */ + +static const char* +isa_gcc_name (int isa) { + switch(isa) + { + case EF_AMDGPU_MACH_AMDGCN_GFX803: + return "fiji"; + default: + return isa_hsa_name (isa); + } +} + +/* Returns the code which is used in the GCN object code to identify the ISA with + the given name (as used by the HSA runtime). */ + +static gcn_isa +isa_code(const char *isa) { + if (!strncmp (isa, gcn_gfx803_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX803; + + if (!strncmp (isa, gcn_gfx900_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX900; + + if (!strncmp (isa, gcn_gfx906_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX906; + + return -1; +} + /* }}} */ /* {{{ Run */ @@ -1993,6 +2099,24 @@ run_kernel (struct kernel_info *kernel, void *vars, struct GOMP_kernel_launch_attributes *kla, struct goacc_asyncqueue *aq, bool module_locked) { + GCN_DEBUG ("SGPRs: %d, VGPRs: %d\n", kernel->description->sgpr_count, + kernel->description->vpgr_count); + + /* Reduce the number of threads/workers if there are insufficient + VGPRs available to run the kernels together. */ + if (kla->ndim == 3 && kernel->description->vpgr_count > 0) + { + int granulated_vgprs = (kernel->description->vpgr_count + 3) & ~3; + int max_threads = (256 / granulated_vgprs) * 4; + if (kla->gdims[2] > max_threads) + { + GCN_WARNING ("Too many VGPRs required to support %d threads/workers" + " per team/gang - reducing to %d threads/workers.\n", + kla->gdims[2], max_threads); + kla->gdims[2] = max_threads; + } + } + GCN_DEBUG ("GCN launch on queue: %d:%d\n", kernel->agent->device_id, (aq ? aq->id : 0)); GCN_DEBUG ("GCN launch attribs: gdims:["); @@ -2194,6 +2318,7 @@ init_basic_kernel_info (struct kernel_info *kernel, kernel->agent = agent; kernel->module = module; kernel->name = d->name; + kernel->description = d; if (pthread_mutex_init (&kernel->init_mutex, NULL)) { GOMP_PLUGIN_error ("Failed to initialize a GCN kernel mutex"); @@ -2257,6 +2382,39 @@ find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent, return res; } +/* Check that the GCN ISA of the given image matches the ISA of the agent. */ + +static bool +isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image) +{ + int isa_field = elf_gcn_isa_field (image); + const char* isa_s = isa_hsa_name (isa_field); + if (!isa_s) + { + hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR); + return false; + } + + if (isa_field != agent->device_isa) + { + char msg[120]; + const char *agent_isa_s = isa_hsa_name (agent->device_isa); + const char *agent_isa_gcc_s = isa_gcc_name (agent->device_isa); + assert (agent_isa_s); + assert (agent_isa_gcc_s); + + snprintf (msg, sizeof msg, + "GCN code object ISA '%s' does not match GPU ISA '%s'.\n" + "Try to recompile with '-foffload=-march=%s'.\n", + isa_s, agent_isa_s, agent_isa_gcc_s); + + hsa_error (msg, HSA_STATUS_ERROR); + return false; + } + + return true; +} + /* Create and finalize the program consisting of all loaded modules. */ static bool @@ -2289,6 +2447,9 @@ create_and_finalize_hsa_program (struct agent_info *agent) { Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image; + if (!isa_matches_agent (agent, image)) + goto fail; + /* Hide relocations from the HSA runtime loader. Keep a copy of the unmodified section headers to use later. */ Elf64_Shdr *image_sections = (Elf64_Shdr *)((char *)image @@ -3236,17 +3397,6 @@ GOMP_OFFLOAD_get_num_devices (void) return hsa_context.agent_count; } -union gomp_device_property_value -GOMP_OFFLOAD_get_property (int device, int prop) -{ - /* Stub. Check device and return default value for unsupported properties. */ - /* TODO: Implement this function. */ - get_agent_info (device); - - union gomp_device_property_value nullval = { .val = 0 }; - return nullval; -} - /* Initialize device (agent) number N so that it can be used for computation. Return TRUE on success. */ @@ -3300,12 +3450,19 @@ GOMP_OFFLOAD_init_device (int n) return hsa_error ("Error requesting maximum queue size of the GCN agent", status); - char buf[64]; status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_NAME, - &buf); + &agent->name); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error querying the name of the agent", status); - agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0); + + agent->device_isa = isa_code (agent->name); + if (agent->device_isa < 0) + return hsa_error ("Unknown GCN agent architecture", HSA_STATUS_ERROR); + + status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_VENDOR_NAME, + &agent->vendor_name); + if (status != HSA_STATUS_SUCCESS) + return hsa_error ("Error querying the vendor name of the agent", status); status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI, @@ -3318,6 +3475,9 @@ GOMP_OFFLOAD_init_device (int n) status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id, get_kernarg_memory_region, &agent->kernarg_region); + if (status != HSA_STATUS_SUCCESS + && status != HSA_STATUS_INFO_BREAK) + hsa_error ("Scanning memory regions failed", status); if (agent->kernarg_region.handle == (uint64_t) -1) { GOMP_PLUGIN_error ("Could not find suitable memory region for kernel " @@ -3331,6 +3491,9 @@ GOMP_OFFLOAD_init_device (int n) status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id, get_data_memory_region, &agent->data_region); + if (status != HSA_STATUS_SUCCESS + && status != HSA_STATUS_INFO_BREAK) + hsa_error ("Scanning memory regions failed", status); if (agent->data_region.handle == (uint64_t) -1) { GOMP_PLUGIN_error ("Could not find suitable memory region for device " @@ -3999,6 +4162,42 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, return true; } +union goacc_property_value +GOMP_OFFLOAD_openacc_get_property (int device, enum goacc_property prop) +{ + struct agent_info *agent = get_agent_info (device); + + union goacc_property_value propval = { .val = 0 }; + + switch (prop) + { + case GOACC_PROPERTY_FREE_MEMORY: + /* Not supported. */ + break; + case GOACC_PROPERTY_MEMORY: + { + size_t size; + hsa_region_t region = agent->data_region; + hsa_status_t status = + hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SIZE, &size); + if (status == HSA_STATUS_SUCCESS) + propval.val = size; + break; + } + case GOACC_PROPERTY_NAME: + propval.ptr = agent->name; + break; + case GOACC_PROPERTY_VENDOR: + propval.ptr = agent->vendor_name; + break; + case GOACC_PROPERTY_DRIVER: + propval.ptr = hsa_context.driver_version_s; + break; + } + + return propval; +} + /* Set up plugin-specific thread-local-data (host-side). */ void * diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index b04923b..abd3bc6 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -699,32 +699,6 @@ GOMP_OFFLOAD_get_num_devices (void) return hsa_context.agent_count; } -/* Part of the libgomp plugin interface. Return the value of property - PROP of agent number N. */ - -union gomp_device_property_value -GOMP_OFFLOAD_get_property (int n, int prop) -{ - union gomp_device_property_value nullval = { .val = 0 }; - - if (!init_hsa_context ()) - return nullval; - if (n >= hsa_context.agent_count) - { - GOMP_PLUGIN_error - ("Request for a property of a non-existing HSA device %i", n); - return nullval; - } - - switch (prop) - { - case GOMP_DEVICE_PROPERTY_VENDOR: - return (union gomp_device_property_value) { .ptr = "HSA" }; - default: - return nullval; - } -} - /* Part of the libgomp plugin interface. Initialize agent number N so that it can be used for computation. Return TRUE on success. */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index e867b4c..ec103a2 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1121,74 +1121,6 @@ GOMP_OFFLOAD_get_num_devices (void) return nvptx_get_num_devices (); } -union gomp_device_property_value -GOMP_OFFLOAD_get_property (int n, int prop) -{ - union gomp_device_property_value propval = { .val = 0 }; - - pthread_mutex_lock (&ptx_dev_lock); - - if (n >= nvptx_get_num_devices () || n < 0 || ptx_devices[n] == NULL) - { - pthread_mutex_unlock (&ptx_dev_lock); - return propval; - } - - struct ptx_device *ptx_dev = ptx_devices[n]; - switch (prop) - { - case GOMP_DEVICE_PROPERTY_MEMORY: - { - size_t total_mem; - - CUDA_CALL_ERET (propval, cuDeviceTotalMem, &total_mem, ptx_dev->dev); - propval.val = total_mem; - } - break; - case GOMP_DEVICE_PROPERTY_FREE_MEMORY: - { - size_t total_mem; - size_t free_mem; - CUdevice ctxdev; - - CUDA_CALL_ERET (propval, cuCtxGetDevice, &ctxdev); - if (ptx_dev->dev == ctxdev) - CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); - else if (ptx_dev->ctx) - { - CUcontext old_ctx; - - CUDA_CALL_ERET (propval, cuCtxPushCurrent, ptx_dev->ctx); - CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); - CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx); - } - else - { - CUcontext new_ctx; - - CUDA_CALL_ERET (propval, cuCtxCreate, &new_ctx, CU_CTX_SCHED_AUTO, - ptx_dev->dev); - CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); - CUDA_CALL_ASSERT (cuCtxDestroy, new_ctx); - } - propval.val = free_mem; - } - break; - case GOMP_DEVICE_PROPERTY_NAME: - propval.ptr = ptx_dev->name; - break; - case GOMP_DEVICE_PROPERTY_VENDOR: - propval.ptr = "Nvidia"; - break; - case GOMP_DEVICE_PROPERTY_DRIVER: - propval.ptr = cuda_driver_version_s; - break; - } - - pthread_mutex_unlock (&ptx_dev_lock); - return propval; -} - bool GOMP_OFFLOAD_init_device (int n) { @@ -1818,6 +1750,76 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +union goacc_property_value +GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) +{ + union goacc_property_value propval = { .val = 0 }; + + pthread_mutex_lock (&ptx_dev_lock); + + if (n >= nvptx_get_num_devices () || n < 0 || ptx_devices[n] == NULL) + { + pthread_mutex_unlock (&ptx_dev_lock); + return propval; + } + + struct ptx_device *ptx_dev = ptx_devices[n]; + switch (prop) + { + case GOACC_PROPERTY_MEMORY: + { + size_t total_mem; + + CUDA_CALL_ERET (propval, cuDeviceTotalMem, &total_mem, ptx_dev->dev); + propval.val = total_mem; + } + break; + case GOACC_PROPERTY_FREE_MEMORY: + { + size_t total_mem; + size_t free_mem; + CUdevice ctxdev; + + CUDA_CALL_ERET (propval, cuCtxGetDevice, &ctxdev); + if (ptx_dev->dev == ctxdev) + CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); + else if (ptx_dev->ctx) + { + CUcontext old_ctx; + + CUDA_CALL_ERET (propval, cuCtxPushCurrent, ptx_dev->ctx); + CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); + CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx); + } + else + { + CUcontext new_ctx; + + CUDA_CALL_ERET (propval, cuCtxCreate, &new_ctx, CU_CTX_SCHED_AUTO, + ptx_dev->dev); + CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem); + CUDA_CALL_ASSERT (cuCtxDestroy, new_ctx); + } + propval.val = free_mem; + } + break; + case GOACC_PROPERTY_NAME: + propval.ptr = ptx_dev->name; + break; + case GOACC_PROPERTY_VENDOR: + propval.ptr = "Nvidia"; + break; + case GOACC_PROPERTY_DRIVER: + propval.ptr = cuda_driver_version_s; + break; + default: + break; + } + + pthread_mutex_unlock (&ptx_dev_lock); + return propval; +} + /* Adjust launch dimensions: pick good values for number of blocks and warps and ensure that number of warps does not exceed CUDA limits as well as GCC's own limits. */ @@ -1929,9 +1931,4 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) nvptx_stacks_free (stacks, teams * threads); } -void -GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args, - void *async_data) -{ - GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented"); -} +/* TODO: Implement GOMP_OFFLOAD_async_run. */ diff --git a/libgomp/target.c b/libgomp/target.c index 617baec..3642547 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -720,7 +720,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = OFFSET_INLINED; continue; } - else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR + || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) { tgt->list[i].key = NULL; if (!not_found_cnt) @@ -739,16 +740,24 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start; splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); - if (n == NULL) + if (n != NULL) + { + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("use_device_ptr pointer wasn't mapped"); } - cur_node.host_start -= n->host_start; - hostaddrs[i] - = (void *) (n->tgt->tgt_start + n->tgt_offset - + cur_node.host_start); - tgt->list[i].offset = ~(uintptr_t) 0; + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) + /* If not present, continue using the host address. */ + ; + else + __builtin_unreachable (); + tgt->list[i].offset = OFFSET_INLINED; } else tgt->list[i].offset = 0; @@ -973,22 +982,40 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, case GOMP_MAP_FIRSTPRIVATE_INT: case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: continue; + case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: + /* The OpenACC 'host_data' construct only allows 'use_device' + "mapping" clauses, so in the first loop, 'not_found_cnt' + must always have been zero, so all OpenACC 'use_device' + clauses have already been handled. (We can only easily test + 'use_device' with 'if_present' clause here.) */ + assert (tgt->list[i].offset == OFFSET_INLINED); + /* Nevertheless, FALLTHRU to the normal handling, to keep the + code conceptually simple, similar to the first loop. */ case GOMP_MAP_USE_DEVICE_PTR: if (tgt->list[i].offset == 0) { cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start; n = gomp_map_lookup (mem_map, &cur_node); - if (n == NULL) + if (n != NULL) + { + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("use_device_ptr pointer wasn't mapped"); } - cur_node.host_start -= n->host_start; - hostaddrs[i] - = (void *) (n->tgt->tgt_start + n->tgt_offset - + cur_node.host_start); - tgt->list[i].offset = ~(uintptr_t) 0; + else if ((kind & typemask) + == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) + /* If not present, continue using the host address. */ + ; + else + __builtin_unreachable (); + tgt->list[i].offset = OFFSET_INLINED; } continue; case GOMP_MAP_STRUCT: @@ -1621,8 +1648,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, { struct addr_pair *target_var = &target_table[num_funcs + i]; uintptr_t target_size = target_var->end - target_var->start; + bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; - if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) + if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) { gomp_mutex_unlock (&devicep->lock); if (is_register_lock) @@ -1636,7 +1664,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); k->tgt = tgt; k->tgt_offset = target_var->start; - k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; + k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; k->virtual_refcount = 0; k->aux = NULL; array->left = NULL; @@ -1995,6 +2023,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, gomp_unmap_vars (tgt_vars, true); } +static inline unsigned int +clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) +{ + /* If we cannot run asynchronously, simply ignore nowait. */ + if (devicep != NULL && devicep->async_run_func == NULL) + flags &= ~GOMP_TARGET_FLAG_NOWAIT; + + return flags; +} + /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. @@ -2027,6 +2065,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, size_t tgt_align = 0, tgt_size = 0; bool fpc_done = false; + flags = clear_unsupported_flags (devicep, flags); + if (flags & GOMP_TARGET_FLAG_NOWAIT) { struct gomp_thread *thr = gomp_thread (); @@ -2440,7 +2480,9 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } } - size_t i; + /* The variables are mapped separately such that they can be released + independently. */ + size_t i, j; if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) for (i = 0; i < mapnum; i++) if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) @@ -2449,6 +2491,15 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); i += sizes[i]; } + else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) + { + for (j = i + 1; j < mapnum; j++) + if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)) + break; + gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += j - i - 1; + } else gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); @@ -2497,6 +2548,7 @@ gomp_target_task_fn (void *data) } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; + assert (devicep->async_run_func); devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, ttask->args, (void *) ttask); return true; @@ -3001,7 +3053,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (get_caps); DLSYM (get_type); DLSYM (get_num_devices); - DLSYM (get_property); DLSYM (init_device); DLSYM (fini_device); DLSYM (load_image); @@ -3014,7 +3065,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) { DLSYM (run); - DLSYM (async_run); + DLSYM_OPT (async_run, async_run); DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); } @@ -3034,7 +3085,8 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, openacc_async_queue_callback) || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) - || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)) + || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have GOMP_OFFLOAD_CAP_OPENACC_200. */ diff --git a/libgomp/team.c b/libgomp/team.c index 82f26a0..cbc3aec 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -636,6 +636,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, nthr->ts.active_level = thr->ts.active_level; nthr->ts.place_partition_off = place_partition_off; nthr->ts.place_partition_len = place_partition_len; + nthr->ts.def_allocator = thr->ts.def_allocator; #ifdef HAVE_SYNC_BUILTINS nthr->ts.single_count = 0; #endif @@ -823,6 +824,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, start_data->ts.team_id = i; start_data->ts.level = team->prev_ts.level + 1; start_data->ts.active_level = thr->ts.active_level; + start_data->ts.def_allocator = thr->ts.def_allocator; #ifdef HAVE_SYNC_BUILTINS start_data->ts.single_count = 0; #endif diff --git a/libgomp/testsuite/Makefile.am b/libgomp/testsuite/Makefile.am index 62b1855..655a413 100644 --- a/libgomp/testsuite/Makefile.am +++ b/libgomp/testsuite/Makefile.am @@ -12,6 +12,8 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \ echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi) RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir +EXTRA_DEJAGNU_SITE_CONFIG = libgomp-site-extra.exp + # Instead of directly in ../testsuite/libgomp-test-support.exp.in, the # following variables have to be "routed through" this Makefile, for expansion # of the several (Makefile) variables used therein. diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in index f0da16d..52aa6c5 100644 --- a/libgomp/testsuite/Makefile.in +++ b/libgomp/testsuite/Makefile.in @@ -99,17 +99,20 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/lthostflags.m4 \ $(top_srcdir)/../config/multi.m4 \ $(top_srcdir)/../config/override.m4 \ - $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \ - $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \ - $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/../libtool.m4 $(top_srcdir)/../config/cet.m4 \ + $(top_srcdir)/../config/tls.m4 \ + $(top_srcdir)/../config/toolexeclibdir.m4 \ + $(top_srcdir)/../ltoptions.m4 $(top_srcdir)/../ltsugar.m4 \ + $(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/../libtool.m4 \ + $(top_srcdir)/../config/cet.m4 \ $(top_srcdir)/plugin/configfrag.ac $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs CONFIG_HEADER = $(top_builddir)/config.h -CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp +CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp \ + libgomp-site-extra.exp CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -308,6 +311,7 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \ echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi) RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir +EXTRA_DEJAGNU_SITE_CONFIG = libgomp-site-extra.exp all: all-am .SUFFIXES: @@ -342,6 +346,8 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) $(am__aclocal_m4_deps): libgomp-test-support.pt.exp: $(top_builddir)/config.status $(srcdir)/libgomp-test-support.exp.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +libgomp-site-extra.exp: $(top_builddir)/config.status $(srcdir)/libgomp-site-extra.exp.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index f52ed71..ee5f0e5 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -319,7 +319,7 @@ proc libgomp_option_proc { option } { proc offload_target_to_openacc_device_type { offload_target } { switch -glob $offload_target { amdgcn* { - return "gcn" + return "radeon" } disable { return "host" @@ -346,11 +346,11 @@ proc check_effective_target_offload_target_nvptx { } { # files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't # get passed on to 'check_effective_target_*' functions. (Not caching the # result due to that.) - set options [current_compiler_flags] + set options [list "additional_flags=[concat "-v" [current_compiler_flags]]"] # Instead of inspecting command-line options, look what the compiler driver # decides. This is somewhat modelled after # 'gcc/testsuite/lib/target-supports.exp:check_configured_with'. - set gcc_output [libgomp_target_compile "-v $options" "" "none" ""] + set gcc_output [libgomp_target_compile "" "" "none" $options] if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy offload_targets] { verbose "compiling for offload targets: $offload_targets" return [string match "*:nvptx*:*" ":$offload_targets:"] @@ -483,22 +483,22 @@ proc check_effective_target_hsa_offloading_selected {} { }] } -# Return 1 if at least one AMD GCN board is present. +# Return 1 if at least one AMD GPU is accessible. -proc check_effective_target_openacc_amdgcn_accel_present { } { - return [check_runtime openacc_amdgcn_accel_present { +proc check_effective_target_openacc_radeon_accel_present { } { + return [check_runtime openacc_radeon_accel_present { #include <openacc.h> int main () { - return !(acc_get_num_devices (acc_device_gcn) > 0); + return !(acc_get_num_devices (acc_device_radeon) > 0); } } "" ] } -# Return 1 if at least one AMD GCN board is present, and the AMD GCN device -# type is selected by default. +# Return 1 if at least one AMD GPU is accessible, and the OpenACC 'radeon' +# device type is selected. -proc check_effective_target_openacc_amdgcn_accel_selected { } { - if { ![check_effective_target_openacc_amdgcn_accel_present] } { +proc check_effective_target_openacc_radeon_accel_selected { } { + if { ![check_effective_target_openacc_radeon_accel_present] } { return 0; } global offload_target diff --git a/libgomp/testsuite/libgomp-site-extra.exp.in b/libgomp/testsuite/libgomp-site-extra.exp.in new file mode 100644 index 0000000..c0d2666 --- /dev/null +++ b/libgomp/testsuite/libgomp-site-extra.exp.in @@ -0,0 +1 @@ +set GCC_UNDER_TEST {@CC@} diff --git a/libgomp/testsuite/libgomp-test-support.exp.in b/libgomp/testsuite/libgomp-test-support.exp.in index 6ec10c7..98fb442 100644 --- a/libgomp/testsuite/libgomp-test-support.exp.in +++ b/libgomp/testsuite/libgomp-test-support.exp.in @@ -1,5 +1,3 @@ -set GCC_UNDER_TEST {@CC@} - set cuda_driver_include "@CUDA_DRIVER_INCLUDE@" set cuda_driver_lib "@CUDA_DRIVER_LIB@" set hsa_runtime_lib "@HSA_RUNTIME_LIB@" diff --git a/libgomp/testsuite/libgomp.c++/pr93931.C b/libgomp/testsuite/libgomp.c++/pr93931.C new file mode 100644 index 0000000..4d4232e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr93931.C @@ -0,0 +1,120 @@ +// PR c++/93931 +// { dg-do run } +// { dg-options "-O2 -std=c++14" } + +extern "C" void abort (); + +void +sink (int &x) +{ + int *volatile p; + p = &x; + (*p)++; +} + +int +foo () +{ + int r = 0; + [&r] () { +#pragma omp parallel for reduction(+ : r) + for (int i = 0; i < 1024; ++i) + r += i; + } (); + return r; +} + +int +bar () +{ + int l = 0; + [&l] () { +#pragma omp parallel for lastprivate (l) + for (int i = 0; i < 1024; ++i) + l = i; + } (); + return l; +} + +void +baz () +{ + int f = 18; + [&f] () { +#pragma omp parallel for firstprivate (f) + for (int i = 0; i < 1024; ++i) + { + sink (f); + f += 3; + sink (f); + if (f != 23) + abort (); + sink (f); + f -= 7; + sink (f); + } + } (); + if (f != 18) + abort (); +} + +int +qux () +{ + int r = 0; + [&] () { +#pragma omp parallel for reduction(+ : r) + for (int i = 0; i < 1024; ++i) + r += i; + } (); + return r; +} + +int +corge () +{ + int l = 0; + [&] () { +#pragma omp parallel for lastprivate (l) + for (int i = 0; i < 1024; ++i) + l = i; + } (); + return l; +} + +void +garply () +{ + int f = 18; + [&] () { +#pragma omp parallel for firstprivate (f) + for (int i = 0; i < 1024; ++i) + { + sink (f); + f += 3; + sink (f); + if (f != 23) + abort (); + sink (f); + f -= 7; + sink (f); + } + } (); + if (f != 18) + abort (); +} + +int +main () +{ + if (foo () != 1024 * 1023 / 2) + abort (); + if (bar () != 1023) + abort (); + baz (); + if (qux () != 1024 * 1023 / 2) + abort (); + if (corge () != 1023) + abort (); + garply (); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c new file mode 100644 index 0000000..9259a9c --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c @@ -0,0 +1,157 @@ +#include <omp.h> +#include <stdint.h> +#include <stdlib.h> + +const omp_alloctrait_t traits2[] += { { omp_atk_alignment, 16 }, + { omp_atk_sync_hint, omp_atv_default }, + { omp_atk_access, omp_atv_default }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_default_mem_fb }, + { omp_atk_partition, omp_atv_environment } }; +omp_alloctrait_t traits3[] += { { omp_atk_sync_hint, omp_atv_uncontended }, + { omp_atk_alignment, 32 }, + { omp_atk_access, omp_atv_all }, + { omp_atk_pool_size, 512 }, + { omp_atk_fallback, omp_atv_allocator_fb }, + { omp_atk_fb_data, 0 }, + { omp_atk_partition, omp_atv_default } }; +const omp_alloctrait_t traits4[] += { { omp_atk_alignment, 128 }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_null_fb } }; + +int +main () +{ + int *volatile p = (int *) omp_alloc (3 * sizeof (int), omp_default_mem_alloc); + int *volatile q; + int *volatile r; + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, 4096 } }; + omp_allocator_handle_t a, a2; + + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 1; + p[1] = 2; + p[2] = 3; + omp_free (p, omp_default_mem_alloc); + p = (int *) omp_alloc (2 * sizeof (int), omp_default_mem_alloc); + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 1; + p[1] = 2; + omp_free (p, omp_null_allocator); + omp_set_default_allocator (omp_default_mem_alloc); + p = (int *) omp_alloc (sizeof (int), omp_null_allocator); + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 3; + omp_free (p, omp_get_default_allocator ()); + + a = omp_init_allocator (omp_default_mem_space, 3, traits); + if (a == omp_null_allocator) + abort (); + p = (int *) omp_alloc (3072, a); + if ((((uintptr_t) p) % 64) != 0) + abort (); + p[0] = 1; + p[3071 / sizeof (int)] = 2; + if (omp_alloc (3072, a) != NULL) + abort (); + omp_free (p, a); + p = (int *) omp_alloc (3072, a); + p[0] = 3; + p[3071 / sizeof (int)] = 4; + omp_free (p, omp_null_allocator); + omp_set_default_allocator (a); + if (omp_get_default_allocator () != a) + abort (); + p = (int *) omp_alloc (3072, omp_null_allocator); + if (omp_alloc (3072, omp_null_allocator) != NULL) + abort (); + omp_free (p, a); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_default_mem_space, + sizeof (traits2) / sizeof (traits2[0]), + traits2); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_default_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + p = (int *) omp_alloc (420, a2); + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 5; + p[419 / sizeof (int)] = 6; + q = (int *) omp_alloc (768, a2); + if ((((uintptr_t) q) % 16) != 0) + abort (); + q[0] = 7; + q[767 / sizeof (int)] = 8; + r = (int *) omp_alloc (512, a2); + if ((((uintptr_t) r) % __alignof (int)) != 0) + abort (); + r[0] = 9; + r[511 / sizeof (int)] = 10; + omp_free (p, omp_null_allocator); + omp_free (q, a2); + omp_free (r, omp_null_allocator); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_default_mem_space, + sizeof (traits4) / sizeof (traits4[0]), + traits4); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_default_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + omp_set_default_allocator (a2); +#ifdef __cplusplus + p = static_cast <int *> (omp_alloc (420)); +#else + p = (int *) omp_alloc (420, omp_null_allocator); +#endif + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 5; + p[419 / sizeof (int)] = 6; + q = (int *) omp_alloc (768, omp_null_allocator); + if ((((uintptr_t) q) % 128) != 0) + abort (); + q[0] = 7; + q[767 / sizeof (int)] = 8; + if (omp_alloc (768, omp_null_allocator) != NULL) + abort (); +#ifdef __cplusplus + omp_free (p); + omp_free (q); + omp_free (NULL); +#else + omp_free (p, omp_null_allocator); + omp_free (q, omp_null_allocator); + omp_free (NULL, omp_null_allocator); +#endif + omp_free (NULL, omp_null_allocator); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c new file mode 100644 index 0000000..ee53958 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c @@ -0,0 +1,46 @@ +#include <omp.h> +#include <stdint.h> +#include <stdlib.h> + +int +main () +{ + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, 4096 } }; + omp_allocator_handle_t a + = omp_init_allocator (omp_default_mem_space, 3, traits); + if (a == omp_null_allocator) + abort (); + + #pragma omp parallel num_threads(4) + { + int n = omp_get_thread_num (); + double *volatile p, *volatile q; + omp_set_default_allocator ((n & 1) ? a : omp_default_mem_alloc); + p = (double *) omp_alloc (1696, omp_null_allocator); + if (p == NULL) + abort (); + p[0] = 1.0; + p[1695 / sizeof (double *)] = 2.0; + #pragma omp barrier + omp_set_default_allocator ((n & 1) ? omp_default_mem_alloc : a); + q = (double *) omp_alloc (1696, omp_null_allocator); + if (n & 1) + { + if (q == NULL) + abort (); + q[0] = 3.0; + q[1695 / sizeof (double *)] = 4.0; + } + else if (q != NULL) + abort (); + #pragma omp barrier + omp_free (p, omp_null_allocator); + omp_free (q, omp_null_allocator); + omp_set_default_allocator (omp_default_mem_alloc); + } + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c new file mode 100644 index 0000000..a30cdc0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c @@ -0,0 +1,28 @@ +/* { dg-set-target-env-var OMP_ALLOCATOR "omp_cgroup_mem_alloc" } */ +/* { dg-set-target-env-var OMP_DISPLAY_ENV "true" } */ + +#include <string.h> +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + const char *p = getenv ("OMP_ALLOCATOR"); + if (p && strcmp (p, "omp_cgroup_mem_alloc") == 0) + { + if (omp_get_default_allocator () != omp_cgroup_mem_alloc) + abort (); + #pragma omp parallel num_threads (2) + { + if (omp_get_default_allocator () != omp_cgroup_mem_alloc) + abort (); + #pragma omp parallel num_threads (2) + { + if (omp_get_default_allocator () != omp_cgroup_mem_alloc) + abort (); + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c new file mode 100644 index 0000000..841e1bc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c @@ -0,0 +1,25 @@ +#include <omp.h> +#include <stdlib.h> + +const omp_alloctrait_t traits[] += { { omp_atk_pool_size, 1 }, + { omp_atk_fallback, omp_atv_abort_fb } }; + +int +main () +{ + omp_allocator_handle_t a; + + if (omp_alloc (0, omp_null_allocator) != NULL) + abort (); + a = omp_init_allocator (omp_default_mem_space, 2, traits); + if (a != omp_null_allocator) + { + if (omp_alloc (0, a) != NULL + || omp_alloc (0, a) != NULL + || omp_alloc (0, a) != NULL) + abort (); + omp_destroy_allocator (a); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr93515.c b/libgomp/testsuite/libgomp.c-c++-common/pr93515.c new file mode 100644 index 0000000..8a69088 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr93515.c @@ -0,0 +1,36 @@ +/* PR libgomp/93515 */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int i; + int a = 42; +#pragma omp target teams distribute parallel for defaultmap(tofrom: scalar) + for (i = 0; i < 64; ++i) + if (omp_get_team_num () == 0) + if (omp_get_thread_num () == 0) + a = 142; + if (a != 142) + __builtin_abort (); + a = 42; +#pragma omp target parallel for defaultmap(tofrom: scalar) + for (i = 0; i < 64; ++i) + if (omp_get_thread_num () == 0) + a = 143; + if (a != 143) + __builtin_abort (); + a = 42; +#pragma omp target firstprivate(a) + { + #pragma omp parallel for + for (i = 0; i < 64; ++i) + if (omp_get_thread_num () == 0) + a = 144; + if (a != 144) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-40.c b/libgomp/testsuite/libgomp.c-c++-common/target-40.c new file mode 100644 index 0000000..22bbdd9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-40.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O0" } */ + +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +volatile int v; +#pragma omp declare target to (v) +typedef void (*fnp1) (void); +typedef fnp1 (*fnp2) (void); +void f1 (void) { v++; } +void f2 (void) { v += 4; } +void f3 (void) { v += 16; f1 (); } +fnp1 f4 (void) { v += 64; return f2; } +int a = 1; +int *b = &a; +int **c = &b; +fnp2 f5 (void) { f3 (); return f4; } +#pragma omp declare target to (c) + +int +main () +{ + int err = 0; + #pragma omp target map(from:err) + { + volatile int xa; + int *volatile xb; + int **volatile xc; + fnp2 xd; + fnp1 xe; + err = 0; + xa = a; + err |= xa != 1; + xb = b; + err |= xb != &a; + xc = c; + err |= xc != &b; + xd = f5 (); + err |= v != 17; + xe = xd (); + err |= v != 81; + xe (); + err |= v != 85; + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr93566.c b/libgomp/testsuite/libgomp.c/pr93566.c new file mode 100644 index 0000000..3334bd57 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr93566.c @@ -0,0 +1,113 @@ +/* PR middle-end/93566 */ +/* { dg-additional-options "-std=c99" } */ + +extern void abort (void); + +void +foo (int *x) +{ + void nest (void) { + #pragma omp parallel for reduction(+:x[:10]) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < 10; j++) + x[j] += j * i; + } + nest (); + for (int i = 0; i < 10; i++) + if (x[i] != 1023 * 1024 / 2 * i) + abort (); +} + +void +bar (void) +{ + int x[10] = {}; + void nest (void) { + #pragma omp parallel for reduction(+:x[:10]) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < 10; j++) + x[j] += j * i; + } + nest (); + for (int i = 0; i < 10; i++) + if (x[i] != 1023 * 1024 / 2 * i) + abort (); +} + +void +baz (void) +{ + int x[10] = {}; + void nest (void) { + #pragma omp parallel for reduction(+:x[2:5]) + for (int i = 0; i < 1024; i++) + for (int j = 2; j < 7; j++) + x[j] += j * i; + } + nest (); + for (int i = 2; i < 7; i++) + if (x[i] != 1023 * 1024 / 2 * i) + abort (); +} + +void +qux (int *x) +{ + void nest (void) { x++; } + nest (); + #pragma omp parallel for reduction(+:x[:9]) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < 9; j++) + x[j] += j * i; + nest (); + for (int i = 0; i < 9; i++) + if (x[i - 1] != 1023 * 1024 / 2 * i) + abort (); +} + +void +quux (void) +{ + int x[10]; + void nest (void) { for (int i = 0; i < 10; i++) x[i] = 0; } + int nest2 (int i) { return x[i]; } + nest (); + #pragma omp parallel for reduction(+:x[:7]) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < 7; j++) + x[j] += j * i; + for (int i = 0; i < 7; i++) + if (nest2 (i) != 1023 * 1024 / 2 * i) + abort (); +} + +void +corge (void) +{ + int x[10]; + void nest (void) { for (int i = 0; i < 10; i++) x[i] = 0; } + int nest2 (int i) { return x[i]; } + nest (); + #pragma omp parallel for reduction(+:x[2:4]) + for (int i = 0; i < 1024; i++) + for (int j = 2; j < 6; j++) + x[j] += j * i; + for (int i = 2; i < 6; i++) + if (nest2 (i) != 1023 * 1024 / 2 * i) + abort (); +} + +int +main () +{ + int a[10] = {}; + foo (a); + bar (); + baz (); + for (int i = 0; i < 10; i++) + a[i] = 0; + qux (a); + quux (); + corge (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c new file mode 100644 index 0000000..8169972 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-38.c @@ -0,0 +1,28 @@ +#define A(n) n##0, n##1, n##2, n##3, n##4, n##5, n##6, n##7, n##8, n##9 +#define B(n) A(n##0), A(n##1), A(n##2), A(n##3), A(n##4), A(n##5), A(n##6), A(n##7), A(n##8), A(n##9) + +int +foo (int x) +{ + int b[] = { B(4), B(5), B(6) }; + return b[x]; +} + +int v[] = { 1, 2, 3, 4, 5, 6 }; +#pragma omp declare target to (foo, v) + +int +main () +{ + int i = 5; + asm ("" : "+g" (i)); + #pragma omp target map(tofrom:i) + { + int a[] = { B(1), B(2), B(3) }; + asm ("" : : "m" (a) : "memory"); + i = a[i] + foo (i) + v[i & 63]; + } + if (i != 105 + 405 + 6) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-39.c b/libgomp/testsuite/libgomp.c/target-39.c new file mode 100644 index 0000000..4442f43 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-39.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O0" } */ + +extern void abort (void); +volatile int v; +#pragma omp declare target to (v) +typedef void (*fnp1) (void); +typedef fnp1 (*fnp2) (void); +void f1 (void) { v++; } +void f2 (void) { v += 4; } +void f3 (void) { v += 16; f1 (); } +fnp1 f4 (void) { v += 64; return f2; } +int a = 1; +int *b = &a; +int **c = &b; +fnp2 f5 (void) { f3 (); return f4; } +#pragma omp declare target to (c, f5) + +int +main () +{ + int err = 0; + #pragma omp target map(from:err) + { + volatile int xa; + int *volatile xb; + int **volatile xc; + fnp2 xd; + fnp1 xe; + err = 0; + xa = a; + err |= xa != 1; + xb = b; + err |= xb != &a; + xc = c; + err |= xc != &b; + xd = f5 (); + err |= v != 17; + xe = xd (); + err |= v != 81; + xe (); + err |= v != 85; + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/async_io_9.f90 b/libgomp/testsuite/libgomp.fortran/async_io_9.f90 new file mode 100644 index 0000000..2dc111c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/async_io_9.f90 @@ -0,0 +1,20 @@ +! { dg-do run } +! PR 95191 - this used to hang. +! Original test case by Bill Long. +program test + real a(10000) + integer my_id + integer bad_id + integer :: iostat + character (len=100) :: iomsg + data my_id /1/ + data bad_id /2/ + a = 1. + open (unit=10, file='test.dat', form='unformatted', & + & asynchronous='yes') + write (unit=10, asynchronous='yes', id=my_id) a + iomsg = "" + wait (unit=10, id=bad_id, iostat=iostat, iomsg=iomsg) + if (iostat == 0 .or. iomsg /= "Bad ID in WAIT statement") stop 1 + close (unit=10, status='delete') +end program test diff --git a/libgomp/testsuite/libgomp.fortran/close_errors_1.f90 b/libgomp/testsuite/libgomp.fortran/close_errors_1.f90 new file mode 100644 index 0000000..6edb7da --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/close_errors_1.f90 @@ -0,0 +1,19 @@ +! { dg-do run } +! PR 95115 - this used to hang with -pthread. Original test case by +! Bill Long. + +program test + character(len=16) my_status + character(len=1000) :: iomsg + open (unit=10, file='test.dat') + print *,42 + write (10, *) 'weird' + rewind (10) + read (10, *) my_status + close (10) + open (unit=10, file='test.dat') + close (unit=10, status=my_status, iostat=ios, iomsg=iomsg) + if (ios == 0) stop 1 + if (iomsg /= "Bad STATUS parameter in CLOSE statement") stop 2 + close (10, status='delete') +end program test diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-3.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-3.f90 new file mode 100644 index 0000000..7c596dc --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-3.f90 @@ -0,0 +1,53 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + integer u(0:1024-1), v(0:1024-1), w(0:1024-1) +contains + +integer(8) function f1 (a, b) + implicit none + integer, value :: a, b + integer(8) :: d + !$omp parallel do lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) + do d = a, b-1 + u(d) = v(d) + w(d) + end do + f1 = d +end + +integer(8) function f2 (a, b, c) + implicit none + integer, value :: a, b, c + integer(8) :: d, e + !$omp parallel do lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) linear(c:5) lastprivate(e) + do d = a, b-1 + u(d) = v(d) + w(d) + c = c + 5 + e = c + end do + f2 = d + c + e +end + +integer(8) function f3 (a1, b1, a2, b2) + implicit none + integer, value :: a1, b1, a2, b2 + integer(8) d1, d2 + !$omp parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + f3 = d1 + d2 +end +end module m + +program main + use m + if (f1 (0, 1024) /= 1024) stop 1 + if (f2 (0, 1024, 17) /= 1024 + 2 * (17 + 5 * 1024)) stop 2 + if (f3 (0, 32, 0, 32) /= 64) stop 3 +end program main diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-4.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-4.f90 new file mode 100644 index 0000000..17b62a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-4.f90 @@ -0,0 +1,60 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer u(0:1023), v(0:1023), w(0:1023) + !$omp declare target (u, v, w) + +contains + +subroutine f1 (a, b) + integer a, b, d + !$omp target teams distribute parallel do default(none) firstprivate (a, b) shared(u, v, w) + do d = a, b-1 + u(d) = v(d) + w(d) + end do +end + +subroutine f2 (a, b, c) + integer a, b, c, d, e + !$omp target teams distribute parallel do default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do +end + +subroutine f3 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) & + !$omp& lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do +end + +subroutine f4 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) & + !$omp& collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do +end +end module m + +program main + use m + implicit none + call f1 (0, 1024) + call f2 (0, 1024, 17) + call f3 (0, 32, 0, 32) + call f4 (0, 32, 0, 32) +end diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-5.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-5.f90 new file mode 100644 index 0000000..9482f08 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-5.f90 @@ -0,0 +1,71 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer u(0:1023), v(0:1023), w(0:1023) + !$omp declare target (u, v, w) + +contains + +integer function f1 (a, b) + integer :: a, b, d + !$omp target map(from: d) + !$omp teams distribute parallel do simd default(none) firstprivate (a, b) shared(u, v, w) + do d = a, b-1 + u(d) = v(d) + w(d) + end do + !$omp end target + f1 = d +end + +integer function f2 (a, b, c) + integer :: a, b, c, d, e + !$omp target map(from: d, e) + !$omp teams distribute parallel do simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do + !$omp end target + f2 = d + e +end + +integer function f3 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams distribute parallel do simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) & + !$omp& collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end target + f3 = d1 + d2 +end + +integer function f4 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams distribute parallel do simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end target + f4 = d1 + d2 +end +end module + +program main + use m + implicit none + if (f1 (0, 1024) /= 1024) stop 1 + if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2 + if (f3 (0, 32, 0, 32) /= 64) stop 3 + if (f4 (0, 32, 0, 32) /= 64) stop 3 +end diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-6.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-6.f90 new file mode 100644 index 0000000..f73f683 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-6.f90 @@ -0,0 +1,42 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer :: u(0:1023), v(0:1023), w(0:1023) + !$omp declare target (u, v, w) + +contains + +integer function f2 (a, b, c) + integer :: a, b, c, d, e + !$omp target map(from: d, e) + !$omp teams distribute parallel do default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do + !$omp end target + f2 = d + e +end + +integer function f3 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end target + f3 = d1 + d2 +end +end module m + +use m + if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 1 + if (f3 (0, 32, 0, 32) /= 64) stop 2 +end diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-7.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-7.f90 new file mode 100644 index 0000000..2bd9468 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-7.f90 @@ -0,0 +1,72 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer u(1024), v(1024), w(1024) + !$omp declare target (v, u, w) + +contains + +integer function f1 (a, b) + integer :: a, b, d + !$omp target map(from: d) + !$omp teams distribute simd default(none) firstprivate (a, b) shared(u, v, w) + do d = a, b-1 + u(d) = v(d) + w(d) + end do + !$omp end teams distribute simd + !$omp end target + f1 = d +end + +integer function f2 (a, b, c) + integer a, b, c, d, e + !$omp target map(from: d, e) + !$omp teams distribute simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do + !$omp end teams distribute simd + !$omp end target + f2 = d + e +end + +integer function f3 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end teams distribute simd + !$omp end target + f3 = d1 + d2 +end + +integer function f4 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end teams distribute simd + !$omp end target + f4 = d1 + d2 +end +end module + +use m + if (f1 (0, 1024) /= 1024) stop 1 + if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2 + if (f3 (0, 32, 0, 32) /= 64) stop 3 + if (f4 (0, 32, 0, 32) /= 64) stop 4 +end diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-8.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-8.f90 new file mode 100644 index 0000000..8a21c6f --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-8.f90 @@ -0,0 +1,76 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer u(0:1023), v(0:1023), w(0:1023) + !$omp declare target (u, v, w) + +contains + +integer function f1 (a, b) + integer :: a, b, d + !$omp target map(from: d) + !$omp teams default(none) shared(a, b, d, u, v, w) + !$omp distribute simd firstprivate (a, b) + do d = a, b-1 + u(d) = v(d) + w(d) + end do + !$omp end teams + !$omp end target + f1 = d +end + +integer function f2 (a, b, c) + integer a, b, c, d, e + !$omp target map(from: d, e) + !$omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w) + !$omp distribute simd linear(d) lastprivate(e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do + !$omp end teams + !$omp end target + f2 = d + e +end + +integer function f3 (a1, b1, a2, b2) + integer a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w) + !$omp distribute simd firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end teams + !$omp end target + f3 = d1 + d2 +end + +integer function f4 (a1, b1, a2, b2) + integer a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams default(none) firstprivate (a1, b1, a2, b2) shared(d1, d2, u, v, w) + !$omp distribute simd collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end teams + !$omp end target + f4 = d1 + d2 +end +end module m + +use m + if (f1 (0, 1024) /= 1024) stop 1 + if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2 + if (f3 (0, 32, 0, 32) /= 64) stop 3 + if (f4 (0, 32, 0, 32) /= 64) stop 4 +end diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-9.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-9.f90 new file mode 100644 index 0000000..5dde7f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr66199-9.f90 @@ -0,0 +1,46 @@ +! { dg-do run } +! +! PR fortran/94690 +! PR middle-end/66199 + +module m + implicit none + integer u(1024), v(1024), w(1024) + !$omp declare target (u, v, w) + +contains + +integer function f2 (a, b, c) + integer :: a, b, c, d, e + !$omp target map(from: d, e) + !$omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w) + !$omp distribute lastprivate(d, e) + do d = a, b-1 + u(d) = v(d) + w(d) + e = c + d * 5 + end do + !$omp end teams + !$omp end target + f2 = d + e +end + +integer function f3 (a1, b1, a2, b2) + integer :: a1, b1, a2, b2, d1, d2 + !$omp target map(from: d1, d2) + !$omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w) + !$omp distribute firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2) + do d1 = a1, b1-1 + do d2 = a2, b2-1 + u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2) + end do + end do + !$omp end teams + !$omp end target + f3 = d1 + d2 +end +end module + +use m + if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 1 + if (f3 (0, 32, 0, 32) /= 64) stop 2 +end diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90 new file mode 100644 index 0000000..39faffd --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90 @@ -0,0 +1,38 @@ +! { dg-do run } + +program main + implicit none + integer, allocatable, dimension(:) :: AA, BB, CC, DD + integer :: i, N = 20 + + allocate(BB(N)) + AA = [(i, i=1,N)] + + !$omp target enter data map(alloc: BB) + !$omp target enter data map(to: AA) + + !$omp target + BB = 3 * AA + !$omp end target + + !$omp target exit data map(delete: AA) + !$omp target exit data map(from: BB) + + if (any (BB /= [(3*i, i=1,N)])) stop 1 + if (any (AA /= [(i, i=1,N)])) stop 2 + + + CC = 31 * BB + DD = [(-i, i=1,N)] + + !$omp target enter data map(to: CC) map(alloc: DD) + + !$omp target + DD = 5 * CC + !$omp end target + + !$omp target exit data map(delete: CC) map(from: DD) + + if (any (CC /= [(31*3*i, i=1,N)])) stop 3 + if (any (DD /= [(31*3*5*i, i=1,N)])) stop 4 +end diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90 new file mode 100644 index 0000000..36a2ed5 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90 @@ -0,0 +1,41 @@ +! { dg-additional-options "-DMEM_SHARED" { target offload_device_shared_as } } +! { dg-do run } +! +! PR middle-end/94635 + implicit none + integer, parameter :: N = 20 + integer, allocatable, dimension(:) :: my1DPtr + integer, dimension(N) :: my1DArr + integer :: i + + allocate(my1DPtr(N)) + my1DPtr = 43 + + !$omp target enter data map(alloc: my1DPtr) + !$omp target + my1DPtr = [(i , i = 1, N)] + !$omp end target + + !$omp target map(from: my1DArr) + my1DArr = my1DPtr + !$omp end target + !$omp target exit data map(delete: my1DPtr) + + if (any (my1DArr /= [(i, i = 1, N)])) stop 1 +#if MEM_SHARED + if (any (my1DArr /= my1DPtr)) stop 2 +#else + if (any (43 /= my1DPtr)) stop 3 +#endif + + my1DPtr = [(2*N-i, i = 1, N)] + my1DArr = 42 + + !$omp target map(tofrom: my1DArr) map(tofrom: my1DPtr(:)) + my1DArr = my1DPtr + my1DPtr = 20 + !$omp end target + + if (any (my1DArr /= [(2*N-i, i = 1, N)])) stop 4 + if (any (20 /= my1DPtr)) stop 6 +end diff --git a/libgomp/testsuite/libgomp.fortran/target-var.f90 b/libgomp/testsuite/libgomp.fortran/target-var.f90 new file mode 100644 index 0000000..5e5ccd4 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/target-var.f90 @@ -0,0 +1,32 @@ +! { dg-additional-options "-O3" } +! +! With -O3 the static local variable A.10 generated for +! the array constructor [-2, -4, ..., -20] is optimized +! away - which has to be handled in the offload_vars table. +! +program main + implicit none (type, external) + integer :: j + integer, allocatable :: A(:) + + A = [(3*j, j=1, 10)] + call bar (A) + deallocate (A) +contains + subroutine bar (array) + integer :: i + integer :: array(:) + + !$omp target map(from:array) + !$acc parallel copyout(array) + array = [(-2*i, i = 1, size(array))] + !$omp do private(array) + !$acc loop gang private(array) + do i = 1, 10 + array(i) = 9*i + end do + if (any (array /= [(-2*i, i = 1, 10)])) error stop 2 + !$omp end target + !$acc end parallel + end subroutine bar +end diff --git a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90 b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90 index 641ebd9..7a4aaae 100644 --- a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90 +++ b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90 @@ -1,3 +1,4 @@ +! { dg-do run } ! Check whether absent optional arguments are properly ! handled with use_device_{addr,ptr}. program main diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp index c06c2a0..7200ec1 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/c++.exp +++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp @@ -88,15 +88,6 @@ if { $lang_test_file_found } { unsupported "$subdir $offload_target offloading" continue } - gcn { - if { ![check_effective_target_openacc_amdgcn_accel_present] } { - # Don't bother; execution testing is going to FAIL. - untested "$subdir $offload_target offloading: supported, but hardware not accessible" - continue - } - - set acc_mem_shared 0 - } host { set acc_mem_shared 1 } @@ -115,6 +106,15 @@ if { $lang_test_file_found } { set acc_mem_shared 0 } + radeon { + if { ![check_effective_target_openacc_radeon_accel_present] } { + # Don't bother; execution testing is going to FAIL. + untested "$subdir $offload_target offloading: supported, but hardware not accessible" + continue + } + + set acc_mem_shared 0 + } default { error "Unknown OpenACC device type: $openacc_device_type (offload target: $offload_target)" } diff --git a/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C b/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C new file mode 100644 index 0000000..ed69359 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C @@ -0,0 +1,58 @@ +#include <openacc.h> +#include <stdlib.h> + +#define N 8 + +namespace one { + int A[N] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + #pragma acc declare copyin (A) +}; + +namespace outer { + namespace inner { + int B[N]; + #pragma acc declare create (B) + }; +}; + +static void +f (void) +{ + int i; + int C[N]; + #pragma acc declare copyout (C) + + if (!acc_is_present (&one::A, sizeof (one::A))) + abort (); + + if (!acc_is_present (&outer::inner::B, sizeof (outer::inner::B))) + abort (); + +#pragma acc parallel + for (i = 0; i < N; i++) + { + outer::inner::B[i] = one::A[i]; + C[i] = outer::inner::B[i]; + } + +#pragma acc parallel + for (i = 0; i < N; i++) + { + if (C[i] != i + 1) + abort (); + } + +#pragma acc parallel + for (i = 0; i < N; i++) + if (outer::inner::B[i] != i + 1) + abort (); +} + + +int +main (int argc, char **argv) +{ + f (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C index c8dba9e..b046bf2 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C +++ b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C @@ -1,3 +1,12 @@ /* Verify OpenACC 'firstprivate' mappings for C++ reference types. */ +/* PR middle-end/48591 */ +/* PR other/71064 */ +/* Set to 0 for offloading targets not supporting long double. */ +#if defined(ACC_DEVICE_TYPE_nvidia) || defined(ACC_DEVICE_TYPE_radeon) +# define DO_LONG_DOUBLE 0 +#else +# define DO_LONG_DOUBLE 1 +#endif + #include "../../../gcc/testsuite/g++.dg/goacc/firstprivate-mappings-1.C" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c deleted file mode 100644 index 9256500..0000000 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Test the `acc_get_property' and '`acc_get_property_string' library - functions for the host device. */ -/* { dg-additional-sources acc_get_property-aux.c } */ -/* { dg-do run } */ - -#include <openacc.h> -#include <stdio.h> - -void expect_device_properties -(acc_device_t dev_type, int dev_num, - int expected_total_mem, int expected_free_mem, - const char* expected_vendor, const char* expected_name, - const char* expected_driver); - -int main() -{ - printf ("Checking acc_device_host device properties\n"); - expect_device_properties (acc_device_host, 0, 0, 0, "GNU", "GOMP", "1.0"); -} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c index 952bdbf..47285fc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c @@ -6,11 +6,12 @@ #include <stdio.h> #include <string.h> -void expect_device_properties -(acc_device_t dev_type, int dev_num, - int expected_total_mem, int expected_free_mem, - const char* expected_vendor, const char* expected_name, - const char* expected_driver) + +void +expect_device_string_properties (acc_device_t dev_type, int dev_num, + const char* expected_vendor, + const char* expected_name, + const char* expected_driver) { const char *vendor = acc_get_property_string (dev_num, dev_type, acc_property_vendor); @@ -21,25 +22,6 @@ void expect_device_properties abort (); } - int total_mem = acc_get_property (dev_num, dev_type, - acc_property_memory); - if (total_mem != expected_total_mem) - { - fprintf (stderr, "Expected acc_property_memory to equal %d, " - "but was %d.\n", expected_total_mem, total_mem); - abort (); - - } - - int free_mem = acc_get_property (dev_num, dev_type, - acc_property_free_memory); - if (free_mem != expected_free_mem) - { - fprintf (stderr, "Expected acc_property_free_memory to equal %d, " - "but was %d.\n", expected_free_mem, free_mem); - abort (); - } - const char *name = acc_get_property_string (dev_num, dev_type, acc_property_name); if (strcmp (name, expected_name)) @@ -59,11 +41,11 @@ void expect_device_properties } int unknown_property = 16058; - int v = acc_get_property (dev_num, dev_type, (acc_device_property_t)unknown_property); + size_t v = acc_get_property (dev_num, dev_type, (acc_device_property_t)unknown_property); if (v != 0) { fprintf (stderr, "Expected value of unknown numeric property to equal 0, " - "but was %d.\n", v); + "but was %zu.\n", v); abort (); } @@ -72,9 +54,45 @@ void expect_device_properties if (s != NULL) { fprintf (stderr, "Expected value of unknown string property to be NULL, " - "but was %d.\n", s); + "but was %s.\n", s); abort (); } +} +void +expect_device_memory (acc_device_t dev_type, int dev_num, + size_t expected_total_memory) +{ + size_t total_mem = acc_get_property (dev_num, dev_type, + acc_property_memory); + + if (total_mem != expected_total_memory) + { + fprintf (stderr, "Expected acc_property_memory to equal %zu, " + "but was %zu.\n", expected_total_memory, total_mem); + abort (); + } + + size_t free_mem = acc_get_property (dev_num, dev_type, + acc_property_free_memory); + if (free_mem > total_mem) + { + fprintf (stderr, "Expected acc_property_free_memory <= acc_property_memory" + ", but free memory was %zu and total memory was %zu.\n", + free_mem, total_mem); + abort (); + } +} + +void +expect_device_properties (acc_device_t dev_type, int dev_num, + size_t expected_total_memory, + const char* expected_vendor, + const char* expected_name, + const char* expected_driver) +{ + expect_device_string_properties (dev_type, dev_num, expected_vendor, + expected_name, expected_driver); + expect_device_memory (dev_type, dev_num, expected_total_memory); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c new file mode 100644 index 0000000..4b1fb5e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c @@ -0,0 +1,135 @@ +/* Test the `acc_get_property' and `acc_get_property_string' library + functions on amdgcn devices by comparing property values with + those obtained through the HSA API. */ +/* { dg-additional-sources acc_get_property-aux.c } */ +/* { dg-additional-options "-ldl" } */ +/* { dg-do run { target openacc_radeon_accel_selected } } */ + +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <openacc.h> + +#ifndef __cplusplus +typedef int bool; +#endif +#include <hsa.h> + + +void expect_device_string_properties (acc_device_t dev_type, int dev_num, + const char* expected_vendor, + const char* expected_name, + const char* expected_driver); + +hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent, + hsa_agent_info_t attribute, + void *value); +hsa_status_t (*hsa_system_get_info_fn) (hsa_system_info_t attribute, + void *value); +hsa_status_t (*hsa_iterate_agents_fn) +(hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data); +hsa_status_t (*hsa_init_fn) (void); + +char* support_cpu_devices; + +void +test_setup () +{ + char* env_runtime; + char* hsa_runtime_lib; + void *handle; + +#define DLSYM_FN(function) \ + function##_fn = (typeof(function##_fn))dlsym (handle, #function); \ + if (function##_fn == NULL) \ + { \ + fprintf (stderr, "Could not get symbol " #function ".\n"); \ + abort (); \ + } + + env_runtime = getenv ("HSA_RUNTIME_LIB"); + hsa_runtime_lib = env_runtime ? env_runtime : (char*)"libhsa-runtime64.so"; + + handle = dlopen (hsa_runtime_lib, RTLD_LAZY); + if (!handle) + { + fprintf (stderr, "Could not load %s.\n", hsa_runtime_lib); + abort (); + } + + DLSYM_FN (hsa_agent_get_info) + DLSYM_FN (hsa_system_get_info) + DLSYM_FN (hsa_iterate_agents) + DLSYM_FN (hsa_init) + + hsa_init_fn (); + + support_cpu_devices = getenv ("GCN_SUPPORT_CPU_DEVICES"); +} + +static hsa_status_t +check_agent_properties (hsa_agent_t agent, void *dev_num_arg) +{ + + char name[64]; + char vendor_name[64]; + uint16_t minor; + uint16_t major; + char driver[60]; + + hsa_status_t status; + hsa_device_type_t device_type; + int* dev_num = (int*)dev_num_arg; + +#define AGENT_GET_INFO(info_type, val) \ + status = hsa_agent_get_info_fn (agent, info_type, &val); \ + if (status != HSA_STATUS_SUCCESS) \ + { \ + fprintf (stderr, "Failed to obtain " #info_type ".\n"); \ + abort (); \ + } +#define SYSTEM_GET_INFO(info_type, val) \ + status = hsa_system_get_info_fn (info_type, &val); \ + if (status != HSA_STATUS_SUCCESS) \ + { \ + fprintf (stderr, "Failed to obtain " #info_type ".\n"); \ + abort (); \ + } + + AGENT_GET_INFO (HSA_AGENT_INFO_DEVICE, device_type) + + /* Skip unsupported device types. Mimic the GCN plugin's behavior. */ + if (!(device_type == HSA_DEVICE_TYPE_GPU + || (support_cpu_devices && device_type == HSA_DEVICE_TYPE_CPU))) + return HSA_STATUS_SUCCESS; + + AGENT_GET_INFO (HSA_AGENT_INFO_NAME, name) + AGENT_GET_INFO (HSA_AGENT_INFO_VENDOR_NAME, vendor_name) + + SYSTEM_GET_INFO (HSA_SYSTEM_INFO_VERSION_MINOR, minor) + SYSTEM_GET_INFO (HSA_SYSTEM_INFO_VERSION_MAJOR, major) + + snprintf (driver, sizeof driver, "HSA Runtime %hu.%hu", + (unsigned short int)major, (unsigned short int)minor); + + expect_device_string_properties(acc_device_radeon, *dev_num, + vendor_name, name, driver); + + (*dev_num)++; + + return status; +} + +int +main () +{ + int dev_num = 0; + test_setup (); + + hsa_status_t status = + hsa_iterate_agents_fn (&check_agent_properties, &dev_num); + + return status; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c new file mode 100644 index 0000000..4ed0dfa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c @@ -0,0 +1,20 @@ +/* Test the `acc_get_property' and '`acc_get_property_string' library + functions for the host device. */ +/* { dg-additional-sources acc_get_property-aux.c } */ +/* { dg-do run } */ + +#include <openacc.h> +#include <stdio.h> + +void expect_device_properties (acc_device_t dev_type, int dev_num, + size_t expected_memory, + const char* expected_vendor, + const char* expected_name, + const char* expected_driver); + +int +main () +{ + printf ("Checking acc_device_host device properties\n"); + expect_device_properties (acc_device_host, 0, 0, "GNU", "GOMP", "1.0"); +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c index 4dd13c4..6334cfd 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c @@ -11,13 +11,14 @@ #include <string.h> #include <stdio.h> -void expect_device_properties -(acc_device_t dev_type, int dev_num, - int expected_total_mem, int expected_free_mem, - const char* expected_vendor, const char* expected_name, - const char* expected_driver); +void expect_device_properties (acc_device_t dev_type, int dev_num, + size_t expected_memory, + const char* expected_vendor, + const char* expected_name, + const char* expected_driver); -int main () +int +main () { int dev_count; cudaGetDeviceCount (&dev_count); @@ -30,26 +31,26 @@ int main () abort (); } - printf("Checking device %d\n", dev_num); + printf ("Checking device %d\n", dev_num); const char *vendor = "Nvidia"; size_t free_mem; size_t total_mem; - if (cudaMemGetInfo(&free_mem, &total_mem) != cudaSuccess) + if (cudaMemGetInfo (&free_mem, &total_mem) != cudaSuccess) { fprintf (stderr, "cudaMemGetInfo failed.\n"); abort (); } struct cudaDeviceProp p; - if (cudaGetDeviceProperties(&p, dev_num) != cudaSuccess) + if (cudaGetDeviceProperties (&p, dev_num) != cudaSuccess) { fprintf (stderr, "cudaGetDeviceProperties failed.\n"); abort (); } int driver_version; - if (cudaDriverGetVersion(&driver_version) != cudaSuccess) + if (cudaDriverGetVersion (&driver_version) != cudaSuccess) { fprintf (stderr, "cudaDriverGetVersion failed.\n"); abort (); @@ -62,7 +63,9 @@ int main () snprintf (driver, sizeof driver, "CUDA Driver %u.%u", driver_version / 1000, driver_version % 1000 / 10); - expect_device_properties(acc_device_nvidia, dev_num, - total_mem, free_mem, vendor, p.name, driver); + /* Note that this check relies on the fact that the device numbering + used by the nvptx plugin agrees with the CUDA device ordering. */ + expect_device_properties (acc_device_nvidia, dev_num, + total_mem, vendor, p.name, driver); } } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c index 289d1ba..3460035 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c @@ -3,8 +3,7 @@ of all device types mentioned in the OpenACC standard. See also acc_get_property.f90. */ -/* { dg-do run { target { { ! { openacc_host_selected } } && { ! { openacc_amdgcn_accel_selected } } } } } */ -/* FIXME: This test does not work with the GCN implementation stub yet. */ +/* { dg-do run } */ #include <openacc.h> #include <stdlib.h> @@ -15,16 +14,16 @@ and do basic device independent validation. */ void -print_device_properties(acc_device_t type) +print_device_properties (acc_device_t type) { const char *s; size_t v; - int dev_count = acc_get_num_devices(type); + int dev_count = acc_get_num_devices (type); for (int i = 0; i < dev_count; ++i) { - printf(" Device %d:\n", i+1); + printf (" Device %d:\n", i+1); s = acc_get_property_string (i, type, acc_property_vendor); printf (" Vendor: %s\n", s); @@ -35,10 +34,10 @@ print_device_properties(acc_device_t type) } v = acc_get_property (i, type, acc_property_memory); - printf (" Total memory: %zd\n", v); + printf (" Total memory: %zu\n", v); v = acc_get_property (i, type, acc_property_free_memory); - printf (" Free memory: %zd\n", v); + printf (" Free memory: %zu\n", v); s = acc_get_property_string (i, type, acc_property_name); printf (" Name: %s\n", s); @@ -58,19 +57,20 @@ print_device_properties(acc_device_t type) } } -int main () +int +main () { - printf("acc_device_none:\n"); + printf ("acc_device_none:\n"); /* For completness; not expected to print anything since there should be no devices of this type. */ - print_device_properties(acc_device_none); + print_device_properties (acc_device_none); - printf("acc_device_default:\n"); - print_device_properties(acc_device_default); + printf ("acc_device_default:\n"); + print_device_properties (acc_device_default); - printf("acc_device_host:\n"); - print_device_properties(acc_device_host); + printf ("acc_device_host:\n"); + print_device_properties (acc_device_host); - printf("acc_device_not_host:\n"); - print_device_properties(acc_device_not_host); + printf ("acc_device_not_host:\n"); + print_device_properties (acc_device_not_host); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index e82a03e..7d05f48 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -224,7 +224,7 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c index ddf647c..ad33f72 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c @@ -106,7 +106,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (event_info->launch_event.vector_length >= 1); else if (acc_device_type == acc_device_nvidia) /* ... is special. */ assert (event_info->launch_event.vector_length == 32); - else if (acc_device_type == acc_device_gcn) /* ...and so is this. */ + else if (acc_device_type == acc_device_radeon) /* ...and so is this. */ assert (event_info->launch_event.vector_length == 64); else { @@ -120,7 +120,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index dc7c758..a5e9ab3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -265,7 +265,7 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); @@ -321,7 +321,7 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); @@ -375,7 +375,7 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); @@ -516,7 +516,7 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); @@ -581,7 +581,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); @@ -647,7 +647,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); - else if (acc_device_type == acc_device_gcn) + else if (acc_device_type == acc_device_radeon) assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c index 840052f..7496426 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c @@ -26,8 +26,8 @@ main () acc_device_t d; #if defined ACC_DEVICE_TYPE_nvidia d = acc_device_nvidia; -#elif defined ACC_DEVICE_TYPE_gcn - d = acc_device_gcn; +#elif defined ACC_DEVICE_TYPE_radeon + d = acc_device_radeon; #elif defined ACC_DEVICE_TYPE_host d = acc_device_host; #else diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c index a59047a..13e5ca2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c @@ -38,7 +38,7 @@ main () assert (v.b[i] == v.a + i); assert (!acc_is_present (&v, sizeof (v))); - assert (!acc_is_present (v.b, sizeof (int *) * n)); + assert (!acc_is_present (v.b, sizeof (int) * n)); } return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c index 0ca5990..1b4cf2f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c @@ -41,9 +41,9 @@ main () assert (v.b[i] == v.a + i); assert (acc_is_present (&v, sizeof (v))); - assert (!acc_is_present (v.b, sizeof (int *) * n)); - assert (!acc_is_present (v.c, sizeof (int *) * n)); - assert (!acc_is_present (v.d, sizeof (int *) * n)); + assert (!acc_is_present (v.b, sizeof (int) * n)); + assert (!acc_is_present (v.c, sizeof (int) * n)); + assert (!acc_is_present (v.d, sizeof (int) * n)); } #pragma acc exit data copyout(v) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c index 4a8b310..2cdd2d1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c @@ -3,4 +3,13 @@ /* { dg-additional-options "-Wno-psabi" } as apparently we're doing funny things with vector arguments. */ +/* PR middle-end/48591 */ +/* PR other/71064 */ +/* Set to 0 for offloading targets not supporting long double. */ +#if defined(ACC_DEVICE_TYPE_nvidia) || defined(ACC_DEVICE_TYPE_radeon) +# define DO_LONG_DOUBLE 0 +#else +# define DO_LONG_DOUBLE 1 +#endif + #include "../../../gcc/testsuite/c-c++-common/goacc/firstprivate-mappings-1.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c index 517004a..64f8ab8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c @@ -1,11 +1,11 @@ /* { dg-do link } */ -/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } */ +/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } */ int var; #pragma acc declare create (var) void __attribute__((noinline, noclone)) -foo () /* { dg-error "function 'foo' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } */ +foo () /* { dg-error "function 'foo' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } */ { var++; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c new file mode 100644 index 0000000..6830ef1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ + +/* Test if, if_present clauses on host_data construct. */ +/* C/C++ variant of 'libgomp.oacc-fortran/host_data-5.F90' */ + +#include <assert.h> +#include <stdint.h> + +void +foo (float *p, intptr_t host_p, int cond) +{ + assert (p == (float *) host_p); + +#pragma acc data copyin(host_p) + { +#pragma acc host_data use_device(p) if_present + /* p not mapped yet, so it will be equal to the host pointer. */ + assert (p == (float *) host_p); + +#pragma acc data copy(p[0:100]) + { + /* Not inside a host_data construct, so p is still the host pointer. */ + assert (p == (float *) host_p); + +#pragma acc host_data use_device(p) + { +#if ACC_MEM_SHARED + assert (p == (float *) host_p); +#else + /* The device address is different from host address. */ + assert (p != (float *) host_p); +#endif + } + +#pragma acc host_data use_device(p) if_present + { +#if ACC_MEM_SHARED + assert (p == (float *) host_p); +#else + /* p is present now, so this is the same as above. */ + assert (p != (float *) host_p); +#endif + } + +#pragma acc host_data use_device(p) if(cond) + { +#if ACC_MEM_SHARED + assert (p == (float *) host_p); +#else + /* p is the device pointer iff cond is true. */ + assert ((p != (float *) host_p) == cond); +#endif + } + } + } +} + +int +main (void) +{ + float arr[100]; + foo (arr, (intptr_t) arr, 0); + foo (arr, (intptr_t) arr, 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 34bc57e..0273c2b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -1,3 +1,6 @@ +/* AMD GCN does not use 32-lane vectors. + { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */ + /* { dg-additional-options "-fopenacc-dim=32" } */ #include <stdio.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c index 04387d3..ca77164 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c @@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp) int main () { +#ifdef ACC_DEVICE_TYPE_radeon + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + /* AMD GCN does not currently support multiple workers. This should be + set to 16 when that changes. */ + return test_1 (16, 1, 1); +#else return test_1 (16, 16, 32); +#endif } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 766e578..5c84301 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int gangsize, workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) { #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) @@ -32,6 +34,10 @@ int main () else ary[ix] = ix; } + + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -39,11 +45,12 @@ int main () int expected = ix; if(ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * workersize * vectorsize); + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 0bec6e1..9c4a85f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; - -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev) + int gangsize, workersize, vectorsize; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ondev) copyout(gangsize, workersize, vectorsize) { #pragma acc loop gang worker vector reduction(+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,18 +30,22 @@ int main () } t += val; } + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) { int val = ix; - if(ondev) + if (ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * vectorsize * workersize); + int w = ix / vectorsize % workersize; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index da4921d..1173c1f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -9,8 +9,9 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int vectorsize; -#pragma acc parallel vector_length(32) copy(ondev) +#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize) { #pragma acc loop vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) @@ -29,6 +30,7 @@ int main () } t += val; } + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -38,7 +40,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 15e2bc2..84c2296 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -9,8 +9,9 @@ int main () int ix; int ondev = 0; int q = 0, h = 0; + int vectorsize; -#pragma acc parallel vector_length(32) copy(q) copy(ondev) +#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize) { int t = q; @@ -32,6 +33,7 @@ int main () t += val; } q = t; + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -41,7 +43,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 6bbd04f..648f89e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ + copyout(workersize) { #pragma acc loop worker reduction(+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,6 +30,7 @@ int main () } t += val; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -36,7 +39,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; val = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index c63a5d4..f9fcf37 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int q = 0, h = 0; + int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \ + copyout(workersize) { int t = q; @@ -31,6 +33,7 @@ int main () t += val; } q = t; + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -39,7 +42,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; val = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index 71d3969..c360ad1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int workersize, vectorsize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ + copyout(workersize, vectorsize) { #pragma acc loop worker vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,6 +30,8 @@ int main () } t += val; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -36,8 +40,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 6010cd2..8c858f3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ + copyout(vectorsize) { #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,7 @@ int main () else ary[ix] = ix; } + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -40,7 +43,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index fa6fb91..5fe486f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize) { #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,7 @@ int main () else ary[ix] = ix; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -39,7 +42,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; expected = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index cd4cc99..fd4e4cf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize, vectorsize) { #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,8 @@ int main () else ary[ix] = ix; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -39,8 +43,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index a5edfc6..cc4c738 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); else __builtin_abort (); @@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); else __builtin_abort (); @@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); else __builtin_abort (); @@ -282,6 +285,12 @@ int main () /* The GCC nvptx back end enforces num_workers (32). */ workers_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end is limited to num_workers (16). + Temporarily set this to 1 until multiple workers are permitted. */ + workers_actual = 1; // 16; + } else __builtin_abort (); #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -328,6 +337,11 @@ int main () /* We're actually executing with num_workers (32). */ /* workers_actual = 32; */ } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end is limited to num_workers (16). */ + workers_actual = 16; + } else __builtin_abort (); #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -367,6 +381,11 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 1024; } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end enforces vector_length (1): autovectorize. */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -407,6 +426,13 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* Because of the way vectors are implemented for GCN, a vector loop + containing a seq routine call will not vectorize calls to that + routine. Hence, we'll only get one "vector". */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -433,6 +459,9 @@ int main () in the following case. So, limit ourselves here. */ if (acc_get_device_type () == acc_device_nvidia) gangs = 3; + /* Similar appears to be true for GCN. */ + if (acc_get_device_type () == acc_device_radeon) + gangs = 3; int gangs_actual = gangs; #define WORKERS 3 int workers_actual = WORKERS; @@ -459,6 +488,13 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* Temporary setting, until multiple workers are permitted. */ + workers_actual = 1; + /* See above comments about GCN vectors_actual. */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c index 2cb5b95..6570c64 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c @@ -15,4 +15,22 @@ main (void) return 0; } -/* { dg-final { scan-assembler-times "bar.sync" 0 } } */ +/* Todo: Boths bar.syncs can be removed. + Atm we generate this dead code inbetween forked and joining: + + mov.u32 %r28, %ntid.y; + mov.u32 %r29, %tid.y; + add.u32 %r30, %r29, %r29; + setp.gt.s32 %r31, %r30, 19; + @%r31 bra $L2; + add.u32 %r25, %r28, %r28; + mov.u32 %r24, %r30; + $L3: + add.u32 %r24, %r24, %r25; + setp.le.s32 %r33, %r24, 19; + @%r33 bra $L3; + $L2: + + so the loop is not recognized as empty loop (which we detect by seeing if + joining immediately follows forked). */ +/* { dg-final { scan-assembler-times "bar.sync" 2 } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c index e8a433f..d955d79 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c @@ -21,4 +21,7 @@ main (void) return 0; } -/* { dg-final { scan-assembler-times "bar.sync" 0 } } */ +/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs + for that (the other two are there for the same reason as in pr85381-2.c). + Todo: Recompute %ntid.y instead of broadcasting it. */ +/* { dg-final { scan-assembler-times "bar.sync" 4 } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c index 6ba96b6..79cebf6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c @@ -1,31 +1,61 @@ -/* Verify that 'acc_unmap_data' unmaps even in presence of dynamic reference - counts. */ +/* Verify that 'acc_unmap_data' unmaps even in presence of structured and + dynamic reference counts, but the device memory remains allocated. */ /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <assert.h> #include <stdlib.h> +#include <string.h> #include <openacc.h> int main () { const int N = 180; - - char *h = (char *) malloc (N); - char *d = (char *) acc_malloc (N); - if (!d) - abort (); - acc_map_data (h, d, N); - - char *d_ = (char *) acc_create (h + 3, N - 77); - assert (d_ == d + 3); - - d_ = (char *) acc_create (h, N); - assert (d_ == d); - - acc_unmap_data (h); - assert (!acc_is_present (h, N)); + const int N_i = 537; + const int C = 37; + + unsigned char *h = (unsigned char *) malloc (N); + assert (h); + unsigned char *d = (unsigned char *) acc_malloc (N); + assert (d); + + for (int i = 0; i < N_i; ++i) + { + acc_map_data (h, d, N); + assert (acc_is_present (h, N)); +#pragma acc parallel present(h[0:N]) + { + if (i == 0) + memset (h, C, N); + } + + unsigned char *d_ = (unsigned char *) acc_create (h + 3, N - 77); + assert (d_ == d + 3); + +#pragma acc data create(h[6:N - 44]) + { + d_ = (unsigned char *) acc_create (h, N); + assert (d_ == d); + +#pragma acc enter data create(h[0:N]) + + assert (acc_is_present (h, N)); + acc_unmap_data (h); + assert (!acc_is_present (h, N)); + } + + /* We can however still access the device memory. */ +#pragma acc parallel loop deviceptr(d) + for (int j = 0; j < N; ++j) + d[j] += i * j; + } + + acc_memcpy_from_device(h, d, N); + for (int j = 0; j < N; ++j) + assert (h[j] == ((C + N_i * (N_i - 1) / 2 * j) % 256)); + + acc_free (d); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c index a97e046..da13d84 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c @@ -30,14 +30,18 @@ int main () int ix; int exit = 0; int ondev = 0; + int gangsize, workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) { ondev = acc_on_device (acc_device_not_host); gang (ary); + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -45,11 +49,12 @@ int main () int expected = ix; if(ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * vectorsize * workersize); + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c index b1e3e3a..dd7bb6c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c @@ -30,14 +30,17 @@ int main () int ix; int exit = 0; int ondev = 0; + int vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ + copyout(vectorsize) { ondev = acc_on_device (acc_device_not_host); vector (ary); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -47,7 +50,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index 81f1e03..acd9884 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -30,14 +30,17 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize) { ondev = acc_on_device (acc_device_not_host); worker (ary); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -46,7 +49,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; expected = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c index 23dbc1a..73696e4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c @@ -30,14 +30,18 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize, vectorsize) { ondev = acc_on_device (acc_device_not_host); worker (ary); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -46,8 +50,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c index 8862148..9769ee7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c @@ -2,8 +2,13 @@ #include <openacc.h> #include <gomp-constants.h> +#ifdef ACC_DEVICE_TYPE_radeon +#define NUM_WORKERS 16 +#define NUM_VECTORS 1 +#else #define NUM_WORKERS 16 #define NUM_VECTORS 32 +#endif #define WIDTH 64 #define HEIGHT 32 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c new file mode 100644 index 0000000..543aaa15 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c @@ -0,0 +1,187 @@ +/* Test dynamic refcount and copy behavior of separate structure members. */ + +#include <assert.h> +#include <stdbool.h> +#include <openacc.h> + +struct s +{ + signed char a; + float b; +}; + +static void test(unsigned variant) +{ + struct s s = { .a = 73, .b = -22 }; + +#pragma acc enter data copyin(s.a, s.b) + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + + /* To verify that any following 'copyin' doesn't 'copyin' again. */ + s.a = -s.a; + s.b = -s.b; + + if (variant & 4) + { + if (variant & 8) + { +#pragma acc enter data copyin(s.b) + } + else + acc_copyin(&s.b, sizeof s.b); + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + + if (variant & 16) + { +#pragma acc enter data copyin(s.a) + } + else + acc_copyin(&s.a, sizeof s.a); + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + + if (variant & 32) + { +#pragma acc enter data copyin(s.a) + acc_copyin(&s.b, sizeof s.b); +#pragma acc enter data copyin(s.b) +#pragma acc enter data copyin(s.b) + acc_copyin(&s.a, sizeof s.a); + acc_copyin(&s.a, sizeof s.a); + acc_copyin(&s.a, sizeof s.a); + } + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + } + +#pragma acc parallel \ + copy(s.a, s.b) + { +#if ACC_MEM_SHARED + if (s.a++ != -73) + __builtin_abort(); + if (s.b-- != 22) + __builtin_abort(); +#else + if (s.a++ != 73) + __builtin_abort(); + if (s.b-- != -22) + __builtin_abort(); +#endif + } +#if ACC_MEM_SHARED + assert(s.a == -72); + assert(s.b == 21); +#else + assert(s.a == -73); + assert(s.b == 22); +#endif + + if (variant & 32) + { + if (variant & 1) + { +#pragma acc exit data copyout(s.a) finalize + } + else + acc_copyout_finalize(&s.a, sizeof s.a); + } + else + { + if (variant & 1) + { +#pragma acc exit data copyout(s.a) + } + else + acc_copyout(&s.a, sizeof s.a); + if (variant & 4) + { + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); +#if ACC_MEM_SHARED + assert(s.a == -72); + assert(s.b == 21); +#else + assert(s.a == -73); + assert(s.b == 22); +#endif + if (variant & 1) + { +#pragma acc exit data copyout(s.a) + } + else + acc_copyout(&s.a, sizeof s.a); + } + } +#if ACC_MEM_SHARED + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + assert(s.a == -72); + assert(s.b == 21); +#else + assert(!acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + assert(s.a == 74); + assert(s.b == 22); +#endif + + if (variant & 32) + { + if (variant & 2) + { +#pragma acc exit data copyout(s.b) finalize + } + else + acc_copyout_finalize(&s.b, sizeof s.b); + } + else + { + if (variant & 2) + { +#pragma acc exit data copyout(s.b) + } + else + acc_copyout(&s.b, sizeof s.b); + if (variant & 4) + { +#if ACC_MEM_SHARED + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + assert(s.a == -72); + assert(s.b == 21); +#else + assert(!acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + assert(s.a == 74); + assert(s.b == 22); +#endif + if (variant & 2) + { +#pragma acc exit data copyout(s.b) + } + else + acc_copyout(&s.b, sizeof s.b); + } + } +#if ACC_MEM_SHARED + assert(acc_is_present(&s.a, sizeof s.a)); + assert(acc_is_present(&s.b, sizeof s.b)); + assert(s.a == -72); + assert(s.b == 21); +#else + assert(!acc_is_present(&s.a, sizeof s.a)); + assert(!acc_is_present(&s.b, sizeof s.b)); + assert(s.a == 74); + assert(s.b == -23); +#endif +} + +int main() +{ + for (unsigned variant = 0; variant < 64; ++variant) + test(variant); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c new file mode 100644 index 0000000..b86f1c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c @@ -0,0 +1,38 @@ +#include <assert.h> + +struct str1 { + int a; + int b; +}; + +struct str2 { + int c; + int d; + struct str1 s; +}; + +int +main (int argc, char *argv[]) +{ + struct str2 t; + + t.c = 1; + t.d = 2; + t.s.a = 3; + t.s.b = 4; + + #pragma acc enter data copyin(t.s) + + #pragma acc serial present(t.s) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ + { + t.s.a = 5; + t.s.b = 6; + } + + #pragma acc exit data copyout(t.s) + + assert (t.s.a == 5); + assert (t.s.b == 6); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c new file mode 100644 index 0000000..4dd8a3a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c @@ -0,0 +1,44 @@ +#include <assert.h> +#include <stdlib.h> + +struct str1 { + int a; + int b; + int *c; +}; + +#define N 1024 + +int +main (int argc, char *argv[]) +{ + struct str1 s; + + s.a = 1; + s.b = 2; + s.c = (int *) malloc (sizeof (int) * N); + + for (int i = 0; i < N; i++) + s.c[i] = i + 10; + + #pragma acc enter data copyin(s.a, s.b, s.c[0:N]) + + #pragma acc serial present(s.a, s.b, s.c[0:N]) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ + { + s.a = 3; + s.b = 4; + for (int i = 0; i < N; i++) + s.c[i] = i + 20; + } + + #pragma acc exit data copyout(s.a, s.b, s.c[0:N]) + + assert (s.a == 3); + assert (s.b == 4); + for (int i = 0; i < N; i++) + assert (s.c[i] == i + 20); + + free (s.c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c new file mode 100644 index 0000000..8fa87777 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-1.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c new file mode 100644 index 0000000..0d6b415 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c @@ -0,0 +1,161 @@ +/* Test transitioning of data lifetimes between structured and dynamic. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +void +f1 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + } + + assert (acc_is_present (block1, SIZE)); + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + assert (acc_is_present (block1, SIZE)); + acc_copyout (block1, SIZE); + assert (acc_is_present (block1, SIZE)); + acc_copyout (block1, SIZE); + assert (!acc_is_present (block1, SIZE)); +#else +#pragma acc exit data copyout(block1[0:SIZE]) + assert (acc_is_present (block1, SIZE)); +#pragma acc exit data copyout(block1[0:SIZE]) + assert (acc_is_present (block1, SIZE)); +#pragma acc exit data copyout(block1[0:SIZE]) + assert (!acc_is_present (block1, SIZE)); +#endif + + free (block1); +} + +void +f2 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + /* This should stay present until the end of the structured data + lifetime. */ + assert (acc_is_present (block1, SIZE)); + } + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f3 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + acc_copyin (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + assert (acc_is_present (block1, SIZE)); + } + + assert (acc_is_present (block1, SIZE)); +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f4 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + char *block3 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE]) + { + /* The first copyin of block2 is the enclosing data region. This + "enter data" should make it live beyond the end of this region. + This works, though the on-target copies of block1, block2 and block3 + will stay allocated until block2 is unmapped because they are bound + together in a single target_mem_desc. */ +#ifdef OPENACC_API + acc_copyin (block2, SIZE); +#else +#pragma acc enter data copyin(block2[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + assert (!acc_is_present (block3, SIZE)); + +#ifdef OPENACC_API + acc_copyout (block2, SIZE); +#else +#pragma acc exit data copyout(block2[0:SIZE]) +#endif + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); + free (block3); +} + +int +main (int argc, char *argv[]) +{ + f1 (); + f2 (); + f3 (); + f4 (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c new file mode 100644 index 0000000..365df8d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-2.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c new file mode 100644 index 0000000..726942c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c @@ -0,0 +1,166 @@ +/* Test nested dynamic/structured data mappings. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +void +f1 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyout (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f2 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { + } + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f3 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyin (block1, SIZE); + acc_copyout (block1, SIZE); + acc_copyout (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f4 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyout (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +void +f5 (void) +{ + char *block1 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif +#pragma acc data copy(block1[0:SIZE]) + { + } +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + + assert (!acc_is_present (block1, SIZE)); + + free (block1); +} + +int +main (int argc, char *argv[]) +{ + f1 (); + f2 (); + f3 (); + f4 (); + f5 (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c new file mode 100644 index 0000000..469b35b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-3.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c new file mode 100644 index 0000000..c13f3c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c @@ -0,0 +1,183 @@ +/* Test nested dynamic/structured data mappings (multiple blocks on data + regions). */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +void +f1 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyout (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); +} + +void +f2 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { + } + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); +} + +void +f3 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyin (block2, SIZE); + acc_copyout (block2, SIZE); + acc_copyout (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc enter data copyin(block2[0:SIZE]) +#pragma acc exit data copyout(block2[0:SIZE]) +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); +} + +void +f4 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block2, SIZE); + acc_copyout (block2, SIZE); +#else +#pragma acc enter data copyin(block2[0:SIZE]) +#pragma acc exit data copyout(block2[0:SIZE]) +#endif + } +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + } + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); +} + +void +f5 (void) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyin (block2, SIZE); +#else +#pragma acc enter data copyin(block2[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { + } +#ifdef OPENACC_API + acc_copyout (block2, SIZE); +#else +#pragma acc exit data copyout(block2[0:SIZE]) +#endif + } + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); +} + +int +main (int argc, char *argv[]) +{ + f1 (); + f2 (); + f3 (); + f4 (); + f5 (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c new file mode 100644 index 0000000..8e88b97 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-4.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c new file mode 100644 index 0000000..e9a6510 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c @@ -0,0 +1,64 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +int +main (int argc, char *argv[]) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + char *block3 = (char *) malloc (SIZE); + + /* Doing this twice ensures that we have a non-zero virtual refcount. Make + sure that works too. */ +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE]) + { + /* The first copyin of block2 is the enclosing data region. This + "enter data" should make it live beyond the end of this region. */ +#ifdef OPENACC_API + acc_copyin (block2, SIZE); +#else +#pragma acc enter data copyin(block2[0:SIZE]) +#endif + } + + assert (acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + assert (!acc_is_present (block3, SIZE)); + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + assert (acc_is_present (block1, SIZE)); + acc_copyout (block1, SIZE); + assert (!acc_is_present (block1, SIZE)); + + acc_copyout (block2, SIZE); + assert (!acc_is_present (block2, SIZE)); +#else +#pragma acc exit data copyout(block1[0:SIZE]) + assert (acc_is_present (block1, SIZE)); +#pragma acc exit data copyout(block1[0:SIZE]) + assert (!acc_is_present (block1, SIZE)); + +#pragma acc exit data copyout(block2[0:SIZE]) + assert (!acc_is_present (block2, SIZE)); +#endif + + free (block1); + free (block2); + free (block3); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c new file mode 100644 index 0000000..59ef562 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-5.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c new file mode 100644 index 0000000..9807076 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c @@ -0,0 +1,56 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +int +main (int argc, char *argv[]) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + char *block3 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE]) + { + /* The first copyin of block2 is the enclosing data region. This + "enter data" should make it live beyond the end of this region. */ +#ifdef OPENACC_API + acc_copyin (block2, SIZE); +#else +#pragma acc enter data copyin(block2[0:SIZE]) +#endif + } + + assert (acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + assert (!acc_is_present (block3, SIZE)); + +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + assert (!acc_is_present (block1, SIZE)); + + acc_copyout (block2, SIZE); + assert (!acc_is_present (block2, SIZE)); +#else +#pragma acc exit data copyout(block1[0:SIZE]) + assert (!acc_is_present (block1, SIZE)); + +#pragma acc exit data copyout(block2[0:SIZE]) + assert (!acc_is_present (block2, SIZE)); +#endif + + free (block1); + free (block2); + free (block3); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c new file mode 100644 index 0000000..0401f73 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-6.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c new file mode 100644 index 0000000..9250b4a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c @@ -0,0 +1,43 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +int +main (int argc, char *argv[]) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); + acc_copyin (block2, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE], block2[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + acc_copyout (block2, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE], block2[0:SIZE]) +#endif + /* These should stay present until the end of the structured data + lifetime. */ + assert (acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + } + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c new file mode 100644 index 0000000..07caefb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-7.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c new file mode 100644 index 0000000..52e8d4c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c @@ -0,0 +1,44 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +int +main (int argc, char *argv[]) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +/* We can't attach the dynamic data mapping's (block1) target_mem_desc to the + enclosing structured data region here, because that region maps block2 + also. */ +#ifdef OPENACC_API + acc_copyout (block1, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#endif + /* These should stay present until the end of the structured data + lifetime. */ + assert (acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + } + + assert (!acc_is_present (block1, SIZE)); + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c new file mode 100644 index 0000000..1c2479a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c @@ -0,0 +1,3 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ +/* { dg-additional-options "-DOPENACC_API" } */ +#include "structured-dynamic-lifetimes-8.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c new file mode 100644 index 0000000..919ee02 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c @@ -0,0 +1,47 @@ +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <openacc.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZE 1024 + +int +main (int argc, char *argv[]) +{ + char *block1 = (char *) malloc (SIZE); + char *block2 = (char *) malloc (SIZE); + +#ifdef OPENACC_API + acc_copyin (block1, SIZE); +#else +#pragma acc enter data copyin(block1[0:SIZE]) +#endif + +#pragma acc data copy(block1[0:SIZE], block2[0:SIZE]) + { +#ifdef OPENACC_API + acc_copyout (block1, SIZE); + acc_copyin (block2, SIZE); +#else +#pragma acc exit data copyout(block1[0:SIZE]) +#pragma acc enter data copyin(block2[0:SIZE]) +#endif + assert (acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); + } + + assert (!acc_is_present (block1, SIZE)); + assert (acc_is_present (block2, SIZE)); +#ifdef OPENACC_API + acc_copyout (block2, SIZE); +#else +#pragma acc exit data copyout(block2[0:SIZE]) +#endif + assert (!acc_is_present (block2, SIZE)); + + free (block1); + free (block2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c index c019fe5..57579171 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c @@ -1,5 +1,5 @@ /* AMD GCN does not use 32-lane vectors, so the expected use counts mismatch. - { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */ + { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */ /* { dg-additional-options "-fopenacc-dim=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp index 7f13242..48cbc98 100644 --- a/libgomp/testsuite/libgomp.oacc-c/c.exp +++ b/libgomp/testsuite/libgomp.oacc-c/c.exp @@ -51,15 +51,6 @@ foreach offload_target [concat [split $offload_targets ","] "disable"] { unsupported "$subdir $offload_target offloading" continue } - gcn { - if { ![check_effective_target_openacc_amdgcn_accel_present] } { - # Don't bother; execution testing is going to FAIL. - untested "$subdir $offload_target offloading: supported, but hardware not accessible" - continue - } - - set acc_mem_shared 0 - } host { set acc_mem_shared 1 } @@ -78,6 +69,15 @@ foreach offload_target [concat [split $offload_targets ","] "disable"] { set acc_mem_shared 0 } + radeon { + if { ![check_effective_target_openacc_radeon_accel_present] } { + # Don't bother; execution testing is going to FAIL. + untested "$subdir $offload_target offloading: supported, but hardware not accessible" + continue + } + + set acc_mem_shared 0 + } default { error "Unknown OpenACC device type: $openacc_device_type (offload target: $offload_target)" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90 index ce69547..1af7cc3 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90 @@ -3,8 +3,6 @@ ! of all device types mentioned in the OpenACC standard. ! ! See also acc_get_property.c -! { dg-do run { target { { ! { openacc_host_selected } } && { ! { openacc_amdgcn_accel_selected } } } } } -! FIXME: This test does not work with the GCN implementation stub yet. program test use openacc @@ -28,13 +26,14 @@ end program test ! and do basic device independent validation. subroutine print_device_properties (device_type) use openacc + use iso_c_binding, only: c_size_t implicit none integer, intent(in) :: device_type integer :: device_count integer :: device - integer(acc_device_property) :: v + integer(c_size_t) :: v character*256 :: s device_count = acc_get_num_devices(device_type) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 index 5a4a1e0..536b3f0 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 @@ -275,8 +275,9 @@ program main if (ltmp .neqv. .not. lexp) STOP 33 if (lgot .neqv. lexp) STOP 34 - igot = 1 + igot = 0 iexp = N + iarr = -42 !$acc parallel loop copy (igot, itmp) do i = 1, N @@ -287,13 +288,24 @@ program main end do !$acc end parallel loop + if (igot /= N) stop 107 + itmp = 0 + do i = 1, N + if (iarr(i) == 0) then + itmp = i + exit + end if + end do + ! At most one iarr element can be 0. do i = 1, N - if (.not. (1 <= iarr(i) .and. iarr(i) < iexp)) STOP 35 + if ((iarr(i) == 0 .and. i /= itmp) & + .or. iarr(i) < 0 .or. iarr(i) >= N) STOP 35 end do if (igot /= iexp) STOP 36 - igot = N + igot = N + 1 iexp = 1 + iarr = -42 !$acc parallel loop copy (igot, itmp) do i = 1, N @@ -304,8 +316,18 @@ program main end do !$acc end parallel loop + if (igot /= 1) stop 108 + itmp = N + 1 + ! At most one iarr element can be N+1. + do i = 1, N + if (iarr(i) == N + 1) then + itmp = i + exit + end if + end do do i = 1, N - if (.not. (iarr(i) == 1 .or. iarr(i) == N)) STOP 37 + if ((iarr(i) == N + 1 .and. i /= itmp) & + .or. iarr(i) <= 0 .or. iarr(i) > N + 1) STOP 37 end do if (igot /= iexp) STOP 38 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95 b/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95 index f16f42f..c5f0fff 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95 +++ b/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95 @@ -31,7 +31,8 @@ program main myvar%p%p(i) = -1.0 end do -!$acc enter data copyin(myvar, myvar%p) create(myvar%p%p) +!$acc enter data copyin(myvar) +!$acc enter data copyin(myvar%p) create(myvar%p%p) !$acc parallel loop present(myvar%p%p) do i=1,100 @@ -39,7 +40,8 @@ program main end do !$acc end parallel loop -!$acc exit data copyout(myvar%p%p) delete(myvar, myvar%p) +!$acc exit data copyout(myvar%p%p) delete(myvar%p) +!$acc exit data delete(myvar) do i=1,100 if (myvar%p%p(i) .ne. i * 2) stop 1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 deleted file mode 100644 index 3593661..0000000 --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 +++ /dev/null @@ -1,33 +0,0 @@ -! { dg-do run } - -! Test of attach/detach with "acc data", two clauses at once. - -program dtype - implicit none - integer, parameter :: n = 512 - type mytype - integer, allocatable :: a(:) - end type mytype - integer i - - type(mytype) :: var - - allocate(var%a(1:n)) - -!$acc data copy(var) copy(var%a) - -!$acc parallel loop - do i = 1,n - var%a(i) = i - end do -!$acc end parallel loop - -!$acc end data - - do i = 1,n - if (i .ne. var%a(i)) stop 1 - end do - - deallocate(var%a) - -end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 index 667d944..edb6b8d 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 @@ -16,12 +16,14 @@ program dtype allocate(var%a(1:n)) allocate(var%b(1:n)) -!$acc parallel loop copy(var) copy(var%a(1:n)) copy(var%b(1:n)) +!$acc data copy(var) +!$acc parallel loop copy(var%a(1:n)) copy(var%b(1:n)) do i = 1,n var%a(i) = i var%b(i) = i end do !$acc end parallel loop +!$acc end data do i = 1,n if (i .ne. var%a(i)) stop 1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 new file mode 100644 index 0000000..ed4f10e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 @@ -0,0 +1,8 @@ +! { dg-do run } + +/* Nullify the 'finalize' clause, which disturbs reference counting. */ +#define finalize +#include "deep-copy-6.f90" + +! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } +! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 index 12910d0..5837a40 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 @@ -3,6 +3,7 @@ ! Test of attachment counters and finalize. program dtype + use openacc implicit none integer, parameter :: n = 512 type mytype @@ -36,7 +37,23 @@ program dtype end do !$acc end parallel loop + if (.not. acc_is_present(var%a(5:n - 5))) stop 11 + if (.not. acc_is_present(var%b(5:n - 5))) stop 12 + if (.not. acc_is_present(var)) stop 13 + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } !$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_get_device_type() .ne. acc_device_host) then + if (acc_is_present(var%a(5:n - 5))) stop 21 + if (acc_is_present(var%b(5:n - 5))) stop 22 + end if + if (.not. acc_is_present(var)) stop 23 !$acc end data diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f index e7358f4..de72774 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL ERROR STOP @@ -17,7 +21,7 @@ ! In gfortran's main program, libfortran's set_options is called - which sets ! compiler_options.backtrace = 1 by default. For an offload libgfortran, this ! is never called and, hence, "Error termination." is never printed. Thus: -! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } } +! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } } ! ! PR85463: ! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f index fca1d96..475c9cb 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL ERROR STOP 35 @@ -17,7 +21,7 @@ ! In gfortran's main program, libfortran's set_options is called - which sets ! compiler_options.backtrace = 1 by default. For an offload libgfortran, this ! is never called and, hence, "Error termination." is never printed. Thus: -! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } } +! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } } ! ! PR85463: ! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f index 2ae0b0d..ab63444 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL ERROR STOP "SiGN" @@ -17,7 +21,7 @@ ! In gfortran's main program, libfortran's set_options is called - which sets ! compiler_options.backtrace = 1 by default. For an offload libgfortran, this ! is never called and, hence, "Error termination." is never printed. Thus: -! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } } +! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } } ! ! PR85463: ! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp index 60f0889..d607903 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp @@ -82,8 +82,11 @@ if { $lang_test_file_found } { unsupported "$subdir $offload_target offloading" continue } - gcn { - if { ![check_effective_target_openacc_amdgcn_accel_present] } { + host { + set acc_mem_shared 1 + } + nvidia { + if { ![check_effective_target_openacc_nvidia_accel_present] } { # Don't bother; execution testing is going to FAIL. untested "$subdir $offload_target offloading: supported, but hardware not accessible" continue @@ -91,11 +94,8 @@ if { $lang_test_file_found } { set acc_mem_shared 0 } - host { - set acc_mem_shared 1 - } - nvidia { - if { ![check_effective_target_openacc_nvidia_accel_present] } { + radeon { + if { ![check_effective_target_openacc_radeon_accel_present] } { # Don't bother; execution testing is going to FAIL. untested "$subdir $offload_target offloading: supported, but hardware not accessible" continue diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 new file mode 100644 index 0000000..483ac3f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 @@ -0,0 +1,92 @@ +! { dg-do run } +! +! Test if, if_present clauses on host_data construct. +! +! Fortran variant of 'libgomp.oacc-c-c++-common/host_data-7.c'. +! +program main + use iso_c_binding + implicit none + real, target :: var, arr(100) + integer(c_intptr_t) :: host_p, host_parr + host_p = transfer(c_loc(var), host_p) + host_parr = transfer(c_loc(arr), host_parr) + call foo (var, arr, host_p, host_parr, .false.) + call foo (var, arr, host_p, host_parr, .true.) + +contains + +subroutine foo (p2, parr, host_p, host_parr, cond) + use openacc + implicit none + real, target, intent(in) :: parr(:), p2 + integer(c_intptr_t), value, intent(in) :: host_p, host_parr + logical, value, intent(in) :: cond + real, pointer :: p + p => p2 + + if (host_p /= transfer(c_loc(p), host_p)) stop 1 + if (host_parr /= transfer(c_loc(parr), host_parr)) stop 2 +#if !ACC_MEM_SHARED + if (acc_is_present(p, c_sizeof(p))) stop 3 + if (acc_is_present(parr, 1)) stop 4 +#endif + + !$acc data copyin(host_p, host_parr) +#if !ACC_MEM_SHARED + if (acc_is_present(p, c_sizeof(p))) stop 5 + if (acc_is_present(parr, 1)) stop 6 +#endif + !$acc host_data use_device(p, parr) if_present + ! not mapped yet, so it will be equal to the host pointer. + if (transfer(c_loc(p), host_p) /= host_p) stop 7 + if (transfer(c_loc(parr), host_parr) /= host_parr) stop 8 + !$acc end host_data +#if !ACC_MEM_SHARED + if (acc_is_present(p, c_sizeof(p))) stop 9 + if (acc_is_present(parr, 1)) stop 10 +#endif + + !$acc data copy(p, parr) + if (.not. acc_is_present(p, c_sizeof(p))) stop 11 + if (.not. acc_is_present(parr, 1)) stop 12 + ! Not inside a host_data construct, so still the host pointer. + if (transfer(c_loc(p), host_p) /= host_p) stop 13 + if (transfer(c_loc(parr), host_parr) /= host_parr) stop 14 + + !$acc host_data use_device(p, parr) +#if ACC_MEM_SHARED + if (transfer(c_loc(p), host_p) /= host_p) stop 15 + if (transfer(c_loc(parr), host_parr) /= host_parr) stop 16 +#else + ! The device address is different from host address. + if (transfer(c_loc(p), host_p) == host_p) stop 17 + if (transfer(c_loc(parr), host_parr) == host_parr) stop 18 +#endif + !$acc end host_data + + !$acc host_data use_device(p, parr) if_present +#if ACC_MEM_SHARED + if (transfer(c_loc(p), host_p) /= host_p) stop 19 + if (transfer(c_loc(parr), host_parr) /= host_parr) stop 20 +#else + ! is present now, so this is the same as above. + if (transfer(c_loc(p), host_p) == host_p) stop 21 + if (transfer(c_loc(parr), host_parr) == host_parr) stop 22 +#endif + !$acc end host_data + + !$acc host_data use_device(p, parr) if(cond) +#if ACC_MEM_SHARED + if (transfer(c_loc(p), host_p) /= host_p) stop 23 + if (transfer(c_loc(parr), host_parr) /= host_parr) stop 24 +#else + ! is the device pointer iff cond is true. + if ((transfer(c_loc(p), host_p) /= host_p) .neqv. cond) stop 25 + if ((transfer(c_loc(parr), host_parr) /= host_parr) .neqv. cond) stop 26 +#endif + !$acc end host_data + !$acc end data + !$acc end data +end subroutine foo +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90 new file mode 100644 index 0000000..445cbab --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90 @@ -0,0 +1,42 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Adapted from 'libgomp.oacc-fortran/deep-copy-6.f90'. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + !$acc enter data create(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) finalize + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 new file mode 100644 index 0000000..7b206ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 @@ -0,0 +1,9 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +/* Nullify the 'finalize' clause, which disturbs reference counting. */ +#define finalize +#include "mdc-refcount-1-1-1.f90" + +! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } +! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90 new file mode 100644 index 0000000..8554534 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90 @@ -0,0 +1,44 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-1-1.f90'. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) finalize + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90 new file mode 100644 index 0000000..8e696cc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90 @@ -0,0 +1,44 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-2-1.f90', without 'finalize' clause. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90 new file mode 100644 index 0000000..070a6f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90 @@ -0,0 +1,45 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-2-1.f90'. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + !$acc exit data detach(var%a) + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) finalize + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90 new file mode 100644 index 0000000..3c4bbda --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90 @@ -0,0 +1,44 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-3-1.f90', without 'finalize' clause. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + !$acc exit data detach(var%a) + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) + !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 new file mode 100644 index 0000000..b22e411 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 @@ -0,0 +1,45 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-3-1.f90'. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data detach(var%a) finalize + !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. + !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + !$acc exit data delete(var%a) + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90 new file mode 100644 index 0000000..476cd5c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90 @@ -0,0 +1,44 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-4-1.f90', without 'finalize' clause. + +program main + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + type(mytype) :: var + + allocate(var%a(1:n)) + + !$acc data create(var) + + call acc_create(var%a) + ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>. + !$acc enter data attach(var%a) + + if (.not. acc_is_present(var%a)) stop 1 + if (.not. acc_is_present(var)) stop 2 + + !$acc exit data detach(var%a) + print *, "CheCKpOInT1" + ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } + !$acc exit data delete(var%a) + !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. + !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. + print *, "CheCKpOInT2" + ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + if (acc_is_present(var%a)) stop 3 + if (.not. acc_is_present(var)) stop 4 + + !$acc end data + if (acc_is_present(var%a)) stop 5 + if (acc_is_present(var)) stop 6 + + deallocate(var%a) + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f index 537212e..36e9844 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f @@ -4,6 +4,6 @@ implicit none include "openacc_lib.h" - if (openacc_version .ne. 201306) STOP 1 + if (openacc_version .ne. 201711) STOP 1 end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 index 54f301b..e815bc1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 @@ -4,6 +4,6 @@ program main use openacc implicit none - if (openacc_version .ne. 201306) STOP 1 + if (openacc_version .ne. 201711) STOP 1 end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90 new file mode 100644 index 0000000..90cca7c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90 @@ -0,0 +1,52 @@ +! { dg-do run } +! +module m + implicit none +contains + pure subroutine add_ps_routine(a, b, c) + implicit none + !$acc routine seq + integer, intent(in) :: a, b + integer, intent(out) :: c + integer, parameter :: n = 10 + integer :: i + + do i = 1, n + if (i .eq. 5) then + c = a + b + end if + end do + end subroutine add_ps_routine + + elemental impure function add_ef(a, b) result(c) + implicit none + !$acc routine + integer, intent(in) :: a, b + integer :: c + + call add_ps_routine(a, b, c) + end function add_ef +end module m + +program main + use m + implicit none + integer, parameter :: n = 10 + integer, dimension(n) :: a_a + integer, dimension(n) :: b_a + integer, dimension(n) :: c_a + integer :: i + + a_a = [(3 * i, i = 1, n)] + b_a = [(-2 * i, i = 1, n)] + !$acc parallel copyin(a_a, b_a) copyout(c_a) + !$acc loop gang + do i = 1, n + if (i .eq. 4) then + c_a = add_ef(a_a, b_a) + end if + end do + !$acc end parallel + if (any (c_a /= [(i, i=1, 10)])) stop 1 + !print *, a +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f index af267fc..2c00d2e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL STOP diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f index 13c0684..adade54 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL STOP 35 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f index 3bd7446..157e369 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f @@ -3,6 +3,10 @@ PROGRAM MAIN IMPLICIT NONE +! Initialize before the checkpoint, in case this produces any output. +!$ACC PARALLEL +!$ACC END PARALLEL + PRINT *, "CheCKpOInT" !$ACC PARALLEL STOP "SiGN" |