aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorAldy Hernandez <aldyh@redhat.com>2020-06-17 07:50:57 -0400
committerAldy Hernandez <aldyh@redhat.com>2020-06-17 07:50:57 -0400
commitb9e67f2840ce0d8859d96e7f8df8fe9584af5eba (patch)
treeed3b7284ff15c802583f6409b9c71b3739642d15 /libgomp
parent1957047ed1c94bf17cf993a2b1866965f493ba87 (diff)
parent56638b9b1853666f575928f8baf17f70e4ed3517 (diff)
downloadgcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.zip
gcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.tar.gz
gcc-b9e67f2840ce0d8859d96e7f8df8fe9584af5eba.tar.bz2
Merge from trunk at:
commit 56638b9b1853666f575928f8baf17f70e4ed3517 Author: GCC Administrator <gccadmin@gcc.gnu.org> Date: Wed Jun 17 00:16:36 2020 +0000 Daily bump.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/.gitattributes2
-rw-r--r--libgomp/ChangeLog688
-rw-r--r--libgomp/Makefile.am2
-rw-r--r--libgomp/Makefile.in16
-rw-r--r--libgomp/aclocal.m41
-rw-r--r--libgomp/affinity-fmt.c4
-rw-r--r--libgomp/allocator.c357
-rw-r--r--libgomp/config/accel/openacc.f9023
-rwxr-xr-xlibgomp/configure49
-rw-r--r--libgomp/configure.ac12
-rw-r--r--libgomp/env.c62
-rw-r--r--libgomp/icv.c19
-rw-r--r--libgomp/libgomp-plugin.h36
-rw-r--r--libgomp/libgomp.h9
-rw-r--r--libgomp/libgomp.map10
-rw-r--r--libgomp/libgomp.texi304
-rw-r--r--libgomp/oacc-host.c47
-rw-r--r--libgomp/oacc-init.c15
-rw-r--r--libgomp/oacc-mem.c130
-rw-r--r--libgomp/oacc-parallel.c3
-rw-r--r--libgomp/omp.h.in90
-rw-r--r--libgomp/openacc.f90291
-rw-r--r--libgomp/openacc.h7
-rw-r--r--libgomp/openacc_lib.h149
-rw-r--r--libgomp/plugin/configfrag.ac4
-rw-r--r--libgomp/plugin/plugin-gcn.c235
-rw-r--r--libgomp/plugin/plugin-hsa.c26
-rw-r--r--libgomp/plugin/plugin-nvptx.c145
-rw-r--r--libgomp/target.c90
-rw-r--r--libgomp/team.c2
-rw-r--r--libgomp/testsuite/Makefile.am2
-rw-r--r--libgomp/testsuite/Makefile.in16
-rw-r--r--libgomp/testsuite/lib/libgomp.exp22
-rw-r--r--libgomp/testsuite/libgomp-site-extra.exp.in1
-rw-r--r--libgomp/testsuite/libgomp-test-support.exp.in2
-rw-r--r--libgomp/testsuite/libgomp.c++/pr93931.C120
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/alloc-1.c157
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/alloc-2.c46
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/alloc-3.c28
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/alloc-4.c25
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/pr93515.c36
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-40.c51
-rw-r--r--libgomp/testsuite/libgomp.c/pr93566.c113
-rw-r--r--libgomp/testsuite/libgomp.c/target-38.c28
-rw-r--r--libgomp/testsuite/libgomp.c/target-39.c47
-rw-r--r--libgomp/testsuite/libgomp.fortran/async_io_9.f9020
-rw-r--r--libgomp/testsuite/libgomp.fortran/close_errors_1.f9019
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-3.f9053
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-4.f9060
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-5.f9071
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-6.f9042
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-7.f9072
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-8.f9076
-rw-r--r--libgomp/testsuite/libgomp.fortran/pr66199-9.f9046
-rw-r--r--libgomp/testsuite/libgomp.fortran/target-enter-data-1.f9038
-rw-r--r--libgomp/testsuite/libgomp.fortran/target-enter-data-2.F9041
-rw-r--r--libgomp/testsuite/libgomp.fortran/target-var.f9032
-rw-r--r--libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f901
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/c++.exp18
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C58
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C9
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c19
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c72
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c135
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c (renamed from libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c)27
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c32
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c4
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c12
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c4
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c9
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c4
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c66
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c9
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c5
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c64
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c15
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c5
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c187
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c38
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c44
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c161
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c166
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c183
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c64
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c56
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c44
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c47
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c/c.exp18
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f905
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f9030
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f956
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f9033
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f904
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F908
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f9017
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f6
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f6
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f6
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/fortran.exp14
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F9092
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f9042
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F909
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f9044
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f9044
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f9045
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f9044
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f9045
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f9044
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f2
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f902
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-10.f9052
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/stop-1.f4
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/stop-2.f4
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/stop-3.f4
144 files changed, 5749 insertions, 816 deletions
diff --git a/libgomp/.gitattributes b/libgomp/.gitattributes
new file mode 100644
index 0000000..47e74eb
--- /dev/null
+++ b/libgomp/.gitattributes
@@ -0,0 +1,2 @@
+# For the Fortran file, complain about tabs
+openacc_lib.h whitespace=tab-in-indent,space-before-tab,trailing-space
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index a204585..ae72976 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,691 @@
+2020-06-16 Tobias Burnus <tobias@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/routine-10.f90: New test.
+
+2020-06-08 Tobias Burnus <tobias@codesourcery.com>
+
+ PR lto/94848
+ PR middle-end/95551
+ * testsuite/libgomp.fortran/target-var.f90: New test.
+
+2020-06-05 Thomas Schwinge <thomas@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>: Explain
+ special handling.
+
+2020-06-05 Thomas Schwinge <thomas@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>:
+ Simplify.
+
+2020-06-05 Julian Brown <julian@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c: New test.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>:
+ Evaluate 'copyfrom' individually for each entry.
+ * testsuite/libgomp.oacc-c-c++-common/struct-1.c: Update.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_data_internal) <GOMP_MAP_STRUCT>:
+ Evaluate 'finalize' individually for each entry.
+ * testsuite/libgomp.oacc-c-c++-common/struct-1.c: New file.
+ * testsuite/libgomp.oacc-c-c++-common/struct-refcount-1.c: Remove
+ file.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: Fix 'sizeof'
+ usage.
+ * testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: Likewise.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_datum): Repair 'is_tgt_unmapped'
+ checking.
+ (acc_unmap_data, goacc_exit_data_internal): Restore
+ 'is_tgt_unmapped' checking.
+ * testsuite/libgomp.oacc-c-c++-common/struct-refcount-1.c: New
+ file.
+ * testsuite/libgomp.oacc-fortran/deep-copy-6.f90: Adjust.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90: Likewise.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (acc_unmap_data): Don't open-code 'gomp_remove_var'.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR libgomp/92854
+ * oacc-mem.c (acc_unmap_data): Remove 'tgt' reference counting.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR libgomp/92854
+ * testsuite/libgomp.oacc-c-c++-common/pr92854-1.c: Extend some
+ more.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (goacc_enter_datum): Use 'tgt' returned from
+ 'gomp_map_vars'.
+ (acc_map_data): Clean up accordingly.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/deep-copy-6.f90: XFAIL behavior
+ of over-eager 'finalize' clause.
+ * testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90: New
+ file.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90: Likewise.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * oacc-mem.c (goacc_exit_data_internal): Unlock on error path.
+
+2020-06-04 Julian Brown <julian@codesourcery.com>
+
+ * oacc-mem.c (acc_attach_async): Add missing gomp_mutex_unlock on
+ error path.
+ (goacc_detach_internal): Likewise.
+
+2020-06-04 Thomas Schwinge <thomas@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/error_stop-1.f: Initialize before
+ the checkpoint.
+ * testsuite/libgomp.oacc-fortran/error_stop-2.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/error_stop-3.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/stop-1.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/stop-2.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/stop-3.f: Likewise.
+
+2020-06-02 Jakub Jelinek <jakub@redhat.com>
+
+ * allocator.c (omp_free): Fix up build if HAVE_SYNC_BUILTINS is not
+ defined.
+
+2020-05-30 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/libgomp.c-c++-common/alloc-4.c: New test.
+
+2020-05-30 Jakub Jelinek <jakub@redhat.com>
+
+ * allocator.c (omp_alloc): For size == 0, return NULL early.
+
+2020-05-29 H.J. Lu <hjl.tools@gmail.com>
+
+ PR bootstrap/95413
+ * configure: Regenerated.
+
+2020-05-23 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR libfortran/95191
+ * testsuite/libgomp.fortran/async_io_9.f90: New test.
+
+2020-05-19 Jakub Jelinek <jakub@redhat.com>
+
+ * omp.h.in (omp_uintptr_t): New typedef.
+ (__GOMP_UINTPTR_T_ENUM): Define.
+ (omp_memspace_handle_t, omp_allocator_handle_t, omp_alloctrait_key_t,
+ omp_alloctrait_value_t, omp_alloctrait_t): New typedefs.
+ (__GOMP_DEFAULT_NULL_ALLOCATOR): Define.
+ (omp_init_allocator, omp_destroy_allocator, omp_set_default_allocator,
+ omp_get_default_allocator, omp_alloc, omp_free): Declare.
+ * libgomp.h (struct gomp_team_state): Add def_allocator field.
+ (gomp_def_allocator): Declare.
+ * libgomp.map (OMP_5.0.1): Export omp_set_default_allocator,
+ omp_get_default_allocator, omp_init_allocator, omp_destroy_allocator,
+ omp_alloc and omp_free.
+ * team.c (gomp_team_start): Copy over ts.def_allocator.
+ * env.c (gomp_def_allocator): New variable.
+ (parse_wait_policy): Adjust function comment.
+ (parse_allocator): New function.
+ (handle_omp_display_env): Print OMP_ALLOCATOR.
+ (initialize_env): Call parse_allocator.
+ * Makefile.am (libgomp_la_SOURCES): Add allocator.c.
+ * allocator.c: New file.
+ * icv.c (omp_set_default_allocator, omp_get_default_allocator): New
+ functions.
+ * testsuite/libgomp.c-c++-common/alloc-1.c: New test.
+ * testsuite/libgomp.c-c++-common/alloc-2.c: New test.
+ * testsuite/libgomp.c-c++-common/alloc-3.c: New test.
+ * Makefile.in: Regenerated.
+
+2020-05-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR bootstrap/95147
+ * configure: Regenerated.
+
+2020-05-14 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR libfortran/95119
+ * testsuite/libgomp.fortran/close_errors_1.f90: New test.
+
+2020-05-14 H.J. Lu <hongjiu.lu@intel.com>
+
+ * configure: Regenerated.
+
+2020-05-14 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/libgomp.c-c++-common/target-40.c: New test.
+
+2020-05-13 Tobias Burnus <tobias@codesourcery.com>
+
+ PR fortran/94690
+ * testsuite/libgomp.fortran/pr66199-3.f90: New.
+ * testsuite/libgomp.fortran/pr66199-4.f90: New.
+ * testsuite/libgomp.fortran/pr66199-5.f90: New.
+ * testsuite/libgomp.fortran/pr66199-6.f90: New.
+ * testsuite/libgomp.fortran/pr66199-7.f90: New.
+ * testsuite/libgomp.fortran/pr66199-8.f90: New.
+ * testsuite/libgomp.fortran/pr66199-9.f90: New.
+
+2020-05-12 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/libgomp.c/target-39.c: New test.
+
+2020-04-29 Thomas Schwinge <thomas@codesourcery.com>
+
+ * config/accel/openacc.f90 (acc_device_current): Set to '-1'.
+ * openacc.f90 (acc_device_current): Likewise.
+ * openacc.h (acc_device_current): Likewise.
+ * openacc_lib.h (acc_device_current): Likewise.
+
+ PR target/94282
+ * testsuite/libgomp.c-c++-common/function-not-offloaded.c: Remove
+ 'dg-allow-blank-lines-in-output'.
+
+ * oacc-init.c (get_openacc_name): Handle 'gcn'.
+ * testsuite/lib/libgomp.exp
+ (offload_target_to_openacc_device_type) [amdgcn*]: Return
+ 'radeon'. Adjust all users.
+ (check_effective_target_openacc_amdgcn_accel_present): Rename
+ to...
+ (check_effective_target_openacc_radeon_accel_present): ... this.
+ Adjust all users.
+ (check_effective_target_openacc_amdgcn_accel_selected): Rename to...
+ (check_effective_target_openacc_radeon_accel_selected): ... this.
+ Adjust all users.
+
+ * testsuite/libgomp.fortran/use_device_ptr-optional-2.f90: Add
+ 'dg-do run'.
+
+2020-04-23 Andrew Stubbs <ams@codesourcery.com>
+
+ PR other/94629
+
+ * plugin/plugin-gcn.c (init_hsa_context): Check return value from
+ hsa_iterate_agents.
+ (GOMP_OFFLOAD_init_device): Check return values from both calls to
+ hsa_agent_iterate_regions.
+
+2020-04-20 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR middle-end/94635
+ * testsuite/libgomp.fortran/target-enter-data-2.F90: Add 'dg-do
+ run'.
+
+2020-04-20 Tobias Burnus <tobias@codesourcery.com>
+
+ PR middle-end/94120
+ * testsuite/libgomp.oacc-c++/declare-pr94120.C: Fix 'declare copy(out)'
+ test case.
+
+2020-04-17 Tobias Burnus <tobias@codesourcery.com>
+
+ PR middle-end/94635
+ * testsuite/libgomp.fortran/target-enter-data-2.F90: New.
+
+2020-04-13 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR libgomp/92843
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8-lib.c:
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c:
+ ... this.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8.c::
+ Rename to...
+ * testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c:
+ ... this.
+
+2020-04-10 Julian Brown <julian@codesourcery.com>
+ Thomas Schwinge <thomas@codesourcery.com>
+
+ PR libgomp/92843
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1-lib.c:
+ New file.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-3.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-4.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-5.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-6.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-7.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8-lib.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/static-dynamic-lifetimes-8.c:
+ Likewise.
+
+2020-04-10 Thomas Schwinge <thomas@codesourcery.com>
+
+ * testsuite/libgomp.fortran/target-enter-data-1.f90: Add 'dg-do
+ run'.
+
+2020-04-08 Tobias Burnus <tobias@codesourcery.com>
+
+ PR middle-end/94120
+ * libgomp.oacc-c++/declare-pr94120.C: New.
+
+2020-04-06 Maciej W. Rozycki <macro@wdc.com>
+
+ * configure.ac: Add testsuite/libgomp-site-extra.exp to output
+ files.
+ * configure: Regenerate.
+ * testsuite/libgomp-site-extra.exp.in: New file.
+ * testsuite/libgomp-test-support.exp.in (GCC_UNDER_TEST): Remove
+ variable.
+ * testsuite/Makefile.am (EXTRA_DEJAGNU_SITE_CONFIG): New
+ variable.
+ * testsuite/Makefile.in: Regenerate.
+
+2020-04-03 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR tree-optimization/89713
+ PR c/94392
+ * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: Again expect
+ 'bar.sync'.
+ * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: Likewise.
+
+2020-03-31 Tobias Burnus <tobias@codesourcery.com>
+
+ * target.c (GOMP_target_enter_exit_data): Handle PSET/MAP_POINTER.
+ * testsuite/libgomp.fortran/target-enter-data-1.f90: New.
+
+2020-03-24 Tobias Burnus <tobias@codesourcery.com>
+
+ PR libgomp/81689
+ * testsuite/libgomp.c/target-link-1.c: Remove xfail.
+
+2020-03-20 Tobias Burnus <tobias@codesourcery.com>
+
+ PR libgomp/94251
+ * target.c (gomp_load_image_to_device): Fix link
+ variable handling.
+
+2020-03-19 Jakub Jelinek <jakub@redhat.com>
+
+ PR c++/93931
+ * testsuite/libgomp.c++/pr93931.C: New test.
+
+2020-03-19 Tobias Burnus <tobias@codesourcery.com>
+
+ * testsuite/libgomp.c-c++-common/function-not-offloaded.c: Add
+ dg-allow-blank-lines-in-output.
+
+2020-03-18 Julian Brown <julian@codesourcery.com>
+ Tobias Burnus <tobias@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/atomic_capture-1.f90: Really make
+ it work concurrently.
+
+2020-03-18 Tobias Burnus <tobias@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: Add
+ #define DO_LONG_DOUBLE; set to 1, except for nvidia + gcn.
+ * libgomp.oacc-c-c++-common/firstprivate-mappings-1.c: Likewise.
+
+2020-03-14 Jakub Jelinek <jakub@redhat.com>
+
+ PR middle-end/93566
+ * testsuite/libgomp.c/pr93566.c: New test.
+
+2020-02-21 Frederik Harwath <frederik@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/acc_get_property.f90: Adapt to
+ changes from 2020-02-19, i.e. use integer(c_size_t) instead of
+ integer(acc_device_property) for the type of the return value of
+ acc_get_property.
+
+2020-02-19 Tobias Burnus <tobias@codesourcery.com>
+
+ * .gitattributes: New; whitespace handling for Fortran's openacc_lib.h.
+ * config/accel/openacc.f90 (openacc_kinds): Add acc_device_current.
+ (openacc_internal, acc_on_device_h): Fix argument name; minor cleanup.
+ * libgomp.texi (Enabling OpenACC): No longer mark as experimental.
+ (acc_set_device_num): Fix Fortran argument name, use same name for C.
+ (acc_get_property): Update Fortran interface to post-OpenACC 3.0
+ corrections; add note about the previous interface and named constant.
+ (OpenACC library and environment variables): Fix two typos.
+ * openacc.f90: Use for all procedures the argument names from the spec
+ as for …_h they are user visible.
+ (openacc_kinds): Rename acc_device_property to
+ acc_device_property_kinds and change value to int32 ; and update users.
+ Re-add acc_device_property for for backward compatibility.
+ (acc_get_property_string_h): Clean up as acc_device_property_kind
+ changed.
+ (acc_get_property_h): Likewise and return c_size_t instead of
+ acc_device_property.
+ (openacc): Also export acc_device_property_kinds.
+ (acc_async_test_h, acc_async_test_all_h, acc_on_device_h,
+ acc_is_present_32_h, acc_is_present_64_h): Simplify logical-return-value
+ handling; check against /= 0 instead of == 1 to match C.
+ * openacc_lib.h: Use for all procedures the argument names from the spec
+ as for …_h they are user visible. Place !GCC$ into the first column to
+ be active also for fixed-form souce form.
+ (acc_device_current, acc_device_property_kind, acc_device_property,
+ acc_property_memory, acc_property_free_memory, acc_property_name,
+ acc_property_vendor, acc_property_driver): New named constants.
+ (acc_get_property, acc_get_property_string): New generic interface.
+
+2020-02-13 Frederik Harwath <frederik@codesourcery.com>
+
+ PR libgomp/93481
+ * plugin/plugin-nvptx.c: Remove GOMP_OFFLOAD_async_run stub.
+ * target.c (gomp_load_plugin_for_device): Make "async_run" loading
+ optional.
+ (gomp_target_task_fn): Assert "devicep->async_run_func".
+ (clear_unsupported_flags): New function to remove unsupported flags
+ (right now only GOMP_TARGET_FLAG_NOWAIT) that can be be ignored.
+ (GOMP_target_ext): Apply clear_unsupported_flags to flags.
+ * testsuite/libgomp.c/target-33.c:
+ Remove xfail for offload_target_nvptx.
+ * testsuite/libgomp.c/target-34.c: Likewise.
+
+2020-02-10 Frederik Harwath <frederik@codesourcery.com>
+
+ * testsuite/libgomp.c/target-33.c: Add xfail for execution on
+ offload_target_nvptx, cf. https://gcc.gnu.org/PR81688.
+ * testsuite/libgomp.c/target-34.c: Likewise.
+ * testsuite/libgomp.c/target-link-1.c: Add xfail for
+ offload_target_nvptx, cf. https://gcc.gnu.org/PR81689.
+
+2020-02-09 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/libgomp.c/target-38.c: New test.
+
+2020-02-06 Jakub Jelinek <jakub@redhat.com>
+
+ PR libgomp/93515
+ * testsuite/libgomp.c-c++-common/pr93515.c: New test.
+
+2020-02-05 Tobias Burnus <tobias@codesourcery.com>
+
+ * testsuite/lib/libgomp.exp
+ (check_effective_target_offload_target_nvptx): Pass flags as 'options'
+ and not as 'source' argument to libgomp_target_compile.
+
+2020-02-03 Andrew Stubbs <ams@codesourcery.com>
+
+ * plugin/plugin-gcn.c (EF_AMDGPU_MACH_AMDGCN_GFX801): Remove.
+ (gcn_gfx801_s): Remove.
+ (isa_hsa_name): Remove gfx801.
+ (isa_gcc_name): Remove gfx801/carizzo.
+ (isa_code): Remove gfx801.
+
+2020-02-03 Julian Brown <julian@codesourcery.com>
+ Tobias Burnus <tobias@codesourcery.com>
+
+ * libgomp.texi (OpenACC Runtime Library Routines): Document *_async
+ and *_finalize variants; document acc_attach and acc_detach; update
+ references from OpenACC 2.0 to 2.6.
+ * openacc.f90 (openacc_version): Update to 201711.
+ * openacc_lib.h (openacc_version): Update to 201711.
+ * testsuite/libgomp.oacc-fortran/openacc_version-1.f: Update expected
+ openacc_version to 201711.
+ * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise.
+
+2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * plugin/plugin-gcn.c (struct hsa_kernel_description): Add sgpr_count
+ and vgpr_count fields.
+ (struct kernel_info): Add a field for a hsa_kernel_description.
+ (run_kernel): Reduce the number of threads/workers if the requested
+ number would require too many VGPRs.
+ (init_basic_kernel_info): Initialize description field with
+ the hsa_kernel_description entry for the kernel.
+
+2020-01-29 Tobias Burnus <tobias@codesourcery.com>
+
+ PR bootstrap/93409
+ * plugin/configfrag.ac (enable_offload_targets): Skip
+ HSA and GCN plugin besides -m32 also for -mx32.
+ * configure: Regenerate.
+
+2020-01-29 Frederik Harwath <frederik@codesourcery.com>
+
+ * oacc-init.c (name_of_acc_device_t): Handle acc_device_radeon.
+
+2020-01-29 Frederik Harwath <frederik@codesourcery.com>
+
+ * plugin-gcn.c (struct agent_info): Add fields "name" and
+ "vendor_name" ...
+ (GOMP_OFFLOAD_init_device): ... and init from here.
+ (struct hsa_context_info): Add field "driver_version_s" ...
+ (init_hsa_contest): ... and init from here.
+ (GOMP_OFFLOAD_openacc_get_property): Replace stub with a proper
+ implementation.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property.c:
+ Enable test execution for amdgcn and host offloading targets.
+ * testsuite/libgomp.oacc-fortran/acc_get_property.f90: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c
+ (expect_device_properties): Split function into ...
+ (expect_device_string_properties): ... this new function ...
+ (expect_device_memory): ... and this new function.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c:
+ Add test.
+
+2020-01-28 Julian Brown <julian@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/deep-copy-2.f90: Remove test from here.
+ * testsuite/libgomp.oacc-fortran/deep-copy-3.f90: Don't use mixed
+ component/non-component variable refs in a single directive.
+ * testsuite/libgomp.oacc-fortran/classtypes-1.f95: Likewise.
+
+2020-01-24 Maciej W. Rozycki <macro@wdc.com>
+
+ * configure.ac: Handle `--with-toolexeclibdir='.
+ * Makefile.in: Regenerate.
+ * aclocal.m4: Regenerate.
+ * configure: Regenerate.
+ * testsuite/Makefile.in: Regenerate.
+
+2020-01-24 Frederik Harwath <frederik@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c
+ (expect_device_properties): Remove "expected_free_mem" argument,
+ change "expected_total_mem" argument type to size_t;
+ change types of acc_get_property results to size_t,
+ adapt format strings.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property.c:
+ Use %zu instead of %zd to print size_t values.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c: Adapt and
+ rename to ...
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c: ... this.
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c: Adapt and
+ rename to ...
+ * testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c: ... this.
+
+2020-01-23 Andrew Stubbs <ams@codesourcery.com>
+
+ * plugin/plugin-gcn.c (parse_target_attributes): Use correct mask for
+ the device id.
+
+2020-01-20 Andrew Stubbs <ams@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn.
+ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main):
+ Adjust test dimensions for amdgcn.
+ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust
+ gang/worker/vector expectations dynamically.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+ (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+ (acc_gang): Recognise acc_device_radeon.
+ (acc_worker): Likewise.
+ (acc_vector): Likewise.
+ (main): Set expectations for amdgcn.
+ * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+ (main): Adjust gang/worker/vector expectations dynamically.
+ * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations
+ for amdgcn.
+
+2020-01-17 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to
+ acc_device_radeon.
+ (openacc): Likewise.
+ * openacc.f90 (openacc_kinds): Likewise.
+ (openacc): Likewise.
+ * openacc.h (acc_device_t): Likewise.
+ * openacc_lib.h: Likewise.
+ * testsuite/lib/libgomp.exp
+ (check_effective_target_openacc_amdgcn_accel_present): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+ (cb_compute_construct_end): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+ (cb_enqueue_launch_start): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+ (cb_enter_data_end): Likewise.
+ (cb_exit_data_start): Likewise.
+ (cb_exit_data_end): Likewise.
+ (cb_compute_construct_end): Likewise.
+ (cb_enqueue_launch_start): Likewise.
+ (cb_enqueue_launch_end): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c
+ (main): Likewise.
+
+2020-01-10 Thomas Schwinge <thomas@codesourcery.com>
+
+ * libgomp-plugin.h (enum goacc_property): New. Adjust all users
+ to use this instead of 'enum gomp_device_property'.
+ (GOMP_OFFLOAD_get_property): Rename to...
+ (GOMP_OFFLOAD_openacc_get_property): ... this. Adjust all users.
+ * libgomp.h (struct gomp_device_descr): Move
+ 'GOMP_OFFLOAD_openacc_get_property'...
+ (struct acc_dispatch_t): ... here. Adjust all users.
+ * plugin/plugin-hsa.c (GOMP_OFFLOAD_get_property): Remove.
+
+ * target.c (gomp_map_vars_internal)
+ <GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT>: Clean up/elaborate code
+ paths.
+
+2020-01-10 Jakub Jelinek <jakub@redhat.com>
+
+ PR libgomp/93219
+ * libgomp.h (gomp_print_string): Change return type from void to int.
+ * affinity-fmt.c (gomp_print_string): Likewise. Return true if
+ not all characters have been written.
+
+2020-01-08 Tobias Burnus <tobias@codesourcery.com>
+
+ * libgomp.texi: Fix typos, use https.
+
2020-01-03 Tobias Burnus <tobias@codesourcery.com>
* testsuite/libgomp.fortran/optional-map.f90: Add test for
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 669b9e4..b841562 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
+ affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c
include $(top_srcdir)/plugin/Makefrag.am
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 3d772ee..5ff2ac1 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -133,10 +133,12 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/lthostflags.m4 \
$(top_srcdir)/../config/multi.m4 \
$(top_srcdir)/../config/override.m4 \
- $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \
- $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
- $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/../libtool.m4 $(top_srcdir)/../config/cet.m4 \
+ $(top_srcdir)/../config/tls.m4 \
+ $(top_srcdir)/../config/toolexeclibdir.m4 \
+ $(top_srcdir)/../ltoptions.m4 $(top_srcdir)/../ltsugar.m4 \
+ $(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \
+ $(top_srcdir)/acinclude.m4 $(top_srcdir)/../libtool.m4 \
+ $(top_srcdir)/../config/cet.m4 \
$(top_srcdir)/plugin/configfrag.ac $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
@@ -229,7 +231,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
- teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
+ teams.lo allocator.lo oacc-profiling.lo oacc-target.lo \
+ $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -570,7 +573,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
+ affinity-fmt.c teams.c allocator.c oacc-profiling.c oacc-target.c \
$(am__append_4)
# Nvidia PTX OpenACC plugin.
@@ -763,6 +766,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
diff --git a/libgomp/aclocal.m4 b/libgomp/aclocal.m4
index 1212599..55d9d71 100644
--- a/libgomp/aclocal.m4
+++ b/libgomp/aclocal.m4
@@ -1177,6 +1177,7 @@ m4_include([../config/lthostflags.m4])
m4_include([../config/multi.m4])
m4_include([../config/override.m4])
m4_include([../config/tls.m4])
+m4_include([../config/toolexeclibdir.m4])
m4_include([../ltoptions.m4])
m4_include([../ltsugar.m4])
m4_include([../ltversion.m4])
diff --git a/libgomp/affinity-fmt.c b/libgomp/affinity-fmt.c
index c423e35..9a5334d 100644
--- a/libgomp/affinity-fmt.c
+++ b/libgomp/affinity-fmt.c
@@ -37,10 +37,10 @@
#include <sys/utsname.h>
#endif
-void
+bool
gomp_print_string (const char *str, size_t len)
{
- fwrite (str, 1, len, stderr);
+ return fwrite (str, 1, len, stderr) != len;
}
void
diff --git a/libgomp/allocator.c b/libgomp/allocator.c
new file mode 100644
index 0000000..4e29399
--- /dev/null
+++ b/libgomp/allocator.c
@@ -0,0 +1,357 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains wrappers for the system allocation routines. Most
+ places in the OpenMP API do not make any provision for failure, so in
+ general we cannot allow memory allocation to fail. */
+
+#define _GNU_SOURCE
+#include "libgomp.h"
+#include <stdlib.h>
+
+#define omp_max_predefined_alloc omp_thread_mem_alloc
+
+struct omp_allocator_data
+{
+ omp_memspace_handle_t memspace;
+ omp_uintptr_t alignment;
+ omp_uintptr_t pool_size;
+ omp_uintptr_t used_pool_size;
+ omp_allocator_handle_t fb_data;
+ unsigned int sync_hint : 8;
+ unsigned int access : 8;
+ unsigned int fallback : 8;
+ unsigned int pinned : 1;
+ unsigned int partition : 7;
+#ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_t lock;
+#endif
+};
+
+struct omp_mem_header
+{
+ void *ptr;
+ size_t size;
+ omp_allocator_handle_t allocator;
+ void *pad;
+};
+
+omp_allocator_handle_t
+omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
+ const omp_alloctrait_t traits[])
+{
+ struct omp_allocator_data data
+ = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all,
+ omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment };
+ struct omp_allocator_data *ret;
+ int i;
+
+ if (memspace > omp_low_lat_mem_space)
+ return omp_null_allocator;
+ for (i = 0; i < ntraits; i++)
+ switch (traits[i].key)
+ {
+ case omp_atk_sync_hint:
+ switch (traits[i].value)
+ {
+ case omp_atv_default:
+ data.sync_hint = omp_atv_contended;
+ break;
+ case omp_atv_contended:
+ case omp_atv_uncontended:
+ case omp_atv_sequential:
+ case omp_atv_private:
+ data.sync_hint = traits[i].value;
+ break;
+ default:
+ return omp_null_allocator;
+ }
+ break;
+ case omp_atk_alignment:
+ if ((traits[i].value & (traits[i].value - 1)) != 0
+ || !traits[i].value)
+ return omp_null_allocator;
+ data.alignment = traits[i].value;
+ break;
+ case omp_atk_access:
+ switch (traits[i].value)
+ {
+ case omp_atv_default:
+ data.access = omp_atv_all;
+ break;
+ case omp_atv_all:
+ case omp_atv_cgroup:
+ case omp_atv_pteam:
+ case omp_atv_thread:
+ data.access = traits[i].value;
+ break;
+ default:
+ return omp_null_allocator;
+ }
+ break;
+ case omp_atk_pool_size:
+ data.pool_size = traits[i].value;
+ break;
+ case omp_atk_fallback:
+ switch (traits[i].value)
+ {
+ case omp_atv_default:
+ data.fallback = omp_atv_default_mem_fb;
+ break;
+ case omp_atv_default_mem_fb:
+ case omp_atv_null_fb:
+ case omp_atv_abort_fb:
+ case omp_atv_allocator_fb:
+ data.fallback = traits[i].value;
+ break;
+ default:
+ return omp_null_allocator;
+ }
+ break;
+ case omp_atk_fb_data:
+ data.fb_data = traits[i].value;
+ break;
+ case omp_atk_pinned:
+ switch (traits[i].value)
+ {
+ case omp_atv_default:
+ case omp_atv_false:
+ data.pinned = omp_atv_false;
+ break;
+ case omp_atv_true:
+ data.pinned = omp_atv_true;
+ break;
+ default:
+ return omp_null_allocator;
+ }
+ break;
+ case omp_atk_partition:
+ switch (traits[i].value)
+ {
+ case omp_atv_default:
+ data.partition = omp_atv_environment;
+ break;
+ case omp_atv_environment:
+ case omp_atv_nearest:
+ case omp_atv_blocked:
+ case omp_atv_interleaved:
+ data.partition = traits[i].value;
+ break;
+ default:
+ return omp_null_allocator;
+ }
+ break;
+ default:
+ return omp_null_allocator;
+ }
+
+ if (data.alignment < sizeof (void *))
+ data.alignment = sizeof (void *);
+
+ /* No support for these so far (for hbw will use memkind). */
+ if (data.pinned || data.memspace == omp_high_bw_mem_space)
+ return omp_null_allocator;
+
+ ret = gomp_malloc (sizeof (struct omp_allocator_data));
+ *ret = data;
+#ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_init (&ret->lock);
+#endif
+ return (omp_allocator_handle_t) ret;
+}
+
+void
+omp_destroy_allocator (omp_allocator_handle_t allocator)
+{
+ if (allocator != omp_null_allocator)
+ {
+#ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_destroy (&((struct omp_allocator_data *) allocator)->lock);
+#endif
+ free ((void *) allocator);
+ }
+}
+
+void *
+omp_alloc (size_t size, omp_allocator_handle_t allocator)
+{
+ struct omp_allocator_data *allocator_data;
+ size_t alignment, new_size;
+ void *ptr, *ret;
+
+ if (__builtin_expect (size == 0, 0))
+ return NULL;
+
+retry:
+ if (allocator == omp_null_allocator)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->ts.def_allocator == omp_null_allocator)
+ thr->ts.def_allocator = gomp_def_allocator;
+ allocator = (omp_allocator_handle_t) thr->ts.def_allocator;
+ }
+
+ if (allocator > omp_max_predefined_alloc)
+ {
+ allocator_data = (struct omp_allocator_data *) allocator;
+ alignment = allocator_data->alignment;
+ }
+ else
+ {
+ allocator_data = NULL;
+ alignment = sizeof (void *);
+ }
+
+ new_size = sizeof (struct omp_mem_header);
+ if (alignment > sizeof (void *))
+ new_size += alignment - sizeof (void *);
+ if (__builtin_add_overflow (size, new_size, &new_size))
+ goto fail;
+
+ if (__builtin_expect (allocator_data
+ && allocator_data->pool_size < ~(uintptr_t) 0, 0))
+ {
+ uintptr_t used_pool_size;
+ if (new_size > allocator_data->pool_size)
+ goto fail;
+#ifdef HAVE_SYNC_BUILTINS
+ used_pool_size = __atomic_load_n (&allocator_data->used_pool_size,
+ MEMMODEL_RELAXED);
+ do
+ {
+ uintptr_t new_pool_size;
+ if (__builtin_add_overflow (used_pool_size, new_size,
+ &new_pool_size)
+ || new_pool_size > allocator_data->pool_size)
+ goto fail;
+ if (__atomic_compare_exchange_n (&allocator_data->used_pool_size,
+ &used_pool_size, new_pool_size,
+ true, MEMMODEL_RELAXED,
+ MEMMODEL_RELAXED))
+ break;
+ }
+ while (1);
+#else
+ gomp_mutex_lock (&allocator_data->lock);
+ if (__builtin_add_overflow (allocator_data->used_pool_size, new_size,
+ &used_pool_size)
+ || used_pool_size > allocator_data->pool_size)
+ {
+ gomp_mutex_unlock (&allocator_data->lock);
+ goto fail;
+ }
+ allocator_data->used_pool_size = used_pool_size;
+ gomp_mutex_unlock (&allocator_data->lock);
+#endif
+ ptr = malloc (new_size);
+ if (ptr == NULL)
+ {
+#ifdef HAVE_SYNC_BUILTINS
+ __atomic_add_fetch (&allocator_data->used_pool_size, -new_size,
+ MEMMODEL_RELAXED);
+#else
+ gomp_mutex_lock (&allocator_data->lock);
+ allocator_data->used_pool_size -= new_size;
+ gomp_mutex_unlock (&allocator_data->lock);
+#endif
+ goto fail;
+ }
+ }
+ else
+ {
+ ptr = malloc (new_size);
+ if (ptr == NULL)
+ goto fail;
+ }
+
+ if (alignment > sizeof (void *))
+ ret = (void *) (((uintptr_t) ptr
+ + sizeof (struct omp_mem_header)
+ + alignment - sizeof (void *)) & ~(alignment - 1));
+ else
+ ret = (char *) ptr + sizeof (struct omp_mem_header);
+ ((struct omp_mem_header *) ret)[-1].ptr = ptr;
+ ((struct omp_mem_header *) ret)[-1].size = new_size;
+ ((struct omp_mem_header *) ret)[-1].allocator = allocator;
+ return ret;
+
+fail:
+ if (allocator_data)
+ {
+ switch (allocator_data->fallback)
+ {
+ case omp_atv_default_mem_fb:
+ if (alignment > sizeof (void *)
+ || (allocator_data
+ && allocator_data->pool_size < ~(uintptr_t) 0))
+ {
+ allocator = omp_default_mem_alloc;
+ goto retry;
+ }
+ /* Otherwise, we've already performed default mem allocation
+ and if that failed, it won't succeed again (unless it was
+ intermitent. Return NULL then, as that is the fallback. */
+ break;
+ case omp_atv_null_fb:
+ break;
+ default:
+ case omp_atv_abort_fb:
+ gomp_fatal ("Out of memory allocating %lu bytes",
+ (unsigned long) size);
+ case omp_atv_allocator_fb:
+ allocator = allocator_data->fb_data;
+ goto retry;
+ }
+ }
+ return NULL;
+}
+
+void
+omp_free (void *ptr, omp_allocator_handle_t allocator)
+{
+ struct omp_mem_header *data;
+
+ if (ptr == NULL)
+ return;
+ (void) allocator;
+ data = &((struct omp_mem_header *) ptr)[-1];
+ if (data->allocator > omp_max_predefined_alloc)
+ {
+ struct omp_allocator_data *allocator_data
+ = (struct omp_allocator_data *) (data->allocator);
+ if (allocator_data->pool_size < ~(uintptr_t) 0)
+ {
+#ifdef HAVE_SYNC_BUILTINS
+ __atomic_add_fetch (&allocator_data->used_pool_size, -data->size,
+ MEMMODEL_RELAXED);
+#else
+ gomp_mutex_lock (&allocator_data->lock);
+ allocator_data->used_pool_size -= data->size;
+ gomp_mutex_unlock (&allocator_data->lock);
+#endif
+ }
+ }
+ free (data->ptr);
+}
diff --git a/libgomp/config/accel/openacc.f90 b/libgomp/config/accel/openacc.f90
index b4d4036..9933073 100644
--- a/libgomp/config/accel/openacc.f90
+++ b/libgomp/config/accel/openacc.f90
@@ -44,13 +44,14 @@ module openacc_kinds
integer, parameter :: acc_device_kind = int32
! Keep in sync with include/gomp-constants.h.
+ integer (acc_device_kind), parameter :: acc_device_current = -1
integer (acc_device_kind), parameter :: acc_device_none = 0
integer (acc_device_kind), parameter :: acc_device_default = 1
integer (acc_device_kind), parameter :: acc_device_host = 2
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
- integer (acc_device_kind), parameter :: acc_device_gcn = 8
+ integer (acc_device_kind), parameter :: acc_device_radeon = 8
end module openacc_kinds
@@ -59,19 +60,19 @@ module openacc_internal
implicit none
interface
- function acc_on_device_h (d)
+ function acc_on_device_h (devicetype)
import
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
logical acc_on_device_h
end function
end interface
interface
- function acc_on_device_l (d) &
+ function acc_on_device_l (devicetype) &
bind (C, name = "acc_on_device")
use iso_c_binding, only: c_int
integer (c_int) :: acc_on_device_l
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end function
end interface
end module openacc_internal
@@ -86,7 +87,7 @@ module openacc
! From openacc_kinds
public :: acc_device_kind
public :: acc_device_none, acc_device_default, acc_device_host
- public :: acc_device_not_host, acc_device_nvidia, acc_device_gcn
+ public :: acc_device_not_host, acc_device_nvidia, acc_device_radeon
public :: acc_on_device
@@ -96,14 +97,10 @@ module openacc
end module openacc
-function acc_on_device_h (d)
+function acc_on_device_h (devicetype)
use openacc_internal, only: acc_on_device_l
use openacc_kinds
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
logical acc_on_device_h
- if (acc_on_device_l (d) .eq. 1) then
- acc_on_device_h = .TRUE.
- else
- acc_on_device_h = .FALSE.
- end if
+ acc_on_device_h = acc_on_device_l (devicetype) /= 0
end function
diff --git a/libgomp/configure b/libgomp/configure
index 04a6fd9..9ffa66c 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -826,6 +826,7 @@ enable_version_specific_runtime_libs
enable_generated_files_in_srcdir
enable_silent_rules
enable_multilib
+with_toolexeclibdir
enable_dependency_tracking
enable_shared
enable_static
@@ -1500,11 +1501,14 @@ Optional Features:
--enable-tls Use thread-local storage [default=yes]
--enable-symvers=STYLE enables symbol versioning of the shared library
[default=yes]
- --enable-cet enable Intel CET in target libraries [default=no]
+ --enable-cet enable Intel CET in target libraries [default=auto]
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-toolexeclibdir=DIR
+ install libraries built with a cross compiler within
+ DIR
--with-pic try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
@@ -3501,6 +3505,22 @@ fi
ac_config_commands="$ac_config_commands default-1"
+
+# Check whether --with-toolexeclibdir was given.
+if test "${with_toolexeclibdir+set}" = set; then :
+ withval=$with_toolexeclibdir; case ${with_toolexeclibdir} in
+ /)
+ ;;
+ */)
+ with_toolexeclibdir=`echo $with_toolexeclibdir | sed 's,/$,,'`
+ ;;
+esac
+else
+ with_toolexeclibdir=no
+fi
+
+
+
# Calculate toolexeclibdir
# Also toolexecdir, though it's only used in toolexeclibdir
case ${enable_version_specific_runtime_libs} in
@@ -3516,7 +3536,14 @@ case ${enable_version_specific_runtime_libs} in
test x"$with_cross_host" != x"no"; then
# Install a library built with a cross compiler in tooldir, not libdir.
toolexecdir='$(exec_prefix)/$(target_alias)'
- toolexeclibdir='$(toolexecdir)/lib'
+ case ${with_toolexeclibdir} in
+ no)
+ toolexeclibdir='$(toolexecdir)/lib'
+ ;;
+ *)
+ toolexeclibdir=${with_toolexeclibdir}
+ ;;
+ esac
else
toolexecdir='$(libdir)/gcc-lib/$(target_alias)'
toolexeclibdir='$(libdir)'
@@ -11405,7 +11432,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11408 "configure"
+#line 11435 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11511,7 +11538,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11514 "configure"
+#line 11541 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -14991,7 +15018,7 @@ fi
# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
#
-# Copyright (C) 2014-2019 Free Software Foundation, Inc.
+# Copyright (C) 2014-2020 Free Software Foundation, Inc.
#
# Contributed by Mentor Embedded.
#
@@ -15320,7 +15347,7 @@ rm -f core conftest.err conftest.$ac_objext \
case "${target}" in
x86_64-*-*)
case " ${CC} ${CFLAGS} " in
- *" -m32 "*)
+ *" -m32 "*|*" -mx32 "*)
PLUGIN_HSA=0
;;
*)
@@ -15360,7 +15387,7 @@ rm -f core conftest.err conftest.$ac_objext \
case "${target}" in
x86_64-*-*)
case " ${CC} ${CFLAGS} " in
- *" -m32 "*)
+ *" -m32 "*|*" -mx32 "*)
PLUGIN_GCN=0
;;
*)
@@ -16713,7 +16740,7 @@ if test "${enable_cet+set}" = set; then :
esac
else
- enable_cet=no
+ enable_cet=auto
fi
@@ -16726,6 +16753,8 @@ case "$host" in
auto)
# Check if target supports multi-byte NOPs
# and if assembler supports CET insn.
+ cet_save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -fcf-protection"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -16749,6 +16778,7 @@ else
enable_cet=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS="$cet_save_CFLAGS"
;;
yes)
# Check if assembler supports CET.
@@ -17020,6 +17050,8 @@ ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
ac_config_files="$ac_config_files testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in"
+ac_config_files="$ac_config_files testsuite/libgomp-site-extra.exp"
+
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
@@ -18173,6 +18205,7 @@ do
"testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;;
"libgomp.spec") CONFIG_FILES="$CONFIG_FILES libgomp.spec" ;;
"testsuite/libgomp-test-support.pt.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" ;;
+ "testsuite/libgomp-site-extra.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-site-extra.exp" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
index 725f3bf..ef5d293 100644
--- a/libgomp/configure.ac
+++ b/libgomp/configure.ac
@@ -64,6 +64,8 @@ target_alias=${target_alias-$host_alias}
AM_INIT_AUTOMAKE([1.9.0 foreign no-dist -Wall -Wno-portability -Wno-override])
AM_ENABLE_MULTILIB(, ..)
+GCC_WITH_TOOLEXECLIBDIR
+
# Calculate toolexeclibdir
# Also toolexecdir, though it's only used in toolexeclibdir
case ${enable_version_specific_runtime_libs} in
@@ -79,7 +81,14 @@ case ${enable_version_specific_runtime_libs} in
test x"$with_cross_host" != x"no"; then
# Install a library built with a cross compiler in tooldir, not libdir.
toolexecdir='$(exec_prefix)/$(target_alias)'
- toolexeclibdir='$(toolexecdir)/lib'
+ case ${with_toolexeclibdir} in
+ no)
+ toolexeclibdir='$(toolexecdir)/lib'
+ ;;
+ *)
+ toolexeclibdir=${with_toolexeclibdir}
+ ;;
+ esac
else
toolexecdir='$(libdir)/gcc-lib/$(target_alias)'
toolexeclibdir='$(libdir)'
@@ -427,4 +436,5 @@ GCC_BASE_VER
AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
+AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp])
AC_OUTPUT
diff --git a/libgomp/env.c b/libgomp/env.c
index dbec3ae..c0c4730 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -86,6 +86,7 @@ char *gomp_bind_var_list;
unsigned long gomp_bind_var_list_len;
void **gomp_places_list;
unsigned long gomp_places_list_len;
+uintptr_t gomp_def_allocator = omp_default_mem_alloc;
int gomp_debug_var;
unsigned int gomp_num_teams_var;
bool gomp_display_affinity_var;
@@ -949,8 +950,7 @@ parse_boolean (const char *name, bool *value)
gomp_error ("Invalid value for environment variable %s", name);
}
-/* Parse the OMP_WAIT_POLICY environment variable and store the
- result in gomp_active_wait_policy. */
+/* Parse the OMP_WAIT_POLICY environment variable and return the value. */
static int
parse_wait_policy (void)
@@ -1084,6 +1084,47 @@ parse_affinity (bool ignore)
return false;
}
+/* Parse the OMP_ALLOCATOR environment variable and return the value. */
+
+static uintptr_t
+parse_allocator (void)
+{
+ const char *env;
+ uintptr_t ret = omp_default_mem_alloc;
+
+ env = getenv ("OMP_ALLOCATOR");
+ if (env == NULL)
+ return ret;
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (0)
+ ;
+#define C(v) \
+ else if (strncasecmp (env, #v, sizeof (#v) - 1) == 0) \
+ { \
+ ret = v; \
+ env += sizeof (#v) - 1; \
+ }
+ C (omp_default_mem_alloc)
+ C (omp_large_cap_mem_alloc)
+ C (omp_const_mem_alloc)
+ C (omp_high_bw_mem_alloc)
+ C (omp_low_lat_mem_alloc)
+ C (omp_cgroup_mem_alloc)
+ C (omp_pteam_mem_alloc)
+ C (omp_thread_mem_alloc)
+#undef C
+ else
+ env = "X";
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ return ret;
+ gomp_error ("Invalid value for environment variable OMP_ALLOCATOR");
+ return omp_default_mem_alloc;
+}
+
static void
parse_acc_device_type (void)
{
@@ -1276,6 +1317,22 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
gomp_display_affinity_var ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_AFFINITY_FORMAT = '%s'\n",
gomp_affinity_format_var);
+ fprintf (stderr, " OMP_ALLOCATOR = '");
+ switch (gomp_def_allocator)
+ {
+#define C(v) case v: fputs (#v, stderr); break;
+ C (omp_default_mem_alloc)
+ C (omp_large_cap_mem_alloc)
+ C (omp_const_mem_alloc)
+ C (omp_high_bw_mem_alloc)
+ C (omp_low_lat_mem_alloc)
+ C (omp_cgroup_mem_alloc)
+ C (omp_pteam_mem_alloc)
+ C (omp_thread_mem_alloc)
+#undef C
+ default: break;
+ }
+ fputs ("'\n", stderr);
if (verbose)
{
@@ -1312,6 +1369,7 @@ initialize_env (void)
parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
true);
+ gomp_def_allocator = parse_allocator ();
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
{
gomp_global_icv.thread_limit_var
diff --git a/libgomp/icv.c b/libgomp/icv.c
index ff4430e..b13289b 100644
--- a/libgomp/icv.c
+++ b/libgomp/icv.c
@@ -197,6 +197,25 @@ omp_get_partition_place_nums (int *place_nums)
*place_nums++ = thr->ts.place_partition_off + i;
}
+void
+omp_set_default_allocator (omp_allocator_handle_t allocator)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ if (allocator == omp_null_allocator)
+ allocator = omp_default_mem_alloc;
+ thr->ts.def_allocator = (uintptr_t) allocator;
+}
+
+omp_allocator_handle_t
+omp_get_default_allocator (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->ts.def_allocator == omp_null_allocator)
+ return (omp_allocator_handle_t) gomp_def_allocator;
+ else
+ return (omp_allocator_handle_t) thr->ts.def_allocator;
+}
+
ialias (omp_set_dynamic)
ialias (omp_set_nested)
ialias (omp_set_num_threads)
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 2559ce0..64f138d 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -54,13 +54,6 @@ enum offload_target_type
OFFLOAD_TARGET_TYPE_GCN = 8
};
-/* Container type for passing device properties. */
-union gomp_device_property_value
-{
- const char *ptr;
- size_t val;
-};
-
/* Opaque type to represent plugin-dependent implementation of an
OpenACC asynchronous queue. */
struct goacc_asyncqueue;
@@ -75,6 +68,32 @@ struct goacc_asyncqueue_list
typedef struct goacc_asyncqueue *goacc_aq;
typedef struct goacc_asyncqueue_list *goacc_aq_list;
+
+/* OpenACC 'acc_get_property' support. */
+
+/* Device property values. Keep in sync with
+ 'libgomp/{openacc.h,openacc.f90}:acc_device_property_t'. */
+enum goacc_property
+ {
+ /* Mask to tell numeric and string values apart. */
+#define GOACC_PROPERTY_STRING_MASK 0x10000
+
+ /* Start from 1 to catch uninitialized use. */
+ GOACC_PROPERTY_MEMORY = 1,
+ GOACC_PROPERTY_FREE_MEMORY = 2,
+ GOACC_PROPERTY_NAME = GOACC_PROPERTY_STRING_MASK | 1,
+ GOACC_PROPERTY_VENDOR = GOACC_PROPERTY_STRING_MASK | 2,
+ GOACC_PROPERTY_DRIVER = GOACC_PROPERTY_STRING_MASK | 3
+ };
+
+/* Container type for passing device properties. */
+union goacc_property_value
+{
+ const char *ptr;
+ size_t val;
+};
+
+
/* Auxiliary struct, used for transferring pairs of addresses from plugin
to libgomp. */
struct addr_pair
@@ -101,7 +120,6 @@ extern const char *GOMP_OFFLOAD_get_name (void);
extern unsigned int GOMP_OFFLOAD_get_caps (void);
extern int GOMP_OFFLOAD_get_type (void);
extern int GOMP_OFFLOAD_get_num_devices (void);
-extern union gomp_device_property_value GOMP_OFFLOAD_get_property (int, int);
extern bool GOMP_OFFLOAD_init_device (int);
extern bool GOMP_OFFLOAD_fini_device (int);
extern unsigned GOMP_OFFLOAD_version (void);
@@ -141,6 +159,8 @@ extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *,
void *);
+extern union goacc_property_value
+ GOMP_OFFLOAD_openacc_get_property (int, enum goacc_property);
#ifdef __cplusplus
}
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 01eb1fb..ca42e0d 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -397,6 +397,9 @@ struct gomp_team_state
unsigned place_partition_off;
unsigned place_partition_len;
+ /* Def-allocator-var ICV. */
+ uintptr_t def_allocator;
+
#ifdef HAVE_SYNC_BUILTINS
/* Number of single stmts encountered. */
unsigned long single_count;
@@ -450,6 +453,7 @@ extern int gomp_debug_var;
extern bool gomp_display_affinity_var;
extern char *gomp_affinity_format_var;
extern size_t gomp_affinity_format_len;
+extern uintptr_t gomp_def_allocator;
extern int goacc_device_num;
extern char *goacc_device_type;
extern int goacc_default_dims[GOMP_DIM_MAX];
@@ -832,7 +836,7 @@ extern void gomp_display_affinity_place (char *, size_t, size_t *, int);
/* affinity-fmt.c */
-extern void gomp_print_string (const char *str, size_t len);
+extern bool gomp_print_string (const char *str, size_t len);
extern void gomp_set_affinity_format (const char *, size_t);
extern void gomp_display_string (char *, size_t, size_t *, const char *,
size_t);
@@ -1068,6 +1072,8 @@ typedef struct acc_dispatch_t
__typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
} async;
+ __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
+
/* NVIDIA target specific routines. */
struct {
__typeof (GOMP_OFFLOAD_openacc_cuda_get_current_device)
@@ -1113,7 +1119,6 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_get_caps) *get_caps_func;
__typeof (GOMP_OFFLOAD_get_type) *get_type_func;
__typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func;
- __typeof (GOMP_OFFLOAD_get_property) *get_property_func;
__typeof (GOMP_OFFLOAD_init_device) *init_device_func;
__typeof (GOMP_OFFLOAD_fini_device) *fini_device_func;
__typeof (GOMP_OFFLOAD_version) *version_func;
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index c7268bf..012e3d6 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -180,6 +180,16 @@ OMP_5.0 {
omp_pause_resource_all_;
} OMP_4.5;
+OMP_5.0.1 {
+ global:
+ omp_set_default_allocator;
+ omp_get_default_allocator;
+ omp_init_allocator;
+ omp_destroy_allocator;
+ omp_alloc;
+ omp_free;
+} OMP_5.0;
+
GOMP_1.0 {
global:
GOMP_atomic_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index d3a5b31..b946743 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -1727,9 +1727,9 @@ the stack size is system dependent.
@ref{OMP_STACKSIZE}
@item @emph{Reference}:
-@uref{http://gcc.gnu.org/ml/gcc-patches/2006-06/msg00493.html,
+@uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00493.html,
GCC Patches Mailinglist},
-@uref{http://gcc.gnu.org/ml/gcc-patches/2006-06/msg00496.html,
+@uref{https://gcc.gnu.org/ml/gcc-patches/2006-06/msg00496.html,
GCC Patches Mailinglist}
@end table
@@ -1811,20 +1811,18 @@ pools available and their worker threads run at priority four.
To activate the OpenACC extensions for C/C++ and Fortran, the compile-time
flag @option{-fopenacc} must be specified. This enables the OpenACC directive
-@code{#pragma acc} in C/C++ and @code{!$accp} directives in free form,
+@code{#pragma acc} in C/C++ and @code{!$acc} directives in free form,
@code{c$acc}, @code{*$acc} and @code{!$acc} directives in fixed form,
@code{!$} conditional compilation sentinels in free form and @code{c$},
@code{*$} and @code{!$} sentinels in fixed form, for Fortran. The flag also
arranges for automatic linking of the OpenACC runtime library
(@ref{OpenACC Runtime Library Routines}).
+See @uref{https://gcc.gnu.org/wiki/OpenACC} for more information.
+
A complete description of all OpenACC directives accepted may be found in
the @uref{https://www.openacc.org, OpenACC} Application Programming
-Interface manual, version 2.0.
-
-Note that this is an experimental feature and subject to
-change in future versions of GCC. See
-@uref{https://gcc.gnu.org/wiki/OpenACC} for more information.
+Interface manual, version 2.6.
@@ -1836,7 +1834,7 @@ change in future versions of GCC. See
@chapter OpenACC Runtime Library Routines
The runtime routines described here are defined by section 3 of the OpenACC
-specifications in version 2.0.
+specifications in version 2.6.
They have C linkage, and do not throw exceptions.
Generally, they are available only for the host, with the exception of
@code{acc_on_device}, which is available for both the host and the
@@ -1852,11 +1850,11 @@ acceleration device.
* acc_get_property:: Get device property.
* acc_async_test:: Tests for completion of a specific asynchronous
operation.
-* acc_async_test_all:: Tests for completion of all asychronous
+* acc_async_test_all:: Tests for completion of all asynchronous
operations.
* acc_wait:: Wait for completion of a specific asynchronous
operation.
-* acc_wait_all:: Waits for completion of all asyncrhonous
+* acc_wait_all:: Waits for completion of all asynchronous
operations.
* acc_wait_all_async:: Wait for completion of all asynchronous
operations.
@@ -1892,6 +1890,8 @@ acceleration device.
present on device.
* acc_memcpy_to_device:: Copy host memory to device memory.
* acc_memcpy_from_device:: Copy device memory to host memory.
+* acc_attach:: Let device pointer point to device-pointer target.
+* acc_detach:: Let device pointer point to host-pointer target.
API routines for target platforms.
@@ -1929,7 +1929,7 @@ for the device type specified in @var{devicetype}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.1.
@end table
@@ -1939,7 +1939,7 @@ for the device type specified in @var{devicetype}.
@section @code{acc_set_device_type} -- Set type of device accelerator to use.
@table @asis
@item @emph{Description}
-This function indicates to the runtime library which device typr, specified
+This function indicates to the runtime library which device type, specified
in @var{devicetype}, to use when executing a parallel or kernels region.
@item @emph{C/C++}:
@@ -1954,7 +1954,7 @@ in @var{devicetype}, to use when executing a parallel or kernels region.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.2.
@end table
@@ -1979,7 +1979,7 @@ parallel or kernels region.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.3.
@end table
@@ -1990,12 +1990,12 @@ parallel or kernels region.
@table @asis
@item @emph{Description}
This function will indicate to the runtime which device number,
-specified by @var{num}, associated with the specifed device
+specified by @var{devicenum}, associated with the specified device
type @var{devicetype}.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
-@item @emph{Prototype}: @tab @code{acc_set_device_num(int num, acc_device_t devicetype);}
+@item @emph{Prototype}: @tab @code{acc_set_device_num(int devicenum, acc_device_t devicetype);}
@end multitable
@item @emph{Fortran}:
@@ -2006,7 +2006,7 @@ type @var{devicetype}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.4.
@end table
@@ -2033,7 +2033,7 @@ region.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
3.2.5.
@end table
@@ -2053,6 +2053,14 @@ The Fortran @code{acc_get_property_string} subroutine returns the string
retrieved in its fourth argument while the remaining entry points are
functions, which pass the return value as their result.
+Note for Fortran, only: the OpenACC technical committee corrected and, hence,
+modified the interface introduced in OpenACC 2.6. The kind-value parameter
+@code{acc_device_property} has been renamed to @code{acc_device_property_kind}
+for consistency and the return type of the @code{acc_get_property} function is
+now a @code{c_size_t} integer instead of a @code{acc_device_property} integer.
+The parameter @code{acc_device_property} will continue to be provided,
+but might be removed in a future version of GCC.
+
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{size_t acc_get_property(int devicenum, acc_device_t devicetype, acc_device_property_t property);}
@@ -2063,10 +2071,11 @@ functions, which pass the return value as their result.
@multitable @columnfractions .20 .80
@item @emph{Interface}: @tab @code{function acc_get_property(devicenum, devicetype, property)}
@item @emph{Interface}: @tab @code{subroutine acc_get_property_string(devicenum, devicetype, property, string)}
+@item @tab @code{use ISO_C_Binding, only: c_size_t}
@item @tab @code{integer devicenum}
@item @tab @code{integer(kind=acc_device_kind) devicetype}
-@item @tab @code{integer(kind=acc_device_property) property}
-@item @tab @code{integer(kind=acc_device_property) acc_get_property}
+@item @tab @code{integer(kind=acc_device_property_kind) property}
+@item @tab @code{integer(kind=c_size_t) acc_get_property}
@item @tab @code{character(*) string}
@end multitable
@@ -2100,8 +2109,8 @@ a zero and Fortran returns a @code{false}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.6.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.9.
@end table
@@ -2128,8 +2137,8 @@ Fortran returns a @code{false}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.7.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.10.
@end table
@@ -2156,8 +2165,8 @@ specified in @var{arg}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.8.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.11.
@end table
@@ -2181,8 +2190,8 @@ This function waits for the completion of all asynchronous operations.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.10.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.13.
@end table
@@ -2207,8 +2216,8 @@ any queue.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.11.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.14.
@end table
@@ -2232,8 +2241,8 @@ asynchronous operations enqueued on queue @var{arg}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.9.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.12.
@end table
@@ -2257,8 +2266,8 @@ This function initializes the runtime for the device type specified in
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.12.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.7.
@end table
@@ -2282,8 +2291,8 @@ This function shuts down the runtime for the device type specified in
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.13.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.8.
@end table
@@ -2313,8 +2322,8 @@ return @code{false}.
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.14.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.17.
@end table
@@ -2332,8 +2341,8 @@ the device address of the allocated memory.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.15.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.18.
@end table
@@ -2350,8 +2359,8 @@ Free previously allocated device memory at the device address @code{a}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.16.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.19.
@end table
@@ -2371,6 +2380,7 @@ variable or array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void *acc_copyin(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{void *acc_copyin_async(h_void *a, size_t len, int async);}
@end multitable
@item @emph{Fortran}:
@@ -2380,11 +2390,18 @@ variable or array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_copyin(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_copyin_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.17.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.20.
@end table
@@ -2393,7 +2410,7 @@ variable or array element and @var{len} specifies the length in bytes.
@section @code{acc_present_or_copyin} -- If the data is not present on the device, allocate device memory and copy from host memory.
@table @asis
@item @emph{Description}
-This function tests if the host data specifed by @var{a} and of length
+This function tests if the host data specified by @var{a} and of length
@var{len} is present or not. If it is not present, then device memory
will be allocated and the host memory copied. The device address of
the newly allocated device memory is returned.
@@ -2402,6 +2419,9 @@ In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
a contiguous array section. The second form @var{a} specifies a variable or
array element and @var{len} specifies the length in bytes.
+Note that @code{acc_present_or_copyin} and @code{acc_pcopyin} exist for
+backward compatibility with OpenACC 2.0; use @ref{acc_copyin} instead.
+
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void *acc_present_or_copyin(h_void *a, size_t len);}
@@ -2423,8 +2443,8 @@ array element and @var{len} specifies the length in bytes.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.18.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.20.
@end table
@@ -2444,6 +2464,7 @@ array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void *acc_create(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{void *acc_create_async(h_void *a, size_t len, int async);}
@end multitable
@item @emph{Fortran}:
@@ -2453,11 +2474,18 @@ array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_create(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_create_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_create_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.19.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.21.
@end table
@@ -2466,7 +2494,7 @@ array element and @var{len} specifies the length in bytes.
@section @code{acc_present_or_create} -- If the data is not present on the device, allocate device memory and map it to host memory.
@table @asis
@item @emph{Description}
-This function tests if the host data specifed by @var{a} and of length
+This function tests if the host data specified by @var{a} and of length
@var{len} is present or not. If it is not present, then device memory
will be allocated and mapped to host memory. In C/C++, the device address
of the newly allocated device memory is returned.
@@ -2475,6 +2503,8 @@ In Fortran, two (2) forms are supported. In the first form, @var{a} specifies
a contiguous array section. The second form @var{a} specifies a variable or
array element and @var{len} specifies the length in bytes.
+Note that @code{acc_present_or_create} and @code{acc_pcreate} exist for
+backward compatibility with OpenACC 2.0; use @ref{acc_create} instead.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@@ -2497,8 +2527,8 @@ array element and @var{len} specifies the length in bytes.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.20.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.21.
@end table
@@ -2517,6 +2547,9 @@ array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{acc_copyout(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_copyout_async(h_void *a, size_t len, int async);}
+@item @emph{Prototype}: @tab @code{acc_copyout_finalize(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_copyout_finalize_async(h_void *a, size_t len, int async);}
@end multitable
@item @emph{Fortran}:
@@ -2526,11 +2559,30 @@ array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_copyout(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize(a, len)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_copyout_finalize_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.21.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.22.
@end table
@@ -2549,6 +2601,9 @@ array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{acc_delete(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_delete_async(h_void *a, size_t len, int async);}
+@item @emph{Prototype}: @tab @code{acc_delete_finalize(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_delete_finalize_async(h_void *a, size_t len, int async);}
@end multitable
@item @emph{Fortran}:
@@ -2558,11 +2613,30 @@ array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_delete(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_finalize(a, len)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_delete_async_finalize(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.22.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.23.
@end table
@@ -2582,6 +2656,7 @@ array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_update_device(h_void *a, size_t len, async);}
@end multitable
@item @emph{Fortran}:
@@ -2591,11 +2666,18 @@ array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_update_device(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_update_device_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.23.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.24.
@end table
@@ -2615,6 +2697,7 @@ array element and @var{len} specifies the length in bytes.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{acc_update_self(h_void *a, size_t len);}
+@item @emph{Prototype}: @tab @code{acc_update_self_async(h_void *a, size_t len, int async);}
@end multitable
@item @emph{Fortran}:
@@ -2624,11 +2707,18 @@ array element and @var{len} specifies the length in bytes.
@item @emph{Interface}: @tab @code{subroutine acc_update_self(a, len)}
@item @tab @code{type, dimension(:[,:]...) :: a}
@item @tab @code{integer len}
+@item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer(acc_handle_kind) :: async}
+@item @emph{Interface}: @tab @code{subroutine acc_update_self_async(a, len, async)}
+@item @tab @code{type, dimension(:[,:]...) :: a}
+@item @tab @code{integer len}
+@item @tab @code{integer(acc_handle_kind) :: async}
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.24.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.25.
@end table
@@ -2647,8 +2737,8 @@ specified with the host address @var{h} and a length of @var{len}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.25.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.26.
@end table
@@ -2666,8 +2756,8 @@ specified by @var{h}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.26.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.27.
@end table
@@ -2685,8 +2775,8 @@ host address specified by @var{h}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.27.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.28.
@end table
@@ -2704,8 +2794,8 @@ device address specified by @var{d}.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.28.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.29.
@end table
@@ -2743,8 +2833,8 @@ a @code{false} is return to indicate the mapped memory is not present.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.29.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.30.
@end table
@@ -2763,8 +2853,8 @@ device memory specified by the device address @var{dest} for a length of
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.30.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.31.
@end table
@@ -2783,8 +2873,50 @@ device memory specified by the device address @var{dest} for a length of
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
-3.2.31.
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.32.
+@end table
+
+
+
+@node acc_attach
+@section @code{acc_attach} -- Let device pointer point to device-pointer target.
+@table @asis
+@item @emph{Description}
+This function updates a pointer on the device from pointing to a host-pointer
+address to pointing to the corresponding device data.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{acc_attach(h_void **ptr);}
+@item @emph{Prototype}: @tab @code{acc_attach_async(h_void **ptr, int async);}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.34.
+@end table
+
+
+
+@node acc_detach
+@section @code{acc_detach} -- Let device pointer point to host-pointer target.
+@table @asis
+@item @emph{Description}
+This function updates a pointer on the device from pointing to a device-pointer
+address to pointing to the corresponding host data.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{acc_detach(h_void **ptr);}
+@item @emph{Prototype}: @tab @code{acc_detach_async(h_void **ptr, int async);}
+@item @emph{Prototype}: @tab @code{acc_detach_finalize(h_void **ptr);}
+@item @emph{Prototype}: @tab @code{acc_detach_finalize_async(h_void **ptr, int async);}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.35.
@end table
@@ -2802,7 +2934,7 @@ as used by the CUDA Runtime or Driver API's.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
A.2.1.1.
@end table
@@ -2821,7 +2953,7 @@ as used by the CUDA Runtime or Driver API's.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
A.2.1.2.
@end table
@@ -2840,7 +2972,7 @@ This handle is the same as used by the CUDA Runtime or Driver API's.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
A.2.1.3.
@end table
@@ -2864,7 +2996,7 @@ The return value is not specified.
@end multitable
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
A.2.1.4.
@end table
@@ -2980,7 +3112,7 @@ The variable @env{GCC_ACC_NOTIFY} is used for diagnostic purposes.
@section @code{ACC_DEVICE_TYPE}
@table @asis
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
4.1.
@end table
@@ -2990,7 +3122,7 @@ The variable @env{GCC_ACC_NOTIFY} is used for diagnostic purposes.
@section @code{ACC_DEVICE_NUM}
@table @asis
@item @emph{Reference}:
-@uref{https://www.openacc.org, OpenACC specification v2.0}, section
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
4.2.
@end table
@@ -3033,7 +3165,7 @@ asynchronous functionality is implemented by making use of CUDA
streams@footnote{See "Stream Management" in "CUDA Driver API",
TRM-06703-001, Version 5.5, for additional information}.
-The primary means by that the asychronous functionality is accessed
+The primary means by that the asynchronous functionality is accessed
is through the use of those OpenACC directives which make use of the
@code{async} and @code{wait} clauses. When the @code{async} clause is
first used with a directive, it creates a CUDA stream. If an
@@ -3206,8 +3338,8 @@ similarly to the first use case.
There are two environment variables associated with the OpenACC library
that may be used to control the device type and device number:
-@env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}, respecively. These two
-environement variables can be used as an alternative to calling
+@env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM}, respectively. These two
+environment variables can be used as an alternative to calling
@code{acc_set_device_num()}. As seen in the second use case, the device
type and device number were specified using @code{acc_set_device_num()}.
If however, the aforementioned environment variables were set, then the
@@ -3220,7 +3352,7 @@ is called prior to a call to an OpenACC function, then you must call
@code{acc_set_device_num()}@footnote{More complete information
about @env{ACC_DEVICE_TYPE} and @env{ACC_DEVICE_NUM} can be found in
sections 4.1 and 4.2 of the @uref{https://www.openacc.org, OpenACC}
-Application Programming Interface”, Version 2.0.}
+Application Programming Interface”, Version 2.6.}
@@ -3935,7 +4067,7 @@ becomes
@chapter Reporting Bugs
Bugs in the GNU Offloading and Multi Processing Runtime Library should
-be reported via @uref{http://gcc.gnu.org/bugzilla/, Bugzilla}. Please add
+be reported via @uref{https://gcc.gnu.org/bugzilla/, Bugzilla}. Please add
"openacc", or "openmp", or both to the keywords field in the bug
report, as appropriate.
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 22c1894..4638789 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -59,27 +59,6 @@ host_get_num_devices (void)
return 1;
}
-static union gomp_device_property_value
-host_get_property (int n, int prop)
-{
- union gomp_device_property_value nullval = { .val = 0 };
-
- if (n >= host_get_num_devices ())
- return nullval;
-
- switch (prop)
- {
- case GOMP_DEVICE_PROPERTY_NAME:
- return (union gomp_device_property_value) { .ptr = "GOMP" };
- case GOMP_DEVICE_PROPERTY_VENDOR:
- return (union gomp_device_property_value) { .ptr = "GNU" };
- case GOMP_DEVICE_PROPERTY_DRIVER:
- return (union gomp_device_property_value) { .ptr = VERSION };
- default:
- return nullval;
- }
-}
-
static bool
host_init_device (int n __attribute__ ((unused)))
{
@@ -245,6 +224,29 @@ host_openacc_async_destruct (struct goacc_asyncqueue *aq
return true;
}
+static union goacc_property_value
+host_openacc_get_property (int n, enum goacc_property prop)
+{
+ union goacc_property_value nullval = { .val = 0 };
+
+ if (n >= host_get_num_devices ())
+ return nullval;
+
+ switch (prop)
+ {
+ case GOACC_PROPERTY_NAME:
+ return (union goacc_property_value) { .ptr = "GOMP" };
+ case GOACC_PROPERTY_VENDOR:
+ return (union goacc_property_value) { .ptr = "GNU" };
+ case GOACC_PROPERTY_DRIVER:
+ return (union goacc_property_value) { .ptr = VERSION };
+ case GOACC_PROPERTY_MEMORY:
+ case GOACC_PROPERTY_FREE_MEMORY:
+ default:
+ return nullval;
+ }
+}
+
static void *
host_openacc_create_thread_data (int ord __attribute__ ((unused)))
{
@@ -269,7 +271,6 @@ static struct gomp_device_descr host_dispatch =
.get_caps_func = host_get_caps,
.get_type_func = host_get_type,
.get_num_devices_func = host_get_num_devices,
- .get_property_func = host_get_property,
.init_device_func = host_init_device,
.fini_device_func = host_fini_device,
.version_func = host_version,
@@ -303,6 +304,8 @@ static struct gomp_device_descr host_dispatch =
.host2dev_func = host_openacc_async_host2dev,
},
+ .get_property_func = host_openacc_get_property,
+
.cuda = {
.get_current_device_func = NULL,
.get_current_context_func = NULL,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index d15f08f..5d786a5 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -99,7 +99,9 @@ unknown_device_type_error (acc_device_t invalid_type)
static const char *
get_openacc_name (const char *name)
{
- if (strcmp (name, "nvptx") == 0)
+ if (strcmp (name, "gcn") == 0)
+ return "radeon";
+ else if (strcmp (name, "nvptx") == 0)
return "nvidia";
else
return name;
@@ -115,6 +117,7 @@ name_of_acc_device_t (enum acc_device_t type)
case acc_device_host: return "host";
case acc_device_not_host: return "not_host";
case acc_device_nvidia: return "nvidia";
+ case acc_device_radeon: return "radeon";
default: unknown_device_type_error (type);
}
__builtin_unreachable ();
@@ -760,14 +763,14 @@ acc_set_device_num (int ord, acc_device_t d)
ialias (acc_set_device_num)
-static union gomp_device_property_value
+static union goacc_property_value
get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
{
goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
if (d == acc_device_current && thr && thr->dev)
- return thr->dev->get_property_func (thr->dev->target_id, prop);
+ return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
gomp_mutex_lock (&acc_device_lock);
@@ -789,7 +792,7 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
assert (dev);
- return dev->get_property_func (dev->target_id, prop);
+ return dev->openacc.get_property_func (dev->target_id, prop);
}
size_t
@@ -798,7 +801,7 @@ acc_get_property (int ord, acc_device_t d, acc_device_property_t prop)
if (!known_device_type_p (d))
unknown_device_type_error(d);
- if (prop & GOMP_DEVICE_PROPERTY_STRING_MASK)
+ if (prop & GOACC_PROPERTY_STRING_MASK)
return 0;
else
return get_property_any (ord, d, prop).val;
@@ -812,7 +815,7 @@ acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop)
if (!known_device_type_p (d))
unknown_device_type_error(d);
- if (prop & GOMP_DEVICE_PROPERTY_STRING_MASK)
+ if (prop & GOACC_PROPERTY_STRING_MASK)
return get_property_any (ord, d, prop).ptr;
else
return NULL;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 2d4bba7..936ae649 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -355,7 +355,6 @@ acc_is_present (void *h, size_t s)
void
acc_map_data (void *h, void *d, size_t s)
{
- struct target_mem_desc *tgt = NULL;
size_t mapnum = 1;
void *hostaddrs = h;
void *devaddrs = d;
@@ -402,10 +401,13 @@ acc_map_data (void *h, void *d, size_t s)
gomp_mutex_unlock (&acc_dev->lock);
- tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
- &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
+ struct target_mem_desc *tgt
+ = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
+ &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
assert (tgt);
+ assert (tgt->list_count == 1);
splay_tree_key n = tgt->list[0].key;
+ assert (n);
assert (n->refcount == 1);
assert (n->virtual_refcount == 0);
/* Special reference counting behavior. */
@@ -466,8 +468,6 @@ acc_unmap_data (void *h)
(void *) h, (int) host_size);
}
- splay_tree_remove (&acc_dev->mem_map, n);
-
struct target_mem_desc *tgt = n->tgt;
if (tgt->refcount == REFCOUNT_INFINITY)
@@ -475,13 +475,18 @@ acc_unmap_data (void *h)
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("cannot unmap target block");
}
- else if (tgt->refcount > 1)
- tgt->refcount--;
- else
- {
- free (tgt->array);
- free (tgt);
- }
+
+ /* Above, we've verified that the mapping must have been set up by
+ 'acc_map_data'. */
+ assert (tgt->refcount == 1);
+
+ /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
+ from freeing the target memory. */
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
+
+ bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+ assert (is_tgt_unmapped);
gomp_mutex_unlock (&acc_dev->lock);
@@ -555,16 +560,17 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
goacc_aq aq = get_goacc_asyncqueue (async);
- gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
- true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ struct target_mem_desc *tgt
+ = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
+ kinds, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ assert (tgt);
+ assert (tgt->list_count == 1);
+ n = tgt->list[0].key;
+ assert (n);
+ assert (n->refcount == 1);
+ assert (n->virtual_refcount == 0);
- gomp_mutex_lock (&acc_dev->lock);
- n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
- assert (n != NULL);
- assert (n->tgt_offset == 0);
- assert ((uintptr_t) hostaddrs[0] == n->host_start);
- d = (void *) n->tgt->tgt_start;
- gomp_mutex_unlock (&acc_dev->lock);
+ d = (void *) tgt->tgt_start;
}
if (profiling_p)
@@ -722,8 +728,16 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
gomp_remove_var_async (acc_dev, n, aq);
else
{
+ size_t num_mappings = 0;
+ /* If the target_mem_desc represents a single data mapping, we can
+ check that it is freed when this splay tree key's refcount reaches
+ zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
+ multiple members), fall back to skipping the test. */
+ for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
+ if (n->tgt->list[l_i].key)
+ ++num_mappings;
bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
- assert (is_tgt_unmapped);
+ assert (is_tgt_unmapped || num_mappings > 1);
}
}
@@ -887,7 +901,10 @@ acc_attach_async (void **hostaddr, int async)
n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
- gomp_fatal ("struct not mapped for acc_attach");
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for acc_attach");
+ }
gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
0, NULL);
@@ -920,7 +937,10 @@ goacc_detach_internal (void **hostaddr, int async, bool finalize)
n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
- gomp_fatal ("struct not mapped for acc_detach");
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for acc_detach");
+ }
gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
@@ -1054,7 +1074,10 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
= splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (n == NULL)
- gomp_fatal ("struct not mapped for detach operation");
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for detach operation");
+ }
gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
}
@@ -1131,45 +1154,38 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
cur_node.host_end - cur_node.host_start);
if (n->refcount == 0)
- gomp_remove_var_async (acc_dev, n, aq);
- }
- break;
-
- case GOMP_MAP_STRUCT:
- {
- int elems = sizes[i];
- for (int j = 1; j <= elems; j++)
{
- struct splay_tree_key_s k;
- k.host_start = (uintptr_t) hostaddrs[i + j];
- k.host_end = k.host_start + sizes[i + j];
- splay_tree_key str;
- str = splay_tree_lookup (&acc_dev->mem_map, &k);
- if (str)
+ if (aq)
+ /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
+ 'gomp_unref_tgt' comment in
+ <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
+ PR92881. */
+ gomp_remove_var_async (acc_dev, n, aq);
+ else
{
- if (finalize)
- {
- if (str->refcount != REFCOUNT_INFINITY)
- str->refcount -= str->virtual_refcount;
- str->virtual_refcount = 0;
- }
- if (str->virtual_refcount > 0)
- {
- if (str->refcount != REFCOUNT_INFINITY)
- str->refcount--;
- str->virtual_refcount--;
- }
- else if (str->refcount > 0
- && str->refcount != REFCOUNT_INFINITY)
- str->refcount--;
- if (str->refcount == 0)
- gomp_remove_var_async (acc_dev, str, aq);
+ size_t num_mappings = 0;
+ /* If the target_mem_desc represents a single data mapping,
+ we can check that it is freed when this splay tree key's
+ refcount reaches zero. Otherwise (e.g. for a
+ 'GOMP_MAP_STRUCT' mapping with multiple members), fall
+ back to skipping the test. */
+ for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
+ if (n->tgt->list[l_i].key)
+ ++num_mappings;
+ bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+ assert (is_tgt_unmapped || num_mappings > 1);
}
}
- i += elems;
}
break;
+ case GOMP_MAP_STRUCT:
+ /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
+ for all its entries. This special handling exists for GCC 10.1
+ compatibility; afterwards, we're not generating these no-op
+ 'GOMP_MAP_STRUCT's anymore. */
+ break;
+
default:
gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
kind);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index edfc606..c7e46e3 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -415,7 +415,8 @@ GOACC_data_start (int flags_m, size_t mapnum,
= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
enter_data_event_info.other_event.parent_construct = acc_construct_data;
for (int i = 0; i < mapnum; ++i)
- if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
+ if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
+ || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
{
/* If there is one such data mapping kind, then this is actually an
OpenACC 'host_data' construct. (GCC maps the OpenACC
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 06a96c5..e2db33e 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -90,11 +90,87 @@ typedef enum omp_pause_resource_t
omp_pause_hard = 2
} omp_pause_resource_t;
+typedef __UINTPTR_TYPE__ omp_uintptr_t;
+
+#if __cplusplus >= 201103L
+# define __GOMP_UINTPTR_T_ENUM : omp_uintptr_t
+#else
+# define __GOMP_UINTPTR_T_ENUM
+#endif
+
+typedef enum omp_memspace_handle_t __GOMP_UINTPTR_T_ENUM
+{
+ omp_default_mem_space = 0,
+ omp_large_cap_mem_space = 1,
+ omp_const_mem_space = 2,
+ omp_high_bw_mem_space = 3,
+ omp_low_lat_mem_space = 4,
+ __omp_memspace_handle_t_max__ = __UINTPTR_MAX__
+} omp_memspace_handle_t;
+
+typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM
+{
+ omp_null_allocator = 0,
+ omp_default_mem_alloc = 1,
+ omp_large_cap_mem_alloc = 2,
+ omp_const_mem_alloc = 3,
+ omp_high_bw_mem_alloc = 4,
+ omp_low_lat_mem_alloc = 5,
+ omp_cgroup_mem_alloc = 6,
+ omp_pteam_mem_alloc = 7,
+ omp_thread_mem_alloc = 8,
+ __omp_allocator_handle_t_max__ = __UINTPTR_MAX__
+} omp_allocator_handle_t;
+
+typedef enum omp_alloctrait_key_t
+{
+ omp_atk_sync_hint = 1,
+ omp_atk_alignment = 2,
+ omp_atk_access = 3,
+ omp_atk_pool_size = 4,
+ omp_atk_fallback = 5,
+ omp_atk_fb_data = 6,
+ omp_atk_pinned = 7,
+ omp_atk_partition = 8
+} omp_alloctrait_key_t;
+
+typedef enum omp_alloctrait_value_t
+{
+ omp_atv_false = 0,
+ omp_atv_true = 1,
+ omp_atv_default = 2,
+ omp_atv_contended = 3,
+ omp_atv_uncontended = 4,
+ omp_atv_sequential = 5,
+ omp_atv_private = 6,
+ omp_atv_all = 7,
+ omp_atv_thread = 8,
+ omp_atv_pteam = 9,
+ omp_atv_cgroup = 10,
+ omp_atv_default_mem_fb = 11,
+ omp_atv_null_fb = 12,
+ omp_atv_abort_fb = 13,
+ omp_atv_allocator_fb = 14,
+ omp_atv_environment = 15,
+ omp_atv_nearest = 16,
+ omp_atv_blocked = 17,
+ omp_atv_interleaved = 18,
+ __omp_alloctrait_value_max__ = __UINTPTR_MAX__
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t
+{
+ omp_alloctrait_key_t key;
+ omp_uintptr_t value;
+} omp_alloctrait_t;
+
#ifdef __cplusplus
extern "C" {
# define __GOMP_NOTHROW throw ()
+# define __GOMP_DEFAULT_NULL_ALLOCATOR = omp_null_allocator
#else
# define __GOMP_NOTHROW __attribute__((__nothrow__))
+# define __GOMP_DEFAULT_NULL_ALLOCATOR
#endif
extern void omp_set_num_threads (int) __GOMP_NOTHROW;
@@ -188,6 +264,20 @@ extern __SIZE_TYPE__ omp_capture_affinity (char *, __SIZE_TYPE__, const char *)
extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW;
extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW;
+extern omp_allocator_handle_t omp_init_allocator (omp_memspace_handle_t,
+ int,
+ const omp_alloctrait_t [])
+ __GOMP_NOTHROW;
+extern void omp_destroy_allocator (omp_allocator_handle_t) __GOMP_NOTHROW;
+extern void omp_set_default_allocator (omp_allocator_handle_t) __GOMP_NOTHROW;
+extern omp_allocator_handle_t omp_get_default_allocator (void) __GOMP_NOTHROW;
+extern void *omp_alloc (__SIZE_TYPE__,
+ omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR)
+ __GOMP_NOTHROW;
+extern void omp_free (void *,
+ omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR)
+ __GOMP_NOTHROW;
+
#ifdef __cplusplus
}
#endif
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index a308316..111705d 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -31,34 +31,36 @@
module openacc_kinds
use iso_fortran_env, only: int32
- use iso_c_binding, only: c_size_t
implicit none
public
- private :: int32, c_size_t
+ private :: int32
! When adding items, also update 'public' setting in 'module openacc' below.
integer, parameter :: acc_device_kind = int32
! Keep in sync with include/gomp-constants.h.
- integer (acc_device_kind), parameter :: acc_device_current = -3
+ integer (acc_device_kind), parameter :: acc_device_current = -1
integer (acc_device_kind), parameter :: acc_device_none = 0
integer (acc_device_kind), parameter :: acc_device_default = 1
integer (acc_device_kind), parameter :: acc_device_host = 2
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
- integer (acc_device_kind), parameter :: acc_device_gcn = 8
+ integer (acc_device_kind), parameter :: acc_device_radeon = 8
- integer, parameter :: acc_device_property = c_size_t
+ integer, parameter :: acc_device_property_kind = int32
+ ! OpenACC 2.6/2.7/3.0 used acc_device_property; in a spec update the
+ ! missing '_kind' was added for consistency. For backward compatibility, keep:
+ integer, parameter :: acc_device_property = acc_device_property_kind
- ! Keep in sync with include/gomp-constants.h.
- integer (acc_device_property), parameter :: acc_property_memory = 1
- integer (acc_device_property), parameter :: acc_property_free_memory = 2
- integer (acc_device_property), parameter :: acc_property_name = int(Z'10001')
- integer (acc_device_property), parameter :: acc_property_vendor = int(Z'10002')
- integer (acc_device_property), parameter :: acc_property_driver = int(Z'10003')
+ ! Keep in sync with 'libgomp/libgomp-plugin.h:goacc_property'.
+ integer (acc_device_property_kind), parameter :: acc_property_memory = 1
+ integer (acc_device_property_kind), parameter :: acc_property_free_memory = 2
+ integer (acc_device_property_kind), parameter :: acc_property_name = int(Z'10001')
+ integer (acc_device_property_kind), parameter :: acc_property_vendor = int(Z'10002')
+ integer (acc_device_property_kind), parameter :: acc_property_driver = int(Z'10003')
integer, parameter :: acc_handle_kind = int32
@@ -72,15 +74,15 @@ module openacc_internal
implicit none
interface
- function acc_get_num_devices_h (d)
+ function acc_get_num_devices_h (devicetype)
import
integer acc_get_num_devices_h
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end function
- subroutine acc_set_device_type_h (d)
+ subroutine acc_set_device_type_h (devicetype)
import
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end subroutine
function acc_get_device_type_h ()
@@ -88,73 +90,74 @@ module openacc_internal
integer (acc_device_kind) acc_get_device_type_h
end function
- subroutine acc_set_device_num_h (n, d)
+ subroutine acc_set_device_num_h (devicenum, devicetype)
import
- integer n
- integer (acc_device_kind) d
+ integer devicenum
+ integer (acc_device_kind) devicetype
end subroutine
- function acc_get_device_num_h (d)
+ function acc_get_device_num_h (devicetype)
import
integer acc_get_device_num_h
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end function
- function acc_get_property_h (n, d, p)
+ function acc_get_property_h (devicenum, devicetype, property)
+ use iso_c_binding, only: c_size_t
import
implicit none (type, external)
- integer (acc_device_property) :: acc_get_property_h
- integer, value :: n
- integer (acc_device_kind), value :: d
- integer (acc_device_property), value :: p
+ integer (c_size_t) :: acc_get_property_h
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
end function
- subroutine acc_get_property_string_h (n, d, p, s)
+ subroutine acc_get_property_string_h (devicenum, devicetype, property, string)
import
implicit none (type, external)
- integer, value :: n
- integer (acc_device_kind), value :: d
- integer (acc_device_property), value :: p
- character (*) :: s
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
+ character (*) :: string
end subroutine
- function acc_async_test_h (a)
+ function acc_async_test_h (arg)
logical acc_async_test_h
- integer a
+ integer arg
end function
function acc_async_test_all_h ()
logical acc_async_test_all_h
end function
- subroutine acc_wait_h (a)
- integer a
+ subroutine acc_wait_h (arg)
+ integer arg
end subroutine
- subroutine acc_wait_async_h (a1, a2)
- integer a1, a2
+ subroutine acc_wait_async_h (arg, async)
+ integer arg, async
end subroutine
subroutine acc_wait_all_h ()
end subroutine
- subroutine acc_wait_all_async_h (a)
- integer a
+ subroutine acc_wait_all_async_h (async)
+ integer async
end subroutine
- subroutine acc_init_h (d)
+ subroutine acc_init_h (devicetype)
import
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end subroutine
- subroutine acc_shutdown_h (d)
+ subroutine acc_shutdown_h (devicetype)
import
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end subroutine
- function acc_on_device_h (d)
+ function acc_on_device_h (devicetype)
import
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
logical acc_on_device_h
end function
@@ -505,17 +508,17 @@ module openacc_internal
end interface
interface
- function acc_get_num_devices_l (d) &
+ function acc_get_num_devices_l (devicetype) &
bind (C, name = "acc_get_num_devices")
use iso_c_binding, only: c_int
integer (c_int) :: acc_get_num_devices_l
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end function
- subroutine acc_set_device_type_l (d) &
+ subroutine acc_set_device_type_l (devicetype) &
bind (C, name = "acc_set_device_type")
use iso_c_binding, only: c_int
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end subroutine
function acc_get_device_type_l () &
@@ -524,37 +527,37 @@ module openacc_internal
integer (c_int) :: acc_get_device_type_l
end function
- subroutine acc_set_device_num_l (n, d) &
+ subroutine acc_set_device_num_l (devicenum, devicetype) &
bind (C, name = "acc_set_device_num")
use iso_c_binding, only: c_int
- integer (c_int), value :: n, d
+ integer (c_int), value :: devicenum, devicetype
end subroutine
- function acc_get_device_num_l (d) &
+ function acc_get_device_num_l (devicetype) &
bind (C, name = "acc_get_device_num")
use iso_c_binding, only: c_int
integer (c_int) :: acc_get_device_num_l
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end function
- function acc_get_property_l (n, d, p) &
+ function acc_get_property_l (devicenum, devicetype, property) &
bind (C, name = "acc_get_property")
use iso_c_binding, only: c_int, c_size_t
implicit none (type, external)
integer (c_size_t) :: acc_get_property_l
- integer (c_int), value :: n
- integer (c_int), value :: d
- integer (c_int), value :: p
+ integer (c_int), value :: devicenum
+ integer (c_int), value :: devicetype
+ integer (c_int), value :: property
end function
- function acc_get_property_string_l (n, d, p) &
+ function acc_get_property_string_l (devicenum, devicetype, property) &
bind (C, name = "acc_get_property_string")
use iso_c_binding, only: c_int, c_ptr
implicit none (type, external)
type (c_ptr) :: acc_get_property_string_l
- integer (c_int), value :: n
- integer (c_int), value :: d
- integer (c_int), value :: p
+ integer (c_int), value :: devicenum
+ integer (c_int), value :: devicetype
+ integer (c_int), value :: property
end function
function acc_async_test_l (a) &
@@ -576,10 +579,10 @@ module openacc_internal
integer (c_int), value :: a
end subroutine
- subroutine acc_wait_async_l (a1, a2) &
+ subroutine acc_wait_async_l (arg, async) &
bind (C, name = "acc_wait_async")
use iso_c_binding, only: c_int
- integer (c_int), value :: a1, a2
+ integer (c_int), value :: arg, async
end subroutine
subroutine acc_wait_all_l () &
@@ -587,29 +590,29 @@ module openacc_internal
use iso_c_binding, only: c_int
end subroutine
- subroutine acc_wait_all_async_l (a) &
+ subroutine acc_wait_all_async_l (async) &
bind (C, name = "acc_wait_all_async")
use iso_c_binding, only: c_int
- integer (c_int), value :: a
+ integer (c_int), value :: async
end subroutine
- subroutine acc_init_l (d) &
+ subroutine acc_init_l (devicetype) &
bind (C, name = "acc_init")
use iso_c_binding, only: c_int
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end subroutine
- subroutine acc_shutdown_l (d) &
+ subroutine acc_shutdown_l (devicetype) &
bind (C, name = "acc_shutdown")
use iso_c_binding, only: c_int
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end subroutine
- function acc_on_device_l (d) &
+ function acc_on_device_l (devicetype) &
bind (C, name = "acc_on_device")
use iso_c_binding, only: c_int
integer (c_int) :: acc_on_device_l
- integer (c_int), value :: d
+ integer (c_int), value :: devicetype
end function
subroutine acc_copyin_l (a, len) &
@@ -767,9 +770,9 @@ module openacc
! From openacc_kinds
public :: acc_device_kind
public :: acc_device_none, acc_device_default, acc_device_host
- public :: acc_device_not_host, acc_device_nvidia, acc_device_gcn
+ public :: acc_device_not_host, acc_device_nvidia, acc_device_radeon
- public :: acc_device_property
+ public :: acc_device_property_kind, acc_device_property
public :: acc_property_memory, acc_property_free_memory
public :: acc_property_name, acc_property_vendor, acc_property_driver
@@ -792,7 +795,7 @@ module openacc
public :: acc_delete_async, acc_update_device_async, acc_update_self_async
public :: acc_copyout_finalize, acc_delete_finalize
- integer, parameter :: openacc_version = 201306
+ integer, parameter :: openacc_version = 201711
interface acc_get_num_devices
procedure :: acc_get_num_devices_h
@@ -1002,19 +1005,19 @@ module openacc
end module openacc
-function acc_get_num_devices_h (d)
+function acc_get_num_devices_h (devicetype)
use openacc_internal, only: acc_get_num_devices_l
use openacc_kinds
integer acc_get_num_devices_h
- integer (acc_device_kind) d
- acc_get_num_devices_h = acc_get_num_devices_l (d)
+ integer (acc_device_kind) devicetype
+ acc_get_num_devices_h = acc_get_num_devices_l (devicetype)
end function
-subroutine acc_set_device_type_h (d)
+subroutine acc_set_device_type_h (devicetype)
use openacc_internal, only: acc_set_device_type_l
use openacc_kinds
- integer (acc_device_kind) d
- call acc_set_device_type_l (d)
+ integer (acc_device_kind) devicetype
+ call acc_set_device_type_l (devicetype)
end subroutine
function acc_get_device_type_h ()
@@ -1024,54 +1027,47 @@ function acc_get_device_type_h ()
acc_get_device_type_h = acc_get_device_type_l ()
end function
-subroutine acc_set_device_num_h (n, d)
+subroutine acc_set_device_num_h (devicenum, devicetype)
use openacc_internal, only: acc_set_device_num_l
use openacc_kinds
- integer n
- integer (acc_device_kind) d
- call acc_set_device_num_l (n, d)
+ integer devicenum
+ integer (acc_device_kind) devicetype
+ call acc_set_device_num_l (devicenum, devicetype)
end subroutine
-function acc_get_device_num_h (d)
+function acc_get_device_num_h (devicetype)
use openacc_internal, only: acc_get_device_num_l
use openacc_kinds
integer acc_get_device_num_h
- integer (acc_device_kind) d
- acc_get_device_num_h = acc_get_device_num_l (d)
+ integer (acc_device_kind) devicetype
+ acc_get_device_num_h = acc_get_device_num_l (devicetype)
end function
-function acc_get_property_h (n, d, p)
- use iso_c_binding, only: c_int, c_size_t
+function acc_get_property_h (devicenum, devicetype, property)
+ use iso_c_binding, only: c_size_t
use openacc_internal, only: acc_get_property_l
use openacc_kinds
implicit none (type, external)
- integer (acc_device_property) :: acc_get_property_h
- integer, value :: n
- integer (acc_device_kind), value :: d
- integer (acc_device_property), value :: p
-
- integer (c_int) :: pint
-
- pint = int (p, c_int)
- acc_get_property_h = acc_get_property_l (n, d, pint)
+ integer (c_size_t) :: acc_get_property_h
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
+ acc_get_property_h = acc_get_property_l (devicenum, devicetype, property)
end function
-subroutine acc_get_property_string_h (n, d, p, s)
- use iso_c_binding, only: c_char, c_int, c_ptr, c_f_pointer, c_associated
+subroutine acc_get_property_string_h (devicenum, devicetype, property, string)
+ use iso_c_binding, only: c_char, c_size_t, c_ptr, c_f_pointer, c_associated
use openacc_internal, only: acc_get_property_string_l
use openacc_kinds
implicit none (type, external)
- integer, value :: n
- integer (acc_device_kind), value :: d
- integer (acc_device_property), value :: p
- character (*) :: s
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
+ character (*) :: string
- integer (c_int) :: pint
type (c_ptr) :: cptr
- integer :: clen
+ integer(c_size_t) :: clen, slen, i
character (kind=c_char, len=1), pointer, contiguous :: sptr (:)
- integer :: slen
- integer :: i
interface
function strlen (s) bind (C, name = "strlen")
@@ -1081,53 +1077,44 @@ subroutine acc_get_property_string_h (n, d, p, s)
end function strlen
end interface
- pint = int (p, c_int)
- cptr = acc_get_property_string_l (n, d, pint)
- s = ""
+ cptr = acc_get_property_string_l (devicenum, devicetype, property)
+ string = ""
if (.not. c_associated (cptr)) then
return
end if
- clen = int (strlen (cptr))
+ clen = strlen (cptr)
call c_f_pointer (cptr, sptr, [clen])
- slen = min (clen, len (s))
+ slen = min (clen, len (string, kind=c_size_t))
do i = 1, slen
- s (i:i) = sptr (i)
+ string (i:i) = sptr (i)
end do
end subroutine
-function acc_async_test_h (a)
+function acc_async_test_h (arg)
use openacc_internal, only: acc_async_test_l
logical acc_async_test_h
- integer a
- if (acc_async_test_l (a) .eq. 1) then
- acc_async_test_h = .TRUE.
- else
- acc_async_test_h = .FALSE.
- end if
+ integer arg
+ acc_async_test_h = acc_async_test_l (arg) /= 0
end function
function acc_async_test_all_h ()
use openacc_internal, only: acc_async_test_all_l
logical acc_async_test_all_h
- if (acc_async_test_all_l () .eq. 1) then
- acc_async_test_all_h = .TRUE.
- else
- acc_async_test_all_h = .FALSE.
- end if
+ acc_async_test_all_h = acc_async_test_all_l () /= 0
end function
-subroutine acc_wait_h (a)
+subroutine acc_wait_h (arg)
use openacc_internal, only: acc_wait_l
- integer a
- call acc_wait_l (a)
+ integer arg
+ call acc_wait_l (arg)
end subroutine
-subroutine acc_wait_async_h (a1, a2)
+subroutine acc_wait_async_h (arg, async)
use openacc_internal, only: acc_wait_async_l
- integer a1, a2
- call acc_wait_async_l (a1, a2)
+ integer arg, async
+ call acc_wait_async_l (arg, async)
end subroutine
subroutine acc_wait_all_h ()
@@ -1135,36 +1122,32 @@ subroutine acc_wait_all_h ()
call acc_wait_all_l ()
end subroutine
-subroutine acc_wait_all_async_h (a)
+subroutine acc_wait_all_async_h (async)
use openacc_internal, only: acc_wait_all_async_l
- integer a
- call acc_wait_all_async_l (a)
+ integer async
+ call acc_wait_all_async_l (async)
end subroutine
-subroutine acc_init_h (d)
+subroutine acc_init_h (devicetype)
use openacc_internal, only: acc_init_l
use openacc_kinds
- integer (acc_device_kind) d
- call acc_init_l (d)
+ integer (acc_device_kind) devicetype
+ call acc_init_l (devicetype)
end subroutine
-subroutine acc_shutdown_h (d)
+subroutine acc_shutdown_h (devicetype)
use openacc_internal, only: acc_shutdown_l
use openacc_kinds
- integer (acc_device_kind) d
- call acc_shutdown_l (d)
+ integer (acc_device_kind) devicetype
+ call acc_shutdown_l (devicetype)
end subroutine
-function acc_on_device_h (d)
+function acc_on_device_h (devicetype)
use openacc_internal, only: acc_on_device_l
use openacc_kinds
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
logical acc_on_device_h
- if (acc_on_device_l (d) .eq. 1) then
- acc_on_device_h = .TRUE.
- else
- acc_on_device_h = .FALSE.
- end if
+ acc_on_device_h = acc_on_device_l (devicetype) /= 0
end function
subroutine acc_copyin_32_h (a, len)
@@ -1414,11 +1397,7 @@ function acc_is_present_32_h (a, len)
!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
- if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
- acc_is_present_32_h = .TRUE.
- else
- acc_is_present_32_h = .FALSE.
- end if
+ acc_is_present_32_h = acc_is_present_l (a, int (len, kind = c_size_t)) /= 0
end function
function acc_is_present_64_h (a, len)
@@ -1428,18 +1407,14 @@ function acc_is_present_64_h (a, len)
!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
- if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
- acc_is_present_64_h = .TRUE.
- else
- acc_is_present_64_h = .FALSE.
- end if
+ acc_is_present_64_h = acc_is_present_l (a, int (len, kind = c_size_t)) /= 0
end function
function acc_is_present_array_h (a)
use openacc_internal, only: acc_is_present_l
logical acc_is_present_array_h
type (*), dimension (..), contiguous :: a
- acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
+ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) /= 0
end function
subroutine acc_copyin_async_32_h (a, len, async)
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index 66786d7..1dc471f 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -49,14 +49,14 @@ extern "C" {
/* Types */
typedef enum acc_device_t {
/* Keep in sync with include/gomp-constants.h. */
- acc_device_current = -3,
+ acc_device_current = -1,
acc_device_none = 0,
acc_device_default = 1,
acc_device_host = 2,
/* acc_device_host_nonshm = 3 removed. */
acc_device_not_host = 4,
acc_device_nvidia = 5,
- acc_device_gcn = 8,
+ acc_device_radeon = 8,
_ACC_device_hwm,
/* Ensure enumeration is layout compatible with int. */
_ACC_highest = __INT_MAX__,
@@ -64,8 +64,7 @@ typedef enum acc_device_t {
} acc_device_t;
typedef enum acc_device_property_t {
- /* Keep in sync with include/gomp-constants.h. */
- /* Start from 1 to catch uninitialized use. */
+ /* Keep in sync with 'libgomp/libgomp-plugin.h:goacc_property'. */
acc_property_memory = 1,
acc_property_free_memory = 2,
acc_property_name = 0x10001,
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index b327ba7..82a3735 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -37,6 +37,7 @@
integer, parameter :: acc_device_kind = 4
! Keep in sync with include/gomp-constants.h.
+ integer (acc_device_kind), parameter :: acc_device_current = -1
integer (acc_device_kind), parameter :: acc_device_none = 0
integer (acc_device_kind), parameter :: acc_device_default = 1
integer (acc_device_kind), parameter :: acc_device_host = 2
@@ -44,7 +45,24 @@
! removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
- integer (acc_device_kind), parameter :: acc_device_gcn = 8
+ integer (acc_device_kind), parameter :: acc_device_radeon = 8
+
+ integer, parameter :: acc_device_property_kind = 4
+! OpenACC 2.6/2.7/3.0 used acc_device_property; in a spec update the
+! missing '_kind' was added for consistency. For backward compatibility, keep:
+ integer, parameter :: acc_device_property &
+ & = acc_device_property_kind
+
+ integer (acc_device_property_kind), parameter :: &
+ & acc_property_memory = 1
+ integer (acc_device_property_kind), parameter :: &
+ & acc_property_free_memory = 2
+ integer (acc_device_property_kind), parameter :: &
+ & acc_property_name = int(Z'10001')
+ integer (acc_device_property_kind), parameter :: &
+ & acc_property_vendor = int(Z'10002')
+ integer (acc_device_property_kind), parameter :: &
+ & acc_property_driver = int(Z'10003')
integer, parameter :: acc_handle_kind = 4
@@ -52,20 +70,20 @@
integer (acc_handle_kind), parameter :: acc_async_noval = -1
integer (acc_handle_kind), parameter :: acc_async_sync = -2
- integer, parameter :: openacc_version = 201306
+ integer, parameter :: openacc_version = 201711
interface acc_get_num_devices
- function acc_get_num_devices_h (d)
+ function acc_get_num_devices_h (devicetype)
import acc_device_kind
integer acc_get_num_devices_h
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end function
end interface
interface acc_set_device_type
- subroutine acc_set_device_type_h (d)
+ subroutine acc_set_device_type_h (devicetype)
import acc_device_kind
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
end subroutine
end interface
@@ -77,25 +95,50 @@
end interface
interface acc_set_device_num
- subroutine acc_set_device_num_h (n, d)
+ subroutine acc_set_device_num_h (devicenum, devicetype)
import acc_device_kind
- integer n
- integer (acc_device_kind) d
+ integer devicenum
+ integer (acc_device_kind) devicetype
end subroutine
end interface
interface acc_get_device_num
- function acc_get_device_num_h (d)
+ function acc_get_device_num_h (devicetype)
import acc_device_kind
integer acc_get_device_num_h
- integer (acc_device_kind) d
+ integer (acc_device_kind) devicetype
+ end function
+ end interface
+
+ interface acc_get_property
+ function acc_get_property_h (devicenum, devicetype, &
+ & property)
+ use iso_c_binding, only: c_size_t
+ import acc_device_kind, acc_device_property_kind
+ implicit none (type, external)
+ integer (c_size_t) :: acc_get_property_h
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
end function
end interface
+ interface acc_get_property_string
+ subroutine acc_get_property_string_h (devicenum, devicetype, &
+ & property, string)
+ import acc_device_kind, acc_device_property_kind
+ implicit none (type, external)
+ integer, value :: devicenum
+ integer (acc_device_kind), value :: devicetype
+ integer (acc_device_property_kind), value :: property
+ character (*) :: string
+ end subroutine
+ end interface
+
interface acc_async_test
- function acc_async_test_h (a)
+ function acc_async_test_h (arg)
logical acc_async_test_h
- integer a
+ integer arg
end function
end interface
@@ -106,8 +149,8 @@
end interface
interface acc_wait
- subroutine acc_wait_h (a)
- integer a
+ subroutine acc_wait_h (arg)
+ integer arg
end subroutine
end interface
@@ -117,8 +160,8 @@
end interface
interface acc_wait_async
- subroutine acc_wait_async_h (a1, a2)
- integer a1, a2
+ subroutine acc_wait_async_h (arg, async)
+ integer arg, async
end subroutine
end interface
@@ -134,8 +177,8 @@
end interface
interface acc_wait_all_async
- subroutine acc_wait_all_async_h (a)
- integer a
+ subroutine acc_wait_all_async_h (async)
+ integer async
end subroutine
end interface
@@ -167,14 +210,14 @@
interface acc_copyin
subroutine acc_copyin_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_copyin_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -187,14 +230,14 @@
interface acc_present_or_copyin
subroutine acc_present_or_copyin_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_present_or_copyin_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -213,14 +256,14 @@
interface acc_create
subroutine acc_create_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_create_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -233,14 +276,14 @@
interface acc_present_or_create
subroutine acc_present_or_create_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_present_or_create_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -259,14 +302,14 @@
interface acc_copyout
subroutine acc_copyout_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_copyout_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -279,14 +322,14 @@
interface acc_copyout_finalize
subroutine acc_copyout_finalize_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_copyout_finalize_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -299,14 +342,14 @@
interface acc_delete
subroutine acc_delete_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_delete_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -319,14 +362,14 @@
interface acc_delete_finalize
subroutine acc_delete_finalize_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_delete_finalize_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -339,14 +382,14 @@
interface acc_update_device
subroutine acc_update_device_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_update_device_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -359,14 +402,14 @@
interface acc_update_self
subroutine acc_update_self_32_h (a, len)
use iso_c_binding, only: c_int32_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end subroutine
subroutine acc_update_self_64_h (a, len)
use iso_c_binding, only: c_int64_t
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end subroutine
@@ -385,7 +428,7 @@
function acc_is_present_32_h (a, len)
use iso_c_binding, only: c_int32_t
logical acc_is_present_32_h
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
end function
@@ -393,7 +436,7 @@
function acc_is_present_64_h (a, len)
use iso_c_binding, only: c_int64_t
logical acc_is_present_64_h
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
end function
@@ -411,7 +454,7 @@
subroutine acc_copyin_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -420,7 +463,7 @@
subroutine acc_copyin_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
@@ -437,7 +480,7 @@
subroutine acc_create_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -446,7 +489,7 @@
subroutine acc_create_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
@@ -463,7 +506,7 @@
subroutine acc_copyout_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -472,7 +515,7 @@
subroutine acc_copyout_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
@@ -489,7 +532,7 @@
subroutine acc_delete_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -498,7 +541,7 @@
subroutine acc_delete_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
@@ -515,7 +558,7 @@
subroutine acc_update_device_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -524,7 +567,7 @@
subroutine acc_update_device_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
@@ -541,7 +584,7 @@
subroutine acc_update_self_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int32_t) len
integer (acc_handle_kind) async
@@ -550,7 +593,7 @@
subroutine acc_update_self_async_64_h (a, len, async)
use iso_c_binding, only: c_int64_t
import acc_handle_kind
- !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
type (*), dimension (*) :: a
integer (c_int64_t) len
integer (acc_handle_kind) async
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac
index 9a424aa..fc91702 100644
--- a/libgomp/plugin/configfrag.ac
+++ b/libgomp/plugin/configfrag.ac
@@ -211,7 +211,7 @@ if test x"$enable_offload_targets" != x; then
case "${target}" in
x86_64-*-*)
case " ${CC} ${CFLAGS} " in
- *" -m32 "*)
+ *" -m32 "*|*" -mx32 "*)
PLUGIN_HSA=0
;;
*)
@@ -251,7 +251,7 @@ if test x"$enable_offload_targets" != x; then
case "${target}" in
x86_64-*-*)
case " ${CC} ${CFLAGS} " in
- *" -m32 "*)
+ *" -m32 "*|*" -mx32 "*)
PLUGIN_GCN=0
;;
*)
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index b5ca7c1..4c6a4c0 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -371,6 +371,8 @@ struct hsa_kernel_description
{
const char *name;
int oacc_dims[3]; /* Only present for GCN kernels. */
+ int sgpr_count;
+ int vpgr_count;
};
/* Mkoffload uses this structure to describe an offload variable. */
@@ -396,6 +398,19 @@ struct gcn_image_desc
struct global_var_info *global_variables;
};
+/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we
+ support.
+ See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */
+
+typedef enum {
+ EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
+ EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
+ EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+} EF_AMDGPU_MACH;
+
+const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
+typedef EF_AMDGPU_MACH gcn_isa;
+
/* Description of an HSA GPU agent (device) and the program associated with
it. */
@@ -408,9 +423,13 @@ struct agent_info
/* Whether the agent has been initialized. The fields below are usable only
if it has been. */
bool initialized;
- /* Precomputed check for problem architectures. */
- bool gfx900_p;
+ /* The instruction set architecture of the device. */
+ gcn_isa device_isa;
+ /* Name of the agent. */
+ char name[64];
+ /* Name of the vendor of the agent. */
+ char vendor_name[64];
/* Command queues of the agent. */
hsa_queue_t *sync_queue;
struct goacc_asyncqueue *async_queues, *omp_async_queue;
@@ -460,6 +479,8 @@ struct kernel_info
struct agent_info *agent;
/* The specific module where the kernel takes place. */
struct module_info *module;
+ /* Information provided by mkoffload associated with the kernel. */
+ struct hsa_kernel_description *description;
/* Mutex enforcing that at most once thread ever initializes a kernel for
use. A thread should have locked agent->module_rwlock for reading before
acquiring it. */
@@ -529,6 +550,8 @@ struct hsa_context_info
int agent_count;
/* Array of agent_info structures describing the individual HSA agents. */
struct agent_info *agents;
+ /* Driver version string. */
+ char driver_version_s[30];
};
/* Format of the on-device heap.
@@ -1213,7 +1236,8 @@ parse_target_attributes (void **input,
grid_attrs_found = true;
break;
}
- else if ((id & GOMP_TARGET_ARG_DEVICE_ALL) == GOMP_TARGET_ARG_DEVICE_ALL)
+ else if ((id & GOMP_TARGET_ARG_DEVICE_MASK)
+ == GOMP_TARGET_ARG_DEVICE_ALL)
{
gcn_dims_found = true;
switch (id & GOMP_TARGET_ARG_ID_MASK)
@@ -1232,7 +1256,8 @@ parse_target_attributes (void **input,
if (gcn_dims_found)
{
- if (agent->gfx900_p && gcn_threads == 0 && override_z_dim == 0)
+ if (agent->device_isa == EF_AMDGPU_MACH_AMDGCN_GFX900
+ && gcn_threads == 0 && override_z_dim == 0)
{
gcn_threads = 4;
GCN_WARNING ("VEGA BUG WORKAROUND: reducing default number of "
@@ -1483,6 +1508,8 @@ init_hsa_context (void)
= GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
* sizeof (struct agent_info));
status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
+ if (status != HSA_STATUS_SUCCESS)
+ return hsa_error ("Scanning compute agents failed", status);
if (agent_index != hsa_context.agent_count)
{
GOMP_PLUGIN_error ("Failed to assign IDs to all GCN agents");
@@ -1496,6 +1523,25 @@ init_hsa_context (void)
GOMP_PLUGIN_error ("Failed to list all HSA runtime agents");
}
+ uint16_t minor, major;
+ status = hsa_fns.hsa_system_get_info_fn (HSA_SYSTEM_INFO_VERSION_MINOR,
+ &minor);
+ if (status != HSA_STATUS_SUCCESS)
+ GOMP_PLUGIN_error ("Failed to obtain HSA runtime minor version");
+ status = hsa_fns.hsa_system_get_info_fn (HSA_SYSTEM_INFO_VERSION_MAJOR,
+ &major);
+ if (status != HSA_STATUS_SUCCESS)
+ GOMP_PLUGIN_error ("Failed to obtain HSA runtime major version");
+
+ size_t len = sizeof hsa_context.driver_version_s;
+ int printed = snprintf (hsa_context.driver_version_s, len,
+ "HSA Runtime %hu.%hu", (unsigned short int)major,
+ (unsigned short int)minor);
+ if (printed >= len)
+ GCN_WARNING ("HSA runtime version string was truncated."
+ "Version %hu.%hu is too long.", (unsigned short int)major,
+ (unsigned short int)minor);
+
hsa_context.initialized = true;
return true;
}
@@ -1578,6 +1624,66 @@ get_data_memory_region (hsa_region_t region, void *data)
HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
}
+static int
+elf_gcn_isa_field (Elf64_Ehdr *image)
+{
+ return image->e_flags & EF_AMDGPU_MACH_MASK;
+}
+
+const static char *gcn_gfx803_s = "gfx803";
+const static char *gcn_gfx900_s = "gfx900";
+const static char *gcn_gfx906_s = "gfx906";
+const static int gcn_isa_name_len = 6;
+
+/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
+ support the ISA. */
+
+static const char*
+isa_hsa_name (int isa) {
+ switch(isa)
+ {
+ case EF_AMDGPU_MACH_AMDGCN_GFX803:
+ return gcn_gfx803_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX900:
+ return gcn_gfx900_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX906:
+ return gcn_gfx906_s;
+ }
+ return NULL;
+}
+
+/* Returns the user-facing name that GCC uses to identify the architecture (e.g.
+ with -march) or NULL if we do not support the ISA.
+ Keep in sync with /gcc/config/gcn/gcn.{c,opt}. */
+
+static const char*
+isa_gcc_name (int isa) {
+ switch(isa)
+ {
+ case EF_AMDGPU_MACH_AMDGCN_GFX803:
+ return "fiji";
+ default:
+ return isa_hsa_name (isa);
+ }
+}
+
+/* Returns the code which is used in the GCN object code to identify the ISA with
+ the given name (as used by the HSA runtime). */
+
+static gcn_isa
+isa_code(const char *isa) {
+ if (!strncmp (isa, gcn_gfx803_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX803;
+
+ if (!strncmp (isa, gcn_gfx900_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX900;
+
+ if (!strncmp (isa, gcn_gfx906_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX906;
+
+ return -1;
+}
+
/* }}} */
/* {{{ Run */
@@ -1993,6 +2099,24 @@ run_kernel (struct kernel_info *kernel, void *vars,
struct GOMP_kernel_launch_attributes *kla,
struct goacc_asyncqueue *aq, bool module_locked)
{
+ GCN_DEBUG ("SGPRs: %d, VGPRs: %d\n", kernel->description->sgpr_count,
+ kernel->description->vpgr_count);
+
+ /* Reduce the number of threads/workers if there are insufficient
+ VGPRs available to run the kernels together. */
+ if (kla->ndim == 3 && kernel->description->vpgr_count > 0)
+ {
+ int granulated_vgprs = (kernel->description->vpgr_count + 3) & ~3;
+ int max_threads = (256 / granulated_vgprs) * 4;
+ if (kla->gdims[2] > max_threads)
+ {
+ GCN_WARNING ("Too many VGPRs required to support %d threads/workers"
+ " per team/gang - reducing to %d threads/workers.\n",
+ kla->gdims[2], max_threads);
+ kla->gdims[2] = max_threads;
+ }
+ }
+
GCN_DEBUG ("GCN launch on queue: %d:%d\n", kernel->agent->device_id,
(aq ? aq->id : 0));
GCN_DEBUG ("GCN launch attribs: gdims:[");
@@ -2194,6 +2318,7 @@ init_basic_kernel_info (struct kernel_info *kernel,
kernel->agent = agent;
kernel->module = module;
kernel->name = d->name;
+ kernel->description = d;
if (pthread_mutex_init (&kernel->init_mutex, NULL))
{
GOMP_PLUGIN_error ("Failed to initialize a GCN kernel mutex");
@@ -2257,6 +2382,39 @@ find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent,
return res;
}
+/* Check that the GCN ISA of the given image matches the ISA of the agent. */
+
+static bool
+isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)
+{
+ int isa_field = elf_gcn_isa_field (image);
+ const char* isa_s = isa_hsa_name (isa_field);
+ if (!isa_s)
+ {
+ hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR);
+ return false;
+ }
+
+ if (isa_field != agent->device_isa)
+ {
+ char msg[120];
+ const char *agent_isa_s = isa_hsa_name (agent->device_isa);
+ const char *agent_isa_gcc_s = isa_gcc_name (agent->device_isa);
+ assert (agent_isa_s);
+ assert (agent_isa_gcc_s);
+
+ snprintf (msg, sizeof msg,
+ "GCN code object ISA '%s' does not match GPU ISA '%s'.\n"
+ "Try to recompile with '-foffload=-march=%s'.\n",
+ isa_s, agent_isa_s, agent_isa_gcc_s);
+
+ hsa_error (msg, HSA_STATUS_ERROR);
+ return false;
+ }
+
+ return true;
+}
+
/* Create and finalize the program consisting of all loaded modules. */
static bool
@@ -2289,6 +2447,9 @@ create_and_finalize_hsa_program (struct agent_info *agent)
{
Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image;
+ if (!isa_matches_agent (agent, image))
+ goto fail;
+
/* Hide relocations from the HSA runtime loader.
Keep a copy of the unmodified section headers to use later. */
Elf64_Shdr *image_sections = (Elf64_Shdr *)((char *)image
@@ -3236,17 +3397,6 @@ GOMP_OFFLOAD_get_num_devices (void)
return hsa_context.agent_count;
}
-union gomp_device_property_value
-GOMP_OFFLOAD_get_property (int device, int prop)
-{
- /* Stub. Check device and return default value for unsupported properties. */
- /* TODO: Implement this function. */
- get_agent_info (device);
-
- union gomp_device_property_value nullval = { .val = 0 };
- return nullval;
-}
-
/* Initialize device (agent) number N so that it can be used for computation.
Return TRUE on success. */
@@ -3300,12 +3450,19 @@ GOMP_OFFLOAD_init_device (int n)
return hsa_error ("Error requesting maximum queue size of the GCN agent",
status);
- char buf[64];
status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_NAME,
- &buf);
+ &agent->name);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error querying the name of the agent", status);
- agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);
+
+ agent->device_isa = isa_code (agent->name);
+ if (agent->device_isa < 0)
+ return hsa_error ("Unknown GCN agent architecture", HSA_STATUS_ERROR);
+
+ status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_VENDOR_NAME,
+ &agent->vendor_name);
+ if (status != HSA_STATUS_SUCCESS)
+ return hsa_error ("Error querying the vendor name of the agent", status);
status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
HSA_QUEUE_TYPE_MULTI,
@@ -3318,6 +3475,9 @@ GOMP_OFFLOAD_init_device (int n)
status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
get_kernarg_memory_region,
&agent->kernarg_region);
+ if (status != HSA_STATUS_SUCCESS
+ && status != HSA_STATUS_INFO_BREAK)
+ hsa_error ("Scanning memory regions failed", status);
if (agent->kernarg_region.handle == (uint64_t) -1)
{
GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
@@ -3331,6 +3491,9 @@ GOMP_OFFLOAD_init_device (int n)
status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
get_data_memory_region,
&agent->data_region);
+ if (status != HSA_STATUS_SUCCESS
+ && status != HSA_STATUS_INFO_BREAK)
+ hsa_error ("Scanning memory regions failed", status);
if (agent->data_region.handle == (uint64_t) -1)
{
GOMP_PLUGIN_error ("Could not find suitable memory region for device "
@@ -3999,6 +4162,42 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
return true;
}
+union goacc_property_value
+GOMP_OFFLOAD_openacc_get_property (int device, enum goacc_property prop)
+{
+ struct agent_info *agent = get_agent_info (device);
+
+ union goacc_property_value propval = { .val = 0 };
+
+ switch (prop)
+ {
+ case GOACC_PROPERTY_FREE_MEMORY:
+ /* Not supported. */
+ break;
+ case GOACC_PROPERTY_MEMORY:
+ {
+ size_t size;
+ hsa_region_t region = agent->data_region;
+ hsa_status_t status =
+ hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SIZE, &size);
+ if (status == HSA_STATUS_SUCCESS)
+ propval.val = size;
+ break;
+ }
+ case GOACC_PROPERTY_NAME:
+ propval.ptr = agent->name;
+ break;
+ case GOACC_PROPERTY_VENDOR:
+ propval.ptr = agent->vendor_name;
+ break;
+ case GOACC_PROPERTY_DRIVER:
+ propval.ptr = hsa_context.driver_version_s;
+ break;
+ }
+
+ return propval;
+}
+
/* Set up plugin-specific thread-local-data (host-side). */
void *
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index b04923b..abd3bc6 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -699,32 +699,6 @@ GOMP_OFFLOAD_get_num_devices (void)
return hsa_context.agent_count;
}
-/* Part of the libgomp plugin interface. Return the value of property
- PROP of agent number N. */
-
-union gomp_device_property_value
-GOMP_OFFLOAD_get_property (int n, int prop)
-{
- union gomp_device_property_value nullval = { .val = 0 };
-
- if (!init_hsa_context ())
- return nullval;
- if (n >= hsa_context.agent_count)
- {
- GOMP_PLUGIN_error
- ("Request for a property of a non-existing HSA device %i", n);
- return nullval;
- }
-
- switch (prop)
- {
- case GOMP_DEVICE_PROPERTY_VENDOR:
- return (union gomp_device_property_value) { .ptr = "HSA" };
- default:
- return nullval;
- }
-}
-
/* Part of the libgomp plugin interface. Initialize agent number N so that it
can be used for computation. Return TRUE on success. */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index e867b4c..ec103a2 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1121,74 +1121,6 @@ GOMP_OFFLOAD_get_num_devices (void)
return nvptx_get_num_devices ();
}
-union gomp_device_property_value
-GOMP_OFFLOAD_get_property (int n, int prop)
-{
- union gomp_device_property_value propval = { .val = 0 };
-
- pthread_mutex_lock (&ptx_dev_lock);
-
- if (n >= nvptx_get_num_devices () || n < 0 || ptx_devices[n] == NULL)
- {
- pthread_mutex_unlock (&ptx_dev_lock);
- return propval;
- }
-
- struct ptx_device *ptx_dev = ptx_devices[n];
- switch (prop)
- {
- case GOMP_DEVICE_PROPERTY_MEMORY:
- {
- size_t total_mem;
-
- CUDA_CALL_ERET (propval, cuDeviceTotalMem, &total_mem, ptx_dev->dev);
- propval.val = total_mem;
- }
- break;
- case GOMP_DEVICE_PROPERTY_FREE_MEMORY:
- {
- size_t total_mem;
- size_t free_mem;
- CUdevice ctxdev;
-
- CUDA_CALL_ERET (propval, cuCtxGetDevice, &ctxdev);
- if (ptx_dev->dev == ctxdev)
- CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
- else if (ptx_dev->ctx)
- {
- CUcontext old_ctx;
-
- CUDA_CALL_ERET (propval, cuCtxPushCurrent, ptx_dev->ctx);
- CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
- CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
- }
- else
- {
- CUcontext new_ctx;
-
- CUDA_CALL_ERET (propval, cuCtxCreate, &new_ctx, CU_CTX_SCHED_AUTO,
- ptx_dev->dev);
- CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
- CUDA_CALL_ASSERT (cuCtxDestroy, new_ctx);
- }
- propval.val = free_mem;
- }
- break;
- case GOMP_DEVICE_PROPERTY_NAME:
- propval.ptr = ptx_dev->name;
- break;
- case GOMP_DEVICE_PROPERTY_VENDOR:
- propval.ptr = "Nvidia";
- break;
- case GOMP_DEVICE_PROPERTY_DRIVER:
- propval.ptr = cuda_driver_version_s;
- break;
- }
-
- pthread_mutex_unlock (&ptx_dev_lock);
- return propval;
-}
-
bool
GOMP_OFFLOAD_init_device (int n)
{
@@ -1818,6 +1750,76 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
return true;
}
+union goacc_property_value
+GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
+{
+ union goacc_property_value propval = { .val = 0 };
+
+ pthread_mutex_lock (&ptx_dev_lock);
+
+ if (n >= nvptx_get_num_devices () || n < 0 || ptx_devices[n] == NULL)
+ {
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return propval;
+ }
+
+ struct ptx_device *ptx_dev = ptx_devices[n];
+ switch (prop)
+ {
+ case GOACC_PROPERTY_MEMORY:
+ {
+ size_t total_mem;
+
+ CUDA_CALL_ERET (propval, cuDeviceTotalMem, &total_mem, ptx_dev->dev);
+ propval.val = total_mem;
+ }
+ break;
+ case GOACC_PROPERTY_FREE_MEMORY:
+ {
+ size_t total_mem;
+ size_t free_mem;
+ CUdevice ctxdev;
+
+ CUDA_CALL_ERET (propval, cuCtxGetDevice, &ctxdev);
+ if (ptx_dev->dev == ctxdev)
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ else if (ptx_dev->ctx)
+ {
+ CUcontext old_ctx;
+
+ CUDA_CALL_ERET (propval, cuCtxPushCurrent, ptx_dev->ctx);
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+ }
+ else
+ {
+ CUcontext new_ctx;
+
+ CUDA_CALL_ERET (propval, cuCtxCreate, &new_ctx, CU_CTX_SCHED_AUTO,
+ ptx_dev->dev);
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ CUDA_CALL_ASSERT (cuCtxDestroy, new_ctx);
+ }
+ propval.val = free_mem;
+ }
+ break;
+ case GOACC_PROPERTY_NAME:
+ propval.ptr = ptx_dev->name;
+ break;
+ case GOACC_PROPERTY_VENDOR:
+ propval.ptr = "Nvidia";
+ break;
+ case GOACC_PROPERTY_DRIVER:
+ propval.ptr = cuda_driver_version_s;
+ break;
+ default:
+ break;
+ }
+
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return propval;
+}
+
/* Adjust launch dimensions: pick good values for number of blocks and warps
and ensure that number of warps does not exceed CUDA limits as well as GCC's
own limits. */
@@ -1929,9 +1931,4 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
nvptx_stacks_free (stacks, teams * threads);
}
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
- void *async_data)
-{
- GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}
+/* TODO: Implement GOMP_OFFLOAD_async_run. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 617baec..3642547 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -720,7 +720,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[i].offset = OFFSET_INLINED;
continue;
}
- else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
+ || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
{
tgt->list[i].key = NULL;
if (!not_found_cnt)
@@ -739,16 +740,24 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start;
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
- if (n == NULL)
+ if (n != NULL)
+ {
+ cur_node.host_start -= n->host_start;
+ hostaddrs[i]
+ = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start);
+ }
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("use_device_ptr pointer wasn't mapped");
}
- cur_node.host_start -= n->host_start;
- hostaddrs[i]
- = (void *) (n->tgt->tgt_start + n->tgt_offset
- + cur_node.host_start);
- tgt->list[i].offset = ~(uintptr_t) 0;
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
+ /* If not present, continue using the host address. */
+ ;
+ else
+ __builtin_unreachable ();
+ tgt->list[i].offset = OFFSET_INLINED;
}
else
tgt->list[i].offset = 0;
@@ -973,22 +982,40 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
case GOMP_MAP_FIRSTPRIVATE_INT:
case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
continue;
+ case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
+ /* The OpenACC 'host_data' construct only allows 'use_device'
+ "mapping" clauses, so in the first loop, 'not_found_cnt'
+ must always have been zero, so all OpenACC 'use_device'
+ clauses have already been handled. (We can only easily test
+ 'use_device' with 'if_present' clause here.) */
+ assert (tgt->list[i].offset == OFFSET_INLINED);
+ /* Nevertheless, FALLTHRU to the normal handling, to keep the
+ code conceptually simple, similar to the first loop. */
case GOMP_MAP_USE_DEVICE_PTR:
if (tgt->list[i].offset == 0)
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start;
n = gomp_map_lookup (mem_map, &cur_node);
- if (n == NULL)
+ if (n != NULL)
+ {
+ cur_node.host_start -= n->host_start;
+ hostaddrs[i]
+ = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start);
+ }
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("use_device_ptr pointer wasn't mapped");
}
- cur_node.host_start -= n->host_start;
- hostaddrs[i]
- = (void *) (n->tgt->tgt_start + n->tgt_offset
- + cur_node.host_start);
- tgt->list[i].offset = ~(uintptr_t) 0;
+ else if ((kind & typemask)
+ == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
+ /* If not present, continue using the host address. */
+ ;
+ else
+ __builtin_unreachable ();
+ tgt->list[i].offset = OFFSET_INLINED;
}
continue;
case GOMP_MAP_STRUCT:
@@ -1621,8 +1648,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
{
struct addr_pair *target_var = &target_table[num_funcs + i];
uintptr_t target_size = target_var->end - target_var->start;
+ bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
- if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
+ if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
{
gomp_mutex_unlock (&devicep->lock);
if (is_register_lock)
@@ -1636,7 +1664,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
k->tgt = tgt;
k->tgt_offset = target_var->start;
- k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
+ k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
k->virtual_refcount = 0;
k->aux = NULL;
array->left = NULL;
@@ -1995,6 +2023,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
gomp_unmap_vars (tgt_vars, true);
}
+static inline unsigned int
+clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
+{
+ /* If we cannot run asynchronously, simply ignore nowait. */
+ if (devicep != NULL && devicep->async_run_func == NULL)
+ flags &= ~GOMP_TARGET_FLAG_NOWAIT;
+
+ return flags;
+}
+
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2027,6 +2065,8 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
size_t tgt_align = 0, tgt_size = 0;
bool fpc_done = false;
+ flags = clear_unsupported_flags (devicep, flags);
+
if (flags & GOMP_TARGET_FLAG_NOWAIT)
{
struct gomp_thread *thr = gomp_thread ();
@@ -2440,7 +2480,9 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
}
}
- size_t i;
+ /* The variables are mapped separately such that they can be released
+ independently. */
+ size_t i, j;
if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
for (i = 0; i < mapnum; i++)
if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
@@ -2449,6 +2491,15 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
&kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
i += sizes[i];
}
+ else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
+ {
+ for (j = i + 1; j < mapnum; j++)
+ if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
+ break;
+ gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
+ &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ i += j - i - 1;
+ }
else
gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
true, GOMP_MAP_VARS_ENTER_DATA);
@@ -2497,6 +2548,7 @@ gomp_target_task_fn (void *data)
}
ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
+ assert (devicep->async_run_func);
devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
ttask->args, (void *) ttask);
return true;
@@ -3001,7 +3053,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (get_caps);
DLSYM (get_type);
DLSYM (get_num_devices);
- DLSYM (get_property);
DLSYM (init_device);
DLSYM (fini_device);
DLSYM (load_image);
@@ -3014,7 +3065,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
{
DLSYM (run);
- DLSYM (async_run);
+ DLSYM_OPT (async_run, async_run);
DLSYM_OPT (can_run, can_run);
DLSYM (dev2dev);
}
@@ -3034,7 +3085,8 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
openacc_async_queue_callback)
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
- || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
+ || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
+ || !DLSYM_OPT (openacc.get_property, openacc_get_property))
{
/* Require all the OpenACC handlers if we have
GOMP_OFFLOAD_CAP_OPENACC_200. */
diff --git a/libgomp/team.c b/libgomp/team.c
index 82f26a0..cbc3aec 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -636,6 +636,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
nthr->ts.active_level = thr->ts.active_level;
nthr->ts.place_partition_off = place_partition_off;
nthr->ts.place_partition_len = place_partition_len;
+ nthr->ts.def_allocator = thr->ts.def_allocator;
#ifdef HAVE_SYNC_BUILTINS
nthr->ts.single_count = 0;
#endif
@@ -823,6 +824,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
start_data->ts.team_id = i;
start_data->ts.level = team->prev_ts.level + 1;
start_data->ts.active_level = thr->ts.active_level;
+ start_data->ts.def_allocator = thr->ts.def_allocator;
#ifdef HAVE_SYNC_BUILTINS
start_data->ts.single_count = 0;
#endif
diff --git a/libgomp/testsuite/Makefile.am b/libgomp/testsuite/Makefile.am
index 62b1855..655a413 100644
--- a/libgomp/testsuite/Makefile.am
+++ b/libgomp/testsuite/Makefile.am
@@ -12,6 +12,8 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir
+EXTRA_DEJAGNU_SITE_CONFIG = libgomp-site-extra.exp
+
# Instead of directly in ../testsuite/libgomp-test-support.exp.in, the
# following variables have to be "routed through" this Makefile, for expansion
# of the several (Makefile) variables used therein.
diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in
index f0da16d..52aa6c5 100644
--- a/libgomp/testsuite/Makefile.in
+++ b/libgomp/testsuite/Makefile.in
@@ -99,17 +99,20 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/lthostflags.m4 \
$(top_srcdir)/../config/multi.m4 \
$(top_srcdir)/../config/override.m4 \
- $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \
- $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
- $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/../libtool.m4 $(top_srcdir)/../config/cet.m4 \
+ $(top_srcdir)/../config/tls.m4 \
+ $(top_srcdir)/../config/toolexeclibdir.m4 \
+ $(top_srcdir)/../ltoptions.m4 $(top_srcdir)/../ltsugar.m4 \
+ $(top_srcdir)/../ltversion.m4 $(top_srcdir)/../lt~obsolete.m4 \
+ $(top_srcdir)/acinclude.m4 $(top_srcdir)/../libtool.m4 \
+ $(top_srcdir)/../config/cet.m4 \
$(top_srcdir)/plugin/configfrag.ac $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am
mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp
+CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp \
+ libgomp-site-extra.exp
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -308,6 +311,7 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
RUNTESTDEFAULTFLAGS = --tool $$tool --srcdir $$srcdir
+EXTRA_DEJAGNU_SITE_CONFIG = libgomp-site-extra.exp
all: all-am
.SUFFIXES:
@@ -342,6 +346,8 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
$(am__aclocal_m4_deps):
libgomp-test-support.pt.exp: $(top_builddir)/config.status $(srcdir)/libgomp-test-support.exp.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+libgomp-site-extra.exp: $(top_builddir)/config.status $(srcdir)/libgomp-site-extra.exp.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
mostlyclean-libtool:
-rm -f *.lo
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index f52ed71..ee5f0e5 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -319,7 +319,7 @@ proc libgomp_option_proc { option } {
proc offload_target_to_openacc_device_type { offload_target } {
switch -glob $offload_target {
amdgcn* {
- return "gcn"
+ return "radeon"
}
disable {
return "host"
@@ -346,11 +346,11 @@ proc check_effective_target_offload_target_nvptx { } {
# files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't
# get passed on to 'check_effective_target_*' functions. (Not caching the
# result due to that.)
- set options [current_compiler_flags]
+ set options [list "additional_flags=[concat "-v" [current_compiler_flags]]"]
# Instead of inspecting command-line options, look what the compiler driver
# decides. This is somewhat modelled after
# 'gcc/testsuite/lib/target-supports.exp:check_configured_with'.
- set gcc_output [libgomp_target_compile "-v $options" "" "none" ""]
+ set gcc_output [libgomp_target_compile "" "" "none" $options]
if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy offload_targets] {
verbose "compiling for offload targets: $offload_targets"
return [string match "*:nvptx*:*" ":$offload_targets:"]
@@ -483,22 +483,22 @@ proc check_effective_target_hsa_offloading_selected {} {
}]
}
-# Return 1 if at least one AMD GCN board is present.
+# Return 1 if at least one AMD GPU is accessible.
-proc check_effective_target_openacc_amdgcn_accel_present { } {
- return [check_runtime openacc_amdgcn_accel_present {
+proc check_effective_target_openacc_radeon_accel_present { } {
+ return [check_runtime openacc_radeon_accel_present {
#include <openacc.h>
int main () {
- return !(acc_get_num_devices (acc_device_gcn) > 0);
+ return !(acc_get_num_devices (acc_device_radeon) > 0);
}
} "" ]
}
-# Return 1 if at least one AMD GCN board is present, and the AMD GCN device
-# type is selected by default.
+# Return 1 if at least one AMD GPU is accessible, and the OpenACC 'radeon'
+# device type is selected.
-proc check_effective_target_openacc_amdgcn_accel_selected { } {
- if { ![check_effective_target_openacc_amdgcn_accel_present] } {
+proc check_effective_target_openacc_radeon_accel_selected { } {
+ if { ![check_effective_target_openacc_radeon_accel_present] } {
return 0;
}
global offload_target
diff --git a/libgomp/testsuite/libgomp-site-extra.exp.in b/libgomp/testsuite/libgomp-site-extra.exp.in
new file mode 100644
index 0000000..c0d2666
--- /dev/null
+++ b/libgomp/testsuite/libgomp-site-extra.exp.in
@@ -0,0 +1 @@
+set GCC_UNDER_TEST {@CC@}
diff --git a/libgomp/testsuite/libgomp-test-support.exp.in b/libgomp/testsuite/libgomp-test-support.exp.in
index 6ec10c7..98fb442 100644
--- a/libgomp/testsuite/libgomp-test-support.exp.in
+++ b/libgomp/testsuite/libgomp-test-support.exp.in
@@ -1,5 +1,3 @@
-set GCC_UNDER_TEST {@CC@}
-
set cuda_driver_include "@CUDA_DRIVER_INCLUDE@"
set cuda_driver_lib "@CUDA_DRIVER_LIB@"
set hsa_runtime_lib "@HSA_RUNTIME_LIB@"
diff --git a/libgomp/testsuite/libgomp.c++/pr93931.C b/libgomp/testsuite/libgomp.c++/pr93931.C
new file mode 100644
index 0000000..4d4232e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr93931.C
@@ -0,0 +1,120 @@
+// PR c++/93931
+// { dg-do run }
+// { dg-options "-O2 -std=c++14" }
+
+extern "C" void abort ();
+
+void
+sink (int &x)
+{
+ int *volatile p;
+ p = &x;
+ (*p)++;
+}
+
+int
+foo ()
+{
+ int r = 0;
+ [&r] () {
+#pragma omp parallel for reduction(+ : r)
+ for (int i = 0; i < 1024; ++i)
+ r += i;
+ } ();
+ return r;
+}
+
+int
+bar ()
+{
+ int l = 0;
+ [&l] () {
+#pragma omp parallel for lastprivate (l)
+ for (int i = 0; i < 1024; ++i)
+ l = i;
+ } ();
+ return l;
+}
+
+void
+baz ()
+{
+ int f = 18;
+ [&f] () {
+#pragma omp parallel for firstprivate (f)
+ for (int i = 0; i < 1024; ++i)
+ {
+ sink (f);
+ f += 3;
+ sink (f);
+ if (f != 23)
+ abort ();
+ sink (f);
+ f -= 7;
+ sink (f);
+ }
+ } ();
+ if (f != 18)
+ abort ();
+}
+
+int
+qux ()
+{
+ int r = 0;
+ [&] () {
+#pragma omp parallel for reduction(+ : r)
+ for (int i = 0; i < 1024; ++i)
+ r += i;
+ } ();
+ return r;
+}
+
+int
+corge ()
+{
+ int l = 0;
+ [&] () {
+#pragma omp parallel for lastprivate (l)
+ for (int i = 0; i < 1024; ++i)
+ l = i;
+ } ();
+ return l;
+}
+
+void
+garply ()
+{
+ int f = 18;
+ [&] () {
+#pragma omp parallel for firstprivate (f)
+ for (int i = 0; i < 1024; ++i)
+ {
+ sink (f);
+ f += 3;
+ sink (f);
+ if (f != 23)
+ abort ();
+ sink (f);
+ f -= 7;
+ sink (f);
+ }
+ } ();
+ if (f != 18)
+ abort ();
+}
+
+int
+main ()
+{
+ if (foo () != 1024 * 1023 / 2)
+ abort ();
+ if (bar () != 1023)
+ abort ();
+ baz ();
+ if (qux () != 1024 * 1023 / 2)
+ abort ();
+ if (corge () != 1023)
+ abort ();
+ garply ();
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c
new file mode 100644
index 0000000..9259a9c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-1.c
@@ -0,0 +1,157 @@
+#include <omp.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+const omp_alloctrait_t traits2[]
+= { { omp_atk_alignment, 16 },
+ { omp_atk_sync_hint, omp_atv_default },
+ { omp_atk_access, omp_atv_default },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_default_mem_fb },
+ { omp_atk_partition, omp_atv_environment } };
+omp_alloctrait_t traits3[]
+= { { omp_atk_sync_hint, omp_atv_uncontended },
+ { omp_atk_alignment, 32 },
+ { omp_atk_access, omp_atv_all },
+ { omp_atk_pool_size, 512 },
+ { omp_atk_fallback, omp_atv_allocator_fb },
+ { omp_atk_fb_data, 0 },
+ { omp_atk_partition, omp_atv_default } };
+const omp_alloctrait_t traits4[]
+= { { omp_atk_alignment, 128 },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+ int *volatile p = (int *) omp_alloc (3 * sizeof (int), omp_default_mem_alloc);
+ int *volatile q;
+ int *volatile r;
+ omp_alloctrait_t traits[3]
+ = { { omp_atk_alignment, 64 },
+ { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, 4096 } };
+ omp_allocator_handle_t a, a2;
+
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ p[2] = 3;
+ omp_free (p, omp_default_mem_alloc);
+ p = (int *) omp_alloc (2 * sizeof (int), omp_default_mem_alloc);
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ omp_free (p, omp_null_allocator);
+ omp_set_default_allocator (omp_default_mem_alloc);
+ p = (int *) omp_alloc (sizeof (int), omp_null_allocator);
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 3;
+ omp_free (p, omp_get_default_allocator ());
+
+ a = omp_init_allocator (omp_default_mem_space, 3, traits);
+ if (a == omp_null_allocator)
+ abort ();
+ p = (int *) omp_alloc (3072, a);
+ if ((((uintptr_t) p) % 64) != 0)
+ abort ();
+ p[0] = 1;
+ p[3071 / sizeof (int)] = 2;
+ if (omp_alloc (3072, a) != NULL)
+ abort ();
+ omp_free (p, a);
+ p = (int *) omp_alloc (3072, a);
+ p[0] = 3;
+ p[3071 / sizeof (int)] = 4;
+ omp_free (p, omp_null_allocator);
+ omp_set_default_allocator (a);
+ if (omp_get_default_allocator () != a)
+ abort ();
+ p = (int *) omp_alloc (3072, omp_null_allocator);
+ if (omp_alloc (3072, omp_null_allocator) != NULL)
+ abort ();
+ omp_free (p, a);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits2) / sizeof (traits2[0]),
+ traits2);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ p = (int *) omp_alloc (420, a2);
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 5;
+ p[419 / sizeof (int)] = 6;
+ q = (int *) omp_alloc (768, a2);
+ if ((((uintptr_t) q) % 16) != 0)
+ abort ();
+ q[0] = 7;
+ q[767 / sizeof (int)] = 8;
+ r = (int *) omp_alloc (512, a2);
+ if ((((uintptr_t) r) % __alignof (int)) != 0)
+ abort ();
+ r[0] = 9;
+ r[511 / sizeof (int)] = 10;
+ omp_free (p, omp_null_allocator);
+ omp_free (q, a2);
+ omp_free (r, omp_null_allocator);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits4) / sizeof (traits4[0]),
+ traits4);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ omp_set_default_allocator (a2);
+#ifdef __cplusplus
+ p = static_cast <int *> (omp_alloc (420));
+#else
+ p = (int *) omp_alloc (420, omp_null_allocator);
+#endif
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 5;
+ p[419 / sizeof (int)] = 6;
+ q = (int *) omp_alloc (768, omp_null_allocator);
+ if ((((uintptr_t) q) % 128) != 0)
+ abort ();
+ q[0] = 7;
+ q[767 / sizeof (int)] = 8;
+ if (omp_alloc (768, omp_null_allocator) != NULL)
+ abort ();
+#ifdef __cplusplus
+ omp_free (p);
+ omp_free (q);
+ omp_free (NULL);
+#else
+ omp_free (p, omp_null_allocator);
+ omp_free (q, omp_null_allocator);
+ omp_free (NULL, omp_null_allocator);
+#endif
+ omp_free (NULL, omp_null_allocator);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c
new file mode 100644
index 0000000..ee53958
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-2.c
@@ -0,0 +1,46 @@
+#include <omp.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ omp_alloctrait_t traits[3]
+ = { { omp_atk_alignment, 64 },
+ { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, 4096 } };
+ omp_allocator_handle_t a
+ = omp_init_allocator (omp_default_mem_space, 3, traits);
+ if (a == omp_null_allocator)
+ abort ();
+
+ #pragma omp parallel num_threads(4)
+ {
+ int n = omp_get_thread_num ();
+ double *volatile p, *volatile q;
+ omp_set_default_allocator ((n & 1) ? a : omp_default_mem_alloc);
+ p = (double *) omp_alloc (1696, omp_null_allocator);
+ if (p == NULL)
+ abort ();
+ p[0] = 1.0;
+ p[1695 / sizeof (double *)] = 2.0;
+ #pragma omp barrier
+ omp_set_default_allocator ((n & 1) ? omp_default_mem_alloc : a);
+ q = (double *) omp_alloc (1696, omp_null_allocator);
+ if (n & 1)
+ {
+ if (q == NULL)
+ abort ();
+ q[0] = 3.0;
+ q[1695 / sizeof (double *)] = 4.0;
+ }
+ else if (q != NULL)
+ abort ();
+ #pragma omp barrier
+ omp_free (p, omp_null_allocator);
+ omp_free (q, omp_null_allocator);
+ omp_set_default_allocator (omp_default_mem_alloc);
+ }
+ omp_destroy_allocator (a);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c
new file mode 100644
index 0000000..a30cdc0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-3.c
@@ -0,0 +1,28 @@
+/* { dg-set-target-env-var OMP_ALLOCATOR "omp_cgroup_mem_alloc" } */
+/* { dg-set-target-env-var OMP_DISPLAY_ENV "true" } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+ const char *p = getenv ("OMP_ALLOCATOR");
+ if (p && strcmp (p, "omp_cgroup_mem_alloc") == 0)
+ {
+ if (omp_get_default_allocator () != omp_cgroup_mem_alloc)
+ abort ();
+ #pragma omp parallel num_threads (2)
+ {
+ if (omp_get_default_allocator () != omp_cgroup_mem_alloc)
+ abort ();
+ #pragma omp parallel num_threads (2)
+ {
+ if (omp_get_default_allocator () != omp_cgroup_mem_alloc)
+ abort ();
+ }
+ }
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c
new file mode 100644
index 0000000..841e1bc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-4.c
@@ -0,0 +1,25 @@
+#include <omp.h>
+#include <stdlib.h>
+
+const omp_alloctrait_t traits[]
+= { { omp_atk_pool_size, 1 },
+ { omp_atk_fallback, omp_atv_abort_fb } };
+
+int
+main ()
+{
+ omp_allocator_handle_t a;
+
+ if (omp_alloc (0, omp_null_allocator) != NULL)
+ abort ();
+ a = omp_init_allocator (omp_default_mem_space, 2, traits);
+ if (a != omp_null_allocator)
+ {
+ if (omp_alloc (0, a) != NULL
+ || omp_alloc (0, a) != NULL
+ || omp_alloc (0, a) != NULL)
+ abort ();
+ omp_destroy_allocator (a);
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr93515.c b/libgomp/testsuite/libgomp.c-c++-common/pr93515.c
new file mode 100644
index 0000000..8a69088
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/pr93515.c
@@ -0,0 +1,36 @@
+/* PR libgomp/93515 */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ int i;
+ int a = 42;
+#pragma omp target teams distribute parallel for defaultmap(tofrom: scalar)
+ for (i = 0; i < 64; ++i)
+ if (omp_get_team_num () == 0)
+ if (omp_get_thread_num () == 0)
+ a = 142;
+ if (a != 142)
+ __builtin_abort ();
+ a = 42;
+#pragma omp target parallel for defaultmap(tofrom: scalar)
+ for (i = 0; i < 64; ++i)
+ if (omp_get_thread_num () == 0)
+ a = 143;
+ if (a != 143)
+ __builtin_abort ();
+ a = 42;
+#pragma omp target firstprivate(a)
+ {
+ #pragma omp parallel for
+ for (i = 0; i < 64; ++i)
+ if (omp_get_thread_num () == 0)
+ a = 144;
+ if (a != 144)
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-40.c b/libgomp/testsuite/libgomp.c-c++-common/target-40.c
new file mode 100644
index 0000000..22bbdd9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-40.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+volatile int v;
+#pragma omp declare target to (v)
+typedef void (*fnp1) (void);
+typedef fnp1 (*fnp2) (void);
+void f1 (void) { v++; }
+void f2 (void) { v += 4; }
+void f3 (void) { v += 16; f1 (); }
+fnp1 f4 (void) { v += 64; return f2; }
+int a = 1;
+int *b = &a;
+int **c = &b;
+fnp2 f5 (void) { f3 (); return f4; }
+#pragma omp declare target to (c)
+
+int
+main ()
+{
+ int err = 0;
+ #pragma omp target map(from:err)
+ {
+ volatile int xa;
+ int *volatile xb;
+ int **volatile xc;
+ fnp2 xd;
+ fnp1 xe;
+ err = 0;
+ xa = a;
+ err |= xa != 1;
+ xb = b;
+ err |= xb != &a;
+ xc = c;
+ err |= xc != &b;
+ xd = f5 ();
+ err |= v != 17;
+ xe = xd ();
+ err |= v != 81;
+ xe ();
+ err |= v != 85;
+ }
+ if (err)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/pr93566.c b/libgomp/testsuite/libgomp.c/pr93566.c
new file mode 100644
index 0000000..3334bd57
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr93566.c
@@ -0,0 +1,113 @@
+/* PR middle-end/93566 */
+/* { dg-additional-options "-std=c99" } */
+
+extern void abort (void);
+
+void
+foo (int *x)
+{
+ void nest (void) {
+ #pragma omp parallel for reduction(+:x[:10])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 0; j < 10; j++)
+ x[j] += j * i;
+ }
+ nest ();
+ for (int i = 0; i < 10; i++)
+ if (x[i] != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+void
+bar (void)
+{
+ int x[10] = {};
+ void nest (void) {
+ #pragma omp parallel for reduction(+:x[:10])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 0; j < 10; j++)
+ x[j] += j * i;
+ }
+ nest ();
+ for (int i = 0; i < 10; i++)
+ if (x[i] != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+void
+baz (void)
+{
+ int x[10] = {};
+ void nest (void) {
+ #pragma omp parallel for reduction(+:x[2:5])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 2; j < 7; j++)
+ x[j] += j * i;
+ }
+ nest ();
+ for (int i = 2; i < 7; i++)
+ if (x[i] != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+void
+qux (int *x)
+{
+ void nest (void) { x++; }
+ nest ();
+ #pragma omp parallel for reduction(+:x[:9])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 0; j < 9; j++)
+ x[j] += j * i;
+ nest ();
+ for (int i = 0; i < 9; i++)
+ if (x[i - 1] != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+void
+quux (void)
+{
+ int x[10];
+ void nest (void) { for (int i = 0; i < 10; i++) x[i] = 0; }
+ int nest2 (int i) { return x[i]; }
+ nest ();
+ #pragma omp parallel for reduction(+:x[:7])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 0; j < 7; j++)
+ x[j] += j * i;
+ for (int i = 0; i < 7; i++)
+ if (nest2 (i) != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+void
+corge (void)
+{
+ int x[10];
+ void nest (void) { for (int i = 0; i < 10; i++) x[i] = 0; }
+ int nest2 (int i) { return x[i]; }
+ nest ();
+ #pragma omp parallel for reduction(+:x[2:4])
+ for (int i = 0; i < 1024; i++)
+ for (int j = 2; j < 6; j++)
+ x[j] += j * i;
+ for (int i = 2; i < 6; i++)
+ if (nest2 (i) != 1023 * 1024 / 2 * i)
+ abort ();
+}
+
+int
+main ()
+{
+ int a[10] = {};
+ foo (a);
+ bar ();
+ baz ();
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+ qux (a);
+ quux ();
+ corge ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c
new file mode 100644
index 0000000..8169972
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-38.c
@@ -0,0 +1,28 @@
+#define A(n) n##0, n##1, n##2, n##3, n##4, n##5, n##6, n##7, n##8, n##9
+#define B(n) A(n##0), A(n##1), A(n##2), A(n##3), A(n##4), A(n##5), A(n##6), A(n##7), A(n##8), A(n##9)
+
+int
+foo (int x)
+{
+ int b[] = { B(4), B(5), B(6) };
+ return b[x];
+}
+
+int v[] = { 1, 2, 3, 4, 5, 6 };
+#pragma omp declare target to (foo, v)
+
+int
+main ()
+{
+ int i = 5;
+ asm ("" : "+g" (i));
+ #pragma omp target map(tofrom:i)
+ {
+ int a[] = { B(1), B(2), B(3) };
+ asm ("" : : "m" (a) : "memory");
+ i = a[i] + foo (i) + v[i & 63];
+ }
+ if (i != 105 + 405 + 6)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-39.c b/libgomp/testsuite/libgomp.c/target-39.c
new file mode 100644
index 0000000..4442f43
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-39.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+extern void abort (void);
+volatile int v;
+#pragma omp declare target to (v)
+typedef void (*fnp1) (void);
+typedef fnp1 (*fnp2) (void);
+void f1 (void) { v++; }
+void f2 (void) { v += 4; }
+void f3 (void) { v += 16; f1 (); }
+fnp1 f4 (void) { v += 64; return f2; }
+int a = 1;
+int *b = &a;
+int **c = &b;
+fnp2 f5 (void) { f3 (); return f4; }
+#pragma omp declare target to (c, f5)
+
+int
+main ()
+{
+ int err = 0;
+ #pragma omp target map(from:err)
+ {
+ volatile int xa;
+ int *volatile xb;
+ int **volatile xc;
+ fnp2 xd;
+ fnp1 xe;
+ err = 0;
+ xa = a;
+ err |= xa != 1;
+ xb = b;
+ err |= xb != &a;
+ xc = c;
+ err |= xc != &b;
+ xd = f5 ();
+ err |= v != 17;
+ xe = xd ();
+ err |= v != 81;
+ xe ();
+ err |= v != 85;
+ }
+ if (err)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/async_io_9.f90 b/libgomp/testsuite/libgomp.fortran/async_io_9.f90
new file mode 100644
index 0000000..2dc111c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/async_io_9.f90
@@ -0,0 +1,20 @@
+! { dg-do run }
+! PR 95191 - this used to hang.
+! Original test case by Bill Long.
+program test
+ real a(10000)
+ integer my_id
+ integer bad_id
+ integer :: iostat
+ character (len=100) :: iomsg
+ data my_id /1/
+ data bad_id /2/
+ a = 1.
+ open (unit=10, file='test.dat', form='unformatted', &
+ & asynchronous='yes')
+ write (unit=10, asynchronous='yes', id=my_id) a
+ iomsg = ""
+ wait (unit=10, id=bad_id, iostat=iostat, iomsg=iomsg)
+ if (iostat == 0 .or. iomsg /= "Bad ID in WAIT statement") stop 1
+ close (unit=10, status='delete')
+end program test
diff --git a/libgomp/testsuite/libgomp.fortran/close_errors_1.f90 b/libgomp/testsuite/libgomp.fortran/close_errors_1.f90
new file mode 100644
index 0000000..6edb7da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/close_errors_1.f90
@@ -0,0 +1,19 @@
+! { dg-do run }
+! PR 95115 - this used to hang with -pthread. Original test case by
+! Bill Long.
+
+program test
+ character(len=16) my_status
+ character(len=1000) :: iomsg
+ open (unit=10, file='test.dat')
+ print *,42
+ write (10, *) 'weird'
+ rewind (10)
+ read (10, *) my_status
+ close (10)
+ open (unit=10, file='test.dat')
+ close (unit=10, status=my_status, iostat=ios, iomsg=iomsg)
+ if (ios == 0) stop 1
+ if (iomsg /= "Bad STATUS parameter in CLOSE statement") stop 2
+ close (10, status='delete')
+end program test
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-3.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-3.f90
new file mode 100644
index 0000000..7c596dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-3.f90
@@ -0,0 +1,53 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ integer u(0:1024-1), v(0:1024-1), w(0:1024-1)
+contains
+
+integer(8) function f1 (a, b)
+ implicit none
+ integer, value :: a, b
+ integer(8) :: d
+ !$omp parallel do lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ end do
+ f1 = d
+end
+
+integer(8) function f2 (a, b, c)
+ implicit none
+ integer, value :: a, b, c
+ integer(8) :: d, e
+ !$omp parallel do lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) linear(c:5) lastprivate(e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ c = c + 5
+ e = c
+ end do
+ f2 = d + c + e
+end
+
+integer(8) function f3 (a1, b1, a2, b2)
+ implicit none
+ integer, value :: a1, b1, a2, b2
+ integer(8) d1, d2
+ !$omp parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ f3 = d1 + d2
+end
+end module m
+
+program main
+ use m
+ if (f1 (0, 1024) /= 1024) stop 1
+ if (f2 (0, 1024, 17) /= 1024 + 2 * (17 + 5 * 1024)) stop 2
+ if (f3 (0, 32, 0, 32) /= 64) stop 3
+end program main
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-4.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-4.f90
new file mode 100644
index 0000000..17b62a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-4.f90
@@ -0,0 +1,60 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer u(0:1023), v(0:1023), w(0:1023)
+ !$omp declare target (u, v, w)
+
+contains
+
+subroutine f1 (a, b)
+ integer a, b, d
+ !$omp target teams distribute parallel do default(none) firstprivate (a, b) shared(u, v, w)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ end do
+end
+
+subroutine f2 (a, b, c)
+ integer a, b, c, d, e
+ !$omp target teams distribute parallel do default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+end
+
+subroutine f3 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) &
+ !$omp& lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+end
+
+subroutine f4 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) &
+ !$omp& collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+end
+end module m
+
+program main
+ use m
+ implicit none
+ call f1 (0, 1024)
+ call f2 (0, 1024, 17)
+ call f3 (0, 32, 0, 32)
+ call f4 (0, 32, 0, 32)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-5.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-5.f90
new file mode 100644
index 0000000..9482f08
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-5.f90
@@ -0,0 +1,71 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer u(0:1023), v(0:1023), w(0:1023)
+ !$omp declare target (u, v, w)
+
+contains
+
+integer function f1 (a, b)
+ integer :: a, b, d
+ !$omp target map(from: d)
+ !$omp teams distribute parallel do simd default(none) firstprivate (a, b) shared(u, v, w)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ end do
+ !$omp end target
+ f1 = d
+end
+
+integer function f2 (a, b, c)
+ integer :: a, b, c, d, e
+ !$omp target map(from: d, e)
+ !$omp teams distribute parallel do simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+ !$omp end target
+ f2 = d + e
+end
+
+integer function f3 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams distribute parallel do simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) &
+ !$omp& collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end target
+ f3 = d1 + d2
+end
+
+integer function f4 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams distribute parallel do simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end target
+ f4 = d1 + d2
+end
+end module
+
+program main
+ use m
+ implicit none
+ if (f1 (0, 1024) /= 1024) stop 1
+ if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2
+ if (f3 (0, 32, 0, 32) /= 64) stop 3
+ if (f4 (0, 32, 0, 32) /= 64) stop 3
+end
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-6.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-6.f90
new file mode 100644
index 0000000..f73f683
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-6.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer :: u(0:1023), v(0:1023), w(0:1023)
+ !$omp declare target (u, v, w)
+
+contains
+
+integer function f2 (a, b, c)
+ integer :: a, b, c, d, e
+ !$omp target map(from: d, e)
+ !$omp teams distribute parallel do default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+ !$omp end target
+ f2 = d + e
+end
+
+integer function f3 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams distribute parallel do default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end target
+ f3 = d1 + d2
+end
+end module m
+
+use m
+ if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 1
+ if (f3 (0, 32, 0, 32) /= 64) stop 2
+end
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-7.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-7.f90
new file mode 100644
index 0000000..2bd9468
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-7.f90
@@ -0,0 +1,72 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer u(1024), v(1024), w(1024)
+ !$omp declare target (v, u, w)
+
+contains
+
+integer function f1 (a, b)
+ integer :: a, b, d
+ !$omp target map(from: d)
+ !$omp teams distribute simd default(none) firstprivate (a, b) shared(u, v, w)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ end do
+ !$omp end teams distribute simd
+ !$omp end target
+ f1 = d
+end
+
+integer function f2 (a, b, c)
+ integer a, b, c, d, e
+ !$omp target map(from: d, e)
+ !$omp teams distribute simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+ !$omp end teams distribute simd
+ !$omp end target
+ f2 = d + e
+end
+
+integer function f3 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end teams distribute simd
+ !$omp end target
+ f3 = d1 + d2
+end
+
+integer function f4 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end teams distribute simd
+ !$omp end target
+ f4 = d1 + d2
+end
+end module
+
+use m
+ if (f1 (0, 1024) /= 1024) stop 1
+ if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2
+ if (f3 (0, 32, 0, 32) /= 64) stop 3
+ if (f4 (0, 32, 0, 32) /= 64) stop 4
+end
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-8.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-8.f90
new file mode 100644
index 0000000..8a21c6f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-8.f90
@@ -0,0 +1,76 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer u(0:1023), v(0:1023), w(0:1023)
+ !$omp declare target (u, v, w)
+
+contains
+
+integer function f1 (a, b)
+ integer :: a, b, d
+ !$omp target map(from: d)
+ !$omp teams default(none) shared(a, b, d, u, v, w)
+ !$omp distribute simd firstprivate (a, b)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ end do
+ !$omp end teams
+ !$omp end target
+ f1 = d
+end
+
+integer function f2 (a, b, c)
+ integer a, b, c, d, e
+ !$omp target map(from: d, e)
+ !$omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w)
+ !$omp distribute simd linear(d) lastprivate(e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+ !$omp end teams
+ !$omp end target
+ f2 = d + e
+end
+
+integer function f3 (a1, b1, a2, b2)
+ integer a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w)
+ !$omp distribute simd firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end teams
+ !$omp end target
+ f3 = d1 + d2
+end
+
+integer function f4 (a1, b1, a2, b2)
+ integer a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams default(none) firstprivate (a1, b1, a2, b2) shared(d1, d2, u, v, w)
+ !$omp distribute simd collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end teams
+ !$omp end target
+ f4 = d1 + d2
+end
+end module m
+
+use m
+ if (f1 (0, 1024) /= 1024) stop 1
+ if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 2
+ if (f3 (0, 32, 0, 32) /= 64) stop 3
+ if (f4 (0, 32, 0, 32) /= 64) stop 4
+end
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-9.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-9.f90
new file mode 100644
index 0000000..5dde7f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-9.f90
@@ -0,0 +1,46 @@
+! { dg-do run }
+!
+! PR fortran/94690
+! PR middle-end/66199
+
+module m
+ implicit none
+ integer u(1024), v(1024), w(1024)
+ !$omp declare target (u, v, w)
+
+contains
+
+integer function f2 (a, b, c)
+ integer :: a, b, c, d, e
+ !$omp target map(from: d, e)
+ !$omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w)
+ !$omp distribute lastprivate(d, e)
+ do d = a, b-1
+ u(d) = v(d) + w(d)
+ e = c + d * 5
+ end do
+ !$omp end teams
+ !$omp end target
+ f2 = d + e
+end
+
+integer function f3 (a1, b1, a2, b2)
+ integer :: a1, b1, a2, b2, d1, d2
+ !$omp target map(from: d1, d2)
+ !$omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w)
+ !$omp distribute firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2)
+ do d1 = a1, b1-1
+ do d2 = a2, b2-1
+ u(d1 * 32 + d2) = v(d1 * 32 + d2) + w(d1 * 32 + d2)
+ end do
+ end do
+ !$omp end teams
+ !$omp end target
+ f3 = d1 + d2
+end
+end module
+
+use m
+ if (f2 (0, 1024, 17) /= 1024 + (17 + 5 * 1023)) stop 1
+ if (f3 (0, 32, 0, 32) /= 64) stop 2
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90
new file mode 100644
index 0000000..39faffd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-1.f90
@@ -0,0 +1,38 @@
+! { dg-do run }
+
+program main
+ implicit none
+ integer, allocatable, dimension(:) :: AA, BB, CC, DD
+ integer :: i, N = 20
+
+ allocate(BB(N))
+ AA = [(i, i=1,N)]
+
+ !$omp target enter data map(alloc: BB)
+ !$omp target enter data map(to: AA)
+
+ !$omp target
+ BB = 3 * AA
+ !$omp end target
+
+ !$omp target exit data map(delete: AA)
+ !$omp target exit data map(from: BB)
+
+ if (any (BB /= [(3*i, i=1,N)])) stop 1
+ if (any (AA /= [(i, i=1,N)])) stop 2
+
+
+ CC = 31 * BB
+ DD = [(-i, i=1,N)]
+
+ !$omp target enter data map(to: CC) map(alloc: DD)
+
+ !$omp target
+ DD = 5 * CC
+ !$omp end target
+
+ !$omp target exit data map(delete: CC) map(from: DD)
+
+ if (any (CC /= [(31*3*i, i=1,N)])) stop 3
+ if (any (DD /= [(31*3*5*i, i=1,N)])) stop 4
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90
new file mode 100644
index 0000000..36a2ed5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-2.F90
@@ -0,0 +1,41 @@
+! { dg-additional-options "-DMEM_SHARED" { target offload_device_shared_as } }
+! { dg-do run }
+!
+! PR middle-end/94635
+ implicit none
+ integer, parameter :: N = 20
+ integer, allocatable, dimension(:) :: my1DPtr
+ integer, dimension(N) :: my1DArr
+ integer :: i
+
+ allocate(my1DPtr(N))
+ my1DPtr = 43
+
+ !$omp target enter data map(alloc: my1DPtr)
+ !$omp target
+ my1DPtr = [(i , i = 1, N)]
+ !$omp end target
+
+ !$omp target map(from: my1DArr)
+ my1DArr = my1DPtr
+ !$omp end target
+ !$omp target exit data map(delete: my1DPtr)
+
+ if (any (my1DArr /= [(i, i = 1, N)])) stop 1
+#if MEM_SHARED
+ if (any (my1DArr /= my1DPtr)) stop 2
+#else
+ if (any (43 /= my1DPtr)) stop 3
+#endif
+
+ my1DPtr = [(2*N-i, i = 1, N)]
+ my1DArr = 42
+
+ !$omp target map(tofrom: my1DArr) map(tofrom: my1DPtr(:))
+ my1DArr = my1DPtr
+ my1DPtr = 20
+ !$omp end target
+
+ if (any (my1DArr /= [(2*N-i, i = 1, N)])) stop 4
+ if (any (20 /= my1DPtr)) stop 6
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-var.f90 b/libgomp/testsuite/libgomp.fortran/target-var.f90
new file mode 100644
index 0000000..5e5ccd4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-var.f90
@@ -0,0 +1,32 @@
+! { dg-additional-options "-O3" }
+!
+! With -O3 the static local variable A.10 generated for
+! the array constructor [-2, -4, ..., -20] is optimized
+! away - which has to be handled in the offload_vars table.
+!
+program main
+ implicit none (type, external)
+ integer :: j
+ integer, allocatable :: A(:)
+
+ A = [(3*j, j=1, 10)]
+ call bar (A)
+ deallocate (A)
+contains
+ subroutine bar (array)
+ integer :: i
+ integer :: array(:)
+
+ !$omp target map(from:array)
+ !$acc parallel copyout(array)
+ array = [(-2*i, i = 1, size(array))]
+ !$omp do private(array)
+ !$acc loop gang private(array)
+ do i = 1, 10
+ array(i) = 9*i
+ end do
+ if (any (array /= [(-2*i, i = 1, 10)])) error stop 2
+ !$omp end target
+ !$acc end parallel
+ end subroutine bar
+end
diff --git a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90 b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90
index 641ebd9..7a4aaae 100644
--- a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90
+++ b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-2.f90
@@ -1,3 +1,4 @@
+! { dg-do run }
! Check whether absent optional arguments are properly
! handled with use_device_{addr,ptr}.
program main
diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
index c06c2a0..7200ec1 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -88,15 +88,6 @@ if { $lang_test_file_found } {
unsupported "$subdir $offload_target offloading"
continue
}
- gcn {
- if { ![check_effective_target_openacc_amdgcn_accel_present] } {
- # Don't bother; execution testing is going to FAIL.
- untested "$subdir $offload_target offloading: supported, but hardware not accessible"
- continue
- }
-
- set acc_mem_shared 0
- }
host {
set acc_mem_shared 1
}
@@ -115,6 +106,15 @@ if { $lang_test_file_found } {
set acc_mem_shared 0
}
+ radeon {
+ if { ![check_effective_target_openacc_radeon_accel_present] } {
+ # Don't bother; execution testing is going to FAIL.
+ untested "$subdir $offload_target offloading: supported, but hardware not accessible"
+ continue
+ }
+
+ set acc_mem_shared 0
+ }
default {
error "Unknown OpenACC device type: $openacc_device_type (offload target: $offload_target)"
}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C b/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C
new file mode 100644
index 0000000..ed69359
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/declare-pr94120.C
@@ -0,0 +1,58 @@
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 8
+
+namespace one {
+ int A[N] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+ #pragma acc declare copyin (A)
+};
+
+namespace outer {
+ namespace inner {
+ int B[N];
+ #pragma acc declare create (B)
+ };
+};
+
+static void
+f (void)
+{
+ int i;
+ int C[N];
+ #pragma acc declare copyout (C)
+
+ if (!acc_is_present (&one::A, sizeof (one::A)))
+ abort ();
+
+ if (!acc_is_present (&outer::inner::B, sizeof (outer::inner::B)))
+ abort ();
+
+#pragma acc parallel
+ for (i = 0; i < N; i++)
+ {
+ outer::inner::B[i] = one::A[i];
+ C[i] = outer::inner::B[i];
+ }
+
+#pragma acc parallel
+ for (i = 0; i < N; i++)
+ {
+ if (C[i] != i + 1)
+ abort ();
+ }
+
+#pragma acc parallel
+ for (i = 0; i < N; i++)
+ if (outer::inner::B[i] != i + 1)
+ abort ();
+}
+
+
+int
+main (int argc, char **argv)
+{
+ f ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C
index c8dba9e..b046bf2 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C
@@ -1,3 +1,12 @@
/* Verify OpenACC 'firstprivate' mappings for C++ reference types. */
+/* PR middle-end/48591 */
+/* PR other/71064 */
+/* Set to 0 for offloading targets not supporting long double. */
+#if defined(ACC_DEVICE_TYPE_nvidia) || defined(ACC_DEVICE_TYPE_radeon)
+# define DO_LONG_DOUBLE 0
+#else
+# define DO_LONG_DOUBLE 1
+#endif
+
#include "../../../gcc/testsuite/g++.dg/goacc/firstprivate-mappings-1.C"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c
deleted file mode 100644
index 9256500..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Test the `acc_get_property' and '`acc_get_property_string' library
- functions for the host device. */
-/* { dg-additional-sources acc_get_property-aux.c } */
-/* { dg-do run } */
-
-#include <openacc.h>
-#include <stdio.h>
-
-void expect_device_properties
-(acc_device_t dev_type, int dev_num,
- int expected_total_mem, int expected_free_mem,
- const char* expected_vendor, const char* expected_name,
- const char* expected_driver);
-
-int main()
-{
- printf ("Checking acc_device_host device properties\n");
- expect_device_properties (acc_device_host, 0, 0, 0, "GNU", "GOMP", "1.0");
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c
index 952bdbf..47285fc 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c
@@ -6,11 +6,12 @@
#include <stdio.h>
#include <string.h>
-void expect_device_properties
-(acc_device_t dev_type, int dev_num,
- int expected_total_mem, int expected_free_mem,
- const char* expected_vendor, const char* expected_name,
- const char* expected_driver)
+
+void
+expect_device_string_properties (acc_device_t dev_type, int dev_num,
+ const char* expected_vendor,
+ const char* expected_name,
+ const char* expected_driver)
{
const char *vendor = acc_get_property_string (dev_num, dev_type,
acc_property_vendor);
@@ -21,25 +22,6 @@ void expect_device_properties
abort ();
}
- int total_mem = acc_get_property (dev_num, dev_type,
- acc_property_memory);
- if (total_mem != expected_total_mem)
- {
- fprintf (stderr, "Expected acc_property_memory to equal %d, "
- "but was %d.\n", expected_total_mem, total_mem);
- abort ();
-
- }
-
- int free_mem = acc_get_property (dev_num, dev_type,
- acc_property_free_memory);
- if (free_mem != expected_free_mem)
- {
- fprintf (stderr, "Expected acc_property_free_memory to equal %d, "
- "but was %d.\n", expected_free_mem, free_mem);
- abort ();
- }
-
const char *name = acc_get_property_string (dev_num, dev_type,
acc_property_name);
if (strcmp (name, expected_name))
@@ -59,11 +41,11 @@ void expect_device_properties
}
int unknown_property = 16058;
- int v = acc_get_property (dev_num, dev_type, (acc_device_property_t)unknown_property);
+ size_t v = acc_get_property (dev_num, dev_type, (acc_device_property_t)unknown_property);
if (v != 0)
{
fprintf (stderr, "Expected value of unknown numeric property to equal 0, "
- "but was %d.\n", v);
+ "but was %zu.\n", v);
abort ();
}
@@ -72,9 +54,45 @@ void expect_device_properties
if (s != NULL)
{
fprintf (stderr, "Expected value of unknown string property to be NULL, "
- "but was %d.\n", s);
+ "but was %s.\n", s);
abort ();
}
+}
+void
+expect_device_memory (acc_device_t dev_type, int dev_num,
+ size_t expected_total_memory)
+{
+ size_t total_mem = acc_get_property (dev_num, dev_type,
+ acc_property_memory);
+
+ if (total_mem != expected_total_memory)
+ {
+ fprintf (stderr, "Expected acc_property_memory to equal %zu, "
+ "but was %zu.\n", expected_total_memory, total_mem);
+ abort ();
+ }
+
+ size_t free_mem = acc_get_property (dev_num, dev_type,
+ acc_property_free_memory);
+ if (free_mem > total_mem)
+ {
+ fprintf (stderr, "Expected acc_property_free_memory <= acc_property_memory"
+ ", but free memory was %zu and total memory was %zu.\n",
+ free_mem, total_mem);
+ abort ();
+ }
+}
+
+void
+expect_device_properties (acc_device_t dev_type, int dev_num,
+ size_t expected_total_memory,
+ const char* expected_vendor,
+ const char* expected_name,
+ const char* expected_driver)
+{
+ expect_device_string_properties (dev_type, dev_num, expected_vendor,
+ expected_name, expected_driver);
+ expect_device_memory (dev_type, dev_num, expected_total_memory);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c
new file mode 100644
index 0000000..4b1fb5e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-gcn.c
@@ -0,0 +1,135 @@
+/* Test the `acc_get_property' and `acc_get_property_string' library
+ functions on amdgcn devices by comparing property values with
+ those obtained through the HSA API. */
+/* { dg-additional-sources acc_get_property-aux.c } */
+/* { dg-additional-options "-ldl" } */
+/* { dg-do run { target openacc_radeon_accel_selected } } */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <openacc.h>
+
+#ifndef __cplusplus
+typedef int bool;
+#endif
+#include <hsa.h>
+
+
+void expect_device_string_properties (acc_device_t dev_type, int dev_num,
+ const char* expected_vendor,
+ const char* expected_name,
+ const char* expected_driver);
+
+hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
+ hsa_agent_info_t attribute,
+ void *value);
+hsa_status_t (*hsa_system_get_info_fn) (hsa_system_info_t attribute,
+ void *value);
+hsa_status_t (*hsa_iterate_agents_fn)
+(hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
+hsa_status_t (*hsa_init_fn) (void);
+
+char* support_cpu_devices;
+
+void
+test_setup ()
+{
+ char* env_runtime;
+ char* hsa_runtime_lib;
+ void *handle;
+
+#define DLSYM_FN(function) \
+ function##_fn = (typeof(function##_fn))dlsym (handle, #function); \
+ if (function##_fn == NULL) \
+ { \
+ fprintf (stderr, "Could not get symbol " #function ".\n"); \
+ abort (); \
+ }
+
+ env_runtime = getenv ("HSA_RUNTIME_LIB");
+ hsa_runtime_lib = env_runtime ? env_runtime : (char*)"libhsa-runtime64.so";
+
+ handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
+ if (!handle)
+ {
+ fprintf (stderr, "Could not load %s.\n", hsa_runtime_lib);
+ abort ();
+ }
+
+ DLSYM_FN (hsa_agent_get_info)
+ DLSYM_FN (hsa_system_get_info)
+ DLSYM_FN (hsa_iterate_agents)
+ DLSYM_FN (hsa_init)
+
+ hsa_init_fn ();
+
+ support_cpu_devices = getenv ("GCN_SUPPORT_CPU_DEVICES");
+}
+
+static hsa_status_t
+check_agent_properties (hsa_agent_t agent, void *dev_num_arg)
+{
+
+ char name[64];
+ char vendor_name[64];
+ uint16_t minor;
+ uint16_t major;
+ char driver[60];
+
+ hsa_status_t status;
+ hsa_device_type_t device_type;
+ int* dev_num = (int*)dev_num_arg;
+
+#define AGENT_GET_INFO(info_type, val) \
+ status = hsa_agent_get_info_fn (agent, info_type, &val); \
+ if (status != HSA_STATUS_SUCCESS) \
+ { \
+ fprintf (stderr, "Failed to obtain " #info_type ".\n"); \
+ abort (); \
+ }
+#define SYSTEM_GET_INFO(info_type, val) \
+ status = hsa_system_get_info_fn (info_type, &val); \
+ if (status != HSA_STATUS_SUCCESS) \
+ { \
+ fprintf (stderr, "Failed to obtain " #info_type ".\n"); \
+ abort (); \
+ }
+
+ AGENT_GET_INFO (HSA_AGENT_INFO_DEVICE, device_type)
+
+ /* Skip unsupported device types. Mimic the GCN plugin's behavior. */
+ if (!(device_type == HSA_DEVICE_TYPE_GPU
+ || (support_cpu_devices && device_type == HSA_DEVICE_TYPE_CPU)))
+ return HSA_STATUS_SUCCESS;
+
+ AGENT_GET_INFO (HSA_AGENT_INFO_NAME, name)
+ AGENT_GET_INFO (HSA_AGENT_INFO_VENDOR_NAME, vendor_name)
+
+ SYSTEM_GET_INFO (HSA_SYSTEM_INFO_VERSION_MINOR, minor)
+ SYSTEM_GET_INFO (HSA_SYSTEM_INFO_VERSION_MAJOR, major)
+
+ snprintf (driver, sizeof driver, "HSA Runtime %hu.%hu",
+ (unsigned short int)major, (unsigned short int)minor);
+
+ expect_device_string_properties(acc_device_radeon, *dev_num,
+ vendor_name, name, driver);
+
+ (*dev_num)++;
+
+ return status;
+}
+
+int
+main ()
+{
+ int dev_num = 0;
+ test_setup ();
+
+ hsa_status_t status =
+ hsa_iterate_agents_fn (&check_agent_properties, &dev_num);
+
+ return status;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c
new file mode 100644
index 0000000..4ed0dfa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-host.c
@@ -0,0 +1,20 @@
+/* Test the `acc_get_property' and '`acc_get_property_string' library
+ functions for the host device. */
+/* { dg-additional-sources acc_get_property-aux.c } */
+/* { dg-do run } */
+
+#include <openacc.h>
+#include <stdio.h>
+
+void expect_device_properties (acc_device_t dev_type, int dev_num,
+ size_t expected_memory,
+ const char* expected_vendor,
+ const char* expected_name,
+ const char* expected_driver);
+
+int
+main ()
+{
+ printf ("Checking acc_device_host device properties\n");
+ expect_device_properties (acc_device_host, 0, 0, "GNU", "GOMP", "1.0");
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c
index 4dd13c4..6334cfd 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c
@@ -11,13 +11,14 @@
#include <string.h>
#include <stdio.h>
-void expect_device_properties
-(acc_device_t dev_type, int dev_num,
- int expected_total_mem, int expected_free_mem,
- const char* expected_vendor, const char* expected_name,
- const char* expected_driver);
+void expect_device_properties (acc_device_t dev_type, int dev_num,
+ size_t expected_memory,
+ const char* expected_vendor,
+ const char* expected_name,
+ const char* expected_driver);
-int main ()
+int
+main ()
{
int dev_count;
cudaGetDeviceCount (&dev_count);
@@ -30,26 +31,26 @@ int main ()
abort ();
}
- printf("Checking device %d\n", dev_num);
+ printf ("Checking device %d\n", dev_num);
const char *vendor = "Nvidia";
size_t free_mem;
size_t total_mem;
- if (cudaMemGetInfo(&free_mem, &total_mem) != cudaSuccess)
+ if (cudaMemGetInfo (&free_mem, &total_mem) != cudaSuccess)
{
fprintf (stderr, "cudaMemGetInfo failed.\n");
abort ();
}
struct cudaDeviceProp p;
- if (cudaGetDeviceProperties(&p, dev_num) != cudaSuccess)
+ if (cudaGetDeviceProperties (&p, dev_num) != cudaSuccess)
{
fprintf (stderr, "cudaGetDeviceProperties failed.\n");
abort ();
}
int driver_version;
- if (cudaDriverGetVersion(&driver_version) != cudaSuccess)
+ if (cudaDriverGetVersion (&driver_version) != cudaSuccess)
{
fprintf (stderr, "cudaDriverGetVersion failed.\n");
abort ();
@@ -62,7 +63,9 @@ int main ()
snprintf (driver, sizeof driver, "CUDA Driver %u.%u",
driver_version / 1000, driver_version % 1000 / 10);
- expect_device_properties(acc_device_nvidia, dev_num,
- total_mem, free_mem, vendor, p.name, driver);
+ /* Note that this check relies on the fact that the device numbering
+ used by the nvptx plugin agrees with the CUDA device ordering. */
+ expect_device_properties (acc_device_nvidia, dev_num,
+ total_mem, vendor, p.name, driver);
}
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c
index 289d1ba..3460035 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_get_property.c
@@ -3,8 +3,7 @@
of all device types mentioned in the OpenACC standard.
See also acc_get_property.f90. */
-/* { dg-do run { target { { ! { openacc_host_selected } } && { ! { openacc_amdgcn_accel_selected } } } } } */
-/* FIXME: This test does not work with the GCN implementation stub yet. */
+/* { dg-do run } */
#include <openacc.h>
#include <stdlib.h>
@@ -15,16 +14,16 @@
and do basic device independent validation. */
void
-print_device_properties(acc_device_t type)
+print_device_properties (acc_device_t type)
{
const char *s;
size_t v;
- int dev_count = acc_get_num_devices(type);
+ int dev_count = acc_get_num_devices (type);
for (int i = 0; i < dev_count; ++i)
{
- printf(" Device %d:\n", i+1);
+ printf (" Device %d:\n", i+1);
s = acc_get_property_string (i, type, acc_property_vendor);
printf (" Vendor: %s\n", s);
@@ -35,10 +34,10 @@ print_device_properties(acc_device_t type)
}
v = acc_get_property (i, type, acc_property_memory);
- printf (" Total memory: %zd\n", v);
+ printf (" Total memory: %zu\n", v);
v = acc_get_property (i, type, acc_property_free_memory);
- printf (" Free memory: %zd\n", v);
+ printf (" Free memory: %zu\n", v);
s = acc_get_property_string (i, type, acc_property_name);
printf (" Name: %s\n", s);
@@ -58,19 +57,20 @@ print_device_properties(acc_device_t type)
}
}
-int main ()
+int
+main ()
{
- printf("acc_device_none:\n");
+ printf ("acc_device_none:\n");
/* For completness; not expected to print anything since there
should be no devices of this type. */
- print_device_properties(acc_device_none);
+ print_device_properties (acc_device_none);
- printf("acc_device_default:\n");
- print_device_properties(acc_device_default);
+ printf ("acc_device_default:\n");
+ print_device_properties (acc_device_default);
- printf("acc_device_host:\n");
- print_device_properties(acc_device_host);
+ printf ("acc_device_host:\n");
+ print_device_properties (acc_device_host);
- printf("acc_device_not_host:\n");
- print_device_properties(acc_device_not_host);
+ printf ("acc_device_not_host:\n");
+ print_device_properties (acc_device_not_host);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
index e82a03e..7d05f48 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
@@ -224,7 +224,7 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
index ddf647c..ad33f72 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
@@ -106,7 +106,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
assert (event_info->launch_event.vector_length >= 1);
else if (acc_device_type == acc_device_nvidia) /* ... is special. */
assert (event_info->launch_event.vector_length == 32);
- else if (acc_device_type == acc_device_gcn) /* ...and so is this. */
+ else if (acc_device_type == acc_device_radeon) /* ...and so is this. */
assert (event_info->launch_event.vector_length == 64);
else
{
@@ -120,7 +120,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index dc7c758..a5e9ab3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -265,7 +265,7 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
@@ -321,7 +321,7 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
@@ -375,7 +375,7 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
@@ -516,7 +516,7 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
@@ -581,7 +581,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
@@ -647,7 +647,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
if (acc_device_type == acc_device_host)
assert (api_info->device_api == acc_device_api_none);
- else if (acc_device_type == acc_device_gcn)
+ else if (acc_device_type == acc_device_radeon)
assert (api_info->device_api == acc_device_api_other);
else
assert (api_info->device_api == acc_device_api_cuda);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c
index 840052f..7496426 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c
@@ -26,8 +26,8 @@ main ()
acc_device_t d;
#if defined ACC_DEVICE_TYPE_nvidia
d = acc_device_nvidia;
-#elif defined ACC_DEVICE_TYPE_gcn
- d = acc_device_gcn;
+#elif defined ACC_DEVICE_TYPE_radeon
+ d = acc_device_radeon;
#elif defined ACC_DEVICE_TYPE_host
d = acc_device_host;
#else
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
index a59047a..13e5ca2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
@@ -38,7 +38,7 @@ main ()
assert (v.b[i] == v.a + i);
assert (!acc_is_present (&v, sizeof (v)));
- assert (!acc_is_present (v.b, sizeof (int *) * n));
+ assert (!acc_is_present (v.b, sizeof (int) * n));
}
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
index 0ca5990..1b4cf2f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
@@ -41,9 +41,9 @@ main ()
assert (v.b[i] == v.a + i);
assert (acc_is_present (&v, sizeof (v)));
- assert (!acc_is_present (v.b, sizeof (int *) * n));
- assert (!acc_is_present (v.c, sizeof (int *) * n));
- assert (!acc_is_present (v.d, sizeof (int *) * n));
+ assert (!acc_is_present (v.b, sizeof (int) * n));
+ assert (!acc_is_present (v.c, sizeof (int) * n));
+ assert (!acc_is_present (v.d, sizeof (int) * n));
}
#pragma acc exit data copyout(v)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c
index 4a8b310..2cdd2d1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-mappings-1.c
@@ -3,4 +3,13 @@
/* { dg-additional-options "-Wno-psabi" } as apparently we're doing funny
things with vector arguments. */
+/* PR middle-end/48591 */
+/* PR other/71064 */
+/* Set to 0 for offloading targets not supporting long double. */
+#if defined(ACC_DEVICE_TYPE_nvidia) || defined(ACC_DEVICE_TYPE_radeon)
+# define DO_LONG_DOUBLE 0
+#else
+# define DO_LONG_DOUBLE 1
+#endif
+
#include "../../../gcc/testsuite/c-c++-common/goacc/firstprivate-mappings-1.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c
index 517004a..64f8ab8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/function-not-offloaded.c
@@ -1,11 +1,11 @@
/* { dg-do link } */
-/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } */
+/* { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } */
int var;
#pragma acc declare create (var)
void __attribute__((noinline, noclone))
-foo () /* { dg-error "function 'foo' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } */
+foo () /* { dg-error "function 'foo' has been referenced in offloaded code but hasn't been marked to be included in the offloaded code" "" { target { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } */
{
var++;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c
new file mode 100644
index 0000000..6830ef1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-7.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+
+/* Test if, if_present clauses on host_data construct. */
+/* C/C++ variant of 'libgomp.oacc-fortran/host_data-5.F90' */
+
+#include <assert.h>
+#include <stdint.h>
+
+void
+foo (float *p, intptr_t host_p, int cond)
+{
+ assert (p == (float *) host_p);
+
+#pragma acc data copyin(host_p)
+ {
+#pragma acc host_data use_device(p) if_present
+ /* p not mapped yet, so it will be equal to the host pointer. */
+ assert (p == (float *) host_p);
+
+#pragma acc data copy(p[0:100])
+ {
+ /* Not inside a host_data construct, so p is still the host pointer. */
+ assert (p == (float *) host_p);
+
+#pragma acc host_data use_device(p)
+ {
+#if ACC_MEM_SHARED
+ assert (p == (float *) host_p);
+#else
+ /* The device address is different from host address. */
+ assert (p != (float *) host_p);
+#endif
+ }
+
+#pragma acc host_data use_device(p) if_present
+ {
+#if ACC_MEM_SHARED
+ assert (p == (float *) host_p);
+#else
+ /* p is present now, so this is the same as above. */
+ assert (p != (float *) host_p);
+#endif
+ }
+
+#pragma acc host_data use_device(p) if(cond)
+ {
+#if ACC_MEM_SHARED
+ assert (p == (float *) host_p);
+#else
+ /* p is the device pointer iff cond is true. */
+ assert ((p != (float *) host_p) == cond);
+#endif
+ }
+ }
+ }
+}
+
+int
+main (void)
+{
+ float arr[100];
+ foo (arr, (intptr_t) arr, 0);
+ foo (arr, (intptr_t) arr, 1);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 34bc57e..0273c2b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,3 +1,6 @@
+/* AMD GCN does not use 32-lane vectors.
+ { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */
+
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
index 04387d3..ca77164 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp)
int main ()
{
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ /* AMD GCN does not currently support multiple workers. This should be
+ set to 16 when that changes. */
+ return test_1 (16, 1, 1);
+#else
return test_1 (16, 16, 32);
+#endif
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 766e578..5c84301 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int gangsize, workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
{
#pragma acc loop gang worker vector
for (unsigned ix = 0; ix < N; ix++)
@@ -32,6 +34,10 @@ int main ()
else
ary[ix] = ix;
}
+
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -39,11 +45,12 @@ int main ()
int expected = ix;
if(ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * workersize * vectorsize);
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 0bec6e1..9c4a85f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
-
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
+ int gangsize, workersize, vectorsize;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(ondev) copyout(gangsize, workersize, vectorsize)
{
#pragma acc loop gang worker vector reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,18 +30,22 @@ int main ()
}
t += val;
}
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
{
int val = ix;
- if(ondev)
+ if (ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * vectorsize * workersize);
+ int w = ix / vectorsize % workersize;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
index da4921d..1173c1f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -9,8 +9,9 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int vectorsize;
-#pragma acc parallel vector_length(32) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
{
#pragma acc loop vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -29,6 +30,7 @@ int main ()
}
t += val;
}
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -38,7 +40,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 15e2bc2..84c2296 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -9,8 +9,9 @@ int main ()
int ix;
int ondev = 0;
int q = 0, h = 0;
+ int vectorsize;
-#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
{
int t = q;
@@ -32,6 +33,7 @@ int main ()
t += val;
}
q = t;
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -41,7 +43,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 6bbd04f..648f89e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+ copyout(workersize)
{
#pragma acc loop worker reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,7 @@ int main ()
}
t += val;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -36,7 +39,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index c63a5d4..f9fcf37 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int q = 0, h = 0;
+ int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
+ copyout(workersize)
{
int t = q;
@@ -31,6 +33,7 @@ int main ()
t += val;
}
q = t;
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
index 71d3969..c360ad1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int workersize, vectorsize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+ copyout(workersize, vectorsize)
{
#pragma acc loop worker vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,8 @@ int main ()
}
t += val;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -36,8 +40,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
index 6010cd2..8c858f3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+ copyout(vectorsize)
{
#pragma acc loop vector
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
else
ary[ix] = ix;
}
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -40,7 +43,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index fa6fb91..5fe486f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize)
{
#pragma acc loop worker
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
else
ary[ix] = ix;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
index cd4cc99..fd4e4cf 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize, vectorsize)
{
#pragma acc loop worker vector
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,8 @@ int main ()
else
ary[ix] = ix;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -39,8 +43,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index a5edfc6..cc4c738 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
else
__builtin_abort ();
@@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
else
__builtin_abort ();
@@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
else
__builtin_abort ();
@@ -282,6 +285,12 @@ int main ()
/* The GCC nvptx back end enforces num_workers (32). */
workers_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end is limited to num_workers (16).
+ Temporarily set this to 1 until multiple workers are permitted. */
+ workers_actual = 1; // 16;
+ }
else
__builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -328,6 +337,11 @@ int main ()
/* We're actually executing with num_workers (32). */
/* workers_actual = 32; */
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end is limited to num_workers (16). */
+ workers_actual = 16;
+ }
else
__builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -367,6 +381,11 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 1024;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end enforces vector_length (1): autovectorize. */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -407,6 +426,13 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* Because of the way vectors are implemented for GCN, a vector loop
+ containing a seq routine call will not vectorize calls to that
+ routine. Hence, we'll only get one "vector". */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -433,6 +459,9 @@ int main ()
in the following case. So, limit ourselves here. */
if (acc_get_device_type () == acc_device_nvidia)
gangs = 3;
+ /* Similar appears to be true for GCN. */
+ if (acc_get_device_type () == acc_device_radeon)
+ gangs = 3;
int gangs_actual = gangs;
#define WORKERS 3
int workers_actual = WORKERS;
@@ -459,6 +488,13 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* Temporary setting, until multiple workers are permitted. */
+ workers_actual = 1;
+ /* See above comments about GCN vectors_actual. */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
index 2cb5b95..6570c64 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
@@ -15,4 +15,22 @@ main (void)
return 0;
}
-/* { dg-final { scan-assembler-times "bar.sync" 0 } } */
+/* Todo: Boths bar.syncs can be removed.
+ Atm we generate this dead code inbetween forked and joining:
+
+ mov.u32 %r28, %ntid.y;
+ mov.u32 %r29, %tid.y;
+ add.u32 %r30, %r29, %r29;
+ setp.gt.s32 %r31, %r30, 19;
+ @%r31 bra $L2;
+ add.u32 %r25, %r28, %r28;
+ mov.u32 %r24, %r30;
+ $L3:
+ add.u32 %r24, %r24, %r25;
+ setp.le.s32 %r33, %r24, 19;
+ @%r33 bra $L3;
+ $L2:
+
+ so the loop is not recognized as empty loop (which we detect by seeing if
+ joining immediately follows forked). */
+/* { dg-final { scan-assembler-times "bar.sync" 2 } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
index e8a433f..d955d79 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
@@ -21,4 +21,7 @@ main (void)
return 0;
}
-/* { dg-final { scan-assembler-times "bar.sync" 0 } } */
+/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
+ for that (the other two are there for the same reason as in pr85381-2.c).
+ Todo: Recompute %ntid.y instead of broadcasting it. */
+/* { dg-final { scan-assembler-times "bar.sync" 4 } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c
index 6ba96b6..79cebf6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92854-1.c
@@ -1,31 +1,61 @@
-/* Verify that 'acc_unmap_data' unmaps even in presence of dynamic reference
- counts. */
+/* Verify that 'acc_unmap_data' unmaps even in presence of structured and
+ dynamic reference counts, but the device memory remains allocated. */
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
#include <assert.h>
#include <stdlib.h>
+#include <string.h>
#include <openacc.h>
int
main ()
{
const int N = 180;
-
- char *h = (char *) malloc (N);
- char *d = (char *) acc_malloc (N);
- if (!d)
- abort ();
- acc_map_data (h, d, N);
-
- char *d_ = (char *) acc_create (h + 3, N - 77);
- assert (d_ == d + 3);
-
- d_ = (char *) acc_create (h, N);
- assert (d_ == d);
-
- acc_unmap_data (h);
- assert (!acc_is_present (h, N));
+ const int N_i = 537;
+ const int C = 37;
+
+ unsigned char *h = (unsigned char *) malloc (N);
+ assert (h);
+ unsigned char *d = (unsigned char *) acc_malloc (N);
+ assert (d);
+
+ for (int i = 0; i < N_i; ++i)
+ {
+ acc_map_data (h, d, N);
+ assert (acc_is_present (h, N));
+#pragma acc parallel present(h[0:N])
+ {
+ if (i == 0)
+ memset (h, C, N);
+ }
+
+ unsigned char *d_ = (unsigned char *) acc_create (h + 3, N - 77);
+ assert (d_ == d + 3);
+
+#pragma acc data create(h[6:N - 44])
+ {
+ d_ = (unsigned char *) acc_create (h, N);
+ assert (d_ == d);
+
+#pragma acc enter data create(h[0:N])
+
+ assert (acc_is_present (h, N));
+ acc_unmap_data (h);
+ assert (!acc_is_present (h, N));
+ }
+
+ /* We can however still access the device memory. */
+#pragma acc parallel loop deviceptr(d)
+ for (int j = 0; j < N; ++j)
+ d[j] += i * j;
+ }
+
+ acc_memcpy_from_device(h, d, N);
+ for (int j = 0; j < N; ++j)
+ assert (h[j] == ((C + N_i * (N_i - 1) / 2 * j) % 256));
+
+ acc_free (d);
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
index a97e046..da13d84 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -30,14 +30,18 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int gangsize, workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
gang (ary);
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -45,11 +49,12 @@ int main ()
int expected = ix;
if(ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * vectorsize * workersize);
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
index b1e3e3a..dd7bb6c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -30,14 +30,17 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+ copyout(vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
vector (ary);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -47,7 +50,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 81f1e03..acd9884 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -30,14 +30,17 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -46,7 +49,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
index 23dbc1a..73696e4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -30,14 +30,18 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize, vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -46,8 +50,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index 8862148..9769ee7 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -2,8 +2,13 @@
#include <openacc.h>
#include <gomp-constants.h>
+#ifdef ACC_DEVICE_TYPE_radeon
+#define NUM_WORKERS 16
+#define NUM_VECTORS 1
+#else
#define NUM_WORKERS 16
#define NUM_VECTORS 32
+#endif
#define WIDTH 64
#define HEIGHT 32
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c
new file mode 100644
index 0000000..543aaa15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-1.c
@@ -0,0 +1,187 @@
+/* Test dynamic refcount and copy behavior of separate structure members. */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <openacc.h>
+
+struct s
+{
+ signed char a;
+ float b;
+};
+
+static void test(unsigned variant)
+{
+ struct s s = { .a = 73, .b = -22 };
+
+#pragma acc enter data copyin(s.a, s.b)
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+
+ /* To verify that any following 'copyin' doesn't 'copyin' again. */
+ s.a = -s.a;
+ s.b = -s.b;
+
+ if (variant & 4)
+ {
+ if (variant & 8)
+ {
+#pragma acc enter data copyin(s.b)
+ }
+ else
+ acc_copyin(&s.b, sizeof s.b);
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+
+ if (variant & 16)
+ {
+#pragma acc enter data copyin(s.a)
+ }
+ else
+ acc_copyin(&s.a, sizeof s.a);
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+
+ if (variant & 32)
+ {
+#pragma acc enter data copyin(s.a)
+ acc_copyin(&s.b, sizeof s.b);
+#pragma acc enter data copyin(s.b)
+#pragma acc enter data copyin(s.b)
+ acc_copyin(&s.a, sizeof s.a);
+ acc_copyin(&s.a, sizeof s.a);
+ acc_copyin(&s.a, sizeof s.a);
+ }
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ }
+
+#pragma acc parallel \
+ copy(s.a, s.b)
+ {
+#if ACC_MEM_SHARED
+ if (s.a++ != -73)
+ __builtin_abort();
+ if (s.b-- != 22)
+ __builtin_abort();
+#else
+ if (s.a++ != 73)
+ __builtin_abort();
+ if (s.b-- != -22)
+ __builtin_abort();
+#endif
+ }
+#if ACC_MEM_SHARED
+ assert(s.a == -72);
+ assert(s.b == 21);
+#else
+ assert(s.a == -73);
+ assert(s.b == 22);
+#endif
+
+ if (variant & 32)
+ {
+ if (variant & 1)
+ {
+#pragma acc exit data copyout(s.a) finalize
+ }
+ else
+ acc_copyout_finalize(&s.a, sizeof s.a);
+ }
+ else
+ {
+ if (variant & 1)
+ {
+#pragma acc exit data copyout(s.a)
+ }
+ else
+ acc_copyout(&s.a, sizeof s.a);
+ if (variant & 4)
+ {
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+#if ACC_MEM_SHARED
+ assert(s.a == -72);
+ assert(s.b == 21);
+#else
+ assert(s.a == -73);
+ assert(s.b == 22);
+#endif
+ if (variant & 1)
+ {
+#pragma acc exit data copyout(s.a)
+ }
+ else
+ acc_copyout(&s.a, sizeof s.a);
+ }
+ }
+#if ACC_MEM_SHARED
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == -72);
+ assert(s.b == 21);
+#else
+ assert(!acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == 74);
+ assert(s.b == 22);
+#endif
+
+ if (variant & 32)
+ {
+ if (variant & 2)
+ {
+#pragma acc exit data copyout(s.b) finalize
+ }
+ else
+ acc_copyout_finalize(&s.b, sizeof s.b);
+ }
+ else
+ {
+ if (variant & 2)
+ {
+#pragma acc exit data copyout(s.b)
+ }
+ else
+ acc_copyout(&s.b, sizeof s.b);
+ if (variant & 4)
+ {
+#if ACC_MEM_SHARED
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == -72);
+ assert(s.b == 21);
+#else
+ assert(!acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == 74);
+ assert(s.b == 22);
+#endif
+ if (variant & 2)
+ {
+#pragma acc exit data copyout(s.b)
+ }
+ else
+ acc_copyout(&s.b, sizeof s.b);
+ }
+ }
+#if ACC_MEM_SHARED
+ assert(acc_is_present(&s.a, sizeof s.a));
+ assert(acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == -72);
+ assert(s.b == 21);
+#else
+ assert(!acc_is_present(&s.a, sizeof s.a));
+ assert(!acc_is_present(&s.b, sizeof s.b));
+ assert(s.a == 74);
+ assert(s.b == -23);
+#endif
+}
+
+int main()
+{
+ for (unsigned variant = 0; variant < 64; ++variant)
+ test(variant);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c
new file mode 100644
index 0000000..b86f1c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-1.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+struct str1 {
+ int a;
+ int b;
+};
+
+struct str2 {
+ int c;
+ int d;
+ struct str1 s;
+};
+
+int
+main (int argc, char *argv[])
+{
+ struct str2 t;
+
+ t.c = 1;
+ t.d = 2;
+ t.s.a = 3;
+ t.s.b = 4;
+
+ #pragma acc enter data copyin(t.s)
+
+ #pragma acc serial present(t.s) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */
+ {
+ t.s.a = 5;
+ t.s.b = 6;
+ }
+
+ #pragma acc exit data copyout(t.s)
+
+ assert (t.s.a == 5);
+ assert (t.s.b == 6);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c
new file mode 100644
index 0000000..4dd8a3a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/struct-copyout-2.c
@@ -0,0 +1,44 @@
+#include <assert.h>
+#include <stdlib.h>
+
+struct str1 {
+ int a;
+ int b;
+ int *c;
+};
+
+#define N 1024
+
+int
+main (int argc, char *argv[])
+{
+ struct str1 s;
+
+ s.a = 1;
+ s.b = 2;
+ s.c = (int *) malloc (sizeof (int) * N);
+
+ for (int i = 0; i < N; i++)
+ s.c[i] = i + 10;
+
+ #pragma acc enter data copyin(s.a, s.b, s.c[0:N])
+
+ #pragma acc serial present(s.a, s.b, s.c[0:N]) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */
+ {
+ s.a = 3;
+ s.b = 4;
+ for (int i = 0; i < N; i++)
+ s.c[i] = i + 20;
+ }
+
+ #pragma acc exit data copyout(s.a, s.b, s.c[0:N])
+
+ assert (s.a == 3);
+ assert (s.b == 4);
+ for (int i = 0; i < N; i++)
+ assert (s.c[i] == i + 20);
+
+ free (s.c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c
new file mode 100644
index 0000000..8fa87777
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-1.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c
new file mode 100644
index 0000000..0d6b415
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-1.c
@@ -0,0 +1,161 @@
+/* Test transitioning of data lifetimes between structured and dynamic. */
+
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+void
+f1 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+ }
+
+ assert (acc_is_present (block1, SIZE));
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ assert (acc_is_present (block1, SIZE));
+ acc_copyout (block1, SIZE);
+ assert (acc_is_present (block1, SIZE));
+ acc_copyout (block1, SIZE);
+ assert (!acc_is_present (block1, SIZE));
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (acc_is_present (block1, SIZE));
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (acc_is_present (block1, SIZE));
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (!acc_is_present (block1, SIZE));
+#endif
+
+ free (block1);
+}
+
+void
+f2 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ /* This should stay present until the end of the structured data
+ lifetime. */
+ assert (acc_is_present (block1, SIZE));
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f3 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+ assert (acc_is_present (block1, SIZE));
+ }
+
+ assert (acc_is_present (block1, SIZE));
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f4 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+ char *block3 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE])
+ {
+ /* The first copyin of block2 is the enclosing data region. This
+ "enter data" should make it live beyond the end of this region.
+ This works, though the on-target copies of block1, block2 and block3
+ will stay allocated until block2 is unmapped because they are bound
+ together in a single target_mem_desc. */
+#ifdef OPENACC_API
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc enter data copyin(block2[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ assert (!acc_is_present (block3, SIZE));
+
+#ifdef OPENACC_API
+ acc_copyout (block2, SIZE);
+#else
+#pragma acc exit data copyout(block2[0:SIZE])
+#endif
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+ free (block3);
+}
+
+int
+main (int argc, char *argv[])
+{
+ f1 ();
+ f2 ();
+ f3 ();
+ f4 ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c
new file mode 100644
index 0000000..365df8d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-2.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c
new file mode 100644
index 0000000..726942c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-2.c
@@ -0,0 +1,166 @@
+/* Test nested dynamic/structured data mappings. */
+
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+void
+f1 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f2 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+ }
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f3 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyin (block1, SIZE);
+ acc_copyout (block1, SIZE);
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f4 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+void
+f5 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+#pragma acc data copy(block1[0:SIZE])
+ {
+ }
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+
+ assert (!acc_is_present (block1, SIZE));
+
+ free (block1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ f1 ();
+ f2 ();
+ f3 ();
+ f4 ();
+ f5 ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c
new file mode 100644
index 0000000..469b35b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-3.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c
new file mode 100644
index 0000000..c13f3c5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-3.c
@@ -0,0 +1,183 @@
+/* Test nested dynamic/structured data mappings (multiple blocks on data
+ regions). */
+
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+void
+f1 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+}
+
+void
+f2 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+ }
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+}
+
+void
+f3 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyin (block2, SIZE);
+ acc_copyout (block2, SIZE);
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc enter data copyin(block2[0:SIZE])
+#pragma acc exit data copyout(block2[0:SIZE])
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+}
+
+void
+f4 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block2, SIZE);
+ acc_copyout (block2, SIZE);
+#else
+#pragma acc enter data copyin(block2[0:SIZE])
+#pragma acc exit data copyout(block2[0:SIZE])
+#endif
+ }
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+}
+
+void
+f5 (void)
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc enter data copyin(block2[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+ }
+#ifdef OPENACC_API
+ acc_copyout (block2, SIZE);
+#else
+#pragma acc exit data copyout(block2[0:SIZE])
+#endif
+ }
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+}
+
+int
+main (int argc, char *argv[])
+{
+ f1 ();
+ f2 ();
+ f3 ();
+ f4 ();
+ f5 ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c
new file mode 100644
index 0000000..8e88b97
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-4.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c
new file mode 100644
index 0000000..e9a6510
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-4.c
@@ -0,0 +1,64 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+int
+main (int argc, char *argv[])
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+ char *block3 = (char *) malloc (SIZE);
+
+ /* Doing this twice ensures that we have a non-zero virtual refcount. Make
+ sure that works too. */
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE])
+ {
+ /* The first copyin of block2 is the enclosing data region. This
+ "enter data" should make it live beyond the end of this region. */
+#ifdef OPENACC_API
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc enter data copyin(block2[0:SIZE])
+#endif
+ }
+
+ assert (acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ assert (!acc_is_present (block3, SIZE));
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ assert (acc_is_present (block1, SIZE));
+ acc_copyout (block1, SIZE);
+ assert (!acc_is_present (block1, SIZE));
+
+ acc_copyout (block2, SIZE);
+ assert (!acc_is_present (block2, SIZE));
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (acc_is_present (block1, SIZE));
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (!acc_is_present (block1, SIZE));
+
+#pragma acc exit data copyout(block2[0:SIZE])
+ assert (!acc_is_present (block2, SIZE));
+#endif
+
+ free (block1);
+ free (block2);
+ free (block3);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c
new file mode 100644
index 0000000..59ef562
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-5.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c
new file mode 100644
index 0000000..9807076
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-5.c
@@ -0,0 +1,56 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+int
+main (int argc, char *argv[])
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+ char *block3 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE], block3[0:SIZE])
+ {
+ /* The first copyin of block2 is the enclosing data region. This
+ "enter data" should make it live beyond the end of this region. */
+#ifdef OPENACC_API
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc enter data copyin(block2[0:SIZE])
+#endif
+ }
+
+ assert (acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ assert (!acc_is_present (block3, SIZE));
+
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ assert (!acc_is_present (block1, SIZE));
+
+ acc_copyout (block2, SIZE);
+ assert (!acc_is_present (block2, SIZE));
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+ assert (!acc_is_present (block1, SIZE));
+
+#pragma acc exit data copyout(block2[0:SIZE])
+ assert (!acc_is_present (block2, SIZE));
+#endif
+
+ free (block1);
+ free (block2);
+ free (block3);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c
new file mode 100644
index 0000000..0401f73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-6.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c
new file mode 100644
index 0000000..9250b4a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-6.c
@@ -0,0 +1,43 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+int
+main (int argc, char *argv[])
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE], block2[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ acc_copyout (block2, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE], block2[0:SIZE])
+#endif
+ /* These should stay present until the end of the structured data
+ lifetime. */
+ assert (acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c
new file mode 100644
index 0000000..07caefb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-7.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c
new file mode 100644
index 0000000..52e8d4c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-7.c
@@ -0,0 +1,44 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+int
+main (int argc, char *argv[])
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+/* We can't attach the dynamic data mapping's (block1) target_mem_desc to the
+ enclosing structured data region here, because that region maps block2
+ also. */
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#endif
+ /* These should stay present until the end of the structured data
+ lifetime. */
+ assert (acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c
new file mode 100644
index 0000000..1c2479a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8-lib.c
@@ -0,0 +1,3 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+/* { dg-additional-options "-DOPENACC_API" } */
+#include "structured-dynamic-lifetimes-8.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c
new file mode 100644
index 0000000..919ee02
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/structured-dynamic-lifetimes-8.c
@@ -0,0 +1,47 @@
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+#include <openacc.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#define SIZE 1024
+
+int
+main (int argc, char *argv[])
+{
+ char *block1 = (char *) malloc (SIZE);
+ char *block2 = (char *) malloc (SIZE);
+
+#ifdef OPENACC_API
+ acc_copyin (block1, SIZE);
+#else
+#pragma acc enter data copyin(block1[0:SIZE])
+#endif
+
+#pragma acc data copy(block1[0:SIZE], block2[0:SIZE])
+ {
+#ifdef OPENACC_API
+ acc_copyout (block1, SIZE);
+ acc_copyin (block2, SIZE);
+#else
+#pragma acc exit data copyout(block1[0:SIZE])
+#pragma acc enter data copyin(block2[0:SIZE])
+#endif
+ assert (acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+ }
+
+ assert (!acc_is_present (block1, SIZE));
+ assert (acc_is_present (block2, SIZE));
+#ifdef OPENACC_API
+ acc_copyout (block2, SIZE);
+#else
+#pragma acc exit data copyout(block2[0:SIZE])
+#endif
+ assert (!acc_is_present (block2, SIZE));
+
+ free (block1);
+ free (block2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
index c019fe5..57579171 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
@@ -1,5 +1,5 @@
/* AMD GCN does not use 32-lane vectors, so the expected use counts mismatch.
- { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */
+ { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */
/* { dg-additional-options "-fopenacc-dim=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp
index 7f13242..48cbc98 100644
--- a/libgomp/testsuite/libgomp.oacc-c/c.exp
+++ b/libgomp/testsuite/libgomp.oacc-c/c.exp
@@ -51,15 +51,6 @@ foreach offload_target [concat [split $offload_targets ","] "disable"] {
unsupported "$subdir $offload_target offloading"
continue
}
- gcn {
- if { ![check_effective_target_openacc_amdgcn_accel_present] } {
- # Don't bother; execution testing is going to FAIL.
- untested "$subdir $offload_target offloading: supported, but hardware not accessible"
- continue
- }
-
- set acc_mem_shared 0
- }
host {
set acc_mem_shared 1
}
@@ -78,6 +69,15 @@ foreach offload_target [concat [split $offload_targets ","] "disable"] {
set acc_mem_shared 0
}
+ radeon {
+ if { ![check_effective_target_openacc_radeon_accel_present] } {
+ # Don't bother; execution testing is going to FAIL.
+ untested "$subdir $offload_target offloading: supported, but hardware not accessible"
+ continue
+ }
+
+ set acc_mem_shared 0
+ }
default {
error "Unknown OpenACC device type: $openacc_device_type (offload target: $offload_target)"
}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90
index ce69547..1af7cc3 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_get_property.f90
@@ -3,8 +3,6 @@
! of all device types mentioned in the OpenACC standard.
!
! See also acc_get_property.c
-! { dg-do run { target { { ! { openacc_host_selected } } && { ! { openacc_amdgcn_accel_selected } } } } }
-! FIXME: This test does not work with the GCN implementation stub yet.
program test
use openacc
@@ -28,13 +26,14 @@ end program test
! and do basic device independent validation.
subroutine print_device_properties (device_type)
use openacc
+ use iso_c_binding, only: c_size_t
implicit none
integer, intent(in) :: device_type
integer :: device_count
integer :: device
- integer(acc_device_property) :: v
+ integer(c_size_t) :: v
character*256 :: s
device_count = acc_get_num_devices(device_type)
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
index 5a4a1e0..536b3f0 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90
@@ -275,8 +275,9 @@ program main
if (ltmp .neqv. .not. lexp) STOP 33
if (lgot .neqv. lexp) STOP 34
- igot = 1
+ igot = 0
iexp = N
+ iarr = -42
!$acc parallel loop copy (igot, itmp)
do i = 1, N
@@ -287,13 +288,24 @@ program main
end do
!$acc end parallel loop
+ if (igot /= N) stop 107
+ itmp = 0
+ do i = 1, N
+ if (iarr(i) == 0) then
+ itmp = i
+ exit
+ end if
+ end do
+ ! At most one iarr element can be 0.
do i = 1, N
- if (.not. (1 <= iarr(i) .and. iarr(i) < iexp)) STOP 35
+ if ((iarr(i) == 0 .and. i /= itmp) &
+ .or. iarr(i) < 0 .or. iarr(i) >= N) STOP 35
end do
if (igot /= iexp) STOP 36
- igot = N
+ igot = N + 1
iexp = 1
+ iarr = -42
!$acc parallel loop copy (igot, itmp)
do i = 1, N
@@ -304,8 +316,18 @@ program main
end do
!$acc end parallel loop
+ if (igot /= 1) stop 108
+ itmp = N + 1
+ ! At most one iarr element can be N+1.
+ do i = 1, N
+ if (iarr(i) == N + 1) then
+ itmp = i
+ exit
+ end if
+ end do
do i = 1, N
- if (.not. (iarr(i) == 1 .or. iarr(i) == N)) STOP 37
+ if ((iarr(i) == N + 1 .and. i /= itmp) &
+ .or. iarr(i) <= 0 .or. iarr(i) > N + 1) STOP 37
end do
if (igot /= iexp) STOP 38
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95 b/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95
index f16f42f..c5f0fff 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95
+++ b/libgomp/testsuite/libgomp.oacc-fortran/classtypes-1.f95
@@ -31,7 +31,8 @@ program main
myvar%p%p(i) = -1.0
end do
-!$acc enter data copyin(myvar, myvar%p) create(myvar%p%p)
+!$acc enter data copyin(myvar)
+!$acc enter data copyin(myvar%p) create(myvar%p%p)
!$acc parallel loop present(myvar%p%p)
do i=1,100
@@ -39,7 +40,8 @@ program main
end do
!$acc end parallel loop
-!$acc exit data copyout(myvar%p%p) delete(myvar, myvar%p)
+!$acc exit data copyout(myvar%p%p) delete(myvar%p)
+!$acc exit data delete(myvar)
do i=1,100
if (myvar%p%p(i) .ne. i * 2) stop 1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
deleted file mode 100644
index 3593661..0000000
--- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
+++ /dev/null
@@ -1,33 +0,0 @@
-! { dg-do run }
-
-! Test of attach/detach with "acc data", two clauses at once.
-
-program dtype
- implicit none
- integer, parameter :: n = 512
- type mytype
- integer, allocatable :: a(:)
- end type mytype
- integer i
-
- type(mytype) :: var
-
- allocate(var%a(1:n))
-
-!$acc data copy(var) copy(var%a)
-
-!$acc parallel loop
- do i = 1,n
- var%a(i) = i
- end do
-!$acc end parallel loop
-
-!$acc end data
-
- do i = 1,n
- if (i .ne. var%a(i)) stop 1
- end do
-
- deallocate(var%a)
-
-end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
index 667d944..edb6b8d 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
@@ -16,12 +16,14 @@ program dtype
allocate(var%a(1:n))
allocate(var%b(1:n))
-!$acc parallel loop copy(var) copy(var%a(1:n)) copy(var%b(1:n))
+!$acc data copy(var)
+!$acc parallel loop copy(var%a(1:n)) copy(var%b(1:n))
do i = 1,n
var%a(i) = i
var%b(i) = i
end do
!$acc end parallel loop
+!$acc end data
do i = 1,n
if (i .ne. var%a(i)) stop 1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90
new file mode 100644
index 0000000..ed4f10e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90
@@ -0,0 +1,8 @@
+! { dg-do run }
+
+/* Nullify the 'finalize' clause, which disturbs reference counting. */
+#define finalize
+#include "deep-copy-6.f90"
+
+! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
index 12910d0..5837a40 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
@@ -3,6 +3,7 @@
! Test of attachment counters and finalize.
program dtype
+ use openacc
implicit none
integer, parameter :: n = 512
type mytype
@@ -36,7 +37,23 @@ program dtype
end do
!$acc end parallel loop
+ if (.not. acc_is_present(var%a(5:n - 5))) stop 11
+ if (.not. acc_is_present(var%b(5:n - 5))) stop 12
+ if (.not. acc_is_present(var)) stop 13
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
!$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_get_device_type() .ne. acc_device_host) then
+ if (acc_is_present(var%a(5:n - 5))) stop 21
+ if (acc_is_present(var%b(5:n - 5))) stop 22
+ end if
+ if (.not. acc_is_present(var)) stop 23
!$acc end data
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
index e7358f4..de72774 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
ERROR STOP
@@ -17,7 +21,7 @@
! In gfortran's main program, libfortran's set_options is called - which sets
! compiler_options.backtrace = 1 by default. For an offload libgfortran, this
! is never called and, hence, "Error termination." is never printed. Thus:
-! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } }
+! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } }
!
! PR85463:
! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
index fca1d96..475c9cb 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
ERROR STOP 35
@@ -17,7 +21,7 @@
! In gfortran's main program, libfortran's set_options is called - which sets
! compiler_options.backtrace = 1 by default. For an offload libgfortran, this
! is never called and, hence, "Error termination." is never printed. Thus:
-! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } }
+! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } }
!
! PR85463:
! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
index 2ae0b0d..ab63444 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
ERROR STOP "SiGN"
@@ -17,7 +21,7 @@
! In gfortran's main program, libfortran's set_options is called - which sets
! compiler_options.backtrace = 1 by default. For an offload libgfortran, this
! is never called and, hence, "Error termination." is never printed. Thus:
-! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_amdgcn_accel_selected } } } }
+! { dg-output "Error termination.*" { target { ! { openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } }
!
! PR85463:
! { dg-output "libgomp: cuStreamSynchronize error.*" { target openacc_nvidia_accel_selected } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
index 60f0889..d607903 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -82,8 +82,11 @@ if { $lang_test_file_found } {
unsupported "$subdir $offload_target offloading"
continue
}
- gcn {
- if { ![check_effective_target_openacc_amdgcn_accel_present] } {
+ host {
+ set acc_mem_shared 1
+ }
+ nvidia {
+ if { ![check_effective_target_openacc_nvidia_accel_present] } {
# Don't bother; execution testing is going to FAIL.
untested "$subdir $offload_target offloading: supported, but hardware not accessible"
continue
@@ -91,11 +94,8 @@ if { $lang_test_file_found } {
set acc_mem_shared 0
}
- host {
- set acc_mem_shared 1
- }
- nvidia {
- if { ![check_effective_target_openacc_nvidia_accel_present] } {
+ radeon {
+ if { ![check_effective_target_openacc_radeon_accel_present] } {
# Don't bother; execution testing is going to FAIL.
untested "$subdir $offload_target offloading: supported, but hardware not accessible"
continue
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90
new file mode 100644
index 0000000..483ac3f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-5.F90
@@ -0,0 +1,92 @@
+! { dg-do run }
+!
+! Test if, if_present clauses on host_data construct.
+!
+! Fortran variant of 'libgomp.oacc-c-c++-common/host_data-7.c'.
+!
+program main
+ use iso_c_binding
+ implicit none
+ real, target :: var, arr(100)
+ integer(c_intptr_t) :: host_p, host_parr
+ host_p = transfer(c_loc(var), host_p)
+ host_parr = transfer(c_loc(arr), host_parr)
+ call foo (var, arr, host_p, host_parr, .false.)
+ call foo (var, arr, host_p, host_parr, .true.)
+
+contains
+
+subroutine foo (p2, parr, host_p, host_parr, cond)
+ use openacc
+ implicit none
+ real, target, intent(in) :: parr(:), p2
+ integer(c_intptr_t), value, intent(in) :: host_p, host_parr
+ logical, value, intent(in) :: cond
+ real, pointer :: p
+ p => p2
+
+ if (host_p /= transfer(c_loc(p), host_p)) stop 1
+ if (host_parr /= transfer(c_loc(parr), host_parr)) stop 2
+#if !ACC_MEM_SHARED
+ if (acc_is_present(p, c_sizeof(p))) stop 3
+ if (acc_is_present(parr, 1)) stop 4
+#endif
+
+ !$acc data copyin(host_p, host_parr)
+#if !ACC_MEM_SHARED
+ if (acc_is_present(p, c_sizeof(p))) stop 5
+ if (acc_is_present(parr, 1)) stop 6
+#endif
+ !$acc host_data use_device(p, parr) if_present
+ ! not mapped yet, so it will be equal to the host pointer.
+ if (transfer(c_loc(p), host_p) /= host_p) stop 7
+ if (transfer(c_loc(parr), host_parr) /= host_parr) stop 8
+ !$acc end host_data
+#if !ACC_MEM_SHARED
+ if (acc_is_present(p, c_sizeof(p))) stop 9
+ if (acc_is_present(parr, 1)) stop 10
+#endif
+
+ !$acc data copy(p, parr)
+ if (.not. acc_is_present(p, c_sizeof(p))) stop 11
+ if (.not. acc_is_present(parr, 1)) stop 12
+ ! Not inside a host_data construct, so still the host pointer.
+ if (transfer(c_loc(p), host_p) /= host_p) stop 13
+ if (transfer(c_loc(parr), host_parr) /= host_parr) stop 14
+
+ !$acc host_data use_device(p, parr)
+#if ACC_MEM_SHARED
+ if (transfer(c_loc(p), host_p) /= host_p) stop 15
+ if (transfer(c_loc(parr), host_parr) /= host_parr) stop 16
+#else
+ ! The device address is different from host address.
+ if (transfer(c_loc(p), host_p) == host_p) stop 17
+ if (transfer(c_loc(parr), host_parr) == host_parr) stop 18
+#endif
+ !$acc end host_data
+
+ !$acc host_data use_device(p, parr) if_present
+#if ACC_MEM_SHARED
+ if (transfer(c_loc(p), host_p) /= host_p) stop 19
+ if (transfer(c_loc(parr), host_parr) /= host_parr) stop 20
+#else
+ ! is present now, so this is the same as above.
+ if (transfer(c_loc(p), host_p) == host_p) stop 21
+ if (transfer(c_loc(parr), host_parr) == host_parr) stop 22
+#endif
+ !$acc end host_data
+
+ !$acc host_data use_device(p, parr) if(cond)
+#if ACC_MEM_SHARED
+ if (transfer(c_loc(p), host_p) /= host_p) stop 23
+ if (transfer(c_loc(parr), host_parr) /= host_parr) stop 24
+#else
+ ! is the device pointer iff cond is true.
+ if ((transfer(c_loc(p), host_p) /= host_p) .neqv. cond) stop 25
+ if ((transfer(c_loc(parr), host_parr) /= host_parr) .neqv. cond) stop 26
+#endif
+ !$acc end host_data
+ !$acc end data
+ !$acc end data
+end subroutine foo
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90
new file mode 100644
index 0000000..445cbab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-1.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Adapted from 'libgomp.oacc-fortran/deep-copy-6.f90'.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ !$acc enter data create(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a) finalize
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
new file mode 100644
index 0000000..7b206ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
@@ -0,0 +1,9 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+/* Nullify the 'finalize' clause, which disturbs reference counting. */
+#define finalize
+#include "mdc-refcount-1-1-1.f90"
+
+! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90
new file mode 100644
index 0000000..8554534
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-1.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-1-1.f90'.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a) finalize
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90
new file mode 100644
index 0000000..8e696cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-2-2.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-2-1.f90', without 'finalize' clause.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a)
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90
new file mode 100644
index 0000000..070a6f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-1.f90
@@ -0,0 +1,45 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-2-1.f90'.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ !$acc exit data detach(var%a)
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a) finalize
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90
new file mode 100644
index 0000000..3c4bbda
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-3-2.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-3-1.f90', without 'finalize' clause.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ !$acc exit data detach(var%a)
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a)
+ !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
new file mode 100644
index 0000000..b22e411
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
@@ -0,0 +1,45 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Adapted from 'libgomp.oacc-fortran/mdc-refcount-1-3-1.f90'.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data detach(var%a) finalize
+ !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
+ !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ !$acc exit data delete(var%a)
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90
new file mode 100644
index 0000000..476cd5c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-2.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+! Copy of 'libgomp.oacc-fortran/mdc-refcount-1-4-1.f90', without 'finalize' clause.
+
+program main
+ use openacc
+ implicit none
+ integer, parameter :: n = 512
+ type mytype
+ integer, allocatable :: a(:)
+ end type mytype
+ type(mytype) :: var
+
+ allocate(var%a(1:n))
+
+ !$acc data create(var)
+
+ call acc_create(var%a)
+ ! After mapping via runtime API call, separately trigger attach action; see <https://github.com/OpenACC/openacc-spec/issues/301>.
+ !$acc enter data attach(var%a)
+
+ if (.not. acc_is_present(var%a)) stop 1
+ if (.not. acc_is_present(var)) stop 2
+
+ !$acc exit data detach(var%a)
+ print *, "CheCKpOInT1"
+ ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
+ !$acc exit data delete(var%a)
+ !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
+ print *, "CheCKpOInT2"
+ ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
+ if (acc_is_present(var%a)) stop 3
+ if (.not. acc_is_present(var)) stop 4
+
+ !$acc end data
+ if (acc_is_present(var%a)) stop 5
+ if (acc_is_present(var)) stop 6
+
+ deallocate(var%a)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
index 537212e..36e9844 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
@@ -4,6 +4,6 @@
implicit none
include "openacc_lib.h"
- if (openacc_version .ne. 201306) STOP 1
+ if (openacc_version .ne. 201711) STOP 1
end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
index 54f301b..e815bc1 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
@@ -4,6 +4,6 @@ program main
use openacc
implicit none
- if (openacc_version .ne. 201306) STOP 1
+ if (openacc_version .ne. 201711) STOP 1
end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90
new file mode 100644
index 0000000..90cca7c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-10.f90
@@ -0,0 +1,52 @@
+! { dg-do run }
+!
+module m
+ implicit none
+contains
+ pure subroutine add_ps_routine(a, b, c)
+ implicit none
+ !$acc routine seq
+ integer, intent(in) :: a, b
+ integer, intent(out) :: c
+ integer, parameter :: n = 10
+ integer :: i
+
+ do i = 1, n
+ if (i .eq. 5) then
+ c = a + b
+ end if
+ end do
+ end subroutine add_ps_routine
+
+ elemental impure function add_ef(a, b) result(c)
+ implicit none
+ !$acc routine
+ integer, intent(in) :: a, b
+ integer :: c
+
+ call add_ps_routine(a, b, c)
+ end function add_ef
+end module m
+
+program main
+ use m
+ implicit none
+ integer, parameter :: n = 10
+ integer, dimension(n) :: a_a
+ integer, dimension(n) :: b_a
+ integer, dimension(n) :: c_a
+ integer :: i
+
+ a_a = [(3 * i, i = 1, n)]
+ b_a = [(-2 * i, i = 1, n)]
+ !$acc parallel copyin(a_a, b_a) copyout(c_a)
+ !$acc loop gang
+ do i = 1, n
+ if (i .eq. 4) then
+ c_a = add_ef(a_a, b_a)
+ end if
+ end do
+ !$acc end parallel
+ if (any (c_a /= [(i, i=1, 10)])) stop 1
+ !print *, a
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f
index af267fc..2c00d2e 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-1.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
STOP
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f
index 13c0684..adade54 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-2.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
STOP 35
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f b/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f
index 3bd7446..157e369 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/stop-3.f
@@ -3,6 +3,10 @@
PROGRAM MAIN
IMPLICIT NONE
+! Initialize before the checkpoint, in case this produces any output.
+!$ACC PARALLEL
+!$ACC END PARALLEL
+
PRINT *, "CheCKpOInT"
!$ACC PARALLEL
STOP "SiGN"