aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog50
-rw-r--r--libgomp/Makefile.am2
-rw-r--r--libgomp/Makefile.in7
-rw-r--r--libgomp/config/accel/target-cxa-dso-dtor.c62
-rw-r--r--libgomp/target-cxa-dso-dtor.c3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-cdtor-1.C104
-rw-r--r--libgomp/testsuite/libgomp.c++/target-cdtor-2.C140
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C3
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c89
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hsa.c203
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C3
16 files changed, 669 insertions, 21 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 096e17b..49a62d4 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,53 @@
+2025-04-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * testsuite/lib/libgomp.exp
+ (check_effective_target_gomp_hip_header_nvidia): Compile with
+ "-Wno-deprecated-declarations".
+ * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise.
+ * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise.
+ * testsuite/libgomp.c/interop-hipblas.h: Add workarounds
+ when using the HIP headers with __HIP_PLATFORM_NVIDIA__.
+
+2025-04-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas,
+ check_effective_target_openacc_cudart): Update description as
+ the check requires more.
+ (check_effective_target_openacc_libcuda,
+ check_effective_target_openacc_libcublas,
+ check_effective_target_openacc_libcudart,
+ check_effective_target_gomp_hip_header_amd,
+ check_effective_target_gomp_hip_header_nvidia,
+ check_effective_target_gomp_hipfort_module,
+ check_effective_target_gomp_libamdhip64,
+ check_effective_target_gomp_libhipblas): New.
+ * testsuite/libgomp.c-c++-common/interop-2.c: New test.
+ * testsuite/libgomp.c/interop-cublas-full.c: New test.
+ * testsuite/libgomp.c/interop-cublas-libonly.c: New test.
+ * testsuite/libgomp.c/interop-cuda-full.c: New test.
+ * testsuite/libgomp.c/interop-cuda-libonly.c: New test.
+ * testsuite/libgomp.c/interop-hip-amd-full.c: New test.
+ * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test.
+ * testsuite/libgomp.c/interop-hip-nvidia-full.c: New test.
+ * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test.
+ * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test.
+ * testsuite/libgomp.c/interop-hip.h: New test.
+ * testsuite/libgomp.c/interop-hipblas-amd-full.c: New test.
+ * testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test.
+ * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test.
+ * testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test.
+ * testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test.
+ * testsuite/libgomp.c/interop-hipblas.h: New test.
+ * testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test.
+ * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test.
+ * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test.
+ * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test.
+ * testsuite/libgomp.fortran/interop-hip.h: New test.
+
+2025-04-23 Tobias Burnus <tburnus@baylibre.com>
+
+ * testsuite/libgomp.fortran/target-enter-data-8.f90: New test.
+
2025-04-17 Jakub Jelinek <jakub@redhat.com>
PR libgomp/119849
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index e3202ae..19479ae 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c target-cxa-dso-dtor.c
include $(top_srcdir)/plugin/Makefrag.am
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2a0a842..6d22b3d 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
+ $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
+ $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -780,6 +782,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..e40a5f0
--- /dev/null
+++ b/libgomp/config/accel/target-cxa-dso-dtor.c
@@ -0,0 +1,62 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
+
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+
+extern void __cxa_finalize (void *);
+
+/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
+
+ Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
+ calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
+ '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
+ results in 'ld' error:
+
+ unresolved symbol __dso_handle
+ collect2: error: ld returned 1 exit status
+ nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
+
+ ..., or for GCN get an implicit definition (running with
+ '--trace-symbol=__dso_handle'):
+
+ ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle
+ <internal>: definition of __dso_handle
+
+ ..., which might be fine, but let's just make it explicit. */
+
+/* There are no DSOs; this is the main program. */
+attribute_hidden void * const __dso_handle = 0;
+
+/* If this file gets linked in, that means that '__dso_handle' has been
+ referenced (for '__cxa_atexit'), and in that case, we also have to run
+ '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy
+ function '__GCC_offload___cxa_finalize'. */
+
+void
+__GCC_offload___cxa_finalize (void *dso_handle)
+{
+ __cxa_finalize (dso_handle);
+}
diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..d1a898d
--- /dev/null
+++ b/libgomp/target-cxa-dso-dtor.c
@@ -0,0 +1,3 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
+
+/* Nothing needed here. */
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
new file mode 100644
index 0000000..ecb029e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -0,0 +1,104 @@
+/* Offloaded C++ objects construction and destruction. */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
new file mode 100644
index 0000000..75e48ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -0,0 +1,140 @@
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */
+
+/* { dg-require-effective-target init_priority } */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {initH1, 1[\r\n]+} }
+ { dg-output {initDH1, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {finiDH1, 1[\r\n]+} }
+ { dg-output {finiH1, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
index 3cdedf4..d4dccf1 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
@@ -14,8 +14,10 @@
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
index ef996cf..724e34b 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
@@ -14,8 +14,10 @@
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
index 24e3d07..24eb7a5 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
@@ -51,7 +51,9 @@ int main()
/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
'__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
- { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+ ..., just 'targetm.arm_eabi_unwinder' is different:
+ { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
{ dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
index 2467061..a4e7a10 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
- { dg-ice PR119737 { offload_target_amdgcn } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
#include "../libgomp.oacc-c++/exceptions-throw-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
index e85e6c3..97f4845 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
- { dg-ice PR119737 { offload_target_amdgcn } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
#include "../libgomp.oacc-c++/exceptions-throw-2.C"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
new file mode 100644
index 0000000..e6099cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
@@ -0,0 +1,89 @@
+/* Offloaded 'constructor' and 'destructor' functions. */
+
+#include <omp.h>
+
+#pragma omp declare target
+
+static void
+__attribute__((constructor))
+initHD1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((constructor))
+initHD2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+static void
+__attribute__((constructor))
+initH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiH2()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run.
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {init[^,]+, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+ { dg-output {fini[^,]+, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c
new file mode 100644
index 0000000..cf8bc90
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hsa.c
@@ -0,0 +1,203 @@
+/* { dg-additional-options "-ldl" } */
+/* { dg-require-effective-target offload_device_gcn } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include "../../../include/hsa.h"
+#include "../../config/gcn/libgomp-gcn.h"
+
+#define STACKSIZE (100 * 1024)
+#define HEAPSIZE (10 * 1024 * 1024)
+#define ARENASIZE HEAPSIZE
+
+/* This code fragment must be optimized or else the host-fallback kernel has
+ * invalid ASM inserts. The rest of the file can be compiled safely at -O0. */
+#pragma omp declare target
+uintptr_t __attribute__((optimize("O1")))
+get_kernel_ptr ()
+{
+ uintptr_t val;
+ if (!omp_is_initial_device ())
+ /* "main._omp_fn.0" is the name GCC gives the first OpenMP target
+ * region in the "main" function.
+ * The ".kd" suffix is added by the LLVM assembler when it creates the
+ * kernel meta-data, and this is what we need to launch a kernel. */
+ asm ("s_getpc_b64 %0\n\t"
+ "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t"
+ "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4"
+ : "=Sg"(val));
+ return val;
+}
+#pragma omp end declare target
+
+int
+main(int argc, char** argv)
+{
+
+ /* Load the HSA runtime DLL. */
+ void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY);
+ assert (hsalib);
+
+ hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value,
+ uint32_t num_consumers,
+ const hsa_agent_t *consumers,
+ hsa_signal_t *signal)
+ = dlsym (hsalib, "hsa_signal_create");
+ assert (hsa_signal_create);
+
+ uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue)
+ = dlsym (hsalib, "hsa_queue_load_write_index_relaxed");
+ assert (hsa_queue_load_write_index_relaxed);
+
+ void (*hsa_signal_store_relaxed) (hsa_signal_t signal,
+ hsa_signal_value_t value)
+ = dlsym (hsalib, "hsa_signal_store_relaxed");
+ assert (hsa_signal_store_relaxed);
+
+ hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal,
+ hsa_signal_condition_t condition,
+ hsa_signal_value_t compare_value,
+ uint64_t timeout_hint,
+ hsa_wait_state_t wait_state_hint)
+ = dlsym (hsalib, "hsa_signal_wait_relaxed");
+ assert (hsa_signal_wait_relaxed);
+
+ void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue,
+ uint64_t value)
+ = dlsym (hsalib, "hsa_queue_store_write_index_relaxed");
+ assert (hsa_queue_store_write_index_relaxed);
+
+ hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal)
+ = dlsym (hsalib, "hsa_signal_destroy");
+ assert (hsa_signal_destroy);
+
+ /* Set up the device data environment. */
+ int test_data_value = 0;
+#pragma omp target enter data map(test_data_value)
+
+ /* Get the interop details. */
+ int device_num = omp_get_default_device();
+ hsa_agent_t *gpu_agent;
+ hsa_queue_t *hsa_queue = NULL;
+
+ omp_interop_t interop = omp_interop_none;
+#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num)
+ assert (interop != omp_interop_none);
+
+ omp_interop_rc_t retcode;
+ omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode);
+ assert (retcode == omp_irc_success);
+ assert (fr == omp_ifr_hsa);
+
+ gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode);
+ assert (retcode == omp_irc_success);
+
+ hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode);
+ assert (retcode == omp_irc_success);
+ assert (hsa_queue);
+
+ /* Call an offload kernel via OpenMP/libgomp.
+ *
+ * This kernel serves two purposes:
+ * 1) Lookup the device-side load-address of itself (thus avoiding the
+ * need to access the libgomp internals).
+ * 2) Count how many times it is called.
+ * We then call it once using OpenMP, and once manually, and check
+ * the counter reads "2". */
+ uint64_t kernel_object = 0;
+#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value)
+ {
+ kernel_object = get_kernel_ptr ();
+ ++test_data_value;
+ }
+
+ assert (kernel_object != 0);
+
+ /* Configure the same kernel to run again, using HSA manually this time. */
+ hsa_status_t status;
+ hsa_signal_t signal;
+ status = hsa_signal_create(1, 0, NULL, &signal);
+ assert (status == HSA_STATUS_SUCCESS);
+
+ /* The kernel is built by GCC for OpenMP, so we need to pass the same
+ * data pointers that libgomp would pass in. */
+ struct {
+ uintptr_t test_data_value;
+ uintptr_t kernel_object;
+ } tgtaddrs;
+
+#pragma omp target data use_device_addr(test_data_value)
+ {
+ tgtaddrs.test_data_value = (uintptr_t)&test_data_value;
+ tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num);
+ }
+
+ /* We also need to duplicate the launch ABI used by plugin-gcn.c. */
+ struct kernargs_abi args; /* From libgomp-gcn.h. */
+ args.dummy1 = (int64_t)&tgtaddrs;
+ args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side. */
+ args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num);
+ args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num);
+ args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num);
+ args.arena_size_per_team = ARENASIZE;
+ args.stack_size_per_thread = STACKSIZE;
+
+ /* Build the HSA dispatch packet, and insert it into the queue. */
+ uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue);
+ const uint32_t queueMask = hsa_queue->size - 1;
+ hsa_kernel_dispatch_packet_t *dispatch_packet =
+ &(((hsa_kernel_dispatch_packet_t *)
+ (hsa_queue->base_address))[packet_id & queueMask]);
+
+ dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ dispatch_packet->workgroup_size_x = 1;
+ dispatch_packet->workgroup_size_y = 64;
+ dispatch_packet->workgroup_size_z = 1;
+ dispatch_packet->grid_size_x = 1;
+ dispatch_packet->grid_size_y = 64;
+ dispatch_packet->grid_size_z = 1;
+ dispatch_packet->completion_signal = signal;
+ dispatch_packet->kernel_object = kernel_object;
+ dispatch_packet->kernarg_address = &args;
+ dispatch_packet->private_segment_size = 0;
+ dispatch_packet->group_segment_size = 1536;
+
+ uint16_t header = 0;
+ header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+ /* Finish writing the packet header with an atomic release. */
+ __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE);
+
+ hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1);
+
+ ;/* Run the kernel and wait for it to complete. */
+ hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id);
+ while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1,
+ UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+ ;
+
+ /* Clean up HSA. */
+ hsa_signal_destroy(signal);
+ free ((void*)args.out_ptr);
+ omp_target_free ((void*)args.heap_ptr, device_num);
+ omp_target_free ((void*)args.arena_ptr, device_num);
+ omp_target_free ((void*)args.stack_ptr, device_num);
+ omp_target_free ((void*)tgtaddrs.kernel_object, device_num);
+
+ /* Clean up OpenMP. */
+ #pragma omp interop destroy(interop)
+
+ /* Bring the data back from the device. */
+#pragma omp target exit data map(test_data_value)
+
+ /* Ensure the kernel was called twice. Once by OpenMP, once by HSA. */
+ assert (test_data_value == 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
index f2ef751..08c5766 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
@@ -4,9 +4,6 @@
{ dg-additional-options -fexceptions } */
/* { dg-additional-options -fdump-tree-optimized-raw }
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
- { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
- { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
/* See also '../libgomp.c++/target-exceptions-throw-1.C'. */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
index f6dc970..a7408cd 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
@@ -6,9 +6,6 @@
{ dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
{ dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
- { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
- { dg-excess-errors {'mkoffload' failures etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
/* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
{ dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */