diff options
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog | 50 | ||||
-rw-r--r-- | libgomp/Makefile.am | 2 | ||||
-rw-r--r-- | libgomp/Makefile.in | 7 | ||||
-rw-r--r-- | libgomp/config/accel/target-cxa-dso-dtor.c | 62 | ||||
-rw-r--r-- | libgomp/target-cxa-dso-dtor.c | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-cdtor-1.C | 104 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-cdtor-2.C | 140 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C | 6 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C | 6 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C | 6 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c | 89 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/interop-hsa.c | 203 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C | 3 |
16 files changed, 669 insertions, 21 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 096e17b..49a62d4 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,53 @@ +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp + (check_effective_target_gomp_hip_header_nvidia): Compile with + "-Wno-deprecated-declarations". + * testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise. + * testsuite/libgomp.c/interop-hipblas.h: Add workarounds + when using the HIP headers with __HIP_PLATFORM_NVIDIA__. + +2025-04-24 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas, + check_effective_target_openacc_cudart): Update description as + the check requires more. + (check_effective_target_openacc_libcuda, + check_effective_target_openacc_libcublas, + check_effective_target_openacc_libcudart, + check_effective_target_gomp_hip_header_amd, + check_effective_target_gomp_hip_header_nvidia, + check_effective_target_gomp_hipfort_module, + check_effective_target_gomp_libamdhip64, + check_effective_target_gomp_libhipblas): New. + * testsuite/libgomp.c-c++-common/interop-2.c: New test. + * testsuite/libgomp.c/interop-cublas-full.c: New test. + * testsuite/libgomp.c/interop-cublas-libonly.c: New test. + * testsuite/libgomp.c/interop-cuda-full.c: New test. + * testsuite/libgomp.c/interop-cuda-libonly.c: New test. + * testsuite/libgomp.c/interop-hip-amd-full.c: New test. + * testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hip.h: New test. + * testsuite/libgomp.c/interop-hipblas-amd-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test. + * testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test. + * testsuite/libgomp.c/interop-hipblas.h: New test. + * testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test. + * testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test. + * testsuite/libgomp.fortran/interop-hip.h: New test. + +2025-04-23 Tobias Burnus <tburnus@baylibre.com> + + * testsuite/libgomp.fortran/target-enter-data-8.f90: New test. + 2025-04-17 Jakub Jelinek <jakub@redhat.com> PR libgomp/119849 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index e3202ae..19479ae 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c + oacc-target.c target-indirect.c target-cxa-dso-dtor.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 2a0a842..6d22b3d 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo target-indirect.lo $(am__objects_1) + oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c $(am__append_3) + oacc-target.c target-indirect.c target-cxa-dso-dtor.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -780,6 +782,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c new file mode 100644 index 0000000..e40a5f0 --- /dev/null +++ b/libgomp/config/accel/target-cxa-dso-dtor.c @@ -0,0 +1,62 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API + + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp.h" + +extern void __cxa_finalize (void *); + +/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>. + + Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit' + calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host. + '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx + results in 'ld' error: + + unresolved symbol __dso_handle + collect2: error: ld returned 1 exit status + nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status + + ..., or for GCN get an implicit definition (running with + '--trace-symbol=__dso_handle'): + + ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle + <internal>: definition of __dso_handle + + ..., which might be fine, but let's just make it explicit. */ + +/* There are no DSOs; this is the main program. */ +attribute_hidden void * const __dso_handle = 0; + +/* If this file gets linked in, that means that '__dso_handle' has been + referenced (for '__cxa_atexit'), and in that case, we also have to run + '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy + function '__GCC_offload___cxa_finalize'. */ + +void +__GCC_offload___cxa_finalize (void *dso_handle) +{ + __cxa_finalize (dso_handle); +} diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c new file mode 100644 index 0000000..d1a898d --- /dev/null +++ b/libgomp/target-cxa-dso-dtor.c @@ -0,0 +1,3 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */ + +/* Nothing needed here. */ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C new file mode 100644 index 0000000..ecb029e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C @@ -0,0 +1,104 @@ +/* Offloaded C++ objects construction and destruction. */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1(7); + +#pragma omp declare target + +S sHD1(5); + +std::vector<S> svHD1(2); + +#pragma omp end declare target + +S sH2(3); + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* C++ objects are constructed in order of appearance (..., and destructed in reverse order). + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C new file mode 100644 index 0000000..75e48ca --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C @@ -0,0 +1,140 @@ +/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */ + +/* { dg-require-effective-target init_priority } */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1 __attribute__((init_priority(1500))) (7); + +#pragma omp declare target + +S sHD1 __attribute__((init_priority(2000))) (5); + +std::vector<S> svHD1 __attribute__((init_priority(1000))) (2); + +static void +__attribute__((constructor(20000))) +initDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(20000))) +finiDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +S sH2 __attribute__((init_priority(500))) (3); + +static void +__attribute__((constructor(10000))) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(10000))) +finiH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } } +*/ + +/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order). + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {initH1, 1[\r\n]+} } + { dg-output {initDH1, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {initDH1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {finiDH1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {finiDH1, 1[\r\n]+} } + { dg-output {finiH1, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C index 3cdedf4..d4dccf1 100644 --- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C @@ -14,8 +14,10 @@ /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': - { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } - { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C index ef996cf..724e34b 100644 --- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C @@ -14,8 +14,10 @@ /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': - { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } - { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails: diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C index 24e3d07..24eb7a5 100644 --- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C @@ -51,7 +51,9 @@ int main() /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': - { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } - { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } } + ..., just 'targetm.arm_eabi_unwinder' is different: + { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } } { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */ diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C index 2467061..a4e7a10 100644 --- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C @@ -4,9 +4,6 @@ { dg-additional-options -fexceptions } */ /* { dg-additional-options -fdump-tree-optimized-raw } { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ -/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 } - { dg-ice PR119737 { offload_target_amdgcn } } - { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */ #include "../libgomp.oacc-c++/exceptions-throw-1.C" diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C index e85e6c3..97f4845 100644 --- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C @@ -4,9 +4,6 @@ { dg-additional-options -fexceptions } */ /* { dg-additional-options -fdump-tree-optimized-raw } { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ -/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 } - { dg-ice PR119737 { offload_target_amdgcn } } - { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */ #include "../libgomp.oacc-c++/exceptions-throw-2.C" diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c new file mode 100644 index 0000000..e6099cf --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c @@ -0,0 +1,89 @@ +/* Offloaded 'constructor' and 'destructor' functions. */ + +#include <omp.h> + +#pragma omp declare target + +static void +__attribute__((constructor)) +initHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((constructor)) +initHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiHD2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +static void +__attribute__((constructor)) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor)) +finiH2() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run. + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {init[^,]+, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } + { dg-output {fini[^,]+, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c new file mode 100644 index 0000000..cf8bc90 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/interop-hsa.c @@ -0,0 +1,203 @@ +/* { dg-additional-options "-ldl" } */ +/* { dg-require-effective-target offload_device_gcn } */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include <assert.h> +#include <dlfcn.h> +#include "../../../include/hsa.h" +#include "../../config/gcn/libgomp-gcn.h" + +#define STACKSIZE (100 * 1024) +#define HEAPSIZE (10 * 1024 * 1024) +#define ARENASIZE HEAPSIZE + +/* This code fragment must be optimized or else the host-fallback kernel has + * invalid ASM inserts. The rest of the file can be compiled safely at -O0. */ +#pragma omp declare target +uintptr_t __attribute__((optimize("O1"))) +get_kernel_ptr () +{ + uintptr_t val; + if (!omp_is_initial_device ()) + /* "main._omp_fn.0" is the name GCC gives the first OpenMP target + * region in the "main" function. + * The ".kd" suffix is added by the LLVM assembler when it creates the + * kernel meta-data, and this is what we need to launch a kernel. */ + asm ("s_getpc_b64 %0\n\t" + "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t" + "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4" + : "=Sg"(val)); + return val; +} +#pragma omp end declare target + +int +main(int argc, char** argv) +{ + + /* Load the HSA runtime DLL. */ + void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY); + assert (hsalib); + + hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal) + = dlsym (hsalib, "hsa_signal_create"); + assert (hsa_signal_create); + + uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue) + = dlsym (hsalib, "hsa_queue_load_write_index_relaxed"); + assert (hsa_queue_load_write_index_relaxed); + + void (*hsa_signal_store_relaxed) (hsa_signal_t signal, + hsa_signal_value_t value) + = dlsym (hsalib, "hsa_signal_store_relaxed"); + assert (hsa_signal_store_relaxed); + + hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal, + hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, + uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint) + = dlsym (hsalib, "hsa_signal_wait_relaxed"); + assert (hsa_signal_wait_relaxed); + + void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue, + uint64_t value) + = dlsym (hsalib, "hsa_queue_store_write_index_relaxed"); + assert (hsa_queue_store_write_index_relaxed); + + hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal) + = dlsym (hsalib, "hsa_signal_destroy"); + assert (hsa_signal_destroy); + + /* Set up the device data environment. */ + int test_data_value = 0; +#pragma omp target enter data map(test_data_value) + + /* Get the interop details. */ + int device_num = omp_get_default_device(); + hsa_agent_t *gpu_agent; + hsa_queue_t *hsa_queue = NULL; + + omp_interop_t interop = omp_interop_none; +#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num) + assert (interop != omp_interop_none); + + omp_interop_rc_t retcode; + omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode); + assert (retcode == omp_irc_success); + assert (fr == omp_ifr_hsa); + + gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode); + assert (retcode == omp_irc_success); + + hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode); + assert (retcode == omp_irc_success); + assert (hsa_queue); + + /* Call an offload kernel via OpenMP/libgomp. + * + * This kernel serves two purposes: + * 1) Lookup the device-side load-address of itself (thus avoiding the + * need to access the libgomp internals). + * 2) Count how many times it is called. + * We then call it once using OpenMP, and once manually, and check + * the counter reads "2". */ + uint64_t kernel_object = 0; +#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value) + { + kernel_object = get_kernel_ptr (); + ++test_data_value; + } + + assert (kernel_object != 0); + + /* Configure the same kernel to run again, using HSA manually this time. */ + hsa_status_t status; + hsa_signal_t signal; + status = hsa_signal_create(1, 0, NULL, &signal); + assert (status == HSA_STATUS_SUCCESS); + + /* The kernel is built by GCC for OpenMP, so we need to pass the same + * data pointers that libgomp would pass in. */ + struct { + uintptr_t test_data_value; + uintptr_t kernel_object; + } tgtaddrs; + +#pragma omp target data use_device_addr(test_data_value) + { + tgtaddrs.test_data_value = (uintptr_t)&test_data_value; + tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num); + } + + /* We also need to duplicate the launch ABI used by plugin-gcn.c. */ + struct kernargs_abi args; /* From libgomp-gcn.h. */ + args.dummy1 = (int64_t)&tgtaddrs; + args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side. */ + args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num); + args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num); + args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num); + args.arena_size_per_team = ARENASIZE; + args.stack_size_per_thread = STACKSIZE; + + /* Build the HSA dispatch packet, and insert it into the queue. */ + uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue); + const uint32_t queueMask = hsa_queue->size - 1; + hsa_kernel_dispatch_packet_t *dispatch_packet = + &(((hsa_kernel_dispatch_packet_t *) + (hsa_queue->base_address))[packet_id & queueMask]); + + dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = 1; + dispatch_packet->workgroup_size_y = 64; + dispatch_packet->workgroup_size_z = 1; + dispatch_packet->grid_size_x = 1; + dispatch_packet->grid_size_y = 64; + dispatch_packet->grid_size_z = 1; + dispatch_packet->completion_signal = signal; + dispatch_packet->kernel_object = kernel_object; + dispatch_packet->kernarg_address = &args; + dispatch_packet->private_segment_size = 0; + dispatch_packet->group_segment_size = 1536; + + uint16_t header = 0; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + /* Finish writing the packet header with an atomic release. */ + __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE); + + hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1); + + ;/* Run the kernel and wait for it to complete. */ + hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id); + while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) + ; + + /* Clean up HSA. */ + hsa_signal_destroy(signal); + free ((void*)args.out_ptr); + omp_target_free ((void*)args.heap_ptr, device_num); + omp_target_free ((void*)args.arena_ptr, device_num); + omp_target_free ((void*)args.stack_ptr, device_num); + omp_target_free ((void*)tgtaddrs.kernel_object, device_num); + + /* Clean up OpenMP. */ + #pragma omp interop destroy(interop) + + /* Bring the data back from the device. */ +#pragma omp target exit data map(test_data_value) + + /* Ensure the kernel was called twice. Once by OpenMP, once by HSA. */ + assert (test_data_value == 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C index f2ef751..08c5766 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C @@ -4,9 +4,6 @@ { dg-additional-options -fexceptions } */ /* { dg-additional-options -fdump-tree-optimized-raw } { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ -/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 } - { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } } - { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */ /* See also '../libgomp.c++/target-exceptions-throw-1.C'. */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C index f6dc970..a7408cd 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C +++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C @@ -6,9 +6,6 @@ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */ /* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */ -/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 } - { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } } - { dg-excess-errors {'mkoffload' failures etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */ /* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 } { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */ |