From d6621a2f3176dd6a593d4f5fa7f85db0234b40d2 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Fri, 26 Aug 2022 12:12:25 +0200 Subject: OpenMP: Support reverse offload (middle end part) gcc/ChangeLog: * internal-fn.cc (expand_GOMP_TARGET_REV): New. * internal-fn.def (GOMP_TARGET_REV): New. * lto-cgraph.cc (lto_output_node, verify_node_partition): Mark 'omp target device_ancestor_host' as in_other_partition and don't error if absent. * omp-low.cc (create_omp_child_function): Mark as 'noclone'. * omp-expand.cc (expand_omp_target): For reverse offload, remove sorry, use device = GOMP_DEVICE_HOST_FALLBACK and create empty-body nohost function. * omp-offload.cc (execute_omp_device_lower): Handle IFN_GOMP_TARGET_REV. (pass_omp_target_link::execute): For ACCEL_COMPILER, don't nullify fn argument for reverse offload libgomp/ChangeLog: * libgomp.texi (OpenMP 5.0): Mark 'ancestor' as implemented but refer to 'requires'. * testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c: New test. * testsuite/libgomp.c-c++-common/reverse-offload-1.c: New test. * testsuite/libgomp.fortran/reverse-offload-1-aux.f90: New test. * testsuite/libgomp.fortran/reverse-offload-1.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/reverse-offload-1.c: Remove dg-sorry. * c-c++-common/gomp/target-device-ancestor-4.c: Likewise. * gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise. * gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise. * c-c++-common/goacc/classify-kernels-parloops.c: Add 'noclone' to scan-tree-dump-times. * c-c++-common/goacc/classify-kernels-unparallelized-parloops.c: Likewise. * c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise. * c-c++-common/goacc/classify-kernels.c: Likewise. * c-c++-common/goacc/classify-parallel.c: Likewise. * c-c++-common/goacc/classify-serial.c: Likewise. * c-c++-common/goacc/kernels-counter-vars-function-scope.c: Likewise. * c-c++-common/goacc/kernels-loop-2.c: Likewise. * c-c++-common/goacc/kernels-loop-3.c: Likewise. * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. * c-c++-common/goacc/kernels-loop-data.c: Likewise. * c-c++-common/goacc/kernels-loop-g.c: Likewise. * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise. * c-c++-common/goacc/kernels-loop-n.c: Likewise. * c-c++-common/goacc/kernels-loop-nest.c: Likewise. * c-c++-common/goacc/kernels-loop.c: Likewise. * c-c++-common/goacc/kernels-one-counter-var.c: Likewise. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise. * gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise. * gfortran.dg/goacc/classify-kernels.f95: Likewise. * gfortran.dg/goacc/classify-parallel.f95: Likewise. * gfortran.dg/goacc/classify-serial.f95: Likewise. * gfortran.dg/goacc/kernels-loop-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data.f95: Likewise. * gfortran.dg/goacc/kernels-loop-n.f95: Likewise. * gfortran.dg/goacc/kernels-loop.f95: Likewise. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise. --- libgomp/libgomp.texi | 2 +- .../libgomp.c-c++-common/reverse-offload-1-aux.c | 10 +++ .../libgomp.c-c++-common/reverse-offload-1.c | 83 ++++++++++++++++++++ .../libgomp.fortran/reverse-offload-1-aux.f90 | 12 +++ .../libgomp.fortran/reverse-offload-1.f90 | 88 ++++++++++++++++++++++ 5 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c create mode 100644 libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 (limited to 'libgomp') diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index e88fe89..0f2998c 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -225,7 +225,7 @@ The OpenMP 4.5 specification is fully supported. @item @code{allocate} clause @tab P @tab Initial support @item @code{use_device_addr} clause on @code{target data} @tab Y @tab @item @code{ancestor} modifier on @code{device} clause - @tab P @tab Reverse offload unsupported + @tab Y @tab See comment for @code{requires} @item Implicit declare target directive @tab Y @tab @item Discontiguous array section with @code{target update} construct @tab N @tab diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c new file mode 100644 index 0000000..b3a331d --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target skip-all-targets } } */ + +/* Declare the following function in a separare translation unit + to ensure it won't have a device version. */ + +int +add_3 (int x) +{ + return x + 3; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c new file mode 100644 index 0000000..976e129 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c @@ -0,0 +1,83 @@ +/* { dg-do run } */ +/* { dg-additional-sources reverse-offload-1-aux.c } */ + +/* Check that reverse offload works in particular: + - no code is generated on the device side (i.e. no + implicit declare target of called functions and no + code gen for the target-region body) + -> would otherwise fail due to 'add_3' symbol + - Plus the usual (compiles, runs, produces correct result) + + Note: Running also the non-reverse-offload target regions + on the host (host fallback) is valid and will pass. */ + +#pragma omp requires reverse_offload + +extern int add_3 (int); + +static int global_var = 5; + +void +check_offload (int *x, int *y) +{ + *x = add_3 (*x); + *y = add_3 (*y); +} + +#pragma omp declare target +void +tg_fn (int *x, int *y) +{ + int x2 = *x, y2 = *y; + if (x2 != 2 || y2 != 3) + __builtin_abort (); + x2 = x2 + 2; + y2 = y2 + 7; + + #pragma omp target device(ancestor : 1) map(tofrom: x2) + check_offload(&x2, &y2); + + if (x2 != 2+2+3 || y2 != 3 + 7) + __builtin_abort (); + *x = x2, *y = y2; +} +#pragma omp end declare target + +void +my_func (int *x, int *y) +{ + if (global_var != 5) + __builtin_abort (); + global_var = 242; + *x = 2*add_3(*x); + *y = 3*add_3(*y); +} + +int +main () +{ + #pragma omp target + { + int x = 2, y = 3; + tg_fn (&x, &y); + } + + #pragma omp target + { + int x = -2, y = -1; + #pragma omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x) + { + if (x != -2 || y != -1) + __builtin_abort (); + my_func (&x, &y); + if (x != 2*(3-2) || y != 3*(3-1)) + __builtin_abort (); + } + if (x != 2*(3-2) || y != -1) + __builtin_abort (); + } + + if (global_var != 242) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 new file mode 100644 index 0000000..1807f06 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 @@ -0,0 +1,12 @@ +! { dg-do compile { target skip-all-targets } } + +! Declare the following function in a separare translation unit +! to ensure it won't have a device version. + + +integer function add_3 (x) + implicit none + integer, value :: x + + add_3 = x + 3 +end function diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 new file mode 100644 index 0000000..7cfb8b6 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 @@ -0,0 +1,88 @@ +! { dg-do run } +! { dg-additional-sources reverse-offload-1-aux.f90 } + +! Check that reverse offload works in particular: +! - no code is generated on the device side (i.e. no +! implicit declare target of called functions and no +! code gen for the target-region body) +! -> would otherwise fail due to 'add_3' symbol +! - Plus the usual (compiles, runs, produces correct result) + +! Note: Running also the non-reverse-offload target regions +! on the host (host fallback) is valid and will pass. + +module m + interface + integer function add_3 (x) + implicit none + integer, value :: x + end function + end interface + integer :: global_var = 5 +end module m + +module m2 + use m + !$omp requires reverse_offload + implicit none (type, external) +contains + subroutine check_offload (x, y) + integer :: x, y + x = add_3(x) + y = add_3(y) + end subroutine check_offload + subroutine m2_tg_fn(x, y) + integer :: x, y + !$omp declare target + if (x /= 2 .or. y /= 3) stop 1 + x = x + 2 + y = y + 7 + !$omp target device(ancestor : 1) map(tofrom: x) + call check_offload(x, y) + !$omp end target + if (x /= 2+2+3 .or. y /= 3 + 7) stop 2 + end subroutine +end module m2 + +program main + use m + !$omp requires reverse_offload + implicit none (type, external) + + integer :: prog_var = 99 + + !$omp target + block + use m2 + integer :: x, y + x = 2; y = 3 + call m2_tg_fn (x, y) + end block + + !$omp target + block + use m2 + integer :: x, y + x = -2; y = -1 + !$omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x) + if (x /= -2 .or. y /= -1) stop 3 + call my_func (x, y) + if (x /= 2*(3-2) .or. y /= 3*(3-1)) stop 5 + !$omp end target + if (x /= 2*(3-2) .or. y /= -1) stop 6 + end block + + if (prog_var /= 41 .or. global_var /= 242) stop 7 + +contains + + subroutine my_func(x, y) + integer :: x, y + if (prog_var /= 99) stop 8 + if (global_var /= 5) stop 9 + prog_var = 41 + global_var = 242 + x = 2*add_3(x) + y = 3*add_3(y) + end subroutine my_func +end -- cgit v1.1