aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-08-26 12:12:25 +0200
committerTobias Burnus <tobias@codesourcery.com>2022-08-26 12:12:25 +0200
commitd6621a2f3176dd6a593d4f5fa7f85db0234b40d2 (patch)
tree422a2ee9ee9aa68b26785ec326ad3312ff5e06f9 /libgomp
parent0c2d6aa1be2ea85e751852834986ae52d58134d3 (diff)
downloadgcc-d6621a2f3176dd6a593d4f5fa7f85db0234b40d2.zip
gcc-d6621a2f3176dd6a593d4f5fa7f85db0234b40d2.tar.gz
gcc-d6621a2f3176dd6a593d4f5fa7f85db0234b40d2.tar.bz2
OpenMP: Support reverse offload (middle end part)
gcc/ChangeLog: * internal-fn.cc (expand_GOMP_TARGET_REV): New. * internal-fn.def (GOMP_TARGET_REV): New. * lto-cgraph.cc (lto_output_node, verify_node_partition): Mark 'omp target device_ancestor_host' as in_other_partition and don't error if absent. * omp-low.cc (create_omp_child_function): Mark as 'noclone'. * omp-expand.cc (expand_omp_target): For reverse offload, remove sorry, use device = GOMP_DEVICE_HOST_FALLBACK and create empty-body nohost function. * omp-offload.cc (execute_omp_device_lower): Handle IFN_GOMP_TARGET_REV. (pass_omp_target_link::execute): For ACCEL_COMPILER, don't nullify fn argument for reverse offload libgomp/ChangeLog: * libgomp.texi (OpenMP 5.0): Mark 'ancestor' as implemented but refer to 'requires'. * testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c: New test. * testsuite/libgomp.c-c++-common/reverse-offload-1.c: New test. * testsuite/libgomp.fortran/reverse-offload-1-aux.f90: New test. * testsuite/libgomp.fortran/reverse-offload-1.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/reverse-offload-1.c: Remove dg-sorry. * c-c++-common/gomp/target-device-ancestor-4.c: Likewise. * gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise. * gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise. * c-c++-common/goacc/classify-kernels-parloops.c: Add 'noclone' to scan-tree-dump-times. * c-c++-common/goacc/classify-kernels-unparallelized-parloops.c: Likewise. * c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise. * c-c++-common/goacc/classify-kernels.c: Likewise. * c-c++-common/goacc/classify-parallel.c: Likewise. * c-c++-common/goacc/classify-serial.c: Likewise. * c-c++-common/goacc/kernels-counter-vars-function-scope.c: Likewise. * c-c++-common/goacc/kernels-loop-2.c: Likewise. * c-c++-common/goacc/kernels-loop-3.c: Likewise. * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. * c-c++-common/goacc/kernels-loop-data.c: Likewise. * c-c++-common/goacc/kernels-loop-g.c: Likewise. * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise. * c-c++-common/goacc/kernels-loop-n.c: Likewise. * c-c++-common/goacc/kernels-loop-nest.c: Likewise. * c-c++-common/goacc/kernels-loop.c: Likewise. * c-c++-common/goacc/kernels-one-counter-var.c: Likewise. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise. * gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95: Likewise. * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise. * gfortran.dg/goacc/classify-kernels.f95: Likewise. * gfortran.dg/goacc/classify-parallel.f95: Likewise. * gfortran.dg/goacc/classify-serial.f95: Likewise. * gfortran.dg/goacc/kernels-loop-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise. * gfortran.dg/goacc/kernels-loop-data.f95: Likewise. * gfortran.dg/goacc/kernels-loop-n.f95: Likewise. * gfortran.dg/goacc/kernels-loop.f95: Likewise. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/libgomp.texi2
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c10
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c83
-rw-r--r--libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f9012
-rw-r--r--libgomp/testsuite/libgomp.fortran/reverse-offload-1.f9088
5 files changed, 194 insertions, 1 deletions
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index e88fe89..0f2998c 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -225,7 +225,7 @@ The OpenMP 4.5 specification is fully supported.
@item @code{allocate} clause @tab P @tab Initial support
@item @code{use_device_addr} clause on @code{target data} @tab Y @tab
@item @code{ancestor} modifier on @code{device} clause
- @tab P @tab Reverse offload unsupported
+ @tab Y @tab See comment for @code{requires}
@item Implicit declare target directive @tab Y @tab
@item Discontiguous array section with @code{target update} construct
@tab N @tab
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c
new file mode 100644
index 0000000..b3a331d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target skip-all-targets } } */
+
+/* Declare the following function in a separare translation unit
+ to ensure it won't have a device version. */
+
+int
+add_3 (int x)
+{
+ return x + 3;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c
new file mode 100644
index 0000000..976e129
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c
@@ -0,0 +1,83 @@
+/* { dg-do run } */
+/* { dg-additional-sources reverse-offload-1-aux.c } */
+
+/* Check that reverse offload works in particular:
+ - no code is generated on the device side (i.e. no
+ implicit declare target of called functions and no
+ code gen for the target-region body)
+ -> would otherwise fail due to 'add_3' symbol
+ - Plus the usual (compiles, runs, produces correct result)
+
+ Note: Running also the non-reverse-offload target regions
+ on the host (host fallback) is valid and will pass. */
+
+#pragma omp requires reverse_offload
+
+extern int add_3 (int);
+
+static int global_var = 5;
+
+void
+check_offload (int *x, int *y)
+{
+ *x = add_3 (*x);
+ *y = add_3 (*y);
+}
+
+#pragma omp declare target
+void
+tg_fn (int *x, int *y)
+{
+ int x2 = *x, y2 = *y;
+ if (x2 != 2 || y2 != 3)
+ __builtin_abort ();
+ x2 = x2 + 2;
+ y2 = y2 + 7;
+
+ #pragma omp target device(ancestor : 1) map(tofrom: x2)
+ check_offload(&x2, &y2);
+
+ if (x2 != 2+2+3 || y2 != 3 + 7)
+ __builtin_abort ();
+ *x = x2, *y = y2;
+}
+#pragma omp end declare target
+
+void
+my_func (int *x, int *y)
+{
+ if (global_var != 5)
+ __builtin_abort ();
+ global_var = 242;
+ *x = 2*add_3(*x);
+ *y = 3*add_3(*y);
+}
+
+int
+main ()
+{
+ #pragma omp target
+ {
+ int x = 2, y = 3;
+ tg_fn (&x, &y);
+ }
+
+ #pragma omp target
+ {
+ int x = -2, y = -1;
+ #pragma omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
+ {
+ if (x != -2 || y != -1)
+ __builtin_abort ();
+ my_func (&x, &y);
+ if (x != 2*(3-2) || y != 3*(3-1))
+ __builtin_abort ();
+ }
+ if (x != 2*(3-2) || y != -1)
+ __builtin_abort ();
+ }
+
+ if (global_var != 242)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90
new file mode 100644
index 0000000..1807f06
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90
@@ -0,0 +1,12 @@
+! { dg-do compile { target skip-all-targets } }
+
+! Declare the following function in a separare translation unit
+! to ensure it won't have a device version.
+
+
+integer function add_3 (x)
+ implicit none
+ integer, value :: x
+
+ add_3 = x + 3
+end function
diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90
new file mode 100644
index 0000000..7cfb8b6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90
@@ -0,0 +1,88 @@
+! { dg-do run }
+! { dg-additional-sources reverse-offload-1-aux.f90 }
+
+! Check that reverse offload works in particular:
+! - no code is generated on the device side (i.e. no
+! implicit declare target of called functions and no
+! code gen for the target-region body)
+! -> would otherwise fail due to 'add_3' symbol
+! - Plus the usual (compiles, runs, produces correct result)
+
+! Note: Running also the non-reverse-offload target regions
+! on the host (host fallback) is valid and will pass.
+
+module m
+ interface
+ integer function add_3 (x)
+ implicit none
+ integer, value :: x
+ end function
+ end interface
+ integer :: global_var = 5
+end module m
+
+module m2
+ use m
+ !$omp requires reverse_offload
+ implicit none (type, external)
+contains
+ subroutine check_offload (x, y)
+ integer :: x, y
+ x = add_3(x)
+ y = add_3(y)
+ end subroutine check_offload
+ subroutine m2_tg_fn(x, y)
+ integer :: x, y
+ !$omp declare target
+ if (x /= 2 .or. y /= 3) stop 1
+ x = x + 2
+ y = y + 7
+ !$omp target device(ancestor : 1) map(tofrom: x)
+ call check_offload(x, y)
+ !$omp end target
+ if (x /= 2+2+3 .or. y /= 3 + 7) stop 2
+ end subroutine
+end module m2
+
+program main
+ use m
+ !$omp requires reverse_offload
+ implicit none (type, external)
+
+ integer :: prog_var = 99
+
+ !$omp target
+ block
+ use m2
+ integer :: x, y
+ x = 2; y = 3
+ call m2_tg_fn (x, y)
+ end block
+
+ !$omp target
+ block
+ use m2
+ integer :: x, y
+ x = -2; y = -1
+ !$omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
+ if (x /= -2 .or. y /= -1) stop 3
+ call my_func (x, y)
+ if (x /= 2*(3-2) .or. y /= 3*(3-1)) stop 5
+ !$omp end target
+ if (x /= 2*(3-2) .or. y /= -1) stop 6
+ end block
+
+ if (prog_var /= 41 .or. global_var /= 242) stop 7
+
+contains
+
+ subroutine my_func(x, y)
+ integer :: x, y
+ if (prog_var /= 99) stop 8
+ if (global_var /= 5) stop 9
+ prog_var = 41
+ global_var = 242
+ x = 2*add_3(x)
+ y = 3*add_3(y)
+ end subroutine my_func
+end