From e898ce7997733c29dcab9c3c62ca102c7f9fa6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C3=B6=20Barany?= Date: Fri, 1 Feb 2019 00:59:30 +0100 Subject: Decompose OpenACC 'kernels' constructs into parts, a sequence of compute constructs Not yet enabled by default: for now, the current mode of OpenACC 'kernels' constructs handling still remains '-fopenacc-kernels=parloops', but that is to change later. gcc/ * omp-oacc-kernels-decompose.cc: New. * Makefile.in (OBJS): Add it. * passes.def: Instantiate it. * tree-pass.h (make_pass_omp_oacc_kernels_decompose): Declare. * flag-types.h (enum openacc_kernels): Add. * doc/invoke.texi (-fopenacc-kernels): Document. * gimple.h (enum gf_mask): Add 'GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED', 'GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE', 'GF_OMP_TARGET_KIND_OACC_DATA_KERNELS'. (is_gimple_omp_oacc, is_gimple_omp_offloaded): Handle these. * gimple-pretty-print.c (dump_gimple_omp_target): Likewise. * omp-expand.c (expand_omp_target, build_omp_regions_1) (omp_make_gimple_edges): Likewise. * omp-low.c (scan_sharing_clauses, scan_omp_for) (check_omp_nesting_restrictions, lower_oacc_reductions) (lower_oacc_head_mark, lower_omp_target): Likewise. * omp-offload.c (execute_oacc_device_lower): Likewise. gcc/c-family/ * c.opt (fopenacc-kernels): Add. gcc/fortran/ * lang.opt (fopenacc-kernels): Add. gcc/testsuite/ * c-c++-common/goacc/kernels-decompose-1.c: New. * c-c++-common/goacc/kernels-decompose-2.c: New. * c-c++-common/goacc/kernels-decompose-ice-1.c: New. * c-c++-common/goacc/kernels-decompose-ice-2.c: New. * gfortran.dg/goacc/kernels-decompose-1.f95: New. * gfortran.dg/goacc/kernels-decompose-2.f95: New. * c-c++-common/goacc/if-clause-2.c: Adjust. * gfortran.dg/goacc/kernels-tree.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c: New. * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Adjust. * testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise. Co-authored-by: Thomas Schwinge --- .../declare-vla-kernels-decompose-ice-1.c | 8 +++++ .../declare-vla-kernels-decompose.c | 6 ++++ .../libgomp.oacc-c-c++-common/declare-vla.c | 6 ++++ .../kernels-decompose-1.c | 38 ++++++++++++++++++++++ .../testsuite/libgomp.oacc-fortran/pr94358-1.f90 | 11 +++++-- 5 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c (limited to 'libgomp') diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c new file mode 100644 index 0000000..c7eae12 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c @@ -0,0 +1,8 @@ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ +/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. + { dg-ice "TODO" } + TODO { dg-prune-output "during GIMPLE pass: omplower" } + TODO { dg-do link } */ + +#undef KERNELS_DECOMPOSE_ICE_HACK +#include "declare-vla.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c new file mode 100644 index 0000000..dd8a1c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c @@ -0,0 +1,6 @@ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ + +/* See also 'declare-vla-kernels-decompose-ice-1.c'. */ + +#define KERNELS_DECOMPOSE_ICE_HACK +#include "declare-vla.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c index 7149357..3bd6331 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c @@ -38,6 +38,12 @@ f_data (void) for (i = 0; i < N; i++) A[i] = -i; + /* See 'declare-vla-kernels-decompose.c'. */ +#ifdef KERNELS_DECOMPOSE_ICE_HACK + (volatile int *) &i; + (volatile int *) &N; +#endif + # pragma acc kernels for (i = 0; i < N; i++) A[i] = i; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c new file mode 100644 index 0000000..fa8ae6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c @@ -0,0 +1,38 @@ +/* Test OpenACC 'kernels' construct decomposition. */ + +/* { dg-additional-options "-fopt-info-omp-all" } */ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ + +#undef NDEBUG +#include + +int main() +{ + int a = 0; + /*TODO Without making 'a' addressable, for GCN offloading we will not see the expected value copied out. (But it does work for nvptx offloading, strange...) */ + (volatile int *) &a; +#define N 123 + int b[N] = { 0 }; + +#pragma acc kernels + { + int c = 234; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */ + + /*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */ + (volatile int *) &c; + +#pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ + for (int i = 0; i < N; ++i) + b[i] = c; + + a = c; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */ + } + + for (int i = 0; i < N; ++i) + assert (b[i] == 234); + assert (a == 234); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 index 5013c5ba..82d8351 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 @@ -1,17 +1,22 @@ ! { dg-do run } ! { dg-additional-options "-fopt-info-omp-all" } +! { dg-additional-options "-fopenacc-kernels=decompose" } subroutine kernel(lo, hi, a, b, c) implicit none integer :: lo, hi, i real, dimension(lo:hi) :: a, b, c - !$acc kernels copyin(lo, hi) ! { dg-optimized "assigned OpenACC seq loop parallelism" } - !$acc loop independent + !$acc kernels copyin(lo, hi) + !$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] } + ! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } + ! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } do i = lo, hi b(i) = a(i) end do - !$acc loop independent + !$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] } + ! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } + ! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } do i = lo, hi c(i) = b(i) end do -- cgit v1.1