diff options
author | Nathan Sidwell <nathan@codesourcery.com> | 2015-11-13 21:51:32 +0000 |
---|---|---|
committer | Nathan Sidwell <nathan@gcc.gnu.org> | 2015-11-13 21:51:32 +0000 |
commit | c5a64cfec7388ddb685071d99dc7b23c918af16c (patch) | |
tree | 9bc316e20fe71f3245e275f4fa8f8192ebe85381 /gcc | |
parent | 7bcc3c8608f201d618b27dc6fbf3aaf176599033 (diff) | |
download | gcc-c5a64cfec7388ddb685071d99dc7b23c918af16c.zip gcc-c5a64cfec7388ddb685071d99dc7b23c918af16c.tar.gz gcc-c5a64cfec7388ddb685071d99dc7b23c918af16c.tar.bz2 |
omp-low.c (scan_sharing_clauses): Accept INDEPENDENT, AUTO & SEQ.
gcc/
* gcc/omp-low.c (scan_sharing_clauses): Accept INDEPENDENT, AUTO &
SEQ.
(oacc_loop_fixed_partitions): Correct return type to bool.
(oacc_loop_auto_partitions): New.
(oacc_loop_partition): Take mask argument, call
oacc_loop_auto_partitions.
(execute_oacc_device_lower): Provide mask to oacc_loop_partition.
gcc/testsuite/
* c-c++-common/goacc/loop-auto-1.c: New.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: New.
From-SVN: r230354
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/omp-low.c | 80 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 2 | ||||
-rw-r--r-- | gcc/testsuite/c-c++-common/goacc/loop-auto-1.c | 230 |
4 files changed, 305 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 13133b1..00d5877 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2015-11-13 Nathan Sidwell <nathan@codesourcery.com> + + * gcc/omp-low.c (scan_sharing_clauses): Accept INDEPENDENT, AUTO & + SEQ. + (oacc_loop_fixed_partitions): Correct return type to bool. + (oacc_loop_auto_partitions): New. + (oacc_loop_partition): Take mask argument, call + oacc_loop_auto_partitions. + (execute_oacc_device_lower): Provide mask to oacc_loop_partition. + 2015-11-13 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/constraints.md (we constraint): New constraint for diff --git a/gcc/omp-low.c b/gcc/omp-low.c index f7584de..4b2b477 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2124,6 +2124,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_TILE: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: break; case OMP_CLAUSE_ALIGNED: @@ -2136,9 +2139,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: - case OMP_CLAUSE_INDEPENDENT: - case OMP_CLAUSE_AUTO: - case OMP_CLAUSE_SEQ: sorry ("Clause not supported yet"); break; @@ -2299,14 +2299,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_TILE: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: break; case OMP_CLAUSE_DEVICE_RESIDENT: case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: - case OMP_CLAUSE_INDEPENDENT: - case OMP_CLAUSE_AUTO: - case OMP_CLAUSE_SEQ: sorry ("Clause not supported yet"); break; @@ -19230,10 +19230,10 @@ oacc_loop_process (oacc_loop *loop) /* Walk the OpenACC loop heirarchy checking and assigning the programmer-specified partitionings. OUTER_MASK is the partitioning - this loop is contained within. Return partitiong mask used within - this loop nest. */ + this loop is contained within. Return true if we contain an + auto-partitionable loop. */ -static unsigned +static bool oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) { unsigned this_mask = loop->mask; @@ -19337,18 +19337,63 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) return has_auto; } +/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. + OUTER_MASK is the partitioning this loop is contained within. + Return the cumulative partitioning used by this loop, siblings and + children. */ + +static unsigned +oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) +{ + unsigned inner_mask = 0; + bool noisy = true; + +#ifdef ACCEL_COMPILER + /* When device_type is supported, we want the device compiler to be + noisy, if the loop parameters are device_type-specific. */ + noisy = false; +#endif + + if (loop->child) + inner_mask |= oacc_loop_auto_partitions (loop->child, + outer_mask | loop->mask); + + if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT)) + { + unsigned this_mask = 0; + + /* Determine the outermost partitioning used within this loop. */ + this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX); + this_mask = (this_mask & -this_mask); + + /* Pick the partitioning just inside that one. */ + this_mask >>= 1; + + /* And avoid picking one use by an outer loop. */ + this_mask &= ~outer_mask; + + if (!this_mask && noisy) + warning_at (loop->loc, 0, + "insufficient partitioning available to parallelize loop"); + + loop->mask = this_mask; + } + inner_mask |= loop->mask; + + if (loop->sibling) + inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask); + + return inner_mask; +} + /* Walk the OpenACC loop heirarchy to check and assign partitioning axes. */ static void -oacc_loop_partition (oacc_loop *loop, int fn_level) +oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) { - unsigned outer_mask = 0; - - if (fn_level >= 0) - outer_mask = GOMP_DIM_MASK (fn_level) - 1; - - oacc_loop_fixed_partitions (loop, outer_mask); + if (oacc_loop_fixed_partitions (loop, outer_mask)) + oacc_loop_auto_partitions (loop, outer_mask); } /* Default fork/join early expander. Delete the function calls if @@ -19429,7 +19474,8 @@ execute_oacc_device_lower () /* Discover, partition and process the loops. */ oacc_loop *loops = oacc_loop_discovery (); - oacc_loop_partition (loops, fn_level); + unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; + oacc_loop_partition (loops, outer_mask); oacc_loop_process (loops); if (dump_file) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 68cf4e9..3301130 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,7 @@ 2015-11-13 Nathan Sidwell <nathan@codesourcery.com> + * c-c++-common/goacc/loop-auto-1.c: New. + * lib/target-supports.exp (check_effective_target_offload_nvptx): New. * gcc.dg/goacc/nvptx-merged-loop.c: New. diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c new file mode 100644 index 0000000..ee6d28c --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c @@ -0,0 +1,230 @@ + +void Foo () +{ + +#pragma acc parallel num_gangs(10) num_workers(32) vector_length(32) + { +#pragma acc loop vector + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop seq + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) {} + } + +#pragma acc loop worker + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop vector + for (int kx = 0; kx < 10; kx++) {} + } + } + +#pragma acc loop gang + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop worker + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop vector + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop vector + for (int kx = 0; kx < 10; kx++) {} + } + + } + +#pragma acc loop auto + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + } + } +} + +#pragma acc routine gang +void Gang (void) +{ +#pragma acc loop vector + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop seq + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) {} + } + +#pragma acc loop worker + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop vector + for (int kx = 0; kx < 10; kx++) {} + } + } + +#pragma acc loop gang + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop worker + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop vector + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int kx = 0; kx < 10; kx++) {} + } + +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop vector + for (int kx = 0; kx < 10; kx++) {} + } + + } + +#pragma acc loop auto + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + } +} + +#pragma acc routine worker +void Worker (void) +{ +#pragma acc loop vector + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop seq + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) {} + } + +#pragma acc loop worker + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop vector + for (int kx = 0; kx < 10; kx++) {} + } + } + +#pragma acc loop auto + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + } + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) + { +#pragma acc loop auto + for (int kx = 0; kx < 10; kx++) {} + } + } +} + +#pragma acc routine vector +void Vector (void) +{ +#pragma acc loop vector + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop seq + for (int jx = 0; jx < 10; jx++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int jx = 0; jx < 10; jx++) {} + } + +#pragma acc loop auto + for (int ix = 0; ix < 10; ix++) {} + +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int ix = 0; ix < 10; ix++) + { +#pragma acc loop auto + for (int jx = 0; jx < 10; jx++) {} + } +} + +#pragma acc routine seq +void Seq (void) +{ +#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + for (int ix = 0; ix < 10; ix++) {} +} |