aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/omp-low.c80
-rw-r--r--gcc/testsuite/ChangeLog2
-rw-r--r--gcc/testsuite/c-c++-common/goacc/loop-auto-1.c230
4 files changed, 305 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 13133b1..00d5877 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
+
+ * gcc/omp-low.c (scan_sharing_clauses): Accept INDEPENDENT, AUTO &
+ SEQ.
+ (oacc_loop_fixed_partitions): Correct return type to bool.
+ (oacc_loop_auto_partitions): New.
+ (oacc_loop_partition): Take mask argument, call
+ oacc_loop_auto_partitions.
+ (execute_oacc_device_lower): Provide mask to oacc_loop_partition.
+
2015-11-13 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/constraints.md (we constraint): New constraint for
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f7584de..4b2b477 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2124,6 +2124,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
break;
case OMP_CLAUSE_ALIGNED:
@@ -2136,9 +2139,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_DEVICE_RESIDENT:
case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
- case OMP_CLAUSE_INDEPENDENT:
- case OMP_CLAUSE_AUTO:
- case OMP_CLAUSE_SEQ:
sorry ("Clause not supported yet");
break;
@@ -2299,14 +2299,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
break;
case OMP_CLAUSE_DEVICE_RESIDENT:
case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
- case OMP_CLAUSE_INDEPENDENT:
- case OMP_CLAUSE_AUTO:
- case OMP_CLAUSE_SEQ:
sorry ("Clause not supported yet");
break;
@@ -19230,10 +19230,10 @@ oacc_loop_process (oacc_loop *loop)
/* Walk the OpenACC loop heirarchy checking and assigning the
programmer-specified partitionings. OUTER_MASK is the partitioning
- this loop is contained within. Return partitiong mask used within
- this loop nest. */
+ this loop is contained within. Return true if we contain an
+ auto-partitionable loop. */
-static unsigned
+static bool
oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
unsigned this_mask = loop->mask;
@@ -19337,18 +19337,63 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
return has_auto;
}
+/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
+ OUTER_MASK is the partitioning this loop is contained within.
+ Return the cumulative partitioning used by this loop, siblings and
+ children. */
+
+static unsigned
+oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
+{
+ unsigned inner_mask = 0;
+ bool noisy = true;
+
+#ifdef ACCEL_COMPILER
+ /* When device_type is supported, we want the device compiler to be
+ noisy, if the loop parameters are device_type-specific. */
+ noisy = false;
+#endif
+
+ if (loop->child)
+ inner_mask |= oacc_loop_auto_partitions (loop->child,
+ outer_mask | loop->mask);
+
+ if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
+ {
+ unsigned this_mask = 0;
+
+ /* Determine the outermost partitioning used within this loop. */
+ this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
+ this_mask = (this_mask & -this_mask);
+
+ /* Pick the partitioning just inside that one. */
+ this_mask >>= 1;
+
+ /* And avoid picking one use by an outer loop. */
+ this_mask &= ~outer_mask;
+
+ if (!this_mask && noisy)
+ warning_at (loop->loc, 0,
+ "insufficient partitioning available to parallelize loop");
+
+ loop->mask = this_mask;
+ }
+ inner_mask |= loop->mask;
+
+ if (loop->sibling)
+ inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+
+ return inner_mask;
+}
+
/* Walk the OpenACC loop heirarchy to check and assign partitioning
axes. */
static void
-oacc_loop_partition (oacc_loop *loop, int fn_level)
+oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
{
- unsigned outer_mask = 0;
-
- if (fn_level >= 0)
- outer_mask = GOMP_DIM_MASK (fn_level) - 1;
-
- oacc_loop_fixed_partitions (loop, outer_mask);
+ if (oacc_loop_fixed_partitions (loop, outer_mask))
+ oacc_loop_auto_partitions (loop, outer_mask);
}
/* Default fork/join early expander. Delete the function calls if
@@ -19429,7 +19474,8 @@ execute_oacc_device_lower ()
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
- oacc_loop_partition (loops, fn_level);
+ unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
+ oacc_loop_partition (loops, outer_mask);
oacc_loop_process (loops);
if (dump_file)
{
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 68cf4e9..3301130 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,7 @@
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
+ * c-c++-common/goacc/loop-auto-1.c: New.
+
* lib/target-supports.exp (check_effective_target_offload_nvptx): New.
* gcc.dg/goacc/nvptx-merged-loop.c: New.
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
new file mode 100644
index 0000000..ee6d28c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
@@ -0,0 +1,230 @@
+
+void Foo ()
+{
+
+#pragma acc parallel num_gangs(10) num_workers(32) vector_length(32)
+ {
+#pragma acc loop vector
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop seq
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+
+#pragma acc loop worker
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop vector
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+
+#pragma acc loop gang
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop worker
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop vector
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop vector
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+ }
+
+#pragma acc loop auto
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+ }
+}
+
+#pragma acc routine gang
+void Gang (void)
+{
+#pragma acc loop vector
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop seq
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+
+#pragma acc loop worker
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop vector
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+
+#pragma acc loop gang
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop worker
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop vector
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop vector
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+
+ }
+
+#pragma acc loop auto
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+}
+
+#pragma acc routine worker
+void Worker (void)
+{
+#pragma acc loop vector
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop seq
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+
+#pragma acc loop worker
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop vector
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+
+#pragma acc loop auto
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++)
+ {
+#pragma acc loop auto
+ for (int kx = 0; kx < 10; kx++) {}
+ }
+ }
+}
+
+#pragma acc routine vector
+void Vector (void)
+{
+#pragma acc loop vector
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop seq
+ for (int jx = 0; jx < 10; jx++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+
+#pragma acc loop auto
+ for (int ix = 0; ix < 10; ix++) {}
+
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int ix = 0; ix < 10; ix++)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < 10; jx++) {}
+ }
+}
+
+#pragma acc routine seq
+void Seq (void)
+{
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+ for (int ix = 0; ix < 10; ix++) {}
+}