aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-low.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/omp-low.c')
-rw-r--r--gcc/omp-low.c80
1 files changed, 63 insertions, 17 deletions
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f7584de..4b2b477 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2124,6 +2124,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
break;
case OMP_CLAUSE_ALIGNED:
@@ -2136,9 +2139,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_DEVICE_RESIDENT:
case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
- case OMP_CLAUSE_INDEPENDENT:
- case OMP_CLAUSE_AUTO:
- case OMP_CLAUSE_SEQ:
sorry ("Clause not supported yet");
break;
@@ -2299,14 +2299,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
break;
case OMP_CLAUSE_DEVICE_RESIDENT:
case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
- case OMP_CLAUSE_INDEPENDENT:
- case OMP_CLAUSE_AUTO:
- case OMP_CLAUSE_SEQ:
sorry ("Clause not supported yet");
break;
@@ -19230,10 +19230,10 @@ oacc_loop_process (oacc_loop *loop)
/* Walk the OpenACC loop heirarchy checking and assigning the
programmer-specified partitionings. OUTER_MASK is the partitioning
- this loop is contained within. Return partitiong mask used within
- this loop nest. */
+ this loop is contained within. Return true if we contain an
+ auto-partitionable loop. */
-static unsigned
+static bool
oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
unsigned this_mask = loop->mask;
@@ -19337,18 +19337,63 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
return has_auto;
}
+/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
+ OUTER_MASK is the partitioning this loop is contained within.
+ Return the cumulative partitioning used by this loop, siblings and
+ children. */
+
+static unsigned
+oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
+{
+ unsigned inner_mask = 0;
+ bool noisy = true;
+
+#ifdef ACCEL_COMPILER
+ /* When device_type is supported, we want the device compiler to be
+ noisy, if the loop parameters are device_type-specific. */
+ noisy = false;
+#endif
+
+ if (loop->child)
+ inner_mask |= oacc_loop_auto_partitions (loop->child,
+ outer_mask | loop->mask);
+
+ if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
+ {
+ unsigned this_mask = 0;
+
+ /* Determine the outermost partitioning used within this loop. */
+ this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
+ this_mask = (this_mask & -this_mask);
+
+ /* Pick the partitioning just inside that one. */
+ this_mask >>= 1;
+
+ /* And avoid picking one use by an outer loop. */
+ this_mask &= ~outer_mask;
+
+ if (!this_mask && noisy)
+ warning_at (loop->loc, 0,
+ "insufficient partitioning available to parallelize loop");
+
+ loop->mask = this_mask;
+ }
+ inner_mask |= loop->mask;
+
+ if (loop->sibling)
+ inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+
+ return inner_mask;
+}
+
/* Walk the OpenACC loop heirarchy to check and assign partitioning
axes. */
static void
-oacc_loop_partition (oacc_loop *loop, int fn_level)
+oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
{
- unsigned outer_mask = 0;
-
- if (fn_level >= 0)
- outer_mask = GOMP_DIM_MASK (fn_level) - 1;
-
- oacc_loop_fixed_partitions (loop, outer_mask);
+ if (oacc_loop_fixed_partitions (loop, outer_mask))
+ oacc_loop_auto_partitions (loop, outer_mask);
}
/* Default fork/join early expander. Delete the function calls if
@@ -19429,7 +19474,8 @@ execute_oacc_device_lower ()
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
- oacc_loop_partition (loops, fn_level);
+ unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
+ oacc_loop_partition (loops, outer_mask);
oacc_loop_process (loops);
if (dump_file)
{