aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@codesourcery.com>2016-05-02 13:16:22 +0000
committerNathan Sidwell <nathan@gcc.gnu.org>2016-05-02 13:16:22 +0000
commit5b37e8664b05336df60996a2411b4d61a852c613 (patch)
tree87acdea4347b49c250d21fc4ba6bac0ece39f7c3
parent87cdf04b1adacaf5dc161d2c0c1f990f4a36d67f (diff)
downloadgcc-5b37e8664b05336df60996a2411b4d61a852c613.zip
gcc-5b37e8664b05336df60996a2411b4d61a852c613.tar.gz
gcc-5b37e8664b05336df60996a2411b4d61a852c613.tar.bz2
omp-low.c (struct oacc_loop): Add 'inner' field.
gcc/ * omp-low.c (struct oacc_loop): Add 'inner' field. (new_oacc_loop_raw): Initialize it to zero. (oacc_loop_fixed_partitions): Initialize it. (oacc_loop_auto_partitions): Partition outermost loop to outermost available partitioning. gcc/testsuite/ * c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust expected partitioning. From-SVN: r235756
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/omp-low.c52
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/c-c++-common/goacc/loop-auto-1.c8
-rw-r--r--libgomp/ChangeLog5
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c54
6 files changed, 88 insertions, 43 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a098272..ff0b573 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
+
+ * omp-low.c (struct oacc_loop): Add 'inner' field.
+ (new_oacc_loop_raw): Initialize it to zero.
+ (oacc_loop_fixed_partitions): Initialize it.
+ (oacc_loop_auto_partitions): Partition outermost loop to outermost
+ available partitioning.
+
2016-05-02 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.md (mulsidi3): Change operand 0 predicate to
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 50ad68e..e4a1e47 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -241,6 +241,7 @@ struct oacc_loop
tree routine; /* Pseudo-loop enclosing a routine. */
unsigned mask; /* Partitioning mask. */
+ unsigned inner; /* Partitioning of inner loops. */
unsigned flags; /* Partitioning flags. */
unsigned ifns; /* Contained loop abstraction functions. */
tree chunk_size; /* Chunk size. */
@@ -18921,7 +18922,7 @@ new_oacc_loop_raw (oacc_loop *parent, location_t loc)
memset (loop->tails, 0, sizeof (loop->tails));
loop->routine = NULL_TREE;
- loop->mask = loop->flags = 0;
+ loop->mask = loop->flags = loop->inner = 0;
loop->ifns = 0;
loop->chunk_size = 0;
loop->head_end = NULL;
@@ -19449,8 +19450,11 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
mask_all |= this_mask;
if (loop->child)
- mask_all |= oacc_loop_fixed_partitions (loop->child,
- outer_mask | this_mask);
+ {
+ loop->inner = oacc_loop_fixed_partitions (loop->child,
+ outer_mask | this_mask);
+ mask_all |= loop->inner;
+ }
if (loop->sibling)
mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
@@ -19466,7 +19470,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
static unsigned
oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
{
- unsigned inner_mask = 0;
+ bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
bool noisy = true;
#ifdef ACCEL_COMPILER
@@ -19475,16 +19479,33 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
noisy = false;
#endif
+ if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
+ {
+ /* Allocate the outermost loop at the outermost available
+ level. */
+ unsigned this_mask = outer_mask + 1;
+
+ if (!(this_mask & loop->inner))
+ loop->mask = this_mask;
+ }
+
if (loop->child)
- inner_mask |= oacc_loop_auto_partitions (loop->child,
- outer_mask | loop->mask);
+ {
+ unsigned child_mask = outer_mask | loop->mask;
+
+ if (loop->mask || assign)
+ child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
- if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
+ loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
+ }
+
+ if (assign && !loop->mask)
{
+ /* Allocate the loop at the innermost available level. */
unsigned this_mask = 0;
/* Determine the outermost partitioning used within this loop. */
- this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
+ this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
this_mask = (this_mask & -this_mask);
/* Pick the partitioning just inside that one. */
@@ -19497,17 +19518,20 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
warning_at (loop->loc, 0,
"insufficient partitioning available to parallelize loop");
- if (dump_file)
- fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
- LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
- this_mask);
-
loop->mask = this_mask;
}
- inner_mask |= loop->mask;
+
+ if (assign && dump_file)
+ fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
+ loop->mask);
+
+ unsigned inner_mask = 0;
if (loop->sibling)
inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+
+ inner_mask |= loop->inner | loop->mask;
return inner_mask;
}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3d6051c..27a5972 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
+
+ * c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings.
+
2016-05-02 Marek Polacek <polacek@redhat.com>
PR c/70851
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
index ee6d28c..33d5340 100644
--- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
@@ -186,10 +186,10 @@ void Worker (void)
for (int jx = 0; jx < 10; jx++) {}
}
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto
for (int ix = 0; ix < 10; ix++)
{
-#pragma acc loop auto
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
for (int jx = 0; jx < 10; jx++)
{
#pragma acc loop auto
@@ -214,10 +214,10 @@ void Vector (void)
#pragma acc loop auto
for (int ix = 0; ix < 10; ix++) {}
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto
for (int ix = 0; ix < 10; ix++)
{
-#pragma acc loop auto
+#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
for (int jx = 0; jx < 10; jx++) {}
}
}
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 351c239..9de04f5 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust
+ expected partitioning.
+
2016-04-29 Cesar Philippidis <cesar@codesourcery.com>
PR middle-end/70626
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 622bbdf..8a755b8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -103,9 +103,11 @@ int vector_1 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{
+#pragma acc loop gang
+ for (int jx = 0; jx < 1; jx++)
#pragma acc loop auto
- for (int ix = 0; ix < size; ix++)
- ary[ix] = place ();
+ for (int ix = 0; ix < size; ix++)
+ ary[ix] = place ();
}
return check (ary, size, 0, 0, 1);
@@ -118,7 +120,7 @@ int vector_2 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{
#pragma acc loop worker
- for (int jx = 0; jx < size / 64; jx++)
+ for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop auto
for (int ix = 0; ix < 64; ix++)
ary[ix + jx * 64] = place ();
@@ -133,30 +135,16 @@ int worker_1 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{
+#pragma acc loop gang
+ for (int kx = 0; kx < 1; kx++)
#pragma acc loop auto
- for (int jx = 0; jx < size / 64; jx++)
+ for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop vector
- for (int ix = 0; ix < 64; ix++)
- ary[ix + jx * 64] = place ();
- }
-
- return check (ary, size, 0, 1, 1);
-}
-
-int worker_2 (int *ary, int size)
-{
- clear (ary, size);
-
-#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
- {
-#pragma acc loop auto
- for (int jx = 0; jx < size / 64; jx++)
-#pragma acc loop auto
- for (int ix = 0; ix < 64; ix++)
- ary[ix + jx * 64] = place ();
+ for (int ix = 0; ix < 64; ix++)
+ ary[ix + jx * 64] = place ();
}
- return check (ary, size, 0, 1, 1);
+ return check (ary, size, 0, 1, 1);
}
int gang_1 (int *ary, int size)
@@ -193,6 +181,22 @@ int gang_2 (int *ary, int size)
return check (ary, size, 1, 1, 1);
}
+int gang_3 (int *ary, int size)
+{
+ clear (ary, size);
+
+#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
+ {
+#pragma acc loop auto
+ for (int jx = 0; jx < size / 64; jx++)
+#pragma acc loop auto
+ for (int ix = 0; ix < 64; ix++)
+ ary[ix + jx * 64] = place ();
+ }
+
+ return check (ary, size, 1, 0, 1);
+}
+
#define N (32*32*32)
int main ()
{
@@ -214,13 +218,13 @@ int main ()
if (worker_1 (ary, N))
return 1;
- if (worker_2 (ary, N))
- return 1;
if (gang_1 (ary, N))
return 1;
if (gang_2 (ary, N))
return 1;
+ if (gang_3 (ary, N))
+ return 1;
return 0;
}