aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libgomp/config/gcn/bar.c15
-rw-r--r--libgomp/config/nvptx/bar.c18
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/nested-parallel-unbalanced.c31
3 files changed, 53 insertions, 11 deletions
diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c
index 02fd197..a21529a 100644
--- a/libgomp/config/gcn/bar.c
+++ b/libgomp/config/gcn/bar.c
@@ -43,7 +43,8 @@ gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELAXED);
}
- asm ("s_barrier" ::: "memory");
+ if (bar->total > 1)
+ asm ("s_barrier" ::: "memory");
}
void
@@ -71,7 +72,8 @@ gomp_barrier_wait_last (gomp_barrier_t *bar)
void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
- asm ("s_barrier" ::: "memory");
+ if (bar->total > 1)
+ asm ("s_barrier" ::: "memory");
}
void
@@ -97,7 +99,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
state &= ~BAR_CANCELLED;
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
- asm ("s_barrier" ::: "memory");
+ if (bar->total > 1)
+ asm ("s_barrier" ::: "memory");
return;
}
}
@@ -172,7 +175,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
{
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
- asm ("s_barrier" ::: "memory");
+ if (bar->total > 1)
+ asm ("s_barrier" ::: "memory");
return false;
}
}
@@ -195,7 +199,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
abort();
}
- asm ("s_barrier" ::: "memory");
+ if (bar->total > 1)
+ asm ("s_barrier" ::: "memory");
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
if (__builtin_expect (gen & BAR_CANCELLED, 0))
return true;
diff --git a/libgomp/config/nvptx/bar.c b/libgomp/config/nvptx/bar.c
index 125ca3e..1116561 100644
--- a/libgomp/config/nvptx/bar.c
+++ b/libgomp/config/nvptx/bar.c
@@ -41,7 +41,8 @@ gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELEASE);
}
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}
void
@@ -69,7 +70,8 @@ gomp_barrier_wait_last (gomp_barrier_t *bar)
void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}
void
@@ -95,7 +97,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
state &= ~BAR_CANCELLED;
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
return;
}
}
@@ -104,7 +107,8 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
state &= ~BAR_CANCELLED;
do
{
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
@@ -158,7 +162,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
{
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
return false;
}
}
@@ -169,7 +174,8 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
generation = state;
do
{
- asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ if (bar->total > 1)
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_CANCELLED, 0))
return true;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/nested-parallel-unbalanced.c b/libgomp/testsuite/libgomp.c-c++-common/nested-parallel-unbalanced.c
new file mode 100644
index 0000000..e777271
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/nested-parallel-unbalanced.c
@@ -0,0 +1,31 @@
+/* Ensure that nested parallel regions work even when the number of loop
+ iterations is not divisible by the number of threads. */
+
+#include <stdlib.h>
+
+int main() {
+ int A[30][40], B[30][40];
+ size_t n = 30;
+
+ for (size_t i = 0; i < 30; ++i)
+ for (size_t j = 0; j < 40; ++j)
+ A[i][j] = 42;
+
+#pragma omp target map(A[0:30][0:40], B[0:30][0:40])
+ {
+#pragma omp parallel for num_threads(8)
+ for (size_t i = 0; i < n; ++i)
+ {
+#pragma omp parallel for
+ for (size_t j = 0; j < n; ++j)
+ {
+ B[i][j] = A[i][j];
+ }
+ }
+ }
+
+for (size_t i = 0; i < n; ++i)
+ for (size_t j = 0; j < n; ++j)
+ if (B[i][j] != 42)
+ abort ();
+}