aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-05-13 20:00:34 +0200
committerTobias Burnus <tobias@codesourcery.com>2022-05-13 20:00:34 +0200
commita46d6268371c446566f656858aada8775a0c988e (patch)
treed8a936caa2c14b43489d72edb77c5be0e1981c8e
parentbc25483c055d62f94f8c289f80843dda3c4a6ff4 (diff)
downloadgcc-a46d6268371c446566f656858aada8775a0c988e.zip
gcc-a46d6268371c446566f656858aada8775a0c988e.tar.gz
gcc-a46d6268371c446566f656858aada8775a0c988e.tar.bz2
OpenMP/Fortran: Use firstprivat not alloc for ptr attach for arrays
For a non-descriptor array, map(A(n:m)) was mapped as map(tofrom:A[n-1] [len: ...]) map(alloc:A [pointer assign, bias: ...]) with this patch, it is changed to map(tofrom:A[n-1] [len: ...]) map(firstprivate:A [pointer assign, bias: ...]) The latter avoids an alloc - and also avoids the race condition with nowait in the enclosed testcase. (Note: predantically, the testcase is invalid since OpenMP 5.1, violating the map clause restriction at [354:10-13]. gcc/fortran/ChangeLog: * trans-openmp.cc (gfc_trans_omp_clauses): When mapping nondescriptor array sections, use GOMP_MAP_FIRSTPRIVATE_POINTER instead of GOMP_MAP_POINTER for the pointer attachment. libgomp/ChangeLog: * testsuite/libgomp.fortran/target-nowait-array-section.f90: New test.
-rw-r--r--gcc/fortran/trans-openmp.cc12
-rw-r--r--libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f9056
2 files changed, 65 insertions, 3 deletions
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index baa45f7..eb5870c 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -3312,9 +3312,15 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
/* An array element or array section which is not part of a
derived type, etc. */
bool element = n->expr->ref->u.ar.type == AR_ELEMENT;
- gfc_trans_omp_array_section (block, n, decl, element,
- GOMP_MAP_POINTER, node, node2,
- node3, node4);
+ tree type = TREE_TYPE (decl);
+ gomp_map_kind k = GOMP_MAP_POINTER;
+ if (!openacc
+ && !GFC_DESCRIPTOR_TYPE_P (type)
+ && !(POINTER_TYPE_P (type)
+ && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (type))))
+ k = GOMP_MAP_FIRSTPRIVATE_POINTER;
+ gfc_trans_omp_array_section (block, n, decl, element, k,
+ node, node2, node3, node4);
}
else if (n->expr
&& n->expr->expr_type == EXPR_VARIABLE
diff --git a/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90 b/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90
new file mode 100644
index 0000000..7560cff
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-nowait-array-section.f90
@@ -0,0 +1,56 @@
+! Runs the the target region asynchrolously and checks for it
+!
+! Note that map(alloc: work(:, i)) + nowait should be save
+! given that a nondescriptor array is used. However, it still
+! violates a map clause restriction, added in OpenMP 5.1 [354:10-13].
+
+PROGRAM test_target_teams_distribute_nowait
+ USE ISO_Fortran_env, only: INT64
+ implicit none
+ INTEGER, parameter :: N = 1024, N_TASKS = 16
+ INTEGER :: i, j, k, my_ticket
+ INTEGER :: order(n_tasks)
+ INTEGER(INT64) :: work(n, n_tasks)
+ INTEGER :: ticket
+ logical :: async
+
+ ticket = 0
+
+ !$omp target enter data map(to: ticket, order)
+
+ !$omp parallel do num_threads(n_tasks)
+ DO i = 1, n_tasks
+ !$omp target map(alloc: work(:, i), ticket) private(my_ticket) nowait
+ !!$omp target teams distribute map(alloc: work(:, i), ticket) private(my_ticket) nowait
+ DO j = 1, n
+ ! Waste cyles
+! work(j, i) = 0
+! DO k = 1, n*(n_tasks - i)
+! work(j, i) = work(j, i) + i*j*k
+! END DO
+ my_ticket = 0
+ !$omp atomic capture
+ ticket = ticket + 1
+ my_ticket = ticket
+ !$omp end atomic
+ !$omp atomic write
+ order(i) = my_ticket
+ END DO
+ !$omp end target !teams distribute
+ END DO
+ !$omp end parallel do
+
+ !$omp target exit data map(from:ticket, order)
+
+ IF (ticket .ne. n_tasks*n) stop 1
+ if (maxval(order) /= n_tasks*n) stop 2
+ ! order(i) == n*i if synchronous and between n and n*n_tasks if run concurrently
+ do i = 1, n_tasks
+ if (order(i) < n .or. order(i) > n*n_tasks) stop 3
+ end do
+ async = .false.
+ do i = 1, n_tasks
+ if (order(i) /= n*i) async = .true.
+ end do
+ if (.not. async) stop 4 ! Did not run asynchronously
+end