aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2018-12-19 10:16:51 +0000
committerTom de Vries <vries@gcc.gnu.org>2018-12-19 10:16:51 +0000
commit22aa0613532c867b252ebfa681a0b74231a82efa (patch)
treeeccf2f6784d32219c737be2665f0f97cf03fbbec
parent43be05f54315b889662bb4f8c085cce301a03862 (diff)
downloadgcc-22aa0613532c867b252ebfa681a0b74231a82efa.zip
gcc-22aa0613532c867b252ebfa681a0b74231a82efa.tar.gz
gcc-22aa0613532c867b252ebfa681a0b74231a82efa.tar.bz2
[nvptx] Only use one logical barrier resource
For openacc loops, we generate this style of code: ... @%r41 bra.uni $L5; @%r40 bra $L6; mov.u64 %r32, %ar0; cvta.shared.u64 %r39, __worker_bcast; st.u64 [%r39], %r32; $L6: $L5: bar.sync 0; @%r40 bra $L4; cvta.shared.u64 %r38, __worker_bcast; ld.u64 %r32, [%r38]; ... $L4: bar.sync 1; ... The first barrier is there to ensure that no thread reads the broadcast buffer before it's written. The second barrier is there to ensure that no thread overwrites the broadcast buffer before all threads have read it (as well as implementing the obligatory synchronization after a worker loop). We've been using the logical barrier resources '0' and '1' for these two barriers, but there's no reason why we can't use the same one. Use logical barrier resource '0' for both barriers, making the openacc implementation claim less resources. Build and reg-tested on x86_64 with nvptx accelerator. 2018-12-19 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx.c (nvptx_single): Always pass false to nvptx_wsync. (nvptx_process_pars): Likewise. From-SVN: r267257
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/nvptx/nvptx.c4
2 files changed, 8 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 82f381a..9596b26 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
2018-12-19 Tom de Vries <tdevries@suse.de>
+ * config/nvptx/nvptx.c (nvptx_single): Always pass false to
+ nvptx_wsync.
+ (nvptx_process_pars): Likewise.
+
+2018-12-19 Tom de Vries <tdevries@suse.de>
+
* config/nvptx/nvptx.c (nvptx_previous_fndecl): Declare.
(nvptx_set_current_function): New function.
(TARGET_SET_CURRENT_FUNCTION): Define.
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 9f834d3..a354811 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4351,7 +4351,7 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
/* This barrier is needed to avoid worker zero clobbering
the broadcast buffer before all the other workers have
had a chance to read this instance of it. */
- emit_insn_before (nvptx_wsync (true), tail);
+ emit_insn_before (nvptx_wsync (false), tail);
}
extract_insn (tail);
@@ -4476,7 +4476,7 @@ nvptx_process_pars (parallel *par)
{
/* Insert begin and end synchronizations. */
emit_insn_before (nvptx_wsync (false), par->forked_insn);
- emit_insn_before (nvptx_wsync (true), par->join_insn);
+ emit_insn_before (nvptx_wsync (false), par->join_insn);
}
}
else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))