aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2019-01-07 08:10:56 +0000
committerTom de Vries <vries@gcc.gnu.org>2019-01-07 08:10:56 +0000
commitd495b5ccd252fdb01c70a73f145a008555b5a253 (patch)
treebf512a0d28ea5a6b009ba100e4b90493d4b5ea84 /gcc
parent0024c32045a72ef45a64ab5673a2ba587978d9d1 (diff)
downloadgcc-d495b5ccd252fdb01c70a73f145a008555b5a253.zip
gcc-d495b5ccd252fdb01c70a73f145a008555b5a253.tar.gz
gcc-d495b5ccd252fdb01c70a73f145a008555b5a253.tar.bz2
[nvptx] Don't emit barriers for empty loops -- fix
When compiling an empty loop: ... long long v1; #pragma acc parallel num_gangs (640) num_workers(1) vector_length (128) #pragma acc loop for (v1 = 0; v1 < 20; v1 += 2) ; ... the compiler emits two subsequent bar.syncs. This triggers some bug on my quadro m1200 (I'm assuming in the ptxas/JIT compiler) that hangs the testcase. This patch works around the bug by doing an optimization: we detect that this is an empty loop (a forked immediately followed by a joining), and don't emit the barriers. The patch does not include the test-case yet, since vector_length (128) is not yet supported at this point. 2019-01-07 Tom de Vries <tdevries@suse.de> PR target/85381 * config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for empty loops. From-SVN: r267630
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/nvptx/nvptx.c15
2 files changed, 17 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6216416..bb4fd52 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
2019-01-07 Tom de Vries <tdevries@suse.de>
+ PR target/85381
+ * config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for
+ empty loops.
+
+2019-01-07 Tom de Vries <tdevries@suse.de>
+
* config/nvptx/nvptx.c (oacc_bcast_partition): Declare.
(nvptx_option_override): Init oacc_bcast_partition.
(nvptx_init_oacc_workers): New function.
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2166f37..26c8071 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4636,9 +4636,12 @@ nvptx_process_pars (parallel *par)
{
nvptx_shared_propagate (false, is_call, par->forked_block,
par->forked_insn, !worker);
- bool empty = nvptx_shared_propagate (true, is_call,
- par->forked_block, par->fork_insn,
- !worker);
+ bool no_prop_p
+ = nvptx_shared_propagate (true, is_call, par->forked_block,
+ par->fork_insn, !worker);
+ bool empty_loop_p
+ = !is_call && (NEXT_INSN (par->forked_insn)
+ && NEXT_INSN (par->forked_insn) == par->joining_insn);
rtx barrier = GEN_INT (0);
int threads = 0;
@@ -4648,7 +4651,11 @@ nvptx_process_pars (parallel *par)
threads = nvptx_mach_vector_length ();
}
- if (!empty || !is_call)
+ if (no_prop_p && empty_loop_p)
+ ;
+ else if (no_prop_p && is_call)
+ ;
+ else
{
/* Insert begin and end synchronizations. */
emit_insn_before (nvptx_cta_sync (barrier, threads),