diff options
author | Tom de Vries <tdevries@suse.de> | 2019-01-07 08:10:56 +0000 |
---|---|---|
committer | Tom de Vries <vries@gcc.gnu.org> | 2019-01-07 08:10:56 +0000 |
commit | d495b5ccd252fdb01c70a73f145a008555b5a253 (patch) | |
tree | bf512a0d28ea5a6b009ba100e4b90493d4b5ea84 /gcc | |
parent | 0024c32045a72ef45a64ab5673a2ba587978d9d1 (diff) | |
download | gcc-d495b5ccd252fdb01c70a73f145a008555b5a253.zip gcc-d495b5ccd252fdb01c70a73f145a008555b5a253.tar.gz gcc-d495b5ccd252fdb01c70a73f145a008555b5a253.tar.bz2 |
[nvptx] Don't emit barriers for empty loops -- fix
When compiling an empty loop:
...
long long v1;
#pragma acc parallel num_gangs (640) num_workers(1) vector_length (128)
#pragma acc loop
for (v1 = 0; v1 < 20; v1 += 2)
;
...
the compiler emits two subsequent bar.syncs. This triggers some bug on my
quadro m1200 (I'm assuming in the ptxas/JIT compiler) that hangs the testcase.
This patch works around the bug by doing an optimization: we detect that this is
an empty loop (a forked immediately followed by a joining), and don't emit the
barriers.
The patch does not include the test-case yet, since vector_length (128) is not
yet supported at this point.
2019-01-07 Tom de Vries <tdevries@suse.de>
PR target/85381
* config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for
empty loops.
From-SVN: r267630
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 15 |
2 files changed, 17 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6216416..bb4fd52 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2019-01-07 Tom de Vries <tdevries@suse.de> + PR target/85381 + * config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for + empty loops. + +2019-01-07 Tom de Vries <tdevries@suse.de> + * config/nvptx/nvptx.c (oacc_bcast_partition): Declare. (nvptx_option_override): Init oacc_bcast_partition. (nvptx_init_oacc_workers): New function. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 2166f37..26c8071 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4636,9 +4636,12 @@ nvptx_process_pars (parallel *par) { nvptx_shared_propagate (false, is_call, par->forked_block, par->forked_insn, !worker); - bool empty = nvptx_shared_propagate (true, is_call, - par->forked_block, par->fork_insn, - !worker); + bool no_prop_p + = nvptx_shared_propagate (true, is_call, par->forked_block, + par->fork_insn, !worker); + bool empty_loop_p + = !is_call && (NEXT_INSN (par->forked_insn) + && NEXT_INSN (par->forked_insn) == par->joining_insn); rtx barrier = GEN_INT (0); int threads = 0; @@ -4648,7 +4651,11 @@ nvptx_process_pars (parallel *par) threads = nvptx_mach_vector_length (); } - if (!empty || !is_call) + if (no_prop_p && empty_loop_p) + ; + else if (no_prop_p && is_call) + ; + else { /* Insert begin and end synchronizations. */ emit_insn_before (nvptx_cta_sync (barrier, threads), |