diff options
author | Tom de Vries <tdevries@suse.de> | 2021-04-28 16:00:01 +0200 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2021-04-29 09:55:15 +0200 |
commit | 4d7c874e2c64ebf7631049ace642d246843febae (patch) | |
tree | 182db6db888838cfaa53ae9c89d0549eb3be4288 /gcc | |
parent | b58dc0b803057c0e6032e0d9bd92cd834f72c75c (diff) | |
download | gcc-4d7c874e2c64ebf7631049ace642d246843febae.zip gcc-4d7c874e2c64ebf7631049ace642d246843febae.tar.gz gcc-4d7c874e2c64ebf7631049ace642d246843febae.tar.bz2 |
[omp, simt] Fix expand_GOMP_SIMT_*
When running the test-case included in this patch using an
nvptx accelerator, it fails in execution.
The problem is that the expansion of GOMP_SIMT_XCHG_BFLY is optimized away
during pass_jump as "trivially dead insns".
This is caused by this code in expand_GOMP_SIMT_XCHG_BFLY:
...
class expand_operand ops[3];
create_output_operand (&ops[0], target, mode);
...
expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops);
...
which doesn't guarantee that target is assigned to by the expanded insn.
F.i., if target is:
...
(gdb) call debug_rtx ( target )
(subreg/s/u:QI (reg:SI 40 [ _61 ]) 0)
...
then after expand_insn, we have:
...
(gdb) call debug_rtx ( ops[0].value )
(reg:QI 57)
...
See commit 3af3bec2e4d "internal-fn: Avoid dropping the lhs of some
calls [PR94941]" for a similar problem.
Fix this in the same way, by adding:
...
if (!rtx_equal_p (target, ops[0].value))
emit_move_insn (target, ops[0].value);
...
where applicable in the expand_GOMP_SIMT_* functions.
Tested libgomp on x86_64 with nvptx accelerator.
gcc/ChangeLog:
2021-04-28 Tom de Vries <tdevries@suse.de>
PR target/100232
* internal-fn.c (expand_GOMP_SIMT_ENTER_ALLOC)
(expand_GOMP_SIMT_LAST_LANE, expand_GOMP_SIMT_ORDERED_PRED)
(expand_GOMP_SIMT_VOTE_ANY, expand_GOMP_SIMT_XCHG_BFLY)
(expand_GOMP_SIMT_XCHG_IDX): Ensure target is assigned to.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/internal-fn.c | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index dd71731..d209a52 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -243,6 +243,8 @@ expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt) create_input_operand (&ops[2], align, Pmode); gcc_assert (targetm.have_omp_simt_enter ()); expand_insn (targetm.code_for_omp_simt_enter, 3, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* Deallocate per-lane storage and leave non-uniform execution region. */ @@ -300,6 +302,8 @@ expand_GOMP_SIMT_LAST_LANE (internal_fn, gcall *stmt) create_input_operand (&ops[1], cond, mode); gcc_assert (targetm.have_omp_simt_last_lane ()); expand_insn (targetm.code_for_omp_simt_last_lane, 2, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* Non-transparent predicate used in SIMT lowering of OpenMP "ordered". */ @@ -319,6 +323,8 @@ expand_GOMP_SIMT_ORDERED_PRED (internal_fn, gcall *stmt) create_input_operand (&ops[1], ctr, mode); gcc_assert (targetm.have_omp_simt_ordered ()); expand_insn (targetm.code_for_omp_simt_ordered, 2, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* "Or" boolean reduction across SIMT lanes: return non-zero in all lanes if @@ -339,6 +345,8 @@ expand_GOMP_SIMT_VOTE_ANY (internal_fn, gcall *stmt) create_input_operand (&ops[1], cond, mode); gcc_assert (targetm.have_omp_simt_vote_any ()); expand_insn (targetm.code_for_omp_simt_vote_any, 2, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* Exchange between SIMT lanes with a "butterfly" pattern: source lane index @@ -361,6 +369,8 @@ expand_GOMP_SIMT_XCHG_BFLY (internal_fn, gcall *stmt) create_input_operand (&ops[2], idx, SImode); gcc_assert (targetm.have_omp_simt_xchg_bfly ()); expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* Exchange between SIMT lanes according to given source lane index. */ @@ -382,6 +392,8 @@ expand_GOMP_SIMT_XCHG_IDX (internal_fn, gcall *stmt) create_input_operand (&ops[2], idx, SImode); gcc_assert (targetm.have_omp_simt_xchg_idx ()); expand_insn (targetm.code_for_omp_simt_xchg_idx, 3, ops); + if (!rtx_equal_p (target, ops[0].value)) + emit_move_insn (target, ops[0].value); } /* This should get expanded in adjust_simduid_builtins. */ |