diff options
author | Tom de Vries <tdevries@suse.de> | 2020-08-04 09:53:08 +0200 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2020-08-04 11:59:08 +0200 |
commit | 344f09a756ebd50510cc1eb3db111fd61c527702 (patch) | |
tree | a58e2ebb455ec00a2ddf6f706d6493a7dd077eb6 /gcc | |
parent | 95f5a3258dd8a9584f2b10304f79441ef2d4c64c (diff) | |
download | gcc-344f09a756ebd50510cc1eb3db111fd61c527702.zip gcc-344f09a756ebd50510cc1eb3db111fd61c527702.tar.gz gcc-344f09a756ebd50510cc1eb3db111fd61c527702.tar.bz2 |
[nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle
With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an
ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle.
Fix this by adding handling of V2DI as well as V2SI mode in
nvptx_gen_shuffle.
Build and reg-tested on x86_64 with nvptx accelerator.
gcc/ChangeLog:
PR target/96428
* config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI.
libgomp/ChangeLog:
PR target/96428
* testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test.
* testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index d8a8fb2..cf53a92 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind) end_sequence (); } break; + case E_V2SImode: + { + rtx src0 = gen_rtx_SUBREG (SImode, src, 0); + rtx src1 = gen_rtx_SUBREG (SImode, src, 4); + rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + start_sequence (); + emit_insn (gen_movsi (tmp0, src0)); + emit_insn (gen_movsi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movsi (dst0, tmp0)); + emit_insn (gen_movsi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; + case E_V2DImode: + { + rtx src0 = gen_rtx_SUBREG (DImode, src, 0); + rtx src1 = gen_rtx_SUBREG (DImode, src, 8); + rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8); + rtx tmp0 = gen_reg_rtx (DImode); + rtx tmp1 = gen_reg_rtx (DImode); + start_sequence (); + emit_insn (gen_movdi (tmp0, src0)); + emit_insn (gen_movdi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movdi (dst0, tmp0)); + emit_insn (gen_movdi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; case E_BImode: { rtx tmp = gen_reg_rtx (SImode); |