aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2020-08-04 09:53:08 +0200
committerTom de Vries <tdevries@suse.de>2020-08-04 11:59:08 +0200
commit344f09a756ebd50510cc1eb3db111fd61c527702 (patch)
treea58e2ebb455ec00a2ddf6f706d6493a7dd077eb6 /gcc
parent95f5a3258dd8a9584f2b10304f79441ef2d4c64c (diff)
downloadgcc-344f09a756ebd50510cc1eb3db111fd61c527702.zip
gcc-344f09a756ebd50510cc1eb3db111fd61c527702.tar.gz
gcc-344f09a756ebd50510cc1eb3db111fd61c527702.tar.bz2
[nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle
With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle. Fix this by adding handling of V2DI as well as V2SI mode in nvptx_gen_shuffle. Build and reg-tested on x86_64 with nvptx accelerator. gcc/ChangeLog: PR target/96428 * config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI. libgomp/ChangeLog: PR target/96428 * testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test. * testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/nvptx/nvptx.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index d8a8fb2..cf53a92 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
end_sequence ();
}
break;
+ case E_V2SImode:
+ {
+ rtx src0 = gen_rtx_SUBREG (SImode, src, 0);
+ rtx src1 = gen_rtx_SUBREG (SImode, src, 4);
+ rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0);
+ rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4);
+ rtx tmp0 = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (SImode);
+ start_sequence ();
+ emit_insn (gen_movsi (tmp0, src0));
+ emit_insn (gen_movsi (tmp1, src1));
+ emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
+ emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
+ emit_insn (gen_movsi (dst0, tmp0));
+ emit_insn (gen_movsi (dst1, tmp1));
+ res = get_insns ();
+ end_sequence ();
+ }
+ break;
+ case E_V2DImode:
+ {
+ rtx src0 = gen_rtx_SUBREG (DImode, src, 0);
+ rtx src1 = gen_rtx_SUBREG (DImode, src, 8);
+ rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0);
+ rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8);
+ rtx tmp0 = gen_reg_rtx (DImode);
+ rtx tmp1 = gen_reg_rtx (DImode);
+ start_sequence ();
+ emit_insn (gen_movdi (tmp0, src0));
+ emit_insn (gen_movdi (tmp1, src1));
+ emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
+ emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
+ emit_insn (gen_movdi (dst0, tmp0));
+ emit_insn (gen_movdi (dst1, tmp1));
+ res = get_insns ();
+ end_sequence ();
+ }
+ break;
case E_BImode:
{
rtx tmp = gen_reg_rtx (SImode);