aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-05-05 14:10:18 +0200
committerUros Bizjak <ubizjak@gmail.com>2023-05-05 14:11:15 +0200
commit919642fa4b2bc4c32910336dd200d53766801c80 (patch)
treed763d913fa98edebe48cd4e4d8c4ae3e6ebc786c
parente383fc69d2a3eab37319ea41543ee09c8cdd6e57 (diff)
downloadgcc-919642fa4b2bc4c32910336dd200d53766801c80.zip
gcc-919642fa4b2bc4c32910336dd200d53766801c80.tar.gz
gcc-919642fa4b2bc4c32910336dd200d53766801c80.tar.bz2
i386: Introduce mulv2si3 instruction
For SSE2 targets the expander unpacks input elements into the correct position in the V4SI vector and emits PMULUDQ instruction. The output elements are then shuffled back to their positions in the V2SI vector. For SSE4 targets PMULLD instruction is emitted directly. gcc/ChangeLog: * config/i386/mmx.md (mulv2si3): New expander. (*mulv2si3): New insn pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/sse2-mmx-mult-vec.c: New test.
-rw-r--r--gcc/config/i386/mmx.md49
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c27
2 files changed, 76 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 872ddbc..6dd203f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2092,6 +2092,55 @@
(set_attr "type" "sseadd")
(set_attr "mode" "TI")])
+(define_expand "mulv2si3"
+ [(set (match_operand:V2SI 0 "register_operand")
+ (mult:V2SI
+ (match_operand:V2SI 1 "register_operand")
+ (match_operand:V2SI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ if (!TARGET_SSE4_1)
+ {
+ rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]),
+ V2SImode);
+ rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]),
+ V2SImode);
+
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1));
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2));
+
+ rtx res = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2));
+
+ rtx op0 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res),
+ const0_rtx, const2_rtx,
+ const0_rtx, const2_rtx));
+
+ emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
+ DONE;
+ }
+})
+
+(define_insn "*mulv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+ (mult:V2SI
+ (match_operand:V2SI 1 "register_operand" "%0,0,v")
+ (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "@
+ pmulld\t{%2, %0|%0, %2}
+ pmulld\t{%2, %0|%0, %2}
+ vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "TI")])
+
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c
new file mode 100644
index 0000000..cdc9a7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define N 2
+
+int a[N] = {-287807, 604344};
+int b[N] = {474362, 874120};
+int r[N];
+
+int rc[N] = {914249338, -11800128};
+
+static void
+sse2_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ r[i] = a[i] * b[i];
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ if (r[i] != rc[i])
+ abort ();
+}