aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2005-05-16 21:33:42 -0700
committerRichard Henderson <rth@gcc.gnu.org>2005-05-16 21:33:42 -0700
commit2b5bf0e27e0b77a48f0f8a046cb6a56280137054 (patch)
treef8700af6e8db43e79f6805f9ce3ecfb5e6382a08 /gcc
parent9d9bd40fb67d65bc99b142f66128b3ad07e63330 (diff)
downloadgcc-2b5bf0e27e0b77a48f0f8a046cb6a56280137054.zip
gcc-2b5bf0e27e0b77a48f0f8a046cb6a56280137054.tar.gz
gcc-2b5bf0e27e0b77a48f0f8a046cb6a56280137054.tar.bz2
sse.md (mulv4si3): New.
* config/i386/sse.md (mulv4si3): New. * lib/target-supports.exp (check_effective_target_vect_int_mul): Add i?86 and x86_64. From-SVN: r99811
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog4
-rw-r--r--gcc/config/i386/sse.md46
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/lib/target-supports.exp4
4 files changed, 58 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 28d0a13..f42032b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2005-05-16 Richard Henderson <rth@redhat.com>
+
+ * config/i386/sse.md (mulv4si3): New.
+
2005-05-17 Hans-Peter Nilsson <hp@axis.com>
* config/cris/cris.h (EXTRA_CONSTRAINT_T): Remove FIXME and
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee2e614..76efe5f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2490,6 +2490,52 @@
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
+(define_expand "mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (match_operand:V4SI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V4SImode);
+ t5 = gen_reg_rtx (V4SImode);
+ t6 = gen_reg_rtx (V4SImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply elements 2 and 0. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
+
+ /* Shift both input vectors down one element, so that elements 3 and 1
+ are now in the slots for elements 2 and 0. For K8, at least, this is
+ faster than using a shuffle. */
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, op1), thirtytwo));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, op2), thirtytwo));
+
+ /* Multiply elements 3 and 1. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
+
+ /* Move the results in element 2 down to element 1; we don't care what
+ goes in elements 2 and 3. */
+ emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+ emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+
+ /* Merge the parts back together. */
+ emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b94841e..c9f0b74 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-16 Richard Henderson <rth@redhat.com>
+
+ * lib/target-supports.exp (check_effective_target_vect_int_mul): Add
+ i?86 and x86_64.
+
2005-05-16 Mark Mitchell <mark@codesourcery.com>
* gcc.dg/compat/generate-random.c (config.h): Do not include.
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 9306790..ac6dda5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -955,7 +955,9 @@ proc check_effective_target_vect_int_mult { } {
verbose "check_effective_target_vect_int_mult: using cached result" 2
} else {
set et_vect_int_mult_saved 0
- if { [istarget powerpc*-*-*] } {
+ if { [istarget powerpc*-*-*]
+ || [istarget i?86-*-*]
+ || [istarget x86_64-*-*] } {
set et_vect_int_mult_saved 1
}
}