aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2012-06-25 13:42:11 -0700
committerRichard Henderson <rth@gcc.gnu.org>2012-06-25 13:42:11 -0700
commita2051b26af54fd2d7e459c1cd6b42e430c3f7a96 (patch)
tree081d90a713e2dde9c776a9fc783f3436f9defd85 /gcc/config
parent93703e7981920e334edf2dc48481a97019e9ae5a (diff)
downloadgcc-a2051b26af54fd2d7e459c1cd6b42e430c3f7a96.zip
gcc-a2051b26af54fd2d7e459c1cd6b42e430c3f7a96.tar.gz
gcc-a2051b26af54fd2d7e459c1cd6b42e430c3f7a96.tar.bz2
i386: Delete sse sdot_prod; add sdot_prodv4si for xop
Now that we support mult_even/odd hooks, the vectorizer can generate the exact same code for plain sse dot_prod by itself, as well as other reductions other than plus. From-SVN: r188960
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/sse.md62
1 files changed, 8 insertions, 54 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 45d3a9c..4b51415 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5737,64 +5737,18 @@
DONE;
})
-(define_code_attr sse2_sse4_1
- [(zero_extend "sse2") (sign_extend "sse4_1")])
-
-(define_expand "<s>dot_prodv4si"
+;; Normally we use widen_mul_even/odd, but combine can't quite get it all
+;; back together when madd is available.
+(define_expand "sdot_prodv4si"
[(match_operand:V2DI 0 "register_operand")
- (any_extend:V2DI (match_operand:V4SI 1 "register_operand"))
+ (match_operand:V4SI 1 "register_operand")
(match_operand:V4SI 2 "register_operand")
(match_operand:V2DI 3 "register_operand")]
- "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
-{
- rtx t1, t2, t3, t4;
-
- t1 = gen_reg_rtx (V2DImode);
- emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
- emit_insn (gen_addv2di3 (t1, t1, operands[3]));
-
- t2 = gen_reg_rtx (V4SImode);
- t3 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
- gen_lowpart (V1TImode, operands[1]),
- GEN_INT (32)));
- emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
- gen_lowpart (V1TImode, operands[2]),
- GEN_INT (32)));
-
- t4 = gen_reg_rtx (V2DImode);
- emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
-
- emit_insn (gen_addv2di3 (operands[0], t1, t4));
- DONE;
-})
-
-(define_expand "<s>dot_prodv8si"
- [(match_operand:V4DI 0 "register_operand")
- (any_extend:V4DI (match_operand:V8SI 1 "register_operand"))
- (match_operand:V8SI 2 "register_operand")
- (match_operand:V4DI 3 "register_operand")]
- "TARGET_AVX2"
+ "TARGET_XOP"
{
- rtx t1, t2, t3, t4;
-
- t1 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
- emit_insn (gen_addv4di3 (t1, t1, operands[3]));
-
- t2 = gen_reg_rtx (V8SImode);
- t3 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
- gen_lowpart (V2TImode, operands[1]),
- GEN_INT (32)));
- emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
- gen_lowpart (V2TImode, operands[2]),
- GEN_INT (32)));
-
- t4 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
-
- emit_insn (gen_addv4di3 (operands[0], t1, t4));
+ rtx t = gen_reg_rtx (V2DImode);
+ emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
+ emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
DONE;
})