aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2012-05-09 22:41:08 +0200
committerUros Bizjak <uros@gcc.gnu.org>2012-05-09 22:41:08 +0200
commitd21a7b447a2bdf5fe11f70827327c46472ee081a (patch)
treef0b3f9057a4fe62a59643810ea390aa9be8ffa03 /gcc
parentc54e7136615f2772e46576b83e6da1c17e37b73a (diff)
downloadgcc-d21a7b447a2bdf5fe11f70827327c46472ee081a.zip
gcc-d21a7b447a2bdf5fe11f70827327c46472ee081a.tar.gz
gcc-d21a7b447a2bdf5fe11f70827327c46472ee081a.tar.bz2
re PR target/52908 (xop-mul-1:f9 miscompiled on bulldozer (-mxop))
PR target/52908 * config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high. (vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern instead of xop_mulv2div2di3_low. (xop_p<macs>dql): Fix vec_select selector. (xop_p<macs>dqh): Ditto. (xop_mulv2div2di3_low): Remove insn_and_split pattern. (xop_mulv2div2di3_high): Ditto. testsuite/ChangeLog: PR target/52908 * gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler directive to Scan for vpmuldq, not vpmacsdql. From-SVN: r187354
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/i386/sse.md96
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c2
4 files changed, 33 insertions, 83 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e31b807..bd8b9b2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2012-05-09 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/52908
+ * config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using
+ xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high.
+ (vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern
+ instead of xop_mulv2div2di3_low.
+ (xop_p<macs>dql): Fix vec_select selector.
+ (xop_p<macs>dqh): Ditto.
+ (xop_mulv2div2di3_low): Remove insn_and_split pattern.
+ (xop_mulv2div2di3_high): Ditto.
+
2012-05-09 Manuel López-Ibáñez <manu@gcc.gnu.org>
* doc/extend.texi (Function Attributes): Point xref to section
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 86b2ed3..46d6a8b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5748,11 +5748,15 @@
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
DONE;
}
@@ -5777,11 +5781,15 @@
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
DONE;
}
@@ -9792,11 +9800,11 @@
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1) (const_int 3)])))
+ (parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1) (const_int 3)]))))
+ (parallel [(const_int 0) (const_int 2)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
@@ -9810,93 +9818,17 @@
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0) (const_int 2)])))
+ (parallel [(const_int 1) (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0) (const_int 2)]))))
+ (parallel [(const_int 1) (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as operands 1/2
-(define_insn_and_split "xop_mulv2div2di3_low"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 1) (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 1) (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 1) (const_int 3)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as either operands[1] or operands[2]
-(define_insn_and_split "xop_mulv2div2di3_high"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 0) (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 0) (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 0) (const_int 2)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
;; XOP parallel integer multiply/add instructions for the intrinisics
(define_insn "xop_p<macs>wd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3f1b787..7cdad4a6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2012-05-09 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/52908
+ * gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler
+ directive to Scan for vpmuldq, not vpmacsdql.
+
2012-05-09 Michael Matz <matz@suse.de>
PR tree-optimization/53185
diff --git a/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c b/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c
index 0406d02..0730987 100644
--- a/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c
+++ b/gcc/testsuite/gcc.target/i386/xop-imul32widen-vector.c
@@ -32,5 +32,5 @@ int main ()
exit (0);
}
-/* { dg-final { scan-assembler "vpmacsdql" } } */
+/* { dg-final { scan-assembler "vpmuldq" } } */
/* { dg-final { scan-assembler "vpmacsdqh" } } */