aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2018-10-02 13:06:54 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2018-10-02 13:06:54 +0000
commitf512bf3ee95914b30ba67f80ca42a72ec630738f (patch)
tree213885b05da8c042e27901d44809531e48adc5bc
parent1c0292829543a2b0c85cfb4de6d4be3be84ede58 (diff)
downloadgcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.zip
gcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.tar.gz
gcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.tar.bz2
sse.md (reduc_plus_scal_v4df): Avoid the use of haddv4df...
2018-10-02 Richard Biener <rguenther@suse.de> * config/i386/sse.md (reduc_plus_scal_v4df): Avoid the use of haddv4df, first reduce to SSE width and exploit the fact that we only need element zero with the reduction result. (reduc_plus_scal_v2df): Likewise. From-SVN: r264785
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/sse.md24
2 files changed, 21 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c930bd2..65d5d7d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-10-02 Richard Biener <rguenther@suse.de>
+
+ * config/i386/sse.md (reduc_plus_scal_v4df): Avoid the use
+ of haddv4df, first reduce to SSE width and exploit the fact
+ that we only need element zero with the reduction result.
+ (reduc_plus_scal_v2df): Likewise.
+
2018-10-02 Eric Botcazou <ebotcazou@adacore.com>
* dojump.h (do_jump): Delete.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0b65572..ce26994 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2473,24 +2473,28 @@
(match_operand:V4DF 1 "register_operand")]
"TARGET_AVX"
{
- rtx tmp = gen_reg_rtx (V4DFmode);
- rtx tmp2 = gen_reg_rtx (V4DFmode);
- rtx vec_res = gen_reg_rtx (V4DFmode);
- emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
- emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
- emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
- emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
+ rtx tmp = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_vec_extract_hi_v4df (tmp, operands[1]));
+ rtx tmp2 = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_addv2df3 (tmp2, tmp, gen_lowpart (V2DFmode, operands[1])));
+ rtx tmp3 = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_vec_interleave_highv2df (tmp3, tmp2, tmp2));
+ emit_insn (gen_adddf3 (operands[0],
+ gen_lowpart (DFmode, tmp2),
+ gen_lowpart (DFmode, tmp3)));
DONE;
})
(define_expand "reduc_plus_scal_v2df"
[(match_operand:DF 0 "register_operand")
(match_operand:V2DF 1 "register_operand")]
- "TARGET_SSE3"
+ "TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (V2DFmode);
- emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
- emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_interleave_highv2df (tmp, operands[1], operands[1]));
+ emit_insn (gen_adddf3 (operands[0],
+ gen_lowpart (DFmode, tmp),
+ gen_lowpart (DFmode, operands[1])));
DONE;
})