diff options
author | Richard Biener <rguenther@suse.de> | 2018-10-02 13:06:54 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2018-10-02 13:06:54 +0000 |
commit | f512bf3ee95914b30ba67f80ca42a72ec630738f (patch) | |
tree | 213885b05da8c042e27901d44809531e48adc5bc | |
parent | 1c0292829543a2b0c85cfb4de6d4be3be84ede58 (diff) | |
download | gcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.zip gcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.tar.gz gcc-f512bf3ee95914b30ba67f80ca42a72ec630738f.tar.bz2 |
sse.md (reduc_plus_scal_v4df): Avoid the use of haddv4df...
2018-10-02 Richard Biener <rguenther@suse.de>
* config/i386/sse.md (reduc_plus_scal_v4df): Avoid the use
of haddv4df, first reduce to SSE width and exploit the fact
that we only need element zero with the reduction result.
(reduc_plus_scal_v2df): Likewise.
From-SVN: r264785
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 24 |
2 files changed, 21 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c930bd2..65d5d7d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-10-02 Richard Biener <rguenther@suse.de> + + * config/i386/sse.md (reduc_plus_scal_v4df): Avoid the use + of haddv4df, first reduce to SSE width and exploit the fact + that we only need element zero with the reduction result. + (reduc_plus_scal_v2df): Likewise. + 2018-10-02 Eric Botcazou <ebotcazou@adacore.com> * dojump.h (do_jump): Delete. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0b65572..ce26994 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2473,24 +2473,28 @@ (match_operand:V4DF 1 "register_operand")] "TARGET_AVX" { - rtx tmp = gen_reg_rtx (V4DFmode); - rtx tmp2 = gen_reg_rtx (V4DFmode); - rtx vec_res = gen_reg_rtx (V4DFmode); - emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); - emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); - emit_insn (gen_addv4df3 (vec_res, tmp, tmp2)); - emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx)); + rtx tmp = gen_reg_rtx (V2DFmode); + emit_insn (gen_vec_extract_hi_v4df (tmp, operands[1])); + rtx tmp2 = gen_reg_rtx (V2DFmode); + emit_insn (gen_addv2df3 (tmp2, tmp, gen_lowpart (V2DFmode, operands[1]))); + rtx tmp3 = gen_reg_rtx (V2DFmode); + emit_insn (gen_vec_interleave_highv2df (tmp3, tmp2, tmp2)); + emit_insn (gen_adddf3 (operands[0], + gen_lowpart (DFmode, tmp2), + gen_lowpart (DFmode, tmp3))); DONE; }) (define_expand "reduc_plus_scal_v2df" [(match_operand:DF 0 "register_operand") (match_operand:V2DF 1 "register_operand")] - "TARGET_SSE3" + "TARGET_SSE2" { rtx tmp = gen_reg_rtx (V2DFmode); - emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1])); - emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx)); + emit_insn (gen_vec_interleave_highv2df (tmp, operands[1], operands[1])); + emit_insn (gen_adddf3 (operands[0], + gen_lowpart (DFmode, tmp), + gen_lowpart (DFmode, operands[1]))); DONE; }) |