diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2011-11-16 19:28:08 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-11-16 19:28:08 +0100 |
commit | c05e32f58d802bda7fafe9e3471e163734a79c79 (patch) | |
tree | a3d9a1dedd8e3c3cffd88cfcf531c0323eae4da5 /gcc | |
parent | a6bbb56fef2ade340c48a60c38fa21213c152401 (diff) | |
download | gcc-c05e32f58d802bda7fafe9e3471e163734a79c79.zip gcc-c05e32f58d802bda7fafe9e3471e163734a79c79.tar.gz gcc-c05e32f58d802bda7fafe9e3471e163734a79c79.tar.bz2 |
sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode sequence for AVX.
* config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode
sequence for AVX.
(<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto.
From-SVN: r181421
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 62 |
2 files changed, 58 insertions, 20 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 218681a..dd03e1a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2011-11-16 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/sse.md (round<mode>2_vec_pack_sfix): Optimize V2DFmode + sequence for AVX. + (<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>): Ditto. + 2011-11-16 Venkataramanan Kumar <venkataramanan.kumar@amd.com> * doc/invoke.texi: Document AMD bdver1 and btver1. @@ -11,13 +17,15 @@ the base reg is stored iff compiling for Thumb1. 2011-11-16 Razya Ladelsky <razya@il.ibm.com> - + PR tree-optimization/49960 - * tree-data-ref.c (initialize_data_dependence_relation): Add initializations. + * tree-data-ref.c (initialize_data_dependence_relation): Add + initializations. Remove call to compute_self_dependence. (compute_affine_dependence): Remove the !DDR_SELF_REFERENCE condition. - (compute_self_dependence): Remove old code. Add call to compute_affine_dependence. - (compute_all_dependences): Remove call to compute_self_dependence. + (compute_self_dependence): Remove old code. Add call to + compute_affine_dependence. + (compute_all_dependences): Remove call to compute_self_dependence. Add call to compute_affine_dependence. 2011-11-16 Andreas Krebbel <Andreas.Krebbel@de.ibm.com> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b8e821d..d04902b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9962,17 +9962,32 @@ { rtx tmp0, tmp1; - tmp0 = gen_reg_rtx (<MODE>mode); - tmp1 = gen_reg_rtx (<MODE>mode); + if (<MODE>mode == V2DFmode + && TARGET_AVX && !TARGET_PREFER_AVX128) + { + rtx tmp2 = gen_reg_rtx (V4DFmode); - emit_insn - (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], - operands[3])); - emit_insn - (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], - operands[3])); - emit_insn - (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); + + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); + emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); + } + else + { + tmp0 = gen_reg_rtx (<MODE>mode); + tmp1 = gen_reg_rtx (<MODE>mode); + + emit_insn + (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], + operands[3])); + emit_insn + (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], + operands[3])); + emit_insn + (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + } DONE; }) @@ -10053,14 +10068,29 @@ { rtx tmp0, tmp1; - tmp0 = gen_reg_rtx (<MODE>mode); - tmp1 = gen_reg_rtx (<MODE>mode); + if (<MODE>mode == V2DFmode + && TARGET_AVX && !TARGET_PREFER_AVX128) + { + rtx tmp2 = gen_reg_rtx (V4DFmode); - emit_insn (gen_round<mode>2 (tmp0, operands[1])); - emit_insn (gen_round<mode>2 (tmp1, operands[2])); + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn - (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_roundv4df2 (tmp2, tmp0)); + emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); + } + else + { + tmp0 = gen_reg_rtx (<MODE>mode); + tmp1 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_round<mode>2 (tmp0, operands[1])); + emit_insn (gen_round<mode>2 (tmp1, operands[2])); + + emit_insn + (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); + } DONE; }) |