diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2013-07-29 13:17:51 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2013-07-29 13:17:51 +0200 |
commit | 576e2f37400cecb0cdb4537c7777284b7878e7c0 (patch) | |
tree | 1de9ab1f3f338f36c3085ab0f145619a94731886 | |
parent | bb80c2eb3a2578e71de04b5aa4a7574918b13144 (diff) | |
download | gcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.zip gcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.tar.gz gcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.tar.bz2 |
re PR target/57954 (AVX missing vxorps (zeroing) before vcvtsi2s %edx, slow down AVX code)
2013-07-29 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (float post-reload splitters): Do not check
for subregs of SSE registers.
2013-07-29 Uros Bizjak <ubizjak@gmail.com>
H.J. Lu <hongjiu.lu@intel.com>
PR target/57954
PR target/57988
* config/i386/i386.md (post-reload splitter
to avoid partial SSE reg dependency stalls): New pattern.
From-SVN: r201308
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 85 |
2 files changed, 64 insertions, 38 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 26ec34a..e30f63b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2013-07-29 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md (float post-reload splitters): Do not check + for subregs of SSE registers. + +2013-07-29 Uros Bizjak <ubizjak@gmail.com> + H.J. Lu <hongjiu.lu@intel.com> + + PR target/57954 + PR target/57988 + * config/i386/i386.md (post-reload splitter + to avoid partial SSE reg dependency stalls): New pattern. + 2013-07-29 Dominik Vogt <vogt@linux.vnet.ibm.com> * config/s390/s390.md ("movcc"): Swap load and store instructions. @@ -28,10 +41,10 @@ * config/aarch64/iterators.md: Add attributes rtn and vas. 2013-07-26 Kyrylo Tkachov <kyrylo.tkachov@arm.com> - Richard Earnshaw <richard.earnshaw@arm.com> + Richard Earnshaw <richard.earnshaw@arm.com> * combine.c (simplify_comparison): Re-canonicalize operands - where appropriate. + where appropriate. * config/arm/arm.md (movcond_addsi): New splitter. 2013-07-25 Sterling Augustine <saugustine@google.com> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c67ed31..84515ae 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4596,10 +4596,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387 && TARGET_INTER_UNIT_CONVERSIONS - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_split @@ -4608,10 +4605,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387 && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -4697,10 +4691,7 @@ (clobber (match_operand:SI 2 "memory_operand"))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { rtx op1 = operands[1]; @@ -4740,10 +4731,7 @@ (clobber (match_operand:SI 2 "memory_operand"))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], @@ -4764,10 +4752,7 @@ (float:MODEF (match_operand:SI 1 "register_operand")))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { rtx op1 = operands[1]; @@ -4810,10 +4795,7 @@ (float:MODEF (match_operand:SI 1 "memory_operand")))] "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(const_int 0)] { operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], @@ -4872,10 +4854,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit" @@ -4905,10 +4884,7 @@ (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -4917,10 +4893,7 @@ (float:MODEF (match_operand:SWI48 1 "memory_operand"))) (clobber (match_operand:SWI48 2 "memory_operand"))] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0]))))" + && reload_completed && SSE_REG_P (operands[0])" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp" @@ -4968,6 +4941,46 @@ && reload_completed" [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) +;; Avoid partial SSE register dependency stalls + +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (vec_merge:<ssevecmode> + (vec_duplicate:<ssevecmode> + (float:MODEF (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0], + <MODE>mode, 0); + emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode)); +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:DI 1 "nonimmediate_operand")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) + (vec_merge:<ssevecmode> + (vec_duplicate:<ssevecmode> + (float:MODEF (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0], + <MODE>mode, 0); + emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode)); +}) + ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ |