aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <uros@gcc.gnu.org>2013-07-29 13:17:51 +0200
committerUros Bizjak <uros@gcc.gnu.org>2013-07-29 13:17:51 +0200
commit576e2f37400cecb0cdb4537c7777284b7878e7c0 (patch)
tree1de9ab1f3f338f36c3085ab0f145619a94731886 /gcc
parentbb80c2eb3a2578e71de04b5aa4a7574918b13144 (diff)
downloadgcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.zip
gcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.tar.gz
gcc-576e2f37400cecb0cdb4537c7777284b7878e7c0.tar.bz2
re PR target/57954 (AVX missing vxorps (zeroing) before vcvtsi2s %edx, slow down AVX code)
2013-07-29 Uros Bizjak <ubizjak@gmail.com> * config/i386/i386.md (float post-reload splitters): Do not check for subregs of SSE registers. 2013-07-29 Uros Bizjak <ubizjak@gmail.com> H.J. Lu <hongjiu.lu@intel.com> PR target/57954 PR target/57988 * config/i386/i386.md (post-reload splitter to avoid partial SSE reg dependency stalls): New pattern. From-SVN: r201308
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/config/i386/i386.md85
2 files changed, 64 insertions, 38 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 26ec34a..e30f63b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2013-07-29 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (float post-reload splitters): Do not check
+ for subregs of SSE registers.
+
+2013-07-29 Uros Bizjak <ubizjak@gmail.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/57954
+ PR target/57988
+ * config/i386/i386.md (post-reload splitter
+ to avoid partial SSE reg dependency stalls): New pattern.
+
2013-07-29 Dominik Vogt <vogt@linux.vnet.ibm.com>
* config/s390/s390.md ("movcc"): Swap load and store instructions.
@@ -28,10 +41,10 @@
* config/aarch64/iterators.md: Add attributes rtn and vas.
2013-07-26 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
- Richard Earnshaw <richard.earnshaw@arm.com>
+ Richard Earnshaw <richard.earnshaw@arm.com>
* combine.c (simplify_comparison): Re-canonicalize operands
- where appropriate.
+ where appropriate.
* config/arm/arm.md (movcond_addsi): New splitter.
2013-07-25 Sterling Augustine <saugustine@google.com>
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c67ed31..84515ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4596,10 +4596,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& TARGET_INTER_UNIT_CONVERSIONS
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_split
@@ -4608,10 +4605,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))])
@@ -4697,10 +4691,7 @@
(clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
rtx op1 = operands[1];
@@ -4740,10 +4731,7 @@
(clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4764,10 +4752,7 @@
(float:MODEF (match_operand:SI 1 "register_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
rtx op1 = operands[1];
@@ -4810,10 +4795,7 @@
(float:MODEF (match_operand:SI 1 "memory_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4872,10 +4854,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit"
@@ -4905,10 +4884,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))])
@@ -4917,10 +4893,7 @@
(float:MODEF (match_operand:SWI48 1 "memory_operand")))
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp"
@@ -4968,6 +4941,46 @@
&& reload_completed"
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+;; Avoid partial SSE register dependency stalls
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand")
+ (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0)
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (float:MODEF (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand")
+ (float:MODEF (match_operand:DI 1 "nonimmediate_operand")))]
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0)
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (float:MODEF (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */