aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2018-09-09 23:32:08 +0200
committerUros Bizjak <uros@gcc.gnu.org>2018-09-09 23:32:08 +0200
commit4801cc61b13b55b988c1bd808444cccd3663f033 (patch)
treec7d7ecf94a7567bc161a02e691123d4e05bd2405
parent8eb7aec132e688debbb0e5ce484fd06c10dec623 (diff)
downloadgcc-4801cc61b13b55b988c1bd808444cccd3663f033.zip
gcc-4801cc61b13b55b988c1bd808444cccd3663f033.tar.gz
gcc-4801cc61b13b55b988c1bd808444cccd3663f033.tar.bz2
i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern.
* config/i386/i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern. (float_extend partial SSE register stall splitter): Ditto. (float_truncate partial SSE register stall splitter): Ditto. From-SVN: r264185
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/i386.md216
2 files changed, 115 insertions, 108 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a29b69c..6dbe814 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-09-09 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (float partial SSE register stall splitter): Move
+ splitter near its instruction pattern.
+ (float_extend partial SSE register stall splitter): Ditto.
+ (float_truncate partial SSE register stall splitter): Ditto.
+
2018-09-09 Hans-Peter Nilsson <hp@bitrange.com>
PR target/86794
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0ee2d91..059ddbd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4477,6 +4477,40 @@
}
})
+(define_insn "*extendsfdf2"
+ [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+ (float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "prefix" "orig,orig,maybe_vex")
+ (set_attr "mode" "SF,XF,DF")
+ (set (attr "enabled")
+ (if_then_else
+ (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "TARGET_MIX_SSE_I387")
+ (symbol_ref "true"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "true")
+ (symbol_ref "false"))))])
+
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
cvtss2sd:
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
@@ -4544,39 +4578,31 @@
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
-(define_insn "*extendsfdf2"
- [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+;; Break partial reg stall for cvtss2sd. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+ [(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
- (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
- "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ (match_operand:SF 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float_extend:DF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
{
- switch (which_alternative)
- {
- case 0:
- case 1:
- return output_387_reg_move (insn, operands);
-
- case 2:
- return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
-
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "fmov,fmov,ssecvt")
- (set_attr "prefix" "orig,orig,maybe_vex")
- (set_attr "mode" "SF,XF,DF")
- (set (attr "enabled")
- (if_then_else
- (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
- (if_then_else
- (eq_attr "alternative" "0,1")
- (symbol_ref "TARGET_MIX_SSE_I387")
- (symbol_ref "true"))
- (if_then_else
- (eq_attr "alternative" "0,1")
- (symbol_ref "true")
- (symbol_ref "false"))))])
+ operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
+ emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
(define_expand "extend<mode>xf2"
[(set (match_operand:XF 0 "nonimmediate_operand")
@@ -4710,6 +4736,32 @@
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
+;; Break partial reg stall for cvtsd2ss. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+ [(set (match_operand:SF 0 "sse_reg_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:SF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
+ emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
;; Conversion from XFmode to {SF,DF}mode
(define_insn "truncxf<mode>2"
@@ -5152,83 +5204,6 @@
DONE;
})
-;; Avoid partial SSE register dependency stalls. This splitter should split
-;; late in the pass sequence (after register rename pass), so allocated
-;; registers won't change anymore
-
-(define_split
- [(set (match_operand:MODEF 0 "sse_reg_operand")
- (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:<MODEF:ssevecmode>
- (vec_duplicate:<MODEF:ssevecmode>
- (float:MODEF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- const machine_mode vmode = <MODEF:ssevecmode>mode;
-
- operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
- emit_move_insn (operands[0], CONST0_RTX (vmode));
-})
-
-;; Break partial reg stall for cvtsd2ss. This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
- [(set (match_operand:SF 0 "sse_reg_operand")
- (float_truncate:SF
- (match_operand:DF 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (float_truncate:SF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
- emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
-})
-
-;; Break partial reg stall for cvtss2sd. This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
- [(set (match_operand:DF 0 "sse_reg_operand")
- (float_extend:DF
- (match_operand:SF 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:V2DF
- (vec_duplicate:V2DF
- (float_extend:DF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
- emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
-})
-
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */
@@ -5279,6 +5254,31 @@
(set_attr "unit" "i387")
(set_attr "fp_int_src" "true")])
+;; Avoid partial SSE register dependency stalls. This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
+
+(define_split
+ [(set (match_operand:MODEF 0 "sse_reg_operand")
+ (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:<MODEF:ssevecmode>
+ (vec_duplicate:<MODEF:ssevecmode>
+ (float:MODEF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ const machine_mode vmode = <MODEF:ssevecmode>mode;
+
+ operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
+ emit_move_insn (operands[0], CONST0_RTX (vmode));
+})
+
(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
[(set (match_operand:MODEF 0 "register_operand")
(unsigned_float:MODEF