aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2019-01-31 21:06:42 +0100
committerUros Bizjak <uros@gcc.gnu.org>2019-01-31 21:06:42 +0100
commit1d4b4f4979171ef0dacc452439e3a317795441db (patch)
tree5064efd8a5202412d2342f7204758c9188c1408c /gcc
parentec2be203d1d8c5cd690cc1444303a2ca9187e962 (diff)
downloadgcc-1d4b4f4979171ef0dacc452439e3a317795441db.zip
gcc-1d4b4f4979171ef0dacc452439e3a317795441db.tar.gz
gcc-1d4b4f4979171ef0dacc452439e3a317795441db.tar.bz2
re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions)
PR target/89071 * config/i386/i386.md (*extendsfdf2): Split out reg->reg alternative to avoid partial SSE register stall for TARGET_AVX. (truncdfsf2): Ditto. (sse4_1_round<mode>2): Ditto. From-SVN: r268427
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.md39
2 files changed, 30 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 56e13e8..bf88929 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-01-31 Uroš Bizjak <ubizjak@gmail.com>
+
+ PR target/89071
+ * config/i386/i386.md (*extendsfdf2): Split out reg->reg
+ alternative to avoid partial SSE register stall for TARGET_AVX.
+ (truncdfsf2): Ditto.
+ (sse4_1_round<mode>2): Ditto.
+
2018-01-31 Bill Schmidt <wschmidt@linux.ibm.com>
PR tree-optimization/89008
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d085e88b..744f155 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4370,9 +4370,9 @@
})
(define_insn "*extendsfdf2"
- [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+ [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
(float_extend:DF
- (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+ (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
switch (which_alternative)
@@ -4382,15 +4382,17 @@
return output_387_reg_move (insn, operands);
case 2:
+ return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
+ case 3:
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "fmov,fmov,ssecvt")
- (set_attr "prefix" "orig,orig,maybe_vex")
- (set_attr "mode" "SF,XF,DF")
+ [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
+ (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
+ (set_attr "mode" "SF,XF,DF,DF")
(set (attr "enabled")
(if_then_else
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
@@ -4481,7 +4483,7 @@
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
+ || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
@@ -4534,9 +4536,9 @@
;; Conversion from DFmode to SFmode.
(define_insn "truncdfsf2"
- [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v")
+ [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
(float_truncate:SF
- (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))]
+ (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
switch (which_alternative)
@@ -4546,13 +4548,15 @@
return output_387_reg_move (insn, operands);
case 2:
+ return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
+ case 3:
return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "fmov,fmov,ssecvt")
+ [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
(set_attr "mode" "SF")
(set (attr "enabled")
(if_then_else
@@ -4639,7 +4643,7 @@
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
+ || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
@@ -16171,19 +16175,20 @@
(define_insn "sse4_1_round<mode>2"
- [(set (match_operand:MODEF 0 "register_operand" "=x,v")
- (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm")
- (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,v")
+ (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"@
+ %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix_extra" "1,*")
- (set_attr "length_immediate" "*,1")
- (set_attr "prefix" "maybe_vex,evex")
- (set_attr "isa" "noavx512f,avx512f")
+ (set_attr "prefix_extra" "1,1,*")
+ (set_attr "length_immediate" "*,*,1")
+ (set_attr "prefix" "maybe_vex,maybe_vex,evex")
+ (set_attr "isa" "noavx512f,noavx512f,avx512f")
(set_attr "mode" "<MODE>")])
(define_insn "rintxf2"