aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2020-05-15 10:02:00 +0200
committerUros Bizjak <ubizjak@gmail.com>2020-05-15 10:02:00 +0200
commitf8b0665445bee8673b62c0a40ae257fe8c75a9b6 (patch)
tree72a557800029112ec1e34a99d1ab98944c7ae5c0 /gcc
parentf4356120ba88c083dd5987376aab7590dd1e0e13 (diff)
downloadgcc-f8b0665445bee8673b62c0a40ae257fe8c75a9b6.zip
gcc-f8b0665445bee8673b62c0a40ae257fe8c75a9b6.tar.gz
gcc-f8b0665445bee8673b62c0a40ae257fe8c75a9b6.tar.bz2
i386: Add V2SFmode hadd/hsub instructions [PR95046]
PFACC/PFNACC 3dNow! instructions got their corresponding SSE alternative in SSE3, so these can't be implemented with TARGET_MMX_WITH_SSE, which implies SSE2. These instructions are only generated via builtins, and since several 3dNow! insns have no corresponding SSE alternative, we can't avoid MMX registers with 3dNow! builtins anyway. Add SSE3/AVX alternatives to the insn pattern, so compiler will be able to use XMM registers when available, but don't prevent MMX registers, since they are needed when SSE3 is not active. Add additional generic insn patterns, used by the combiner to synthesize horizontal V2SFmode add/sub instructions. These patterns are active for TARGET_MMX_WITH_SSE only, and use only XMM registers. gcc/ChangeLog: PR target/95046 * config/i386/i386.md (isa): Add sse3_noavx. (enabled): Handle sse3_noavx. * config/i386/mmx.md (mmx_haddv2sf3): New expander. (*mmx_haddv2sf3): Rename from mmx_haddv2sf3. Add SSE/AVX alternatives. Match commutative vec_select selector operands. (*mmx_haddv2sf3_low): New insn pattern. (*mmx_hsubv2sf3): Add SSE/AVX alternatives. (*mmx_hsubv2sf3_low): New insn pattern. testsuite/ChangeLog: PR target/95046 * gcc.target/i386/pr95046-8.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386.md6
-rw-r--r--gcc/config/i386/mmx.md104
-rw-r--r--gcc/testsuite/ChangeLog5
4 files changed, 108 insertions, 21 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7daad3c..587db39 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2020-05-15 Uroš Bizjak <ubizjak@gmail.com>
+
+ PR target/95046
+ * config/i386/i386.md (isa): Add sse3_noavx.
+ (enabled): Handle sse3_noavx.
+
+ * config/i386/mmx.md (mmx_haddv2sf3): New expander.
+ (*mmx_haddv2sf3): Rename from mmx_haddv2sf3. Add SSE/AVX
+ alternatives. Match commutative vec_select selector operands.
+ (*mmx_haddv2sf3_low): New insn pattern.
+
+ (*mmx_hsubv2sf3): Add SSE/AVX alternatives.
+ (*mmx_hsubv2sf3_low): New insn pattern.
+
2020-05-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/33315
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 722eb9b..b555c16 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -803,8 +803,8 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
- sse_noavx,sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
+ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
@@ -834,6 +834,8 @@
(eq_attr "isa" "sse2_noavx")
(symbol_ref "TARGET_SSE2 && !TARGET_AVX")
(eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
+ (eq_attr "isa" "sse3_noavx")
+ (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
(eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
(eq_attr "isa" "sse4_noavx")
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2955ca2..f73c845 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -530,43 +530,109 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
-(define_insn "mmx_haddv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_expand "mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
(vec_concat:V2SF
(plus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0")
- (parallel [(const_int 0)]))
+ (match_operand:V2SF 1 "register_operand")
+ (parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plus:SF
- (vec_select:SF
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")
- (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand")
+ (parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_3DNOW"
- "pfacc\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "TARGET_3DNOW")
+
+(define_insn "*mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+ (vec_select:SF (match_dup 1)
+ (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym,x,x")
+ (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+ "TARGET_3DNOW
+ && INTVAL (operands[3]) != INTVAL (operands[4])
+ && INTVAL (operands[5]) != INTVAL (operands[6])"
+ "@
+ pfacc\t{%2, %0|%0, %2}
+ haddps\t{%2, %0|%0, %2}
+ vhaddps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse3_noavx,avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "prefix" "*,orig,vex")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_haddv2sf3_low"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+ (vec_select:SF
+ (match_dup 1)
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+ "TARGET_MMX_WITH_SSE && TARGET_SSE3
+ && INTVAL (operands[2]) != INTVAL (operands[3])"
+ "@
+ haddps\t{%0, %0|%0, %0}
+ vhaddps\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
(define_insn "mmx_hsubv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(minus:SF
(vec_select:SF
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
- "pfnacc\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "@
+ pfnacc\t{%2, %0|%0, %2}
+ hsubps\t{%2, %0|%0, %2}
+ vhsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse3_noavx,avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "prefix" "*,orig,vex")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_hsubv2sf3_low"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_dup 1)
+ (parallel [(const_int 1)]))))]
+ "TARGET_MMX_WITH_SSE && TARGET_SSE3"
+ "@
+ hsubps\t{%0, %0|%0, %0}
+ vhsubps\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
(define_insn "mmx_addsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0bf5dcd..4a7988f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-05-15 Uroš Bizjak <ubizjak@gmail.com>
+
+ PR target/95046
+ * gcc.target/i386/pr95046-8.c: New test.
+
2020-05-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/33315