aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Glisse <marc.glisse@inria.fr>2012-10-08 22:45:56 +0200
committerMarc Glisse <glisse@gcc.gnu.org>2012-10-08 20:45:56 +0000
commitb790dea2eb93fa958ec9b3f62adbbb1d1e633d1e (patch)
treeec8cd77b0fd2b7468d9a7a4fe327fae6147c4c15
parent9b2770f22cfddf5a0ab54659ce696071b0857444 (diff)
downloadgcc-b790dea2eb93fa958ec9b3f62adbbb1d1e633d1e.zip
gcc-b790dea2eb93fa958ec9b3f62adbbb1d1e633d1e.tar.gz
gcc-b790dea2eb93fa958ec9b3f62adbbb1d1e633d1e.tar.bz2
re PR middle-end/54400 (recognize vector reductions)
2012-10-08 Marc Glisse <marc.glisse@inria.fr> gcc/ PR target/54400 * config/i386/i386.md (type attribute): Add sseadd1. (unit attribute): Add support for sseadd1. (memory attribute): Likewise. * config/i386/athlon.md: Likewise. * config/i386/core2.md: Likewise. * config/i386/atom.md: Likewise. * config/i386/ppro.md: Likewise. * config/i386/bdver1.md: Likewise. * config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into... (sse3_haddv2df3): ... expander. (*sse3_haddv2df3): ... define_insn. Accept permuted operands. (sse3_hsubv2df3): ... define_insn. (*sse3_haddv2df3_low): New define_insn. (*sse3_hsubv2df3_low): New define_insn. gcc/testsuite/ PR target/54400 * gcc.target/i386/pr54400.c: New testcase. From-SVN: r192223
-rw-r--r--gcc/ChangeLog18
-rw-r--r--gcc/config/i386/athlon.md18
-rw-r--r--gcc/config/i386/atom.md6
-rw-r--r--gcc/config/i386/bdver1.md8
-rw-r--r--gcc/config/i386/core2.md6
-rw-r--r--gcc/config/i386/i386.md8
-rw-r--r--gcc/config/i386/ppro.md8
-rw-r--r--gcc/config/i386/sse.md90
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr54400.c53
10 files changed, 188 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ba74c1b..afa36d1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2012-10-08 Marc Glisse <marc.glisse@inria.fr>
+
+ PR target/54400
+ * config/i386/i386.md (type attribute): Add sseadd1.
+ (unit attribute): Add support for sseadd1.
+ (memory attribute): Likewise.
+ * config/i386/athlon.md: Likewise.
+ * config/i386/core2.md: Likewise.
+ * config/i386/atom.md: Likewise.
+ * config/i386/ppro.md: Likewise.
+ * config/i386/bdver1.md: Likewise.
+ * config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
+ (sse3_haddv2df3): ... expander.
+ (*sse3_haddv2df3): ... define_insn. Accept permuted operands.
+ (sse3_hsubv2df3): ... define_insn.
+ (*sse3_haddv2df3_low): New define_insn.
+ (*sse3_hsubv2df3_low): New define_insn.
+
2012-10-08 Jan Hubicka <jh@suse.cz>
* loop-unswitch.c (unswitch_single_loop): Use
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
index 401cb0d..1a2d607 100644
--- a/gcc/config/i386/athlon.md
+++ b/gcc/config/i386/athlon.md
@@ -807,47 +807,47 @@
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load" 4
(and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load_k8" 6
(and (eq_attr "cpu" "k8,generic64,amdfam10")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseadd" 4
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(eq_attr "mode" "SF,DF,DI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector_load" 5
(and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fpload2,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_load_k8" 7
(and (eq_attr "cpu" "k8,generic64")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
(and (eq_attr "cpu" "amdfam10")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector" 5
(and (eq_attr "cpu" "athlon")
- (eq_attr "type" "sseadd"))
+ (eq_attr "type" "sseadd,sseadd1"))
"athlon-vector,athlon-fpsched,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_k8" 5
(and (eq_attr "cpu" "k8,generic64")
- (eq_attr "type" "sseadd"))
+ (eq_attr "type" "sseadd,sseadd1"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
(and (eq_attr "cpu" "amdfam10")
- (eq_attr "type" "sseadd"))
+ (eq_attr "type" "sseadd,sseadd1"))
"athlon-direct,athlon-fpsched,athlon-fadd")
;; Conversions behaves very irregularly and the scheduling is critical here.
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md
index 3c2b957..f24fd5c 100644
--- a/gcc/config/i386/atom.md
+++ b/gcc/config/i386/atom.md
@@ -596,7 +596,7 @@
;; no memory simple
(define_insn_reservation "atom_sseadd" 5
(and (eq_attr "cpu" "atom")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "!V2DF")
(eq_attr "atom_unit" "!complex")))))
@@ -605,7 +605,7 @@
;; memory simple
(define_insn_reservation "atom_sseadd_mem" 5
(and (eq_attr "cpu" "atom")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(and (eq_attr "memory" "!none")
(and (eq_attr "mode" "!V2DF")
(eq_attr "atom_unit" "!complex")))))
@@ -614,7 +614,7 @@
;; maxps, minps, *pd, hadd, hsub
(define_insn_reservation "atom_sseadd_3" 8
(and (eq_attr "cpu" "atom")
- (and (eq_attr "type" "sseadd")
+ (and (eq_attr "type" "sseadd,sseadd1")
(ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
"atom-complex, atom-all-eu*7")
diff --git a/gcc/config/i386/bdver1.md b/gcc/config/i386/bdver1.md
index 10f95ff..2367785 100644
--- a/gcc/config/i386/bdver1.md
+++ b/gcc/config/i386/bdver1.md
@@ -697,24 +697,24 @@
;; SSE MUL, ADD, and MULADD.
(define_insn_reservation "bdver1_ssemuladd_load_256" 11
(and (eq_attr "cpu" "bdver1,bdver2")
- (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+ (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
(and (eq_attr "mode" "V8SF,V4DF")
(eq_attr "memory" "load"))))
"bdver1-double,bdver1-fpload,bdver1-ffma")
(define_insn_reservation "bdver1_ssemuladd_256" 7
(and (eq_attr "cpu" "bdver1,bdver2")
- (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+ (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
(and (eq_attr "mode" "V8SF,V4DF")
(eq_attr "memory" "none"))))
"bdver1-double,bdver1-fpsched,bdver1-ffma")
(define_insn_reservation "bdver1_ssemuladd_load" 10
(and (eq_attr "cpu" "bdver1,bdver2")
- (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+ (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
(eq_attr "memory" "load")))
"bdver1-direct,bdver1-fpload,bdver1-ffma")
(define_insn_reservation "bdver1_ssemuladd" 6
(and (eq_attr "cpu" "bdver1,bdver2")
- (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+ (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
(eq_attr "memory" "none")))
"bdver1-direct,bdver1-fpsched,bdver1-ffma")
(define_insn_reservation "bdver1_sseimul_load" 8
diff --git a/gcc/config/i386/core2.md b/gcc/config/i386/core2.md
index d154cdc..5abc77b 100644
--- a/gcc/config/i386/core2.md
+++ b/gcc/config/i386/core2.md
@@ -36,7 +36,7 @@
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "float")
(eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+ sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
(cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
(const_string "float")
@@ -528,13 +528,13 @@
(define_insn_reservation "c2_sse_addcmp" 3
(and (eq_attr "cpu" "core2,corei7")
(and (eq_attr "memory" "none")
- (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+ (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
"c2_decodern,c2_p1")
(define_insn_reservation "c2_sse_addcmp_load" 3
(and (eq_attr "cpu" "core2,corei7")
(and (eq_attr "memory" "load")
- (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+ (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
"c2_decodern,c2_p2+c2_p1")
(define_insn_reservation "c2_sse_mul_SF" 4
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5886478..fa10cb4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -327,8 +327,8 @@
str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
- ssemuladd,sse4arg,lwp,
+ sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
+ ssediv,sseins,ssemuladd,sse4arg,lwp,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -342,7 +342,7 @@
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
(eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+ sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
@@ -599,7 +599,7 @@
imov,imovx,icmp,test,bitmanip,
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
- sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
+ sseadd1,sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
(match_operand 2 "memory_operand"))
(const_string "load")
(and (eq_attr "type" "icmov,ssemuladd,sse4arg")
diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md
index bc1cb59..f82b694 100644
--- a/gcc/config/i386/ppro.md
+++ b/gcc/config/i386/ppro.md
@@ -509,14 +509,14 @@
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "SF")
- (eq_attr "type" "sseadd"))))
+ (eq_attr "type" "sseadd,sseadd1"))))
"decodern,p1")
(define_insn_reservation "ppro_sse_add_SF_load" 3
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "SF")
- (eq_attr "type" "sseadd"))))
+ (eq_attr "type" "sseadd,sseadd1"))))
"decoder0,p2+p1")
(define_insn_reservation "ppro_sse_cmp_SF" 3
@@ -619,14 +619,14 @@
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "V4SF")
- (eq_attr "type" "sseadd"))))
+ (eq_attr "type" "sseadd,sseadd1"))))
"decoder0,p1*2")
(define_insn_reservation "ppro_sse_add_V4SF_load" 3
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
- (eq_attr "type" "sseadd"))))
+ (eq_attr "type" "sseadd,sseadd1"))))
"decoder0,(p2+p1)*2")
(define_insn_reservation "ppro_sse_cmp_V4SF" 3
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d7fadd0..a73c815 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1216,28 +1216,108 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_insn "sse3_h<plusminus_insn>v2df3"
+(define_expand "sse3_haddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand")
+ (vec_concat:V2DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3")
+
+(define_insn "*sse3_haddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_concat:V2DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+ (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 2)
+ (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+ "TARGET_SSE3
+ && INTVAL (operands[3]) != INTVAL (operands[4])
+ && INTVAL (operands[5]) != INTVAL (operands[6])"
+ "@
+ haddpd\t{%2, %0|%0, %2}
+ vhaddpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_concat:V2DF
- (plusminus:DF
+ (minus:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "0,x")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
- (plusminus:DF
+ (minus:DF
(vec_select:DF
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_SSE3"
"@
- h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
- vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ hsubpd\t{%2, %0|%0, %2}
+ vhsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
+(define_insn "*sse3_haddv2df3_low"
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+ "TARGET_SSE3
+ && INTVAL (operands[2]) != INTVAL (operands[3])"
+ "@
+ haddpd\t{%0, %0|%0, %0}
+ vhaddpd\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_hsubv2df3_low"
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE3"
+ "@
+ hsubpd\t{%0, %0|%0, %0}
+ vhsubpd\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "avx_h<plusminus_insn>v8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_concat:V8SF
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a2f300d..08e0a69 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2012-10-08 Marc Glisse <marc.glisse@inria.fr>
+
+ PR target/54400
+ * gcc.target/i386/pr54400.c: New testcase.
+
2012-10-08 Jakub Jelinek <jakub@redhat.com>
PR c++/54858
diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c
new file mode 100644
index 0000000..5ed5ba0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr54400.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+double f (__m128d p)
+{
+ return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+ return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+ return p[1] + p[0];
+}
+
+__m128d h (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] - p[1], q[0] - q[1] };
+ return r;
+}
+
+__m128d i1 (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] + p[1], q[0] + q[1] };
+ return r;
+}
+
+__m128d i2 (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] + p[1], q[1] + q[0] };
+ return r;
+}
+
+__m128d i3 (__m128d p, __m128d q)
+{
+ __m128d r = { p[1] + p[0], q[0] + q[1] };
+ return r;
+}
+
+__m128d i4 (__m128d p, __m128d q)
+{
+ __m128d r = { p[1] + p[0], q[1] + q[0] };
+ return r;
+}
+
+/* { dg-final { scan-assembler-times "hsubpd" 2 } } */
+/* { dg-final { scan-assembler-times "haddpd" 6 } } */
+/* { dg-final { scan-assembler-not "unpck" } } */