aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.vnet.ibm.com>2011-07-20 16:16:53 +0000
committerMichael Meissner <meissner@gcc.gnu.org>2011-07-20 16:16:53 +0000
commitc36193c690369c4180ab460b3a7c9a797f87e160 (patch)
treeea61f50fc075e518e0f6943f421bc5f9d8ee6666 /gcc
parent242f54216097a8dfc04a9b35d9b28c3f58ad0bbe (diff)
downloadgcc-c36193c690369c4180ab460b3a7c9a797f87e160.zip
gcc-c36193c690369c4180ab460b3a7c9a797f87e160.tar.gz
gcc-c36193c690369c4180ab460b3a7c9a797f87e160.tar.bz2
Allow 4 operand FMAs on power7
From-SVN: r176522
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/rs6000/rs6000.md8
-rw-r--r--gcc/config/rs6000/vsx.md188
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c8
-rw-r--r--gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c8
-rw-r--r--gcc/testsuite/gcc.target/powerpc/recip-3.c4
7 files changed, 181 insertions, 58 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b2f7e19..eb02423 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2011-07-20 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * config/rs6000/vsx.md (vsx_fma*): Use 4 argument fma instructions
+ where we can use them from the standard and altivec instruction
+ sets, instead of always using the 3 operand VSX forms that require
+ the destination to overlap one of the inputs.
+ (vsx_fms*): Ditto.
+ (vsx_fnma*): Ditto.
+ (vsx_fnms*): Ditto.
+
+ * config/rs6000/rs6000.md (fmadf4_fpr): Set fp_type fp_maddsub_d
+ for DF types.
+ (fmsdf4_fpr): Ditto.
+ (nfmadf4_fpr): Ditto.
+ (nfmsdf4_fpr): Ditto.
+
2011-07-20 Sandra Loosemore <sandra@codesourcery.com>
* genrecog.c (make_insn_sequence): Correct position numbering
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b34b70a..288f291 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6288,7 +6288,7 @@
&& VECTOR_UNIT_NONE_P (DFmode)"
"{fma|fmadd} %0,%1,%2,%3"
[(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_insn "*fmsdf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
@@ -6299,7 +6299,7 @@
&& VECTOR_UNIT_NONE_P (DFmode)"
"{fms|fmsub} %0,%1,%2,%3"
[(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_insn "*nfmadf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
@@ -6310,7 +6310,7 @@
&& VECTOR_UNIT_NONE_P (DFmode)"
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_insn "*nfmsdf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
@@ -6321,7 +6321,7 @@
&& VECTOR_UNIT_NONE_P (DFmode)"
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b4d1e8b..e859af3 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -524,46 +524,112 @@
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Fused vector multiply/add instructions
-
-(define_insn "*vsx_fma<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (fma:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
+;; Fused vector multiply/add instructions Support the classical DF versions of
+;; fma, which allows the target to be a separate register from the 3 inputs.
+;; Under VSX, the target must be either the addend or the first multiply.
+;; Where we can, also do the same for the Altivec V4SF fmas.
+
+(define_insn "*vsx_fmadf4"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
+ (fma:DF
+ (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
+ (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
+ (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
"@
- x<VSv>madda<VSs> %x0,%x1,%x2
- x<VSv>maddm<VSs> %x0,%x1,%x3
- x<VSv>madda<VSs> %x0,%x1,%x2
- x<VSv>maddm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
+ xsmaddadp %x0,%x1,%x2
+ xsmaddmdp %x0,%x1,%x3
+ xsmaddadp %x0,%x1,%x2
+ xsmaddmdp %x0,%x1,%x3
+ {fma|fmadd} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_d")])
+
+(define_insn "*vsx_fmav4sf4"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
+ (fma:V4SF
+ (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
+ (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
+ (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "@
+ xvmaddasp %x0,%x1,%x2
+ xvmaddmsp %x0,%x1,%x3
+ xvmaddasp %x0,%x1,%x2
+ xvmaddmsp %x0,%x1,%x3
+ vmaddfp %0,%1,%2,%3"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_fmav2df4"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
+ (fma:V2DF
+ (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
+ (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
+ (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "@
+ xvmaddadp %x0,%x1,%x2
+ xvmaddmdp %x0,%x1,%x3
+ xvmaddadp %x0,%x1,%x2
+ xvmaddmdp %x0,%x1,%x3"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_fmsdf4"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
+ (fma:DF
+ (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
+ (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
+ (neg:DF
+ (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "@
+ xsmsubadp %x0,%x1,%x2
+ xsmsubmdp %x0,%x1,%x3
+ xsmsubadp %x0,%x1,%x2
+ xsmsubmdp %x0,%x1,%x3
+ {fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_insn "*vsx_fms<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (fma:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (neg:VSX_B
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (fma:VSX_F
+ (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (neg:VSX_F
+ (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>msuba<VSs> %x0,%x1,%x2
x<VSv>msubm<VSs> %x0,%x1,%x3
x<VSv>msuba<VSs> %x0,%x1,%x2
x<VSv>msubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_nfmadf4"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
+ (neg:DF
+ (fma:DF
+ (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
+ (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
+ (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "@
+ xsnmaddadp %x0,%x1,%x2
+ xsnmaddmdp %x0,%x1,%x3
+ xsnmaddadp %x0,%x1,%x2
+ xsnmaddmdp %x0,%x1,%x3
+ {fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_d")])
(define_insn "*vsx_nfma<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (neg:VSX_B
- (fma:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (neg:VSX_F
+ (fma:VSX_F
+ (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>nmadda<VSs> %x0,%x1,%x2
@@ -573,22 +639,56 @@
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "*vsx_nfms<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (neg:VSX_B
- (fma:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (neg:VSX_B
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
+(define_insn "*vsx_nfmsdf4"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
+ (neg:DF
+ (fma:DF
+ (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
+ (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
+ (neg:DF
+ (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
"@
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
+ xsnmsubadp %x0,%x1,%x2
+ xsnmsubmdp %x0,%x1,%x3
+ xsnmsubadp %x0,%x1,%x2
+ xsnmsubmdp %x0,%x1,%x3
+ {fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_d")])
+
+(define_insn "*vsx_nfmsv4sf4"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
+ (neg:V4SF
+ (fma:V4SF
+ (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
+ (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
+ (neg:V4SF
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "@
+ xvnmsubasp %x0,%x1,%x2
+ xvnmsubmsp %x0,%x1,%x3
+ xvnmsubasp %x0,%x1,%x2
+ xvnmsubmsp %x0,%x1,%x3
+ vnmsubfp %0,%1,%2,%3"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "*vsx_nfmsv2df4"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
+ (neg:V2DF
+ (fma:V2DF
+ (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
+ (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
+ (neg:V2DF
+ (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "@
+ xvnmsubadp %x0,%x1,%x2
+ xvnmsubmdp %x0,%x1,%x3
+ xvnmsubadp %x0,%x1,%x2
+ xvnmsubmdp %x0,%x1,%x3"
+ [(set_attr "type" "vecfloat")])
;; Vector conditional expressions (no scalar version for these instructions)
(define_insn "vsx_eq<mode>"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index faa412b..5aa4d6b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2011-07-20 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/ppc-fma-1.c: Adjust to allow non-VSX fmas to
+ be generated.
+ * gcc.target/powerpc/ppc-fma-2.c: Ditto.
+ * gcc.target/powerpc/recip-3.c: Ditto.
+
2011-07-19 Jason Merrill <jason@redhat.com>
PR c++/6709 (DR 743)
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
index 674115a..a3d5324 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
@@ -3,16 +3,16 @@
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math" } */
/* { dg-final { scan-assembler-times "xvmadd" 4 } } */
-/* { dg-final { scan-assembler-times "xsmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 2 } } */
/* { dg-final { scan-assembler-times "fmadds" 2 } } */
/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
-/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "xsmsub\|fmsub\ " 1 } } */
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
-/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "xsnmadd\|fnmadd " 1 } } */
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
-/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "xsnmsub\|fnmsub " 1 } } */
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
/* All functions should generate an appropriate (a * b) + c instruction
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
index 111b9cb..f732b9f 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
@@ -3,16 +3,16 @@
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -ffp-contract=off" } */
/* { dg-final { scan-assembler-times "xvmadd" 2 } } */
-/* { dg-final { scan-assembler-times "xsmadd" 1 } } */
+/* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 1 } } */
/* { dg-final { scan-assembler-times "fmadds" 1 } } */
/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
-/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "xsmsub\|fmsub\ " 1 } } */
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
-/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "xsnmadd\|fnmadd\ " 1 } } */
/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
-/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "xsnmsub\|fnmsub\ " 1 } } */
/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
/* Only the functions calling the bulitin should generate an appropriate (a *
diff --git a/gcc/testsuite/gcc.target/powerpc/recip-3.c b/gcc/testsuite/gcc.target/powerpc/recip-3.c
index c5ce539..4065881 100644
--- a/gcc/testsuite/gcc.target/powerpc/recip-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/recip-3.c
@@ -1,9 +1,9 @@
/* { dg-do compile { target { { powerpc*-*-* } && { ! powerpc*-apple-darwin* } } } } */
/* { dg-options "-O2 -mrecip -ffast-math -mcpu=power7" } */
/* { dg-final { scan-assembler-times "xsrsqrtedp" 1 } } */
-/* { dg-final { scan-assembler-times "xsmsub.dp" 1 } } */
+/* { dg-final { scan-assembler-times "xsmsub.dp\|fmsub\ " 1 } } */
/* { dg-final { scan-assembler-times "xsmuldp" 4 } } */
-/* { dg-final { scan-assembler-times "xsnmsub.dp" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 2 } } */
/* { dg-final { scan-assembler-times "frsqrtes" 1 } } */
/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
/* { dg-final { scan-assembler-times "fmuls" 4 } } */