aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.ibm.com>2023-04-09 23:32:27 -0400
committerMichael Meissner <meissner@linux.ibm.com>2023-04-09 23:34:08 -0400
commit725bcdeec60771cb9ee387978716028b64ea1b7f (patch)
tree8efc92d73bcccbac46e0e8dfcbeff395ddf2d757
parentafa87bd5f7b126e20268aa959441cde2e02bba0e (diff)
downloadgcc-725bcdeec60771cb9ee387978716028b64ea1b7f.zip
gcc-725bcdeec60771cb9ee387978716028b64ea1b7f.tar.gz
gcc-725bcdeec60771cb9ee387978716028b64ea1b7f.tar.bz2
Do not generate vmaddfp and vnmsubfp
This is version 3 of the patch. This is essentially version 1 with the removal of changes to altivec.md, and cleanup of the comments. Version 2 generated the vmaddfp and vnmsubfp instructions if -Ofast was used, and those changes are deleted in this patch. The Altivec instructions vmaddfp and vnmsubfp have different rounding behaviors than the VSX xvmaddsp and xvnmsubsp instructions due to VSCR[NJ] and other corner cases. In particular, generating these instructions seems to break Eigen on big endian systems. 2023-04-09 Michael Meissner <meissner@linux.ibm.com> gcc/ PR target/70243 * config/rs6000/vsx.md (vsx_fmav4sf4): Do not generate vmaddfp. (vsx_nfmsv4sf4): Do not generate vnmsubfp. gcc/testsuite/ PR target/70243 * gcc.target/powerpc/pr70243.c: New test.
-rw-r--r--gcc/config/rs6000/vsx.md34
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr70243.c41
2 files changed, 58 insertions, 17 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0865608..806ee43 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2009,22 +2009,23 @@
"x<VSv>tsqrt<sd>p %0,%x1"
[(set_attr "type" "<VStype_simple>")])
-;; Fused vector multiply/add instructions. Support the classical Altivec
-;; versions of fma, which allows the target to be a separate register from the
-;; 3 inputs. Under VSX, the target must be either the addend or the first
-;; multiply.
-
+;; Fused vector multiply/add instructions. Do not generate the Altivec versions
+;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a
+;; separate register from the 3 inputs, which can possibly save an extra move
+;; being generated (assuming all registers are AltiVec registers). However,
+;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
+;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
+;; instead of -0.0.
(define_insn "*vsx_fmav4sf4"
- [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
(fma:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
- (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
- (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
+ (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
"VECTOR_UNIT_VSX_P (V4SFmode)"
"@
xvmaddasp %x0,%x1,%x2
- xvmaddmsp %x0,%x1,%x3
- vmaddfp %0,%1,%2,%3"
+ xvmaddmsp %x0,%x1,%x3"
[(set_attr "type" "vecfloat")])
(define_insn "*vsx_fmav2df4"
@@ -2066,18 +2067,17 @@
[(set_attr "type" "<VStype_mul>")])
(define_insn "*vsx_nfmsv4sf4"
- [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
(neg:V4SF
(fma:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
- (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
+ (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
(neg:V4SF
- (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
"VECTOR_UNIT_VSX_P (V4SFmode)"
"@
xvnmsubasp %x0,%x1,%x2
- xvnmsubmsp %x0,%x1,%x3
- vnmsubfp %0,%1,%2,%3"
+ xvnmsubmsp %x0,%x1,%x3"
[(set_attr "type" "vecfloat")])
(define_insn "*vsx_nfmsv2df4"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr70243.c b/gcc/testsuite/gcc.target/powerpc/pr70243.c
new file mode 100644
index 0000000..18a5ce7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr70243.c
@@ -0,0 +1,41 @@
+/* { dg-do compile */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* PR 70423, Make sure we don't generate vmaddfp or vnmsubfp. These
+ instructions have different rounding modes than the VSX instructions
+ xvmaddsp and xvnmsubsp. These tests are written where the 3 inputs and
+ target are all separate registers. Because vmaddfp and vnmsubfp are no
+ longer generated the compiler will have to generate an xsmaddsp or xsnmsubsp
+ instruction followed by a move operation. */
+
+#include <altivec.h>
+
+vector float
+do_add1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return (a * b) + c;
+}
+
+vector float
+do_nsub1 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return -((a * b) - c);
+}
+
+vector float
+do_add2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return vec_madd (a, b, c);
+}
+
+vector float
+do_nsub2 (vector float dummy, vector float a, vector float b, vector float c)
+{
+ return vec_nmsub (a, b, c);
+}
+
+/* { dg-final { scan-assembler {\mxvmadd[am]sp\M} } } */
+/* { dg-final { scan-assembler {\mxvnmsub[am]sp\M} } } */
+/* { dg-final { scan-assembler-not {\mvmaddfp\M} } } */
+/* { dg-final { scan-assembler-not {\mvnmsubfp\M} } } */