aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2023-11-09 13:59:39 +0000
committerTamar Christina <tamar.christina@arm.com>2023-11-09 14:06:06 +0000
commit3f176e1adc6bc9cc2c21222d776b51d9f43cb66b (patch)
treeb29f87125d258d93421a09757f8c4802e4bc603f
parent2d44ab221f64f01fc676be0da1a6774740d713c6 (diff)
downloadgcc-3f176e1adc6bc9cc2c21222d776b51d9f43cb66b.zip
gcc-3f176e1adc6bc9cc2c21222d776b51d9f43cb66b.tar.gz
gcc-3f176e1adc6bc9cc2c21222d776b51d9f43cb66b.tar.bz2
middle-end: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]
This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more canonical and allows a target to expand this sequence efficiently. Such sequences are common in scientific code working with gradients. There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x)) which I remove since this is a less efficient form. The testsuite is also updated in light of this. gcc/ChangeLog: PR tree-optimization/109154 * match.pd: Add new neg+abs rule, remove inverse copysign rule. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.dg/fold-copysign-1.c: Updated. * gcc.dg/pr55152-2.c: Updated. * gcc.dg/tree-ssa/abs-4.c: Updated. * gcc.dg/tree-ssa/backprop-6.c: Updated. * gcc.dg/tree-ssa/copy-sign-2.c: Updated. * gcc.dg/tree-ssa/mult-abs-2.c: Updated. * gcc.target/aarch64/fneg-abs_1.c: New test. * gcc.target/aarch64/fneg-abs_2.c: New test. * gcc.target/aarch64/fneg-abs_3.c: New test. * gcc.target/aarch64/fneg-abs_4.c: New test. * gcc.target/aarch64/sve/fneg-abs_1.c: New test. * gcc.target/aarch64/sve/fneg-abs_2.c: New test. * gcc.target/aarch64/sve/fneg-abs_3.c: New test. * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
-rw-r--r--gcc/match.pd10
-rw-r--r--gcc/testsuite/gcc.dg/fold-copysign-1.c4
-rw-r--r--gcc/testsuite/gcc.dg/pr55152-2.c3
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/abs-4.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c3
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c39
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c39
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c37
15 files changed, 303 insertions, 12 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 68a1587..5928acb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1118,14 +1118,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(hypots @0 (copysigns @1 @2))
(hypots @0 @1))))
-/* copysign(x, CST) -> [-]abs (x). */
+/* copysign(x, CST) -> abs (x). */
(for copysigns (COPYSIGN_ALL)
(simplify
(copysigns @0 REAL_CST@1)
- (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
- (negate (abs @0))
+ (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
(abs @0))))
+/* Transform fneg (fabs (X)) -> copysign (X, -1). */
+(simplify
+ (negate (abs @0))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
+
/* copysign(copysign(x, y), z) -> copysign(x, z). */
(for copysigns (COPYSIGN_ALL)
(simplify
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f17d65c..f9cafd1 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -12,5 +12,5 @@ double bar (double x)
return __builtin_copysign (x, minuszero);
}
-/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 54db0f2..605f202 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -10,4 +10,5 @@ int f(int a)
return (a<-a)?a:-a;
}
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 6197519..e1b825f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
/* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
/* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index 31f0571..c3a1386 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
TEST_FUNCTION (double, )
TEST_FUNCTION (long double, l)
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index de52c5f..e5d565c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -10,4 +10,5 @@ float f1(float x)
float t = __builtin_copysignf (1.0f, -x);
return x * t;
}
-/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a41f1ba..a22896b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,4 +34,5 @@ float i1(float x)
{
return x * (x <= 0.f ? 1.f : -1.f);
}
-/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000..f823013
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+** orr v[0-9]+.2s, #128, lsl #24
+** ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+ return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+** orr v[0-9]+.4s, #128, lsl #24
+** ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+ return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+** adrp x0, .LC[0-9]+
+** ldr q[0-9]+, \[x0, #:lo12:.LC0\]
+** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+ return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000..1411211
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+** movi v[0-9]+.2s, 0x80, lsl 24
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float32_t f1 (float32_t a)
+{
+ return -fabsf (a);
+}
+
+/*
+** f2:
+** mov x0, -9223372036854775808
+** fmov d[0-9]+, x0
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float64_t f2 (float64_t a)
+{
+ return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000..b465217
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+** ...
+** ldr q[0-9]+, \[x0\]
+** orr v[0-9]+.4s, #128, lsl #24
+** str q[0-9]+, \[x0\], 16
+** ...
+*/
+void f1 (float32_t *a, int n)
+{
+ for (int i = 0; i < (n & -8); i++)
+ a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+** ...
+** ldr q[0-9]+, \[x0\]
+** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** str q[0-9]+, \[x0\], 16
+** ...
+*/
+void f2 (float64_t *a, int n)
+{
+ for (int i = 0; i < (n & -8); i++)
+ a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000..10879de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+** mov x0, -9223372036854775808
+** fmov d[0-9]+, x0
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+double negabs (double x)
+{
+ unsigned long long y;
+ memcpy (&y, &x, sizeof(double));
+ y = y | (1UL << 63);
+ memcpy (&x, &y, sizeof(double));
+ return x;
+}
+
+/*
+** negabsf:
+** movi v[0-9]+.2s, 0x80, lsl 24
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float negabsf (float x)
+{
+ unsigned int y;
+ memcpy (&y, &x, sizeof(float));
+ y = y | (1U << 31);
+ memcpy (&x, &y, sizeof(float));
+ return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000..0c7664e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+** orr v[0-9]+.2s, #128, lsl #24
+** ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+ return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+** orr v[0-9]+.4s, #128, lsl #24
+** ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+ return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+** adrp x0, .LC[0-9]+
+** ldr q[0-9]+, \[x0, #:lo12:.LC0\]
+** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+ return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000..a60cd31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+** movi v[0-9]+.2s, 0x80, lsl 24
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float32_t f1 (float32_t a)
+{
+ return -fabsf (a);
+}
+
+/*
+** f2:
+** mov x0, -9223372036854775808
+** fmov d[0-9]+, x0
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float64_t f2 (float64_t a)
+{
+ return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000..1bf3432
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+** ...
+** ld1w z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+** orr z[0-9]+.s, z[0-9]+.s, #0x80000000
+** st1w z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+** ...
+*/
+void f1 (float32_t *a, int n)
+{
+ for (int i = 0; i < (n & -8); i++)
+ a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+** ...
+** ld1d z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+** orr z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+** st1d z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+** ...
+*/
+void f2 (float64_t *a, int n)
+{
+ for (int i = 0; i < (n & -8); i++)
+ a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000..21f2a8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+** mov x0, -9223372036854775808
+** fmov d[0-9]+, x0
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+double negabs (double x)
+{
+ unsigned long long y;
+ memcpy (&y, &x, sizeof(double));
+ y = y | (1UL << 63);
+ memcpy (&x, &y, sizeof(double));
+ return x;
+}
+
+/*
+** negabsf:
+** movi v[0-9]+.2s, 0x80, lsl 24
+** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+** ret
+*/
+float negabsf (float x)
+{
+ unsigned int y;
+ memcpy (&y, &x, sizeof(float));
+ y = y | (1U << 31);
+ memcpy (&x, &y, sizeof(float));
+ return x;
+}
+