aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorSoumya AR <soumyaa@nvidia.com>2024-11-13 15:41:15 +0530
committerSoumya AR <soumyaa@nvidia.com>2024-11-13 15:42:16 +0530
commit5a674367c6da870184f3bdb7ec110b96aa91bb2b (patch)
tree3ee3e8897e1258031017435899025a44ed9237f7 /gcc
parentf42f8dcf495e0a17df95a71c6a91093532cb9f3b (diff)
downloadgcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.zip
gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.gz
gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.bz2
Match: Fold pow calls to ldexp when possible [PR57492]
This patch transforms the following POW calls to equivalent LDEXP calls, as discussed in PR57492: powi (powof2, i) -> ldexp (1.0, i * log2 (powof2)) powof2 * ldexp (x, i) -> ldexp (x, i + log2 (powof2)) a * ldexp(1., i) -> ldexp (a, i) This is especially helpful for SVE architectures as LDEXP calls can be implemented using the FSCALE instruction, as seen in the following patch: https://gcc.gnu.org/g:9b2915d95d855333d4d8f66b71a75f653ee0d076 SPEC2017 was run with this patch, while there are no noticeable improvements, there are no non-noise regressions either. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Signed-off-by: Soumya AR <soumyaa@nvidia.com> gcc/ChangeLog: PR target/57492 * match.pd: Added patterns to fold calls to pow to ldexp and optimize specific ldexp calls. gcc/testsuite/ChangeLog: PR target/57492 * gcc.dg/tree-ssa/ldexp.c: New test. * gcc.dg/tree-ssa/pow-to-ldexp.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/match.pd25
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldexp.c32
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c44
3 files changed, 101 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 9d86d0f..6fa1b59 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8485,6 +8485,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& real_isfinite (TREE_REAL_CST_PTR (@0)))
(exps (plus (mult (logs @0) @1) @2)))))
+ /* Simplify powi (powof2, i) to ldexp (1, i * log2 (powof2)). */
+ (simplify
+ (POWI REAL_CST@0 @1)
+ (with { HOST_WIDE_INT tmp = 0; }
+ (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+ && tmp > 0 && pow2p_hwi (tmp))
+ (LDEXP { build_one_cst (type); }
+ (mult @1 {build_int_cst (integer_type_node,
+ exact_log2 (tmp)); })))))
+
+ /* Simplify powof2 * ldexp (x, i) to ldexp (x, i + log2 (powof2)) */
+ (simplify
+ (mult:c REAL_CST@0 (LDEXP @1 @2))
+ (with { HOST_WIDE_INT tmp = 0; }
+ (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+ && tmp > 0 && pow2p_hwi (tmp))
+ (LDEXP @1 (plus {build_int_cst (integer_type_node,
+ exact_log2 (tmp)); } @2)))))
+
+ /* Simplify a * ldexp (1., i) to ldexp (a, i). */
+ (simplify
+ (mult:c @0 (LDEXP REAL_CST@1 @2))
+ (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst1))
+ (LDEXP @0 @2)))
+
(for sqrts (SQRT)
cbrts (CBRT)
pows (POW)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
new file mode 100644
index 0000000..63b36f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-assembler-not "\tfmul\t" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME) \
+ TYPE test_##NAME##_1(TYPE a, int i) \
+ { \
+ return CONST * __builtin_##BUILTIN(a, i); \
+ } \
+ TYPE test_##NAME##_2(int i) \
+ { \
+ return 45 * __builtin_##BUILTIN(1.0, i); \
+ } \
+ TYPE test_##NAME##_3(TYPE a, int i) \
+ { \
+ return a * __builtin_##BUILTIN(1.0, i); \
+ } \
+ TYPE test_##NAME##_4(int i) \
+ { \
+ TYPE a = CONST; \
+ return a * __builtin_##BUILTIN(a, i); \
+ } \
+ TYPE test_##NAME##_5(TYPE a, int i) \
+ { \
+ TYPE t1 = a; \
+ return t1 * __builtin_##BUILTIN(1.0, i); \
+ }
+
+TEST(double, ldexp, 8.0, double_ldexp)
+TEST(float, ldexpf, 8.0f, float_ldexp)
+TEST(long double, ldexpl, 8.0L, long_ldexp) \ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
new file mode 100644
index 0000000..007949d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-times "__builtin_ldexp\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpf\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpl\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_powi" 0 "optimized" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME) \
+ TYPE test_##NAME##_1(TYPE a, int i) \
+ { \
+ return a * __builtin_##BUILTIN(CONST, i); \
+ } \
+ TYPE test_##NAME##_2(int i) \
+ { \
+ return __builtin_##BUILTIN(CONST, i); \
+ } \
+ TYPE test_##NAME##_3(int i) \
+ { \
+ return CONST * __builtin_##BUILTIN(CONST, i); \
+ } \
+ TYPE test_##NAME##_4(TYPE a, int i) \
+ { \
+ TYPE t1 = a; \
+ return t1 * __builtin_##BUILTIN(CONST, i); \
+ } \
+ TYPE test_##NAME##_5(int i) \
+ { \
+ TYPE powof2 = 8; \
+ return powof2 * __builtin_##BUILTIN(CONST, i); \
+ } \
+ TYPE test_##NAME##_6(int i) \
+ { \
+ TYPE powof2 = 32; \
+ return __builtin_##BUILTIN(powof2, i); \
+ } \
+ TYPE test_##NAME##_7(int i) \
+ { \
+ return 2 * __builtin_##BUILTIN(CONST, i); \
+ }
+
+TEST(double, powi, 2.0, double_powi)
+TEST(float, powif, 2.0f, float_powif)
+TEST(long double, powil, 2.0, long_powil)