Match: Fold pow calls to ldexp when possible [PR57492]

This patch transforms the following POW calls to equivalent LDEXP calls, as discussed in PR57492: powi (powof2, i) -> ldexp (1.0, i * log2 (powof2)) powof2 * ldexp (x, i) -> ldexp (x, i + log2 (powof2)) a * ldexp(1., i) -> ldexp (a, i) This is especially helpful for SVE architectures as LDEXP calls can be implemented using the FSCALE instruction, as seen in the following patch: https://gcc.gnu.org/g:9b2915d95d855333d4d8f66b71a75f653ee0d076 SPEC2017 was run with this patch, while there are no noticeable improvements, there are no non-noise regressions either. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Signed-off-by: Soumya AR <soumyaa@nvidia.com> gcc/ChangeLog: PR target/57492 * match.pd: Added patterns to fold calls to pow to ldexp and optimize specific ldexp calls. gcc/testsuite/ChangeLog: PR target/57492 * gcc.dg/tree-ssa/ldexp.c: New test. * gcc.dg/tree-ssa/pow-to-ldexp.c: New test.
author: Soumya AR <soumyaa@nvidia.com> 2024-11-13 15:41:15 +0530
committer: Soumya AR <soumyaa@nvidia.com> 2024-11-13 15:42:16 +0530
commit: 5a674367c6da870184f3bdb7ec110b96aa91bb2b (patch)
tree: 3ee3e8897e1258031017435899025a44ed9237f7 /gcc
parent: f42f8dcf495e0a17df95a71c6a91093532cb9f3b (diff)
download: gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.zip
gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.gz
gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.bz2
3 files changed, 101 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 9d86d0f..6fa1b59 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8485,6 +8485,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	&& real_isfinite (TREE_REAL_CST_PTR (@0)))
     (exps (plus (mult (logs @0) @1) @2)))))
 
+ /* Simplify powi (powof2, i) to ldexp (1, i * log2 (powof2)). */
+ (simplify
+  (POWI REAL_CST@0 @1)
+  (with { HOST_WIDE_INT tmp = 0; }
+   (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+	&& tmp > 0 && pow2p_hwi (tmp))
+    (LDEXP { build_one_cst (type); }
+       (mult @1 {build_int_cst (integer_type_node,
+	     exact_log2 (tmp)); })))))
+
+ /* Simplify powof2 * ldexp (x, i) to ldexp (x, i + log2 (powof2)) */
+ (simplify
+  (mult:c REAL_CST@0 (LDEXP @1 @2))
+  (with { HOST_WIDE_INT tmp = 0; }
+   (if (real_isinteger (&TREE_REAL_CST (@0), &tmp)
+	&& tmp > 0 && pow2p_hwi (tmp))
+    (LDEXP @1 (plus {build_int_cst (integer_type_node,
+	     exact_log2 (tmp)); } @2)))))
+
+ /* Simplify a * ldexp (1., i) to ldexp (a, i). */
+ (simplify
+  (mult:c @0 (LDEXP REAL_CST@1 @2))
+  (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst1))
+   (LDEXP @0 @2)))
+
  (for sqrts (SQRT)
       cbrts (CBRT)
       pows (POW)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
new file mode 100644
index 0000000..63b36f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldexp.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-assembler-not "\tfmul\t" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME)                       \
+  TYPE test_##NAME##_1(TYPE a, int i)                          \
+  {                                                            \
+    return CONST * __builtin_##BUILTIN(a, i);                  \
+  }                                                            \
+  TYPE test_##NAME##_2(int i)                                  \
+  {                                                            \
+    return 45 * __builtin_##BUILTIN(1.0, i);                   \
+  }                                                            \
+  TYPE test_##NAME##_3(TYPE a, int i)                          \
+  {                                                            \
+    return a * __builtin_##BUILTIN(1.0, i);                    \
+  }                                                            \
+  TYPE test_##NAME##_4(int i)                                  \
+  {                                                            \
+    TYPE a = CONST;                                            \
+    return a * __builtin_##BUILTIN(a, i);                      \
+  }                                                            \
+  TYPE test_##NAME##_5(TYPE a, int i)                          \
+  {                                                            \
+    TYPE t1 = a;                                               \
+    return t1 * __builtin_##BUILTIN(1.0, i);                   \
+  }
+
+TEST(double, ldexp, 8.0, double_ldexp)
+TEST(float, ldexpf, 8.0f, float_ldexp)
+TEST(long double, ldexpl, 8.0L, long_ldexp)
+\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
new file mode 100644
index 0000000..007949d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pow-to-ldexp.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-times "__builtin_ldexp\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpf\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ldexpl\ " 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_powi" 0 "optimized" } } */
+
+#define TEST(TYPE, BUILTIN, CONST, NAME)                        \
+  TYPE test_##NAME##_1(TYPE a, int i)                           \
+  {                                                             \
+    return a * __builtin_##BUILTIN(CONST, i);                   \
+  }                                                             \
+  TYPE test_##NAME##_2(int i)                                   \
+  {                                                             \
+    return __builtin_##BUILTIN(CONST, i);                       \
+  }                                                             \
+  TYPE test_##NAME##_3(int i)                                   \
+  {                                                             \
+    return CONST * __builtin_##BUILTIN(CONST, i);               \
+  }                                                             \
+  TYPE test_##NAME##_4(TYPE a, int i)                           \
+  {                                                             \
+    TYPE t1 = a;                                                \
+    return t1 * __builtin_##BUILTIN(CONST, i);                  \
+  }                                                             \
+  TYPE test_##NAME##_5(int i)                                   \
+  {                                                             \
+    TYPE powof2 = 8;                                            \
+    return powof2 * __builtin_##BUILTIN(CONST, i);              \
+  }                                                             \
+  TYPE test_##NAME##_6(int i)                                   \
+  {                                                             \
+    TYPE powof2 = 32;                                           \
+    return __builtin_##BUILTIN(powof2, i);                      \
+  }                                                             \
+  TYPE test_##NAME##_7(int i)                                   \
+  {                                                             \
+    return 2 * __builtin_##BUILTIN(CONST, i);                   \
+  }
+
+TEST(double, powi, 2.0, double_powi)
+TEST(float, powif, 2.0f, float_powif)
+TEST(long double, powil, 2.0, long_powil)
author	Soumya AR <soumyaa@nvidia.com>	2024-11-13 15:41:15 +0530
committer	Soumya AR <soumyaa@nvidia.com>	2024-11-13 15:42:16 +0530
commit	5a674367c6da870184f3bdb7ec110b96aa91bb2b (patch)
tree	3ee3e8897e1258031017435899025a44ed9237f7 /gcc
parent	f42f8dcf495e0a17df95a71c6a91093532cb9f3b (diff)
download	gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.zip gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.gz gcc-5a674367c6da870184f3bdb7ec110b96aa91bb2b.tar.bz2