diff options
author | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-10-17 08:40:34 -0700 |
---|---|---|
committer | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-10-25 15:56:41 +0200 |
commit | 07a8538d90763f0ae640dea822bdeb63ea17ec44 (patch) | |
tree | d145b1a67d709f6d2935e6b4eb808444929223b1 | |
parent | df4af89bc3eabbeaccb16539aa1082cb9863e187 (diff) | |
download | gcc-07a8538d90763f0ae640dea822bdeb63ea17ec44.zip gcc-07a8538d90763f0ae640dea822bdeb63ea17ec44.tar.gz gcc-07a8538d90763f0ae640dea822bdeb63ea17ec44.tar.bz2 |
match.pd: Add std::pow folding optimizations.
This patch adds the following two simplifications in match.pd for
POW_ALL and POWI:
- pow (1.0/x, y) to pow (x, -y), avoiding the division
- pow (0.0, x) to 0.0, avoiding the call to pow.
The patterns are guarded by flag_unsafe_math_optimizations,
!flag_trapping_math, and !HONOR_INFINITIES.
The POW_ALL patterns are also gated under !flag_errno_math.
The second pattern is also guarded by !HONOR_NANS and
!HONOR_SIGNED_ZEROS.
Tests were added to confirm the application of the transform for
builtins pow, powf, powl, powi, powif, powil, and powf16.
The patch was bootstrapped and regtested on aarch64-linux-gnu and
x86_64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/
* match.pd: Fold pow (1.0/x, y) -> pow (x, -y) and
pow (0.0, x) -> 0.0.
gcc/testsuite/
* gcc.dg/tree-ssa/pow_fold_1.c: New test.
-rw-r--r-- | gcc/match.pd | 28 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c | 42 |
2 files changed, 70 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index f16b733..809c717 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8285,6 +8285,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (rdiv @0 (exps:s @1)) (mult @0 (exps (negate @1))))) + (for pow (POW_ALL) + (if (! HONOR_INFINITIES (type) + && ! flag_trapping_math + && ! flag_errno_math) + /* Simplify pow(1.0/x, y) into pow(x, -y). */ + (simplify + (pow (rdiv:s real_onep@0 @1) @2) + (pow @1 (negate @2))) + + /* Simplify pow(0.0, x) into 0.0. */ + (if (! HONOR_NANS (type) && ! HONOR_SIGNED_ZEROS (type)) + (simplify + (pow real_zerop@0 @1) + @0)))) + (if (! HONOR_SIGN_DEPENDENT_ROUNDING (type) && ! HONOR_NANS (type) && ! HONOR_INFINITIES (type) && ! flag_trapping_math @@ -8643,6 +8658,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (mult (POW:s @0 @1) (POW:s @2 @1)) (POW (mult @0 @2) @1)) + (if (! HONOR_INFINITIES (type) && ! flag_trapping_math) + /* Simplify powi(1.0/x, y) into powi(x, -y). */ + (simplify + (POWI (rdiv@3 real_onep@0 @1) @2) + (if (single_use (@3)) + (POWI @1 (negate @2)))) + + /* Simplify powi(0.0, x) into 0.0. */ + (if (! HONOR_NANS (type) && ! HONOR_SIGNED_ZEROS (type)) + (simplify + (POWI real_zerop@0 @1) + @0))) + /* Simplify powi(x,y) * powi(z,y) -> powi(x*z,y). */ (simplify (mult (POWI:s @0 @1) (POWI:s @2 @1)) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c b/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c new file mode 100644 index 0000000..d98bcb0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pow_fold_1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized -fexcess-precision=16" } */ +/* { dg-add-options float16 } */ +/* { dg-require-effective-target float16_runtime } */ +/* { dg-require-effective-target c99_runtime } */ + +extern void link_error (void); + +#define POW1OVER(TYPE1, TYPE2, CTY, TY) \ + void \ + pow1over_##TY (TYPE1 x, TYPE2 y) \ + { \ + TYPE1 t1 = 1.0##CTY / x; \ + TYPE1 t2 = __builtin_pow##TY (t1, y); \ + TYPE2 t3 = -y; \ + TYPE1 t4 = __builtin_pow##TY (x, t3); \ + if (t2 != t4) \ + link_error (); \ + } \ + +#define POW0(TYPE1, TYPE2, CTY, TY) \ + void \ + pow0_##TY (TYPE2 x) \ + { \ + TYPE1 t1 = __builtin_pow##TY (0.0##CTY, x); \ + if (t1 != 0.0##CTY) \ + link_error (); \ + } \ + +#define TEST_ALL(TYPE1, TYPE2, CTY, TY) \ + POW1OVER (TYPE1, TYPE2, CTY, TY) \ + POW0 (TYPE1, TYPE2, CTY, TY) + +TEST_ALL (double, double, , ) +TEST_ALL (float, float, f, f) +TEST_ALL (_Float16, _Float16, f16, f16) +TEST_ALL (long double, long double, L, l) +TEST_ALL (double, int, , i) +TEST_ALL (float, int, f, if) +TEST_ALL (long double, int, L, il) + +/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */ |