diff options
author | Andrew Pinski <apinski@marvell.com> | 2022-01-09 23:39:31 -0800 |
---|---|---|
committer | Andrew Pinski <apinski@marvell.com> | 2022-01-23 22:17:59 +0000 |
commit | cbcf4a50fa21abd7a4a50a7ce47ada80b115febc (patch) | |
tree | b12e35dc952601d083fbb9ece21f2ad46802fd85 /gcc | |
parent | 603a9ab41f4fd2748407fecf8b4ce2e5f9f71c23 (diff) | |
download | gcc-cbcf4a50fa21abd7a4a50a7ce47ada80b115febc.zip gcc-cbcf4a50fa21abd7a4a50a7ce47ada80b115febc.tar.gz gcc-cbcf4a50fa21abd7a4a50a7ce47ada80b115febc.tar.bz2 |
[aarch64/64821]: Simplify __builtin_aarch64_sqrt* into internal function .SQRT.
This is a simple patch which simplifies the __builtin_aarch64_sqrt* builtins
into the internal function SQRT which allows for constant folding and other
optimizations at the gimple level. It was originally suggested we do to
__builtin_sqrt just for __builtin_aarch64_sqrtdf when -fno-math-errno
but since r6-4969-g686ee9719a4 we have the internal function SQRT which does
the same so it makes we don't need to check -fno-math-errno either now.
Applied as approved after bootstrapped and tested on aarch64-linux-gnu with no regressions.
PR target/64821
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc
(aarch64_general_gimple_fold_builtin): Handle
__builtin_aarch64_sqrt* and simplify into SQRT internal
function.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vsqrt-1.c: New test.
* gcc.target/aarch64/vsqrt-2.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.cc | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vsqrt-1.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vsqrt-2.c | 28 |
3 files changed, 52 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index b7f338d..5217dbd 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -2820,6 +2820,13 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); break; + /* Lower sqrt builtins to gimple/internal function sqrt. */ + BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP) + new_stmt = gimple_build_call_internal (IFN_SQRT, + 1, args[0]); + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); + break; + /*lower store and load neon builtins to gimple. */ BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c new file mode 100644 index 0000000..e614c7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c @@ -0,0 +1,17 @@ +/* PR target/64821 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* Check that we constant fold sqrt(4.0) into 2.0. */ +/* { dg-final { scan-tree-dump-not " \\\.SQRT" "optimized" } } */ +/* { dg-final { scan-tree-dump " 2\\\.0e\\\+0" "optimized" } } */ +/* { dg-final { scan-assembler-not "fsqrt" } } */ +/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. */ +/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } } } */ + +#include <arm_neon.h> + +float64x1_t f64(void) +{ + float64x1_t a = (float64x1_t){4.0}; + return vsqrt_f64 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c new file mode 100644 index 0000000..4dea4da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c @@ -0,0 +1,28 @@ +/* PR target/64821 */ +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-optimized" } */ +#include <arm_neon.h> + +/* Check that we lower __builtin_aarch64_sqrt* into the internal function SQRT. */ +/* { dg-final { scan-tree-dump-times " __builtin_aarch64_sqrt" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\\.SQRT " 4 "optimized" } } */ + +float64x1_t f64(float64x1_t a) +{ + return vsqrt_f64 (a); +} + +float64x2_t f64q(float64x2_t a) +{ + return vsqrtq_f64 (a); +} + +float32x2_t f32(float32x2_t a) +{ + return vsqrt_f32 (a); +} + +float32x4_t f32q(float32x4_t a) +{ + return vsqrtq_f32 (a); +} |