From a1d3c9b64266d84736ba8eac96aa864165905b79 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 6 Apr 2021 12:32:06 -0300 Subject: i386: Move hypot implementation to C The generic hypotf is slight slower, mostly due the tricks the assembly does to optimize the isinf/isnan/issignaling. The generic hypot is way slower, since the optimized implementation uses the i386 default excessive precision to issue the operation directly. A similar implementation is provided instead of using the generic implementation: Checked on i686-linux-gnu. --- sysdeps/i386/fpu/e_hypot.S | 75 --------------------------------------------- sysdeps/i386/fpu/e_hypot.c | 48 +++++++++++++++++++++++++++++ sysdeps/i386/fpu/e_hypotf.S | 64 -------------------------------------- 3 files changed, 48 insertions(+), 139 deletions(-) delete mode 100644 sysdeps/i386/fpu/e_hypot.S create mode 100644 sysdeps/i386/fpu/e_hypot.c delete mode 100644 sysdeps/i386/fpu/e_hypotf.S (limited to 'sysdeps') diff --git a/sysdeps/i386/fpu/e_hypot.S b/sysdeps/i386/fpu/e_hypot.S deleted file mode 100644 index f2c956b..0000000 --- a/sysdeps/i386/fpu/e_hypot.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Compute the hypothenuse of X and Y. - Copyright (C) 1998-2021 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include -#include - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_hypot) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fldl 4(%esp) // x - fxam - fnstsw - fldl 12(%esp) // y : x - movb %ah, %ch - fxam - fnstsw - movb %ah, %al - orb %ch, %ah - sahf - jc 1f - fmul %st(0) // y * y : x - fxch // x : y * y - fmul %st(0) // x * x : y * y - faddp // x * x + y * y - fsqrt - DBL_NARROW_EVAL_UFLOW_NONNEG -2: ret - - // We have to test whether any of the parameters is Inf. - // In this case the result is infinity. -1: andb $0x45, %al - cmpb $5, %al - je 3f // jump if y is Inf - andb $0x45, %ch - cmpb $5, %ch - jne 4f // jump if x is not Inf - fxch -3: fstp %st(1) - fabs - jmp 2b - -4: testb $1, %al - jnz 5f // y is NaN - fxch -5: fstp %st(1) - jmp 2b - -END(__ieee754_hypot) -libm_alias_finite (__ieee754_hypot, __hypot) diff --git a/sysdeps/i386/fpu/e_hypot.c b/sysdeps/i386/fpu/e_hypot.c new file mode 100644 index 0000000..aa49d42 --- /dev/null +++ b/sysdeps/i386/fpu/e_hypot.c @@ -0,0 +1,48 @@ +/* Euclidean distance function. Double/Binary64 i386 version. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* The i386 allows to use the default excess of precision to optimize the + hypot implementation, since internal multiplication and sqrt is carried + with 80-bit FP type. */ +double +__ieee754_hypot (double x, double y) +{ + if (!isfinite (x) || !isfinite (y)) + { + if ((isinf (x) || isinf (y)) + && !issignaling (x) && !issignaling (y)) + return INFINITY; + return x + y; + } + + long double lx = x; + long double ly = y; + double r = math_narrow_eval ((double) sqrtl (lx * lx + ly * ly)); + math_check_force_underflow_nonneg (r); + return r; +} +libm_alias_finite (__ieee754_hypot, __hypot) diff --git a/sysdeps/i386/fpu/e_hypotf.S b/sysdeps/i386/fpu/e_hypotf.S deleted file mode 100644 index cec5d15..0000000 --- a/sysdeps/i386/fpu/e_hypotf.S +++ /dev/null @@ -1,64 +0,0 @@ -/* Compute the hypothenuse of X and Y. - Copyright (C) 1998-2021 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include -#include - - .text -ENTRY(__ieee754_hypotf) - flds 4(%esp) // x - fxam - fnstsw - flds 8(%esp) // y : x - movb %ah, %ch - fxam - fnstsw - movb %ah, %al - orb %ch, %ah - sahf - jc 1f - fmul %st(0) // y * y : x - fxch // x : y * y - fmul %st(0) // x * x : y * y - faddp // x * x + y * y - fsqrt - FLT_NARROW_EVAL -2: ret - - // We have to test whether any of the parameters is Inf. - // In this case the result is infinity. -1: andb $0x45, %al - cmpb $5, %al - je 3f // jump if y is Inf - andb $0x45, %ch - cmpb $5, %ch - jne 4f // jump if x is not Inf - fxch -3: fstp %st(1) - fabs - jmp 2b - -4: testb $1, %al - jnz 5f // y is NaN - fxch -5: fstp %st(1) - jmp 2b - -END(__ieee754_hypotf) -libm_alias_finite (__ieee754_hypotf, __hypotf) -- cgit v1.1