aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ieee754
diff options
context:
space:
mode:
authorWilco Dijkstra <wilco.dijkstra@arm.com>2021-03-10 12:39:56 +0000
committerWilco Dijkstra <wdijkstr@arm.com>2021-03-11 14:26:36 +0000
commitdb3f7bb5586392d9809fc6397c7184983aed6008 (patch)
tree59384a45317a88bceae8b4155975b798a80d09a6 /sysdeps/ieee754
parent94caafa040e4b4289c968cd70d53041b1463ac4d (diff)
downloadglibc-db3f7bb5586392d9809fc6397c7184983aed6008.zip
glibc-db3f7bb5586392d9809fc6397c7184983aed6008.tar.gz
glibc-db3f7bb5586392d9809fc6397c7184983aed6008.tar.bz2
math: Remove slow paths from asin and acos [BZ #15267]
This patch series removes all remaining slow paths and related code. First asin/acos, tan, atan, atan2 implementations are updated, and the final patch removes the unused mpa files, headers and probes. Passes buildmanyglibc. Remove slow paths from asin/acos. Add ULP annotations based on previous slow path checks (which are approximate). Update AArch64 and x86_64 libm-test-ulps. Reviewed-By: Paul Zimmermann <Paul.Zimmermann@inria.fr>
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r--sysdeps/ieee754/dbl-64/e_asin.c341
1 files changed, 43 insertions, 298 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c
index ae5344f..4473ae5 100644
--- a/sysdeps/ieee754/dbl-64/e_asin.c
+++ b/sysdeps/ieee754/dbl-64/e_asin.c
@@ -21,8 +21,7 @@
/* */
/* FUNCTIONS: uasin */
/* uacos */
-/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h usncs.h */
-/* doasin.c sincos32.c dosincos.c mpa.c */
+/* FILES NEEDED: dla.h endian.h mydefs.h usncs.h */
/* sincos.tbl asincos.tbl powtwo.tbl root.tbl */
/* */
/******************************************************************/
@@ -31,7 +30,6 @@
#include "asincos.tbl"
#include "root.tbl"
#include "powtwo.tbl"
-#include "MathLib.h"
#include "uasncs.h"
#include <float.h>
#include <math.h>
@@ -43,15 +41,11 @@
# define SECTION
#endif
-void __doasin(double x, double dx, double w[]);
-void __dubsin(double x, double dx, double v[]);
-void __dubcos(double x, double dx, double v[]);
-void __docos(double x, double dx, double v[]);
-
+/* asin with max ULP of ~0.516 based on random sampling. */
double
SECTION
__ieee754_asin(double x){
- double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2];
+ double x2,xx,res1,p,t,res,r,cor,cc,y,c,z;
mynumber u,v;
int4 k,m,n;
@@ -70,27 +64,8 @@ __ieee754_asin(double x){
x2 = x*x;
t = (((((f6*x2 + f5)*x2 + f4)*x2 + f3)*x2 + f2)*x2 + f1)*(x2*x);
res = x+t; /* res=arcsin(x) according to Taylor series */
- cor = (x-res)+t;
- if (res == res+1.025*cor) return res;
- else {
- x1 = x+big;
- xx = x*x;
- x1 -= big;
- x2 = x - x1;
- p = x1*x1*x1;
- s1 = a1.x*p;
- s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x +
- ((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p;
- res1 = x+s1;
- s2 = ((x-res1)+s1)+s2;
- res = res1+s2;
- cor = (res1-res)+s2;
- if (res == res+1.00014*cor) return res;
- else {
- __doasin(x,0,w);
- return w[0];
- }
- }
+ /* Max ULP is 0.513. */
+ return res;
}
/*---------------------0.125 <= |x| < 0.5 -----------------------------*/
else if (k < 0x3fe00000) {
@@ -103,26 +78,8 @@ __ieee754_asin(double x){
+xx*asncs.x[n+6]))))+asncs.x[n+7];
t+=p;
res =asncs.x[n+8] +t;
- cor = (asncs.x[n+8]-res)+t;
- if (res == res+1.05*cor) return (m>0)?res:-res;
- else {
- r=asncs.x[n+8]+xx*asncs.x[n+9];
- t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]);
- res = r+t;
- cor = (r-res)+t;
- if (res == res+1.0005*cor) return (m>0)?res:-res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __dubsin(res,z,w);
- z=(w[0]-fabs(x))+w[1];
- if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
- else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
- else {
- return (m>0)?res:-res;
- }
- }
- }
+ /* Max ULP is 0.524. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3fe00000) */
/*-------------------- 0.5 <= |x| < 0.75 -----------------------------*/
else
@@ -135,26 +92,8 @@ __ieee754_asin(double x){
+xx*(asncs.x[n+6]+xx*asncs.x[n+7])))))+asncs.x[n+8];
t+=p;
res =asncs.x[n+9] +t;
- cor = (asncs.x[n+9]-res)+t;
- if (res == res+1.01*cor) return (m>0)?res:-res;
- else {
- r=asncs.x[n+9]+xx*asncs.x[n+10];
- t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]);
- res = r+t;
- cor = (r-res)+t;
- if (res == res+1.0005*cor) return (m>0)?res:-res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __dubsin(res,z,w);
- z=(w[0]-fabs(x))+w[1];
- if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
- else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
- else {
- return (m>0)?res:-res;
- }
- }
- }
+ /* Max ULP is 0.505. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3fe80000) */
/*--------------------- 0.75 <= |x|< 0.921875 ----------------------*/
else
@@ -167,28 +106,8 @@ __ieee754_asin(double x){
+xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+xx*asncs.x[n+8]))))))+asncs.x[n+9];
t+=p;
res =asncs.x[n+10] +t;
- cor = (asncs.x[n+10]-res)+t;
- if (res == res+1.01*cor) return (m>0)?res:-res;
- else {
- r=asncs.x[n+10]+xx*asncs.x[n+11];
- t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]);
- res = r+t;
- cor = (r-res)+t;
- if (res == res+1.0008*cor) return (m>0)?res:-res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- y=hp0.x-res;
- z=((hp0.x-y)-res)+(hp1.x-z);
- __dubcos(y,z,w);
- z=(w[0]-fabs(x))+w[1];
- if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
- else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
- else {
- return (m>0)?res:-res;
- }
- }
- }
+ /* Max ULP is 0.505. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3fed8000) */
/*-------------------0.921875 <= |x| < 0.953125 ------------------------*/
else
@@ -203,29 +122,8 @@ __ieee754_asin(double x){
xx*asncs.x[n+9])))))))+asncs.x[n+10];
t+=p;
res =asncs.x[n+11] +t;
- cor = (asncs.x[n+11]-res)+t;
- if (res == res+1.01*cor) return (m>0)?res:-res;
- else {
- r=asncs.x[n+11]+xx*asncs.x[n+12];
- t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]);
- res = r+t;
- cor = (r-res)+t;
- if (res == res+1.0007*cor) return (m>0)?res:-res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- y=(hp0.x-res)-z;
- z=y+hp1.x;
- y=(y-z)+hp1.x;
- __dubcos(z,y,w);
- z=(w[0]-fabs(x))+w[1];
- if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
- else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
- else {
- return (m>0)?res:-res;
- }
- }
- }
+ /* Max ULP is 0.505. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3fee8000) */
/*--------------------0.953125 <= |x| < 0.96875 ------------------------*/
@@ -241,29 +139,8 @@ __ieee754_asin(double x){
xx*(asncs.x[n+9]+xx*asncs.x[n+10]))))))))+asncs.x[n+11];
t+=p;
res =asncs.x[n+12] +t;
- cor = (asncs.x[n+12]-res)+t;
- if (res == res+1.01*cor) return (m>0)?res:-res;
- else {
- r=asncs.x[n+12]+xx*asncs.x[n+13];
- t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]);
- res = r+t;
- cor = (r-res)+t;
- if (res == res+1.0007*cor) return (m>0)?res:-res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- y=(hp0.x-res)-z;
- z=y+hp1.x;
- y=(y-z)+hp1.x;
- __dubcos(z,y,w);
- z=(w[0]-fabs(x))+w[1];
- if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
- else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
- else {
- return (m>0)?res:-res;
- }
- }
- }
+ /* Max ULP is 0.505. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3fef0000) */
/*--------------------0.96875 <= |x| < 1 --------------------------------*/
else
@@ -282,16 +159,8 @@ __ieee754_asin(double x){
cor = (hp1.x - 2.0*cc)-2.0*(y+cc)*p;
res1 = hp0.x - 2.0*y;
res =res1 + cor;
- if (res == res+1.003*((res1-res)+cor)) return (m>0)?res:-res;
- else {
- c=y+cc;
- cc=(y-c)+cc;
- __doasin(c,cc,w);
- res1=hp0.x-2.0*w[0];
- cor=((hp0.x-res1)-2.0*w[0])+(hp1.x-2.0*w[1]);
- res = res1+cor;
- return (m>0)?res:-res;
- }
+ /* Max ULP is 0.5015. */
+ return (m>0)?res:-res;
} /* else if (k < 0x3ff00000) */
/*---------------------------- |x|>=1 -------------------------------*/
else if (k==0x3ff00000 && u.i[LOW_HALF]==0) return (m>0)?hp0.x:-hp0.x;
@@ -315,11 +184,12 @@ libm_alias_finite (__ieee754_asin, __asin)
/* */
/*******************************************************************/
+/* acos with max ULP of ~0.523 based on random sampling. */
double
SECTION
__ieee754_acos(double x)
{
- double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps;
+ double x2,xx,res1,p,t,res,r,cor,cc,y,c,z;
mynumber u,v;
int4 k,m,n;
u.x = x;
@@ -336,32 +206,8 @@ __ieee754_acos(double x)
r=hp0.x-x;
cor=(((hp0.x-r)-x)+hp1.x)-t;
res = r+cor;
- cor = (r-res)+cor;
- if (res == res+1.004*cor) return res;
- else {
- x1 = x+big;
- xx = x*x;
- x1 -= big;
- x2 = x - x1;
- p = x1*x1*x1;
- s1 = a1.x*p;
- s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x +
- ((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p;
- res1 = x+s1;
- s2 = ((x-res1)+s1)+s2;
- r=hp0.x-res1;
- cor=(((hp0.x-r)-res1)+hp1.x)-s2;
- res = r+cor;
- cor = (r-res)+cor;
- if (res == res+1.00004*cor) return res;
- else {
- __doasin(x,0,w);
- r=hp0.x-w[0];
- cor=((hp0.x-r)-w[0])+(hp1.x-w[1]);
- res=r+cor;
- return res;
- }
- }
+ /* Max ULP is 0.502. */
+ return res;
} /* else if (k < 0x3fc00000) */
/*---------------------- 0.125 <= |x| < 0.5 --------------------*/
else
@@ -377,35 +223,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+8]):(hp0.x+asncs.x[n+8]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
- if (res == res+1.02*((y-res)+t)) return res;
- else {
- r=asncs.x[n+8]+xx*asncs.x[n+9];
- t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]);
- if (m>0)
- {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; }
- else
- {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); }
- res = p+t;
- cor = (p-res)+t;
- if (res == (res+1.0002*cor)) return res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __docos(res,z,w);
- z=(w[0]-x)+w[1];
- if (z>1.0e-27) return max(res,res1);
- else if (z<-1.0e-27) return min(res,res1);
- else return res;
- }
- }
+ /* Max ULP is 0.51. */
+ return res;
} /* else if (k < 0x3fe00000) */
/*--------------------------- 0.5 <= |x| < 0.75 ---------------------*/
else
if (k < 0x3fe80000) {
n = 1056+((k&0x000fe000)>>11)*3;
- if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
- else {xx = -x - asncs.x[n]; eps=1.02; }
+ if (m>0) {xx = x - asncs.x[n]; }
+ else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+
@@ -414,33 +241,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+9]):(hp0.x+asncs.x[n+9]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
- if (res == res+eps*((y-res)+t)) return res;
- else {
- r=asncs.x[n+9]+xx*asncs.x[n+10];
- t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]);
- if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0004; }
- else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0002; }
- res = p+t;
- cor = (p-res)+t;
- if (res == (res+eps*cor)) return res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __docos(res,z,w);
- z=(w[0]-x)+w[1];
- if (z>1.0e-27) return max(res,res1);
- else if (z<-1.0e-27) return min(res,res1);
- else return res;
- }
- }
+ /* Max ULP is 0.523 based on random sampling. */
+ return res;
} /* else if (k < 0x3fe80000) */
/*------------------------- 0.75 <= |x| < 0.921875 -------------*/
else
if (k < 0x3fed8000) {
n = 992+((k&0x000fe000)>>13)*13;
- if (m>0) {xx = x - asncs.x[n]; eps = 1.04; }
- else {xx = -x - asncs.x[n]; eps = 1.01; }
+ if (m>0) {xx = x - asncs.x[n]; }
+ else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+
@@ -449,33 +259,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+10]):(hp0.x+asncs.x[n+10]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
- if (res == res+eps*((y-res)+t)) return res;
- else {
- r=asncs.x[n+10]+xx*asncs.x[n+11];
- t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]);
- if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0032; }
- else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0008; }
- res = p+t;
- cor = (p-res)+t;
- if (res == (res+eps*cor)) return res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __docos(res,z,w);
- z=(w[0]-x)+w[1];
- if (z>1.0e-27) return max(res,res1);
- else if (z<-1.0e-27) return min(res,res1);
- else return res;
- }
- }
+ /* Max ULP is 0.523 based on random sampling. */
+ return res;
} /* else if (k < 0x3fed8000) */
/*-------------------0.921875 <= |x| < 0.953125 ------------------*/
else
if (k < 0x3fee8000) {
n = 884+((k&0x000fe000)>>13)*14;
- if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
- else {xx = -x - asncs.x[n]; eps =1.005; }
+ if (m>0) {xx = x - asncs.x[n]; }
+ else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]
@@ -485,33 +278,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+11]):(hp0.x+asncs.x[n+11]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
- if (res == res+eps*((y-res)+t)) return res;
- else {
- r=asncs.x[n+11]+xx*asncs.x[n+12];
- t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]);
- if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; }
- else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; }
- res = p+t;
- cor = (p-res)+t;
- if (res == (res+eps*cor)) return res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __docos(res,z,w);
- z=(w[0]-x)+w[1];
- if (z>1.0e-27) return max(res,res1);
- else if (z<-1.0e-27) return min(res,res1);
- else return res;
- }
- }
+ /* Max ULP is 0.523 based on random sampling. */
+ return res;
} /* else if (k < 0x3fee8000) */
/*--------------------0.953125 <= |x| < 0.96875 ----------------*/
else
if (k < 0x3fef0000) {
n = 768+((k&0x000fe000)>>13)*15;
- if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
- else {xx = -x - asncs.x[n]; eps=1.005;}
+ if (m>0) {xx = x - asncs.x[n]; }
+ else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]
@@ -521,25 +297,8 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+12]):(hp0.x+asncs.x[n+12]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
- if (res == res+eps*((y-res)+t)) return res;
- else {
- r=asncs.x[n+12]+xx*asncs.x[n+13];
- t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]);
- if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; }
- else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; }
- res = p+t;
- cor = (p-res)+t;
- if (res == (res+eps*cor)) return res;
- else {
- res1=res+1.1*cor;
- z=0.5*(res1-res);
- __docos(res,z,w);
- z=(w[0]-x)+w[1];
- if (z>1.0e-27) return max(res,res1);
- else if (z<-1.0e-27) return min(res,res1);
- else return res;
- }
- }
+ /* Max ULP is 0.523 based on random sampling. */
+ return res;
} /* else if (k < 0x3fef0000) */
/*-----------------0.96875 <= |x| < 1 ---------------------------*/
@@ -560,28 +319,14 @@ __ieee754_acos(double x)
cor = (hp1.x - cc)-(y+cc)*p;
res1 = hp0.x - y;
res =res1 + cor;
- if (res == res+1.002*((res1-res)+cor)) return (res+res);
- else {
- c=y+cc;
- cc=(y-c)+cc;
- __doasin(c,cc,w);
- res1=hp0.x-w[0];
- cor=((hp0.x-res1)-w[0])+(hp1.x-w[1]);
- res = res1+cor;
- return (res+res);
- }
+ /* Max ULP is 0.501. */
+ return (res+res);
}
else {
cor = cc+p*(y+cc);
res = y + cor;
- if (res == res+1.03*((y-res)+cor)) return (res+res);
- else {
- c=y+cc;
- cc=(y-c)+cc;
- __doasin(c,cc,w);
- res = w[0];
- return (res+res);
- }
+ /* Max ULP is 0.515. */
+ return (res+res);
}
} /* else if (k < 0x3ff00000) */