1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
//===-- Half-precision asinpif16(x) function ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
//
//===----------------------------------------------------------------------===//
#include "src/math/asinpif16.h"
#include "hdr/errno_macros.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/macros/optimization.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, asinpif16, (float16 x)) {
using FPBits = fputil::FPBits<float16>;
FPBits xbits(x);
bool is_neg = xbits.is_neg();
double x_abs = fputil::cast<double>(xbits.abs().get_val());
auto signed_result = [is_neg](auto r) -> auto { return is_neg ? -r : r; };
if (LIBC_UNLIKELY(x_abs > 1.0)) {
// aspinf16(NaN) = NaN
if (xbits.is_nan()) {
if (xbits.is_signaling_nan()) {
fputil::raise_except_if_required(FE_INVALID);
return FPBits::quiet_nan().get_val();
}
return x;
}
// 1 < |x| <= +/-inf
fputil::raise_except_if_required(FE_INVALID);
fputil::set_errno_if_required(EDOM);
return FPBits::quiet_nan().get_val();
}
// the coefficients for the polynomial approximation of asin(x)/pi in the
// range [0, 0.5] extracted using python-sympy
//
// Python code to generate the coefficients:
// > from sympy import *
// > import math
// > x = symbols('x')
// > print(series(asin(x)/math.pi, x, 0, 21))
//
// OUTPUT:
//
// 0.318309886183791*x + 0.0530516476972984*x**3 + 0.0238732414637843*x**5 +
// 0.0142102627760621*x**7 + 0.00967087327815336*x**9 +
// 0.00712127941391293*x**11 + 0.00552355646848375*x**13 +
// 0.00444514782463692*x**15 + 0.00367705242846804*x**17 +
// 0.00310721681820837*x**19 + O(x**21)
//
// it's very accurate in the range [0, 0.5] and has a maximum error of
// 0.0000000000000001 in the range [0, 0.5].
constexpr double POLY_COEFFS[] = {
0x1.45f306dc9c889p-2, // x^1
0x1.b2995e7b7b5fdp-5, // x^3
0x1.8723a1d588a36p-6, // x^5
0x1.d1a452f20430dp-7, // x^7
0x1.3ce52a3a09f61p-7, // x^9
0x1.d2b33e303d375p-8, // x^11
0x1.69fde663c674fp-8, // x^13
0x1.235134885f19bp-8, // x^15
};
// polynomial evaluation using horner's method
// work only for |x| in [0, 0.5]
auto asinpi_polyeval = [](double x) -> double {
return x * fputil::polyeval(x * x, POLY_COEFFS[0], POLY_COEFFS[1],
POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4],
POLY_COEFFS[5], POLY_COEFFS[6], POLY_COEFFS[7]);
};
// if |x| <= 0.5:
if (LIBC_UNLIKELY(x_abs <= 0.5)) {
// Use polynomial approximation of asin(x)/pi in the range [0, 0.5]
double result = asinpi_polyeval(fputil::cast<double>(x));
return fputil::cast<float16>(result);
}
// If |x| > 0.5, we need to use the range reduction method:
// y = asin(x) => x = sin(y)
// because: sin(a) = cos(pi/2 - a)
// therefore:
// x = cos(pi/2 - y)
// let z = pi/2 - y,
// x = cos(z)
// because: cos(2a) = 1 - 2 * sin^2(a), z = 2a, a = z/2
// therefore:
// cos(z) = 1 - 2 * sin^2(z/2)
// sin(z/2) = sqrt((1 - cos(z))/2)
// sin(z/2) = sqrt((1 - x)/2)
// let u = (1 - x)/2
// then:
// sin(z/2) = sqrt(u)
// z/2 = asin(sqrt(u))
// z = 2 * asin(sqrt(u))
// pi/2 - y = 2 * asin(sqrt(u))
// y = pi/2 - 2 * asin(sqrt(u))
// y/pi = 1/2 - 2 * asin(sqrt(u))/pi
//
// Finally, we can write:
// asinpi(x) = 1/2 - 2 * asinpi(sqrt(u))
// where u = (1 - x) /2
// = 0.5 - 0.5 * x
// = multiply_add(-0.5, x, 0.5)
double u = fputil::multiply_add(-0.5, x_abs, 0.5);
double asinpi_sqrt_u = asinpi_polyeval(fputil::sqrt<double>(u));
double result = fputil::multiply_add(-2.0, asinpi_sqrt_u, 0.5);
return fputil::cast<float16>(signed_result(result));
}
} // namespace LIBC_NAMESPACE_DECL
|