aboutsummaryrefslogtreecommitdiff
path: root/libc/src/math/generic/asinf.cpp
blob: 77d6de910962c289d88ad552e42788ee8cead5f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
//===-- Single-precision asin function ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/asinf.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA

#include "src/__support/math/inv_trigf_utils.h"

namespace LIBC_NAMESPACE_DECL {

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
static constexpr size_t N_EXCEPTS = 2;

// Exceptional values when |x| <= 0.5
static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
    // (inputs, RZ output, RU offset, RD offset, RN offset)
    // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
    {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
    // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
    {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
}};

// Exceptional values when 0.5 < |x| <= 1
static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
    // (inputs, RZ output, RU offset, RD offset, RN offset)
    // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
    {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
    // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
    {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
}};
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

LLVM_LIBC_FUNCTION(float, asinf, (float x)) {
  using namespace inv_trigf_utils_internal;
  using FPBits = typename fputil::FPBits<float>;

  FPBits xbits(x);
  uint32_t x_uint = xbits.uintval();
  uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
  constexpr double SIGN[2] = {1.0, -1.0};
  uint32_t x_sign = x_uint >> 31;

  // |x| <= 0.5-ish
  if (x_abs < 0x3f04'471dU) {
    // |x| < 0x1.d12edp-12
    if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) {
      // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x
      // is:
      //   |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
      //                             = x^2 / 6
      //                             < 2^-25
      //                             < epsilon(1)/2.
      // So the correctly rounded values of asin(x) are:
      //   = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
      //                        or (rounding mode = FE_UPWARD and x is
      //                        negative),
      //   = x otherwise.
      // To simplify the rounding decision and make it more efficient, we use
      //   fma(x, 2^-25, x) instead.
      // An exhaustive test shows that this formula work correctly for all
      // rounding modes up to |x| < 0x1.d12edp-12.
      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
      // |x| < 2^-125. For targets without FMA instructions, we simply use
      // double for intermediate results as it is more efficient than using an
      // emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
      return fputil::multiply_add(x, 0x1.0p-25f, x);
#else
      double xd = static_cast<double>(x);
      return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
    }

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
    // Check for exceptional values
    if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
        LIBC_UNLIKELY(r.has_value()))
      return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

    // For |x| <= 0.5, we approximate asinf(x) by:
    //   asin(x) = x * P(x^2)
    // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
    // asin(x)/x on [0, 0.5] generated by Sollya with:
    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
    //                 [|1, D...|], [0, 0.5]);
    // An exhaustive test shows that this approximation works well up to a
    // little more than 0.5.
    double xd = static_cast<double>(x);
    double xsq = xd * xd;
    double x3 = xd * xsq;
    double r = asin_eval(xsq);
    return static_cast<float>(fputil::multiply_add(x3, r, xd));
  }

  // |x| > 1, return NaNs.
  if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) {
    if (xbits.is_signaling_nan()) {
      fputil::raise_except_if_required(FE_INVALID);
      return FPBits::quiet_nan().get_val();
    }

    if (x_abs <= 0x7f80'0000U) {
      fputil::set_errno_if_required(EDOM);
      fputil::raise_except_if_required(FE_INVALID);
    }

    return FPBits::quiet_nan().get_val();
  }

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  // Check for exceptional values
  if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
      LIBC_UNLIKELY(r.has_value()))
    return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

  // When |x| > 0.5, we perform range reduction as follow:
  //
  // Assume further that 0.5 < x <= 1, and let:
  //   y = asin(x)
  // We will use the double angle formula:
  //   cos(2y) = 1 - 2 sin^2(y)
  // and the complement angle identity:
  //   x = sin(y) = cos(pi/2 - y)
  //              = 1 - 2 sin^2 (pi/4 - y/2)
  // So:
  //   sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
  // And hence:
  //   pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
  // Equivalently:
  //   asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
  // Let u = (1 - x)/2, then:
  //   asin(x) = pi/2 - 2 * asin( sqrt(u) )
  // Moreover, since 0.5 < x <= 1:
  //   0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
  // And hence we can reuse the same polynomial approximation of asin(x) when
  // |x| <= 0.5:
  //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),

  xbits.set_sign(Sign::POS);
  double sign = SIGN[x_sign];
  double xd = static_cast<double>(xbits.get_val());
  double u = fputil::multiply_add(-0.5, xd, 0.5);
  double c1 = sign * (-2 * fputil::sqrt<double>(u));
  double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
  double c3 = c1 * u;

  double r = asin_eval(u);
  return static_cast<float>(fputil::multiply_add(c3, r, c2));
}

} // namespace LIBC_NAMESPACE_DECL