libc/src/__support/math/atanf_float.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

//===-- Single-precision atanf float function -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
#define LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H

#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE_DECL {

namespace math {

namespace atanf_internal {

using fputil::FloatFloat;
// atan(i/64) with i = 0..16, generated by Sollya with:
// > for i from 0 to 16 do {
//     a = round(atan(i/16), SG, RN);
//     b = round(atan(i/16) - a, SG, RN);
//     print("{", b, ",", a, "},");
//   };
static constexpr FloatFloat ATAN_I[17] = {
    {0.0f, 0.0f},
    {-0x1.1a6042p-30f, 0x1.ff55bcp-5f},
    {-0x1.54f424p-30f, 0x1.fd5baap-4f},
    {0x1.79cb6p-28f, 0x1.7b97b4p-3f},
    {-0x1.b4dfc8p-29f, 0x1.f5b76p-3f},
    {-0x1.1f0286p-27f, 0x1.362774p-2f},
    {0x1.e4defp-30f, 0x1.6f6194p-2f},
    {0x1.e611fep-29f, 0x1.a64eecp-2f},
    {0x1.586ed4p-28f, 0x1.dac67p-2f},
    {-0x1.6499e6p-26f, 0x1.0657eap-1f},
    {0x1.7bdfd6p-26f, 0x1.1e00bap-1f},
    {-0x1.98e422p-28f, 0x1.345f02p-1f},
    {0x1.934f7p-28f, 0x1.4978fap-1f},
    {0x1.c5a6c6p-27f, 0x1.5d5898p-1f},
    {0x1.5e118cp-27f, 0x1.700a7cp-1f},
    {-0x1.1d4eb6p-26f, 0x1.819d0cp-1f},
    {-0x1.777a5cp-26f, 0x1.921fb6p-1f},
};

// 1 / (1 + (i/16)^2)  with i = 0..16, generated by Sollya with:
// > for i from 0 to 16 do {
//     a = round(1 / (1 + (i/16)^2), SG, RN);
//     print(a, ",");
//   };
static constexpr float ATANF_REDUCED_ARG[17] = {
    0x1.0p0f,       0x1.fe01fep-1f, 0x1.f81f82p-1f, 0x1.ee9c8p-1f,
    0x1.e1e1e2p-1f, 0x1.d272cap-1f, 0x1.c0e07p-1f,  0x1.adbe88p-1f,
    0x1.99999ap-1f, 0x1.84f00cp-1f, 0x1.702e06p-1f, 0x1.5babccp-1f,
    0x1.47ae14p-1f, 0x1.34679ap-1f, 0x1.21fb78p-1f, 0x1.107fbcp-1f,
    0x1p-1f,
};

// Approximating atan( u / (1 + u * k/16) )
//   atan( u / (1 + u * k/16) ) / u ~ 1 - k/16 * u + (k^2/256 - 1/3) * u^2 +
//                                    + (k/16 - (k/16)^3) * u^3 + O(u^4)
LIBC_INLINE static float atanf_eval(float u, float k_over_16) {
  // (k/16)^2
  float c2 = k_over_16 * k_over_16;
  // -(k/16)^3
  float c3 = fputil::multiply_add(-k_over_16, c2, k_over_16);
  float u2 = u * u;
  // (k^2/256 - 1/3) + u * (k/16 - (k/16)^3)
  float a0 = fputil::multiply_add(c3, u, c2 - 0x1.555556p-2f);
  // -k/16 + u*(k^2/256 - 1/3) + u^2 * (k/16 - (k/16)^3)
  float a1 = fputil::multiply_add(u, a0, -k_over_16);
  // u - u^2 * k/16 + u^3 * ((k^2/256 - 1/3) + u^4 * (k/16 - (k/16)^3))
  return fputil::multiply_add(u2, a1, u);
}

} // namespace atanf_internal

// There are several range reduction steps we can take for atan2(y, x) as
// follow:

LIBC_INLINE static float atanf(float x) {
  using namespace atanf_internal;
  using FPBits = typename fputil::FPBits<float>;
  using FPBits = typename fputil::FPBits<float>;

  constexpr float SIGN[2] = {1.0f, -1.0f};
  constexpr FloatFloat PI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f};

  FPBits x_bits(x);
  Sign s = x_bits.sign();
  float sign = SIGN[s.is_neg()];
  uint32_t x_abs = x_bits.uintval() & 0x7fff'ffffU;

  // x is inf or nan, |x| <= 2^-11 or |x|= > 2^11.
  if (LIBC_UNLIKELY(x_abs <= 0x3a00'0000U || x_abs >= 0x4500'0000U)) {
    if (LIBC_UNLIKELY(x_bits.is_inf()))
      return sign * PI_OVER_2.hi;
    // atan(NaN) = NaN
    if (LIBC_UNLIKELY(x_bits.is_nan())) {
      if (x_bits.is_signaling_nan()) {
        fputil::raise_except_if_required(FE_INVALID);
        return FPBits::quiet_nan().get_val();
      }

      return x;
    }
    // |x| >= 2^11:
    // atan(x) = sign(x) * pi/2 - atan(1/x)
    //         ~ sign(x) * pi/2 - 1/x
    if (LIBC_UNLIKELY(x_abs >= 0x4200'0000))
      return fputil::multiply_add(sign, PI_OVER_2.hi, -1.0f / x);
    // x <= 2^-11:
    // atan(x) ~ x
    return x;
  }

  // Range reduction steps:
  // 1)  atan(x) = sign(x) * atan(|x|)
  // 2)  If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
  // 3)  For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32.
  //     Let y = |x| - k/16, then using the angle summation formula, we have:
  //   atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
  //             = atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
  //             = atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )
  // 4)  Let u = y / (1 + k^2/256), then we can rewritten the above as:
  //   atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
  //             ~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
  //                             + (k/16 - (k/16)^3) * u^4) + O(u^5)
  float x_a = cpp::bit_cast<float>(x_abs);
  // |x| > 1 + 1/32, we need to invert x, so we will perform the division in
  // float-float.
  if (x_abs > 0x3f84'0000U)
    x_a = 1.0f / x_a;
  // Perform range reduction.
  //   k = nearestint(x * 16)
  float k_f = fputil::nearest_integer(x_a * 0x1.0p4f);
  unsigned idx = static_cast<unsigned>(k_f);
  float k_over_16 = k_f * 0x1.0p-4f;
  float y = x_a - k_over_16;
  // u = (x - k/16) / (1 + (k/16)^2)
  float u = y * ATANF_REDUCED_ARG[idx];

  // atan(x) = sign(x) * atan(|x|)
  //         = sign(x) * (atan(k/16) + atan(|))
  // p ~ atan(u)
  float p = atanf_eval(u, k_over_16);
  // |x|  > 1 + 1/32: q ~ (pi/2 - atan(1/|x|))
  // |x| <= 1 + 1/32: q ~ atan(|x|)
  float q = (p + ATAN_I[idx].lo) + ATAN_I[idx].hi;
  if (x_abs > 0x3f84'0000U)
    q = PI_OVER_2.hi + (PI_OVER_2.lo - q);
  // |x|  > 1 + 1/32: sign(x) * (pi/2 - atan(1/|x|))
  // |x| <= 1 + 1/32: sign(x) * atan(|x|)
  return sign * q;
}

} // namespace math

} // namespace LIBC_NAMESPACE_DECL

#endif // LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H