libc/src/math/generic/atan.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

//===-- Double-precision atan function ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/atan.h"
#include "atan_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE_DECL {

// To compute atan(x), we divided it into the following cases:
// * |x| < 2^-26:
//      Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
//      return atan(x) = x - sign(x) * epsilon.
// * 2^-26 <= |x| < 1:
//      We perform range reduction mod 2^-6 = 1/64 as follow:
//      Let k = 2^(-6) * round(|x| * 2^6), then
//        atan(x) = sign(x) * atan(|x|)
//                = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
//      We store atan(k) in a look up table, and perform intermediate steps in
//      double-double.
// * 1 < |x| < 2^53:
//      First we perform the transformation y = 1/|x|:
//        atan(x) = sign(x) * (pi/2 - atan(1/|x|))
//                = sign(x) * (pi/2 - atan(y)).
//      Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
//      previous case:
//      Let k = 2^(-6) * round(y * 2^6), then
//        atan(y) = atan(k) + atan((y - k) / (1 + y*k))
//                = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
//                = atan(k) + atan((1 - k*|x|) / (|x| + k)).
// * |x| >= 2^53:
//      Using the reciprocal transformation:
//        atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
//      We have that:
//        atan(1/|x|) <= 1/|x| <= 2^-53,
//      which is smaller than ulp(pi/2) / 2.
//      So we can return:
//        atan(x) = sign(x) * (pi/2 - epsilon)

LLVM_LIBC_FUNCTION(double, atan, (double x)) {
  using FPBits = fputil::FPBits<double>;

  constexpr double IS_NEG[2] = {1.0, -1.0};
  constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
                                      0x1.921fb54442d18p0};
  constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
                                       -0x1.921fb54442d18p0};

  FPBits xbits(x);
  bool x_sign = xbits.is_neg();
  xbits = xbits.abs();
  uint64_t x_abs = xbits.uintval();
  int x_exp =
      static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;

  // |x| < 1.
  if (x_exp < 0) {
    if (LIBC_UNLIKELY(x_exp < -26)) {
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
      return x;
#else
      if (x == 0.0)
        return x;
      // |x| < 2^-26
      return fputil::multiply_add(-0x1.0p-54, x, x);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
    }

    double x_d = xbits.get_val();
    // k = 2^-6 * round(2^6 * |x|)
    double k = fputil::nearest_integer(0x1.0p6 * x_d);
    unsigned idx = static_cast<unsigned>(k);
    k *= 0x1.0p-6;

    // numerator = |x| - k
    DoubleDouble num, den;
    num.lo = 0.0;
    num.hi = x_d - k;

    // denominator = 1 - k * |x|
    den.hi = fputil::multiply_add(x_d, k, 1.0);
    DoubleDouble prod = fputil::exact_mult(x_d, k);
    // Using Dekker's 2SUM algorithm to compute the lower part.
    den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;

    // x_r = (|x| - k) / (1 + k * |x|)
    DoubleDouble x_r = fputil::div(num, den);

    // Approximating atan(x_r) using Taylor polynomial.
    DoubleDouble p = atan_eval(x_r);

    // atan(x) = sign(x) * (atan(k) + atan(x_r))
    //         = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
    return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
#else

    DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
    double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
    double r = IS_NEG[x_sign] * (c0.hi + c1);

    return r;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  }

  // |x| >= 2^53 or x is NaN.
  if (LIBC_UNLIKELY(x_exp >= 53)) {
    // x is nan
    if (xbits.is_nan()) {
      if (xbits.is_signaling_nan()) {
        fputil::raise_except_if_required(FE_INVALID);
        return FPBits::quiet_nan().get_val();
      }
      return x;
    }
    // |x| >= 2^53
    // atan(x) ~ sign(x) * pi/2.
    if (x_exp >= 53)
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
      return IS_NEG[x_sign] * PI_OVER_2.hi;
#else
      return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
                                  IS_NEG[x_sign] * PI_OVER_2.lo);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  }

  double x_d = xbits.get_val();
  double y = 1.0 / x_d;

  // k = 2^-6 * round(2^6 / |x|)
  double k = fputil::nearest_integer(0x1.0p6 * y);
  unsigned idx = static_cast<unsigned>(k);
  k *= 0x1.0p-6;

  // denominator = |x| + k
  DoubleDouble den = fputil::exact_add(x_d, k);
  // numerator = 1 - k * |x|
  DoubleDouble num;
  num.hi = fputil::multiply_add(-x_d, k, 1.0);
  DoubleDouble prod = fputil::exact_mult(x_d, k);
  // Using Dekker's 2SUM algorithm to compute the lower part.
  num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;

  // x_r = (1/|x| - k) / (1 - k/|x|)
  //     = (1 - k * |x|) / (|x| - k)
  DoubleDouble x_r = fputil::div(num, den);

  // Approximating atan(x_r) using Taylor polynomial.
  DoubleDouble p = atan_eval(x_r);

  // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
  //         = sign(x) * (pi/2 - atan(k) - atan(x_r))
  //         = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
  return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
#else
  DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
  DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
  double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);

  double r = IS_NEG[!x_sign] * (c1.hi + c2);

  return r;
#endif
}

} // namespace LIBC_NAMESPACE_DECL