libc/src/math/generic/tanf.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

//===-- Single-precision tan function -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/tanf.h"
#include "sincosf_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA

namespace LIBC_NAMESPACE_DECL {

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Exceptional cases for tanf.
constexpr size_t N_EXCEPTS = 6;

constexpr fputil::ExceptValues<float, N_EXCEPTS> TANF_EXCEPTS{{
    // (inputs, RZ output, RU offset, RD offset, RN offset)
    // x = 0x1.ada6aap27, tan(x) = 0x1.e80304p-3 (RZ)
    {0x4d56d355, 0x3e740182, 1, 0, 0},
    // x = 0x1.862064p33, tan(x) = -0x1.8dee56p-3 (RZ)
    {0x50431032, 0xbe46f72b, 0, 1, 1},
    // x = 0x1.af61dap48, tan(x) = 0x1.60d1c6p-2 (RZ)
    {0x57d7b0ed, 0x3eb068e3, 1, 0, 1},
    // x = 0x1.0088bcp52, tan(x) = 0x1.ca1edp0 (RZ)
    {0x5980445e, 0x3fe50f68, 1, 0, 0},
    // x = 0x1.f90dfcp72, tan(x) = 0x1.597f9cp-1 (RZ)
    {0x63fc86fe, 0x3f2cbfce, 1, 0, 0},
    // x = 0x1.a6ce12p86, tan(x) = -0x1.c5612ep-1 (RZ)
    {0x6ad36709, 0xbf62b097, 0, 1, 0},
}};
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

LLVM_LIBC_FUNCTION(float, tanf, (float x)) {
  using FPBits = typename fputil::FPBits<float>;
  FPBits xbits(x);
  uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;

  // |x| < pi/32
  if (LIBC_UNLIKELY(x_abs <= 0x3dc9'0fdbU)) {
    double xd = static_cast<double>(x);

    // |x| < 0x1.0p-12f
    if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
      if (LIBC_UNLIKELY(x_abs == 0U)) {
        // For signed zeros.
        return x;
      }
      // When |x| < 2^-12, the relative error of the approximation tan(x) ~ x
      // is:
      //   |tan(x) - x| / |tan(x)| < |x^3| / (3|x|)
      //                           = x^2 / 3
      //                           < 2^-25
      //                           < epsilon(1)/2.
      // So the correctly rounded values of tan(x) are:
      //   = x + sign(x)*eps(x) if rounding mode = FE_UPWARD and x is positive,
      //                        or (rounding mode = FE_DOWNWARD and x is
      //                        negative),
      //   = x otherwise.
      // To simplify the rounding decision and make it more efficient, we use
      //   fma(x, 2^-25, x) instead.
      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
      // |x| < 2^-125. For targets without FMA instructions, we simply use
      // double for intermediate results as it is more efficient than using an
      // emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
      return fputil::multiply_add(x, 0x1.0p-25f, x);
#else
      return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
    }

    // |x| < pi/32
    double xsq = xd * xd;

    // Degree-9 minimax odd polynomial of tan(x) generated by Sollya with:
    // > P = fpminimax(tan(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/32]);
    double result =
        fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3,
                         0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6);
    return static_cast<float>(xd * result);
  }

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  bool x_sign = xbits.uintval() >> 31;
  // Check for exceptional values
  if (LIBC_UNLIKELY(x_abs == 0x3f8a1f62U)) {
    // |x| = 0x1.143ec4p0
    float sign = x_sign ? -1.0f : 1.0f;

    // volatile is used to prevent compiler (gcc) from optimizing the
    // computation, making the results incorrect in different rounding modes.
    volatile float tmp = 0x1.ddf9f4p0f;
    tmp = fputil::multiply_add(sign, tmp, sign * 0x1.1p-24f);

    return tmp;
  }
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

  // |x| > 0x1.ada6a8p+27f
  if (LIBC_UNLIKELY(x_abs > 0x4d56'd354U)) {
    // Inf or NaN
    if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
      if (xbits.is_signaling_nan()) {
        fputil::raise_except_if_required(FE_INVALID);
        return FPBits::quiet_nan().get_val();
      }

      if (x_abs == 0x7f80'0000U) {
        fputil::set_errno_if_required(EDOM);
        fputil::raise_except_if_required(FE_INVALID);
      }
      return x + FPBits::quiet_nan().get_val();
    }
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
    // Other large exceptional values
    if (auto r = TANF_EXCEPTS.lookup_odd(x_abs, x_sign);
        LIBC_UNLIKELY(r.has_value()))
      return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
  }

  // For |x| >= pi/32, we use the definition of tan(x) function:
  //   tan(x) = sin(x) / cos(x)
  // The we follow the same computations of sin(x) and cos(x) as sinf, cosf,
  // and sincosf.

  double xd = static_cast<double>(x);
  double sin_k, cos_k, sin_y, cosm1_y;

  sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
  // tan(x) = sin(x) / cos(x)
  //        = (sin_y * cos_k + cos_y * sin_k) / (cos_y * cos_k - sin_y * sin_k)
  using fputil::multiply_add;
  return static_cast<float>(
      multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) /
      multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k)));
}

} // namespace LIBC_NAMESPACE_DECL