libc/src/math/generic/asinpif16.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

//===-- Half-precision asinpif16(x) function ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
//
//===----------------------------------------------------------------------===//

#include "src/math/asinpif16.h"
#include "hdr/errno_macros.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/macros/optimization.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, asinpif16, (float16 x)) {
  using FPBits = fputil::FPBits<float16>;

  FPBits xbits(x);
  bool is_neg = xbits.is_neg();
  double x_abs = fputil::cast<double>(xbits.abs().get_val());

  auto signed_result = [is_neg](auto r) -> auto { return is_neg ? -r : r; };

  if (LIBC_UNLIKELY(x_abs > 1.0)) {
    // aspinf16(NaN) = NaN
    if (xbits.is_nan()) {
      if (xbits.is_signaling_nan()) {
        fputil::raise_except_if_required(FE_INVALID);
        return FPBits::quiet_nan().get_val();
      }
      return x;
    }

    // 1 < |x| <= +/-inf
    fputil::raise_except_if_required(FE_INVALID);
    fputil::set_errno_if_required(EDOM);

    return FPBits::quiet_nan().get_val();
  }

  // the coefficients for the polynomial approximation of asin(x)/pi in the
  // range [0, 0.5] extracted using python-sympy
  //
  // Python code to generate the coefficients:
  //  > from sympy import *
  //  > import math
  //  > x = symbols('x')
  //  > print(series(asin(x)/math.pi, x, 0, 21))
  //
  // OUTPUT:
  //
  // 0.318309886183791*x + 0.0530516476972984*x**3 + 0.0238732414637843*x**5 +
  // 0.0142102627760621*x**7 + 0.00967087327815336*x**9 +
  // 0.00712127941391293*x**11 + 0.00552355646848375*x**13 +
  // 0.00444514782463692*x**15 + 0.00367705242846804*x**17 +
  // 0.00310721681820837*x**19 + O(x**21)
  //
  // it's very accurate in the range [0, 0.5] and has a maximum error of
  // 0.0000000000000001 in the range [0, 0.5].
  constexpr double POLY_COEFFS[] = {
      0x1.45f306dc9c889p-2, // x^1
      0x1.b2995e7b7b5fdp-5, // x^3
      0x1.8723a1d588a36p-6, // x^5
      0x1.d1a452f20430dp-7, // x^7
      0x1.3ce52a3a09f61p-7, // x^9
      0x1.d2b33e303d375p-8, // x^11
      0x1.69fde663c674fp-8, // x^13
      0x1.235134885f19bp-8, // x^15
  };
  // polynomial evaluation using horner's method
  // work only for |x| in [0, 0.5]
  auto asinpi_polyeval = [](double x) -> double {
    return x * fputil::polyeval(x * x, POLY_COEFFS[0], POLY_COEFFS[1],
                                POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4],
                                POLY_COEFFS[5], POLY_COEFFS[6], POLY_COEFFS[7]);
  };

  // if |x| <= 0.5:
  if (LIBC_UNLIKELY(x_abs <= 0.5)) {
    // Use polynomial approximation of asin(x)/pi in the range [0, 0.5]
    double result = asinpi_polyeval(fputil::cast<double>(x));
    return fputil::cast<float16>(result);
  }

  // If |x| > 0.5, we need to use the range reduction method:
  //    y = asin(x) => x = sin(y)
  //      because: sin(a) = cos(pi/2 - a)
  //      therefore:
  //    x = cos(pi/2 - y)
  //      let z = pi/2 - y,
  //    x = cos(z)
  //      because: cos(2a) = 1 - 2 * sin^2(a), z = 2a, a = z/2
  //      therefore:
  //    cos(z) = 1 - 2 * sin^2(z/2)
  //    sin(z/2) = sqrt((1 - cos(z))/2)
  //    sin(z/2) = sqrt((1 - x)/2)
  //      let u = (1 - x)/2
  //      then:
  //    sin(z/2) = sqrt(u)
  //    z/2 = asin(sqrt(u))
  //    z = 2 * asin(sqrt(u))
  //    pi/2 - y = 2 * asin(sqrt(u))
  //    y = pi/2 - 2 * asin(sqrt(u))
  //    y/pi = 1/2 - 2 * asin(sqrt(u))/pi
  //
  // Finally, we can write:
  //   asinpi(x) = 1/2 - 2 * asinpi(sqrt(u))
  //     where u = (1 - x) /2
  //             = 0.5 - 0.5 * x
  //             = multiply_add(-0.5, x, 0.5)

  double u = fputil::multiply_add(-0.5, x_abs, 0.5);
  double asinpi_sqrt_u = asinpi_polyeval(fputil::sqrt<double>(u));
  double result = fputil::multiply_add(-2.0, asinpi_sqrt_u, 0.5);

  return fputil::cast<float16>(signed_result(result));
}

} // namespace LIBC_NAMESPACE_DECL