//===-- Collection of utils for atan/atan2 ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H #define LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/double_double.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { namespace { using DoubleDouble = fputil::DoubleDouble; using Float128 = fputil::DyadicFloat<128>; // atan(i/64) with i = 0..64, generated by Sollya with: // > for i from 0 to 64 do { // a = round(atan(i/64), D, RN); // b = round(atan(i/64) - a, D, RN); // print("{", b, ",", a, "},"); // }; constexpr DoubleDouble ATAN_I[65] = { {0.0, 0.0}, {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7}, {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6}, {-0x1.86ef8f794f105p-63, 0x1.7fb818430da2ap-5}, {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5}, {0x1.ac4ce285df847p-58, 0x1.3f59f0e7c559dp-4}, {-0x1.cfb654c0c3d98p-58, 0x1.7ee182602f10fp-4}, {0x1.f7b8f29a05987p-58, 0x1.be39ebe6f07c3p-4}, {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4}, {-0x1.b485914dacf8cp-59, 0x1.1e1fafb043727p-3}, {0x1.61a3b0ce9281bp-57, 0x1.3d6eee8c6626cp-3}, {-0x1.054ab2c010f3dp-58, 0x1.5c9811e3ec26ap-3}, {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3}, {0x1.cf601e7b4348ep-59, 0x1.9a6a8e96c8626p-3}, {0x1.17b10d2e0e5abp-61, 0x1.b90d7529260a2p-3}, {0x1.c648d1534597ep-57, 0x1.d77d5df205736p-3}, {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3}, {0x1.62e47390cb865p-56, 0x1.09dc597d86362p-2}, {0x1.30ca4748b1bf9p-57, 0x1.18bf5a30bf178p-2}, {-0x1.077cdd36dfc81p-56, 0x1.278372057ef46p-2}, {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2}, {-0x1.5d5e43c55b3bap-56, 0x1.44aa436c2af0ap-2}, {-0x1.2566480884082p-57, 0x1.530ad9951cd4ap-2}, {-0x1.a725715711fp-56, 0x1.614840309cfe2p-2}, {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2}, {0x1.69c885c2b249ap-56, 0x1.7d5604b63b3f7p-2}, {0x1.b6d0ba3748fa8p-56, 0x1.8b24d394a1b25p-2}, {0x1.9e6c988fd0a77p-56, 0x1.98cd5454d6b18p-2}, {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2}, {0x1.ae187b1ca504p-56, 0x1.b3a911da65c6cp-2}, {-0x1.cc1ce70934c34p-56, 0x1.c0db4c94ec9fp-2}, {-0x1.a2cfa4418f1adp-56, 0x1.cde53432c1351p-2}, {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2}, {0x1.0e53dc1bf3435p-56, 0x1.e77eb7f175a34p-2}, {-0x1.a3992dc382a23p-57, 0x1.f40dd0b541418p-2}, {-0x1.b32c949c9d593p-55, 0x1.0039c73c1a40cp-1}, {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1}, {0x1.974fa13b5404fp-58, 0x1.0c6145b5b43dap-1}, {-0x1.2bdaee1c0ee35p-58, 0x1.1255d9bfbd2a9p-1}, {0x1.c621cec00c301p-55, 0x1.1835a88be7c13p-1}, {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1}, {0x1.c421c9f38224ep-57, 0x1.23b71e2cc9e6ap-1}, {-0x1.09e73b0c6c087p-56, 0x1.2958e59308e31p-1}, {0x1.c5d5e9ff0cf8dp-55, 0x1.2ee628406cbcap-1}, {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1}, {-0x1.2304331d8bf46p-55, 0x1.39c391cd4171ap-1}, {0x1.ecf8b492644fp-56, 0x1.3f13fb89e96f4p-1}, {-0x1.f76d0163f79c8p-56, 0x1.445065b795b56p-1}, {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1}, {0x1.4a33dbeb3796cp-55, 0x1.4e8de5bb6ec04p-1}, {-0x1.1bb74abda520cp-55, 0x1.538f57b89061fp-1}, {-0x1.5e5c9d8c5a95p-56, 0x1.587d81f732fbbp-1}, {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1}, {-0x1.2b785350ee8c1p-57, 0x1.6220d115d7b8ep-1}, {-0x1.6ea6febe8bbbap-56, 0x1.66d663923e087p-1}, {-0x1.a80386188c50ep-55, 0x1.6b798920b3d99p-1}, {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1}, {0x1.7b2a6165884a1p-59, 0x1.748978fba8e0fp-1}, {0x1.406a08980374p-55, 0x1.78f6bbd5d315ep-1}, {0x1.560821e2f3aa9p-55, 0x1.7d528289fa093p-1}, {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1}, {0x1.6b66e7fc8b8c3p-57, 0x1.85d69576cc2c5p-1}, {-0x1.55b9a5e177a1bp-55, 0x1.89ff5ff57f1f8p-1}, {-0x1.ec182ab042f61p-56, 0x1.8e17aa99cc05ep-1}, {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1}, }; // Approximate atan(x) for |x| <= 2^-7. // Using degree-9 Taylor polynomial: // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9; // Then the absolute error is bounded by: // |atan(x) - P(x)| < |x|^11/11 < 2^(-7*11) / 11 < 2^-80. // And the relative error is bounded by: // |(atan(x) - P(x))/atan(x)| < |x|^10 / 10 < 2^-73. // For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than // ulp(x_hi^3 / 3) gives us: // P(x) ~ x_hi - x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + // + x_lo * (1 - x_hi^2 + x_hi^4) // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by: // |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66. [[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) { DoubleDouble p; p.hi = x.hi; double x_hi_sq = x.hi * x.hi; // c0 ~ x_hi^2 * 1/5 - 1/3 double c0 = fputil::multiply_add(x_hi_sq, 0x1.999999999999ap-3, -0x1.5555555555555p-2); // c1 ~ x_hi^2 * 1/9 - 1/7 double c1 = fputil::multiply_add(x_hi_sq, 0x1.c71c71c71c71cp-4, -0x1.2492492492492p-3); // x_hi^3 double x_hi_3 = x_hi_sq * x.hi; // x_hi^4 double x_hi_4 = x_hi_sq * x_hi_sq; // d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9 double d0 = fputil::multiply_add(x_hi_4, c1, c0); // x_lo - x_lo * x_hi^2 + x_lo * x_hi^4 double d1 = fputil::multiply_add(x_hi_4 - x_hi_sq, x.lo, x.lo); // p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + // + x_lo * (1 - x_hi^2 + x_hi^4) p.lo = fputil::multiply_add(x_hi_3, d0, d1); return p; } // Float128 versions. // atan(i/64) with i = 0..64, generated by Sollya with: // > for i from 1 to 64 do { // a = round(atan(i/64), 128, RN); // ll = ceil(log2(a)); // b = 2^ll + a; // print("{Sign::POS, ", 2^(ll - 128), ",", b, "},"); // }; constexpr Float128 ATAN_I_F128[65] = { {Sign::POS, 0, 0_u128}, {Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128}, {Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128}, {Sign::POS, -132, 0xbfdc0c21'86d14fcf'220e10d6'1df56ec7_u128}, {Sign::POS, -132, 0xffaaddb9'67ef4e36'cb2792dc'0e2e0d51_u128}, {Sign::POS, -131, 0x9facf873'e2aceb58'99c50bbf'08e6cdf6_u128}, {Sign::POS, -131, 0xbf70c130'17887460'93567e78'4cf83676_u128}, {Sign::POS, -131, 0xdf1cf5f3'783e1bef'71e5340b'30e5d9ef_u128}, {Sign::POS, -131, 0xfeadd4d5'617b6e32'c897989f'3e888ef8_u128}, {Sign::POS, -130, 0x8f0fd7d8'21b93725'bd375929'83a0af9a_u128}, {Sign::POS, -130, 0x9eb77746'331362c3'47619d25'0360fe85_u128}, {Sign::POS, -130, 0xae4c08f1'f6134efa'b54d3fef'0c2de994_u128}, {Sign::POS, -130, 0xbdcbda5e'72d81134'7b0b4f88'1c9c7488_u128}, {Sign::POS, -130, 0xcd35474b'643130e7'b00f3da1'a46eeb3b_u128}, {Sign::POS, -130, 0xdc86ba94'93051022'f621a5c1'cb552f03_u128}, {Sign::POS, -130, 0xebbeaef9'02b9b38c'91a2a68b'2fbd78e8_u128}, {Sign::POS, -130, 0xfadbafc9'6406eb15'6dc79ef5'f7a217e6_u128}, {Sign::POS, -129, 0x84ee2cbe'c31b12c5'c8e72197'0cabd3a3_u128}, {Sign::POS, -129, 0x8c5fad18'5f8bc130'ca4748b1'bf88298d_u128}, {Sign::POS, -129, 0x93c1b902'bf7a2df1'06459240'6fe1447a_u128}, {Sign::POS, -129, 0x9b13b9b8'3f5e5e69'c5abb498'd27af328_u128}, {Sign::POS, -129, 0xa25521b6'15784d45'43787549'88b8d9e3_u128}, {Sign::POS, -129, 0xa9856cca'8e6a4eda'99b7f77b'f7d9e8c1_u128}, {Sign::POS, -129, 0xb0a42018'4e7f0cb1'b51d51dc'200a0fc3_u128}, {Sign::POS, -129, 0xb7b0ca0f'26f78473'8aa32122'dcfe4483_u128}, {Sign::POS, -129, 0xbeab025b'1d9fbad3'910b8564'93411026_u128}, {Sign::POS, -129, 0xc59269ca'50d92b6d'a1746e91'f50a28de_u128}, {Sign::POS, -129, 0xcc66aa2a'6b58c33c'd9311fa1'4ed9b7c4_u128}, {Sign::POS, -129, 0xd327761e'611fe5b6'427c95e9'001e7136_u128}, {Sign::POS, -129, 0xd9d488ed'32e3635c'30f6394a'0806345d_u128}, {Sign::POS, -129, 0xe06da64a'764f7c67'c631ed96'798cb804_u128}, {Sign::POS, -129, 0xe6f29a19'609a84ba'60b77ce1'ca6dc2c8_u128}, {Sign::POS, -129, 0xed63382b'0dda7b45'6fe445ec'bc3a8d03_u128}, {Sign::POS, -129, 0xf3bf5bf8'bad1a21c'a7b837e6'86adf3fa_u128}, {Sign::POS, -129, 0xfa06e85a'a0a0be5c'66d23c7d'5dc8ecc2_u128}, {Sign::POS, -128, 0x801ce39e'0d205c99'a6d6c6c5'4d938596_u128}, {Sign::POS, -128, 0x832bf4a6'd9867e2a'4b6a09cb'61a515c1_u128}, {Sign::POS, -128, 0x8630a2da'da1ed065'd3e84ed5'013ca37e_u128}, {Sign::POS, -128, 0x892aecdf'de9547b5'094478fc'472b4afc_u128}, {Sign::POS, -128, 0x8c1ad445'f3e09b8c'439d8018'60205921_u128}, {Sign::POS, -128, 0x8f005d5e'f7f59f9b'5c835e16'65c43748_u128}, {Sign::POS, -128, 0x91db8f16'64f350e2'10e4f9c1'126e0220_u128}, {Sign::POS, -128, 0x94ac72c9'847186f6'18c4f393'f78a32f9_u128}, {Sign::POS, -128, 0x97731420'365e538b'abd3fe19'f1aeb6b3_u128}, {Sign::POS, -128, 0x9a2f80e6'71bdda20'4226f8e2'204ff3bd_u128}, {Sign::POS, -128, 0x9ce1c8e6'a0b8cdb9'f799c4e8'174cf11c_u128}, {Sign::POS, -128, 0x9f89fdc4'f4b7a1ec'f8b49264'4f0701e0_u128}, {Sign::POS, -128, 0xa22832db'cadaae08'92fe9c08'637af0e6_u128}, {Sign::POS, -128, 0xa4bc7d19'34f70924'19a87f2a'457dac9f_u128}, {Sign::POS, -128, 0xa746f2dd'b7602294'67b7d66f'2d74e019_u128}, {Sign::POS, -128, 0xa9c7abdc'4830f5c8'916a84b5'be7933f6_u128}, {Sign::POS, -128, 0xac3ec0fb'997dd6a1'a36273a5'6afa8ef4_u128}, {Sign::POS, -128, 0xaeac4c38'b4d8c080'14725e2f'3e52070a_u128}, {Sign::POS, -128, 0xb110688a'ebdc6f6a'43d65788'b9f6a7b5_u128}, {Sign::POS, -128, 0xb36b31c9'1f043691'59014174'4462f93a_u128}, {Sign::POS, -128, 0xb5bcc490'59ecc4af'f8f3cee7'5e3907d5_u128}, {Sign::POS, -128, 0xb8053e2b'c2319e73'cb2da552'10a4443d_u128}, {Sign::POS, -128, 0xba44bc7d'd470782f'654c2cb1'0942e386_u128}, {Sign::POS, -128, 0xbc7b5dea'e98af280'd4113006'e80fb290_u128}, {Sign::POS, -128, 0xbea94144'fd049aac'1043c5e7'55282e7d_u128}, {Sign::POS, -128, 0xc0ce85b8'ac526640'89dd62c4'6e92fa25_u128}, {Sign::POS, -128, 0xc2eb4abb'661628b5'b373fe45'c61bb9fb_u128}, {Sign::POS, -128, 0xc4ffaffa'bf8fbd54'8cb43d10'bc9e0221_u128}, {Sign::POS, -128, 0xc70bd54c'e602ee13'e7d54fbd'09f2be38_u128}, {Sign::POS, -128, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}, }; // Degree-13 minimax polynomial generated by Sollya with: // > P = fpminimax(atan(x), [|1, 3, 5, 7, 9, 11, 13|], [|1, 128...|], // [0, 2^-7]); // > dirtyinfnorm(atan(x) - P, [0, 2^-7]); // 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122 constexpr Float128 ATAN_POLY_F128[] = { {Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128}, {Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128}, {Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128}, {Sign::POS, -131, 0xe38e38e3'8dce3d96'626a1643'f8eb68f3_u128}, {Sign::NEG, -131, 0xba2e8b7a'ea4ad00f'005a35c7'6ef609b1_u128}, {Sign::POS, -131, 0x9d82765e'd22a7d92'ac09c405'c0a69214_u128}, }; // Approximate atan for |x| <= 2^-7. [[maybe_unused]] Float128 atan_eval(const Float128 &x) { Float128 x_sq = fputil::quick_mul(x, x); Float128 x3 = fputil::quick_mul(x, x_sq); Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1], ATAN_POLY_F128[2], ATAN_POLY_F128[3], ATAN_POLY_F128[4], ATAN_POLY_F128[5]); return fputil::multiply_add(x3, p, x); } } // anonymous namespace } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H