aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt1
-rw-r--r--libc/fuzzing/math/CMakeLists.txt27
-rw-r--r--libc/fuzzing/math/f16sqrt_fuzz.cpp56
-rw-r--r--libc/fuzzing/math/fsqrt_fuzz.cpp53
-rw-r--r--libc/fuzzing/math/hypot_fuzz.cpp64
-rw-r--r--libc/include/wchar.yaml6
-rw-r--r--libc/shared/math.h5
-rw-r--r--libc/shared/math/acospif16.h29
-rw-r--r--libc/shared/math/asin.h23
-rw-r--r--libc/shared/math/asinf.h23
-rw-r--r--libc/shared/math/asinf16.h28
-rw-r--r--libc/shared/math/asinhf.h23
-rw-r--r--libc/shared/math/exp10f16.h6
-rw-r--r--libc/src/__support/FPUtil/CMakeLists.txt1
-rw-r--r--libc/src/__support/FPUtil/FEnvImpl.h51
-rw-r--r--libc/src/__support/GPU/allocator.cpp134
-rw-r--r--libc/src/__support/math/CMakeLists.txt90
-rw-r--r--libc/src/__support/math/acos.h2
-rw-r--r--libc/src/__support/math/acosf.h2
-rw-r--r--libc/src/__support/math/acosf16.h2
-rw-r--r--libc/src/__support/math/acoshf.h2
-rw-r--r--libc/src/__support/math/acoshf16.h8
-rw-r--r--libc/src/__support/math/acospif16.h147
-rw-r--r--libc/src/__support/math/asin.h297
-rw-r--r--libc/src/__support/math/asinf.h175
-rw-r--r--libc/src/__support/math/asinf16.h146
-rw-r--r--libc/src/__support/math/asinhf.h125
-rw-r--r--libc/src/__support/math/erff.h2
-rw-r--r--libc/src/__support/math/exp.h16
-rw-r--r--libc/src/__support/math/exp10.h19
-rw-r--r--libc/src/__support/math/exp10f.h2
-rw-r--r--libc/src/__support/math/exp10f16.h2
-rw-r--r--libc/src/__support/math/exp10f16_utils.h3
-rw-r--r--libc/src/__support/math/exp10f_utils.h2
-rw-r--r--libc/src/__support/math/exp_utils.h4
-rw-r--r--libc/src/__support/math/expf.h2
-rw-r--r--libc/src/__support/math/expf16.h2
-rw-r--r--libc/src/__support/math/expf16_utils.h3
-rw-r--r--libc/src/__support/math/frexpf.h2
-rw-r--r--libc/src/__support/math/frexpf128.h2
-rw-r--r--libc/src/__support/math/frexpf16.h2
-rw-r--r--libc/src/__support/math/ldexpf.h2
-rw-r--r--libc/src/__support/math/ldexpf128.h2
-rw-r--r--libc/src/__support/math/ldexpf16.h2
-rw-r--r--libc/src/math/generic/CMakeLists.txt49
-rw-r--r--libc/src/math/generic/acospif16.cpp122
-rw-r--r--libc/src/math/generic/asin.cpp15
-rw-r--r--libc/src/math/generic/asinf.cpp155
-rw-r--r--libc/src/math/generic/asinf16.cpp121
-rw-r--r--libc/src/math/generic/asinhf.cpp106
-rw-r--r--libc/src/math/generic/range_reduction_double_common.h2
-rw-r--r--libc/src/stdio/baremetal/CMakeLists.txt1
-rw-r--r--libc/src/stdio/scanf_core/CMakeLists.txt2
-rw-r--r--libc/src/wchar/CMakeLists.txt15
-rw-r--r--libc/src/wchar/mbsinit.cpp26
-rw-r--r--libc/src/wchar/mbsinit.h22
-rw-r--r--libc/src/wchar/wchar_utils.h13
-rw-r--r--libc/src/wchar/wcschr.cpp9
-rw-r--r--libc/src/wchar/wcspbrk.cpp11
-rw-r--r--libc/src/wchar/wcstok.cpp23
-rw-r--r--libc/test/integration/src/stdlib/gpu/malloc_stress.cpp28
-rw-r--r--libc/test/shared/CMakeLists.txt5
-rw-r--r--libc/test/shared/shared_math_test.cpp39
-rw-r--r--libc/test/src/wchar/CMakeLists.txt15
-rw-r--r--libc/test/src/wchar/mbsinit_test.cpp33
65 files changed, 1682 insertions, 725 deletions
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index e8f59c9..5e8278e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1264,6 +1264,7 @@ if(LLVM_LIBC_FULL_BUILD)
# wchar.h entrypoints
libc.src.wchar.mblen
libc.src.wchar.mbrlen
+ libc.src.wchar.mbsinit
libc.src.wchar.mbrtowc
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
diff --git a/libc/fuzzing/math/CMakeLists.txt b/libc/fuzzing/math/CMakeLists.txt
index be63fe4..19416fc 100644
--- a/libc/fuzzing/math/CMakeLists.txt
+++ b/libc/fuzzing/math/CMakeLists.txt
@@ -205,3 +205,30 @@ add_libc_fuzzer(
DEPENDS
libc.src.math.cbrt
)
+
+add_libc_fuzzer(
+ fsqrt_fuzz
+ NEED_MPFR
+ SRCS
+ fsqrt_fuzz.cpp
+ DEPENDS
+ libc.src.math.fsqrt
+)
+
+add_libc_fuzzer(
+ f16sqrt_fuzz
+ NEED_MPFR
+ SRCS
+ f16sqrt_fuzz.cpp
+ DEPENDS
+ libc.src.math.f16sqrt
+)
+
+add_libc_fuzzer(
+ hypot_fuzz
+ NEED_MPFR
+ SRCS
+ hypot_fuzz.cpp
+ DEPENDS
+ libc.src.math.hypot
+)
diff --git a/libc/fuzzing/math/f16sqrt_fuzz.cpp b/libc/fuzzing/math/f16sqrt_fuzz.cpp
new file mode 100644
index 0000000..9a097a5
--- /dev/null
+++ b/libc/fuzzing/math/f16sqrt_fuzz.cpp
@@ -0,0 +1,56 @@
+//===-- f16sqrt_fuzz.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc f16sqrt implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16sqrt.h"
+#include "utils/MPFRWrapper/mpfr_inc.h"
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <math.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ mpfr_t input;
+ mpfr_t out;
+ mpfr_init2(input, 53);
+ mpfr_init2(out, 128);
+ for (size_t i = 0; i < size / sizeof(double); ++i) {
+ double x;
+ std::memcpy(&x, data, sizeof(double));
+ data += sizeof(double);
+
+ // remove NaN, inf, and values outside the accepted range
+ if (isnan(x) || isinf(x) || x < 0)
+ continue;
+ // signed zeros already tested in unit tests
+ if (signbit(x) && x == 0.0)
+ continue;
+
+ mpfr_set_d(input, x, MPFR_RNDN);
+ mpfr_sqrt(out, input, MPFR_RNDN);
+ float16 to_compare = mpfr_get_d(out, MPFR_RNDN);
+
+ float16 result = LIBC_NAMESPACE::f16sqrt(x);
+
+ if (result != to_compare) {
+ std::cout << std::hexfloat << "Failing input: " << x << std::endl;
+ std::cout << std::hexfloat
+ << "Failing output: " << static_cast<float>(result)
+ << std::endl;
+ std::cout << std::hexfloat
+ << "Expected: " << static_cast<float>(to_compare) << std::endl;
+ __builtin_trap();
+ }
+ }
+ mpfr_clear(input);
+ mpfr_clear(out);
+ return 0;
+}
diff --git a/libc/fuzzing/math/fsqrt_fuzz.cpp b/libc/fuzzing/math/fsqrt_fuzz.cpp
new file mode 100644
index 0000000..06bb231
--- /dev/null
+++ b/libc/fuzzing/math/fsqrt_fuzz.cpp
@@ -0,0 +1,53 @@
+//===-- fsqrt_fuzz.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc fsqrt implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fsqrt.h"
+#include "utils/MPFRWrapper/mpfr_inc.h"
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <math.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ mpfr_t input;
+ mpfr_t out;
+ mpfr_init2(input, 53);
+ mpfr_init2(out, 128);
+ for (size_t i = 0; i < size / sizeof(double); ++i) {
+ double x;
+ std::memcpy(&x, data, sizeof(double));
+ data += sizeof(double);
+
+ // remove NaN, inf, and values outside the accepted range
+ if (isnan(x) || isinf(x) || x < 0)
+ continue;
+ // signed zeros already tested in unit tests
+ if (signbit(x) && x == 0.0)
+ continue;
+
+ mpfr_set_d(input, x, MPFR_RNDN);
+ mpfr_sqrt(out, input, MPFR_RNDN);
+ float to_compare = mpfr_get_flt(out, MPFR_RNDN);
+
+ float result = LIBC_NAMESPACE::fsqrt(x);
+
+ if (result != to_compare) {
+ std::cout << std::hexfloat << "Failing input: " << x << std::endl;
+ std::cout << std::hexfloat << "Failing output: " << result << std::endl;
+ std::cout << std::hexfloat << "Expected: " << to_compare << std::endl;
+ __builtin_trap();
+ }
+ }
+ mpfr_clear(input);
+ mpfr_clear(out);
+ return 0;
+}
diff --git a/libc/fuzzing/math/hypot_fuzz.cpp b/libc/fuzzing/math/hypot_fuzz.cpp
new file mode 100644
index 0000000..6129e41
--- /dev/null
+++ b/libc/fuzzing/math/hypot_fuzz.cpp
@@ -0,0 +1,64 @@
+//===-- hypot_fuzz.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc hypot implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/math/hypot.h"
+#include "utils/MPFRWrapper/mpfr_inc.h"
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <math.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ mpfr_t in_x;
+ mpfr_t in_y;
+ mpfr_t out;
+ mpfr_init2(in_x, 53);
+ mpfr_init2(in_y, 53);
+ mpfr_init2(out, 128);
+
+ for (size_t i = 0; i < size / (2 * sizeof(double)); ++i) {
+ double x;
+ double y;
+
+ std::memcpy(&x, data, sizeof(double));
+ data += sizeof(double);
+ std::memcpy(&y, data, sizeof(double));
+ data += sizeof(double);
+
+ // remove NaN, inf, and signed zeros
+ if (isnan(x) || isinf(x) || (signbit(x) && x == 0.0))
+ return 0;
+ if (isnan(y) || isinf(y) || (signbit(y) && y == 0.0))
+ return 0;
+
+ mpfr_set_d(in_x, x, MPFR_RNDN);
+ mpfr_set_d(in_y, y, MPFR_RNDN);
+
+ int output = mpfr_hypot(out, in_x, in_y, MPFR_RNDN);
+ mpfr_subnormalize(out, output, MPFR_RNDN);
+ double to_compare = mpfr_get_d(out, MPFR_RNDN);
+
+ double result = LIBC_NAMESPACE::hypot(x, y);
+
+ if (result != to_compare) {
+ std::cout << std::hexfloat << "Failing x: " << x << std::endl;
+ std::cout << std::hexfloat << "Failing y: " << y << std::endl;
+ std::cout << std::hexfloat << "Failing output: " << result << std::endl;
+ std::cout << std::hexfloat << "Expected: " << to_compare << std::endl;
+ __builtin_trap();
+ }
+ }
+ mpfr_clear(in_x);
+ mpfr_clear(in_y);
+ mpfr_clear(out);
+ return 0;
+}
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 0285f19..6e1f595 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -53,6 +53,12 @@ functions:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
+ - name: mbsinit
+ standards:
+ - stdc
+ return_type: int
+ arguments:
+ - type: mbstate_t *
- name: mblen
standards:
- stdc
diff --git a/libc/shared/math.h b/libc/shared/math.h
index e3c674c..e0f00f5 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -16,6 +16,11 @@
#include "math/acosf16.h"
#include "math/acoshf.h"
#include "math/acoshf16.h"
+#include "math/acospif16.h"
+#include "math/asin.h"
+#include "math/asinf.h"
+#include "math/asinf16.h"
+#include "math/asinhf.h"
#include "math/erff.h"
#include "math/exp.h"
#include "math/exp10.h"
diff --git a/libc/shared/math/acospif16.h b/libc/shared/math/acospif16.h
new file mode 100644
index 0000000..38225f2
--- /dev/null
+++ b/libc/shared/math/acospif16.h
@@ -0,0 +1,29 @@
+//===-- Shared acospif16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ACOSPIF16_H
+#define LLVM_LIBC_SHARED_MATH_ACOSPIF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/acospif16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::acospif16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_ACOSPIF16_H
diff --git a/libc/shared/math/asin.h b/libc/shared/math/asin.h
new file mode 100644
index 0000000..0b2c8ea6
--- /dev/null
+++ b/libc/shared/math/asin.h
@@ -0,0 +1,23 @@
+//===-- Shared asin function ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASIN_H
+#define LLVM_LIBC_SHARED_MATH_ASIN_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/asin.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asin;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ASIN_H
diff --git a/libc/shared/math/asinf.h b/libc/shared/math/asinf.h
new file mode 100644
index 0000000..ac051bd
--- /dev/null
+++ b/libc/shared/math/asinf.h
@@ -0,0 +1,23 @@
+//===-- Shared asinf function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINF_H
+#define LLVM_LIBC_SHARED_MATH_ASINF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/asinf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINF_H
diff --git a/libc/shared/math/asinf16.h b/libc/shared/math/asinf16.h
new file mode 100644
index 0000000..d545e26
--- /dev/null
+++ b/libc/shared/math/asinf16.h
@@ -0,0 +1,28 @@
+//===-- Shared asinf16 function ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINF16_H
+#define LLVM_LIBC_SHARED_MATH_ASINF16_H
+
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/asinf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINF_H
diff --git a/libc/shared/math/asinhf.h b/libc/shared/math/asinhf.h
new file mode 100644
index 0000000..c4a5509
--- /dev/null
+++ b/libc/shared/math/asinhf.h
@@ -0,0 +1,23 @@
+//===-- Shared asinhf function ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINHF_H
+#define LLVM_LIBC_SHARED_MATH_ASINHF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/asinhf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinhf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINHF_H
diff --git a/libc/shared/math/exp10f16.h b/libc/shared/math/exp10f16.h
index af00787..d6ba067 100644
--- a/libc/shared/math/exp10f16.h
+++ b/libc/shared/math/exp10f16.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SHARED_MATH_EXP10F_H
-#define LLVM_LIBC_SHARED_MATH_EXP10F_H
+#ifndef LLVM_LIBC_SHARED_MATH_EXP10F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP10F16_H
#include "include/llvm-libc-macros/float16-macros.h"
#include "shared/libc_common.h"
@@ -26,4 +26,4 @@ using math::exp10f16;
#endif // LIBC_TYPES_HAS_FLOAT16
-#endif // LLVM_LIBC_SHARED_MATH_EXP10F_H
+#endif // LLVM_LIBC_SHARED_MATH_EXP10F16_H
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 94f8b95..6e447fc 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -7,6 +7,7 @@ add_header_library(
libc.hdr.fenv_macros
libc.hdr.math_macros
libc.hdr.stdint_proxy
+ libc.src.__support.CPP.type_traits
libc.src.__support.macros.attributes
libc.src.errno.errno
)
diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 7691088..7bd5643 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -12,6 +12,7 @@
#include "hdr/fenv_macros.h"
#include "hdr/math_macros.h"
#include "hdr/types/fenv_t.h"
+#include "src/__support/CPP/type_traits.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h"
@@ -72,40 +73,58 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }
namespace LIBC_NAMESPACE_DECL {
namespace fputil {
-LIBC_INLINE static int clear_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+clear_except_if_required([[maybe_unused]] int excepts) {
+ if (cpp::is_constant_evaluated()) {
+ return 0;
+ } else {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
- if (math_errhandling & MATH_ERREXCEPT)
- return clear_except(excepts);
+ if (math_errhandling & MATH_ERREXCEPT)
+ return clear_except(excepts);
#endif // LIBC_MATH_HAS_NO_EXCEPT
- return 0;
+ return 0;
+ }
}
-LIBC_INLINE static int set_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+set_except_if_required([[maybe_unused]] int excepts) {
+ if (cpp::is_constant_evaluated()) {
+ return 0;
+ } else {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
- if (math_errhandling & MATH_ERREXCEPT)
- return set_except(excepts);
+ if (math_errhandling & MATH_ERREXCEPT)
+ return set_except(excepts);
#endif // LIBC_MATH_HAS_NO_EXCEPT
- return 0;
+ return 0;
+ }
}
-LIBC_INLINE static int raise_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+raise_except_if_required([[maybe_unused]] int excepts) {
+ if (cpp::is_constant_evaluated()) {
+ return 0;
+ } else {
#ifndef LIBC_MATH_HAS_NO_EXCEPT
- if (math_errhandling & MATH_ERREXCEPT)
+ if (math_errhandling & MATH_ERREXCEPT)
#ifdef LIBC_TARGET_ARCH_IS_X86_64
- return raise_except</*SKIP_X87_FPU*/ true>(excepts);
+ return raise_except</*SKIP_X87_FPU*/ true>(excepts);
#else // !LIBC_TARGET_ARCH_IS_X86
- return raise_except(excepts);
+ return raise_except(excepts);
#endif // LIBC_TARGET_ARCH_IS_X86
#endif // LIBC_MATH_HAS_NO_EXCEPT
- return 0;
+ return 0;
+ }
}
-LIBC_INLINE static void set_errno_if_required([[maybe_unused]] int err) {
+LIBC_INLINE static constexpr void
+set_errno_if_required([[maybe_unused]] int err) {
+ if (!cpp::is_constant_evaluated()) {
#ifndef LIBC_MATH_HAS_NO_ERRNO
- if (math_errhandling & MATH_ERRNO)
- libc_errno = err;
+ if (math_errhandling & MATH_ERRNO)
+ libc_errno = err;
#endif // LIBC_MATH_HAS_NO_ERRNO
+ }
}
} // namespace fputil
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 866aea7..2b78c4d 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -16,6 +16,7 @@
#include "allocator.h"
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/CPP/atomic.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/new.h"
@@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024;
constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE;
constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1;
constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8;
+constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8;
constexpr static uint32_t MIN_SIZE = 16;
constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
// The number of times to attempt claiming an in-progress slab allocation.
-constexpr static uint32_t MAX_TRIES = 128;
-
-// A sentinel used to indicate an invalid but non-null pointer value.
-constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max();
+constexpr static uint32_t MAX_TRIES = 1024;
static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
@@ -70,8 +69,8 @@ static void rpc_free(void *ptr) {
// Convert a potentially disjoint bitmask into an increasing integer per-lane
// for use with indexing between gpu lanes.
-static inline uint32_t lane_count(uint64_t lane_mask) {
- return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1));
+static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) {
+ return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1));
}
// Obtain an initial value to seed a random number generator. We use the rounded
@@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) {
void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
uint64_t mask = gpu::get_lane_mask();
uint32_t workers = cpp::popcount(uniform);
- for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers)
+ for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n;
+ i += workers)
s[i] = c;
}
@@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) {
return x && (x & (x - 1)) == 0;
}
-// Where this chunk size should start looking in the global array.
-static inline constexpr uint32_t start_index(uint32_t chunk_index) {
- return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) /
- impl::get_chunk_id(SLAB_SIZE / 2);
+// Where this chunk size should start looking in the global array. Small
+// allocations are much more likely than large ones, so we give them the most
+// space. We use a cubic easing function normalized on the possible chunks.
+static inline constexpr uint32_t get_start_index(uint32_t chunk_size) {
+ constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2);
+ uint64_t norm =
+ (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk;
+ uint64_t bias = (norm * norm * norm) >> 32;
+ uint64_t inv = (1 << 16) - bias;
+ return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16);
+}
+
+// Returns the id of the lane below this one that acts as its leader.
+static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
+ uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0;
+ return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1;
+}
+
+// We use a sentinal value to indicate a failed or in-progress allocation.
+template <typename T> bool is_sentinel(const T &x) {
+ return x == cpp::numeric_limits<T>::max();
}
} // namespace impl
@@ -264,28 +281,33 @@ struct Slab {
continue;
// We try using any known empty bits from the previous attempt first.
- uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask),
- ~after ? (old_index & ~(BITS_IN_WORD - 1)) +
- cpp::countr_zero(~after)
- : impl::xorshift32(state));
+ uint32_t start = gpu::shuffle(
+ mask, cpp::countr_zero(uniform & mask),
+ ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after)
+ : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD));
- uint32_t id = impl::lane_count(uniform & mask);
+ // Each lane tries to claim one bit in a single contiguous mask.
+ uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id());
uint32_t index = (start + id) % usable_bits(chunk_size);
uint32_t slot = index / BITS_IN_WORD;
uint32_t bit = index % BITS_IN_WORD;
// Get the mask of bits destined for the same slot and coalesce it.
- uint64_t match = uniform & gpu::match_any(mask, slot);
- uint32_t length = cpp::popcount(match);
- uint32_t bitmask = gpu::shuffle(
- mask, cpp::countr_zero(match),
- static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit);
+ uint32_t leader = impl::get_leader_id(
+ uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0),
+ gpu::get_lane_id());
+ uint32_t length = cpp::popcount(uniform & mask) -
+ impl::lane_count(uniform & mask, leader);
+ uint32_t bitmask =
+ static_cast<uint32_t>(
+ (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1)
+ << bit;
uint32_t before = 0;
- if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match)))
+ if (gpu::get_lane_id() == leader)
before = cpp::AtomicRef(get_bitfield()[slot])
.fetch_or(bitmask, cpp::MemoryOrder::RELAXED);
- before = gpu::shuffle(mask, cpp::countr_zero(match), before);
+ before = gpu::shuffle(mask, leader, before);
if (~before & (1 << bit))
result = ptr_from_index(index, chunk_size);
else
@@ -323,20 +345,20 @@ struct GuardPtr {
private:
struct RefCounter {
// Indicates that the object is in its deallocation phase and thus invalid.
- static constexpr uint64_t INVALID = uint64_t(1) << 63;
+ static constexpr uint32_t INVALID = uint32_t(1) << 31;
// If a read preempts an unlock call we indicate this so the following
// unlock call can swap out the helped bit and maintain exclusive ownership.
- static constexpr uint64_t HELPED = uint64_t(1) << 62;
+ static constexpr uint32_t HELPED = uint32_t(1) << 30;
// Resets the reference counter, cannot be reset to zero safely.
- void reset(uint32_t n, uint64_t &count) {
+ void reset(uint32_t n, uint32_t &count) {
counter.store(n, cpp::MemoryOrder::RELAXED);
count = n;
}
// Acquire a slot in the reference counter if it is not invalid.
- bool acquire(uint32_t n, uint64_t &count) {
+ bool acquire(uint32_t n, uint32_t &count) {
count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n;
return (count & INVALID) == 0;
}
@@ -349,7 +371,7 @@ private:
// another thread resurrected the counter and we quit, or a parallel read
// helped us invalidating it. For the latter, claim that flag and return.
if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
- uint64_t expected = 0;
+ uint32_t expected = 0;
if (counter.compare_exchange_strong(expected, INVALID,
cpp::MemoryOrder::RELAXED,
cpp::MemoryOrder::RELAXED))
@@ -372,28 +394,29 @@ private:
return (val & INVALID) ? 0 : val;
}
- cpp::Atomic<uint64_t> counter{0};
+ cpp::Atomic<uint32_t> counter{0};
};
- cpp::Atomic<Slab *> ptr{nullptr};
- RefCounter ref{};
+ cpp::Atomic<Slab *> ptr;
+ RefCounter ref;
// Should be called be a single lane for each different pointer.
template <typename... Args>
- Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) {
+ Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) {
Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED);
if (!expected &&
ptr.compare_exchange_strong(
- expected, reinterpret_cast<Slab *>(SENTINEL),
+ expected,
+ reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()),
cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
- count = cpp::numeric_limits<uint64_t>::max();
+ count = cpp::numeric_limits<uint32_t>::max();
void *raw = impl::rpc_allocate(sizeof(Slab));
if (!raw)
return nullptr;
return new (raw) Slab(cpp::forward<Args>(args)...);
}
- if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL))
+ if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected)))
return nullptr;
if (!ref.acquire(n, count))
@@ -405,7 +428,7 @@ private:
// Finalize the associated memory and signal that it is ready to use by
// resetting the counter.
- void finalize(Slab *mem, uint32_t n, uint64_t &count) {
+ void finalize(Slab *mem, uint32_t n, uint32_t &count) {
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
ptr.store(mem, cpp::MemoryOrder::RELAXED);
cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
@@ -418,7 +441,7 @@ public:
// The uniform mask represents which lanes share the same pointer. For each
// uniform value we elect a leader to handle it on behalf of the other lanes.
template <typename... Args>
- Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
+ Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count,
Args &&...args) {
count = 0;
Slab *result = nullptr;
@@ -433,14 +456,15 @@ public:
// We defer storing the newly allocated slab until now so that we can use
// multiple lanes to initialize it and release it for use.
- if (count == cpp::numeric_limits<uint64_t>::max()) {
+ if (impl::is_sentinel(count)) {
result->initialize(uniform);
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
finalize(result, cpp::popcount(uniform), count);
}
- if (count != cpp::numeric_limits<uint64_t>::max())
- count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1;
+ if (!impl::is_sentinel(count))
+ count = count - cpp::popcount(uniform) +
+ impl::lane_count(uniform, gpu::get_lane_id()) + 1;
return result;
}
@@ -469,7 +493,7 @@ static GuardPtr slots[ARRAY_SIZE] = {};
// Keep a cache of the last successful slot for each chunk size. Initialize it
// to an even spread of the total size. Must be updated if the chunking scheme
// changes.
-#define S(X) (impl::start_index(X))
+#define S(X) (impl::get_start_index(X))
static cpp::Atomic<uint32_t> indices[] = {
S(16), S(32), S(48), S(64), S(96), S(112), S(128),
S(192), S(224), S(256), S(384), S(448), S(512), S(768),
@@ -481,26 +505,28 @@ static cpp::Atomic<uint32_t> indices[] = {
#undef S
// Tries to find a slab in the table that can support the given chunk size.
-static Slab *find_slab(uint32_t chunk_size) {
+static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
// We start at the index of the last successful allocation for this kind.
uint32_t chunk_id = impl::get_chunk_id(chunk_size);
uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED);
- uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
- for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) {
+ for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) {
uint32_t index =
- !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE;
+ !offset ? start
+ : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE;
- if (slots[index].use_count() < Slab::available_chunks(chunk_size)) {
+ if (!offset ||
+ slots[index].use_count() < Slab::available_chunks(chunk_size)) {
uint64_t lane_mask = gpu::get_lane_mask();
- uint64_t reserved = 0;
+ uint32_t reserved = 0;
Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask,
reserved, chunk_size, index);
// If there is a slab allocation in progress we retry a few times.
for (uint32_t retries = 0;
- retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) {
+ !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES;
+ retries++) {
uint64_t lane_mask = gpu::get_lane_mask();
slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved,
chunk_size, index);
@@ -514,13 +540,17 @@ static Slab *find_slab(uint32_t chunk_size) {
slab->get_chunk_size() == chunk_size) {
if (index != start)
indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED);
+ uniform = uniform & gpu::get_lane_mask();
return slab;
} else if (slab && (reserved > Slab::available_chunks(chunk_size) ||
slab->get_chunk_size() != chunk_size)) {
slots[index].unlock(gpu::get_lane_mask(),
gpu::get_lane_mask() & uniform);
- } else if (!slab && reserved == SENTINEL) {
+ } else if (!slab && impl::is_sentinel(reserved)) {
+ uniform = uniform & gpu::get_lane_mask();
return nullptr;
+ } else {
+ sleep_briefly();
}
}
}
@@ -547,12 +577,12 @@ void *allocate(uint64_t size) {
// Try to find a slab for the rounded up chunk size and allocate from it.
uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size));
- Slab *slab = find_slab(chunk_size);
- if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL))
+ uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
+ Slab *slab = find_slab(chunk_size, uniform);
+ if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab)))
return nullptr;
uint64_t lane_mask = gpu::get_lane_mask();
- uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index());
void *ptr = slab->allocate(lane_mask, uniform);
return ptr;
}
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 926bbd5..13f46a1 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -4,7 +4,6 @@ add_header_library(
acos.h
DEPENDS
.asin_utils
- libc.src.__support.math.asin_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fenv_impl
@@ -96,6 +95,21 @@ add_header_library(
)
add_header_library(
+ acospif16
+ HDRS
+ acospif16.h
+ DEPENDS
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.types
+)
+
+add_header_library(
asin_utils
HDRS
asin_utils.h
@@ -110,6 +124,67 @@ add_header_library(
)
add_header_library(
+ asin
+ HDRS
+ asin.h
+ DEPENDS
+ .asin_utils
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.dyadic_float
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.cpu_features
+)
+
+add_header_library(
+ asinhf
+ HDRS
+ asinhf.h
+ DEPENDS
+ .acoshf_utils
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.config
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
+ asinf
+ HDRS
+ asinf.h
+ DEPENDS
+ .inv_trigf_utils
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.except_value_utils
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.config
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.cpu_features
+)
+
+add_header_library(
+ asinf16
+ HDRS
+ asinf16.h
+ DEPENDS
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
erff
HDRS
erff.h
@@ -358,11 +433,10 @@ add_header_library(
DEPENDS
.exp10f16_utils
libc.src.__support.FPUtil.fp_bits
- src.__support.FPUtil.FEnvImpl
- src.__support.FPUtil.FPBits
- src.__support.FPUtil.cast
- src.__support.FPUtil.rounding_mode
- src.__support.FPUtil.except_value_utils
- src.__support.macros.optimization
- src.__support.macros.properties.cpu_features
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.FPUtil.except_value_utils
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.cpu_features
)
diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h
index a52ead7..0e1e413 100644
--- a/libc/src/__support/math/acos.h
+++ b/libc/src/__support/math/acos.h
@@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr double acos(double x) {
+LIBC_INLINE static constexpr double acos(double x) {
using DoubleDouble = fputil::DoubleDouble;
using namespace asin_internal;
using FPBits = fputil::FPBits<double>;
diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h
index 153087e..7a0c0e5 100644
--- a/libc/src/__support/math/acosf.h
+++ b/libc/src/__support/math/acosf.h
@@ -45,7 +45,7 @@ static constexpr fputil::ExceptValues<float, N_EXCEPTS> ACOSF_EXCEPTS = {{
} // namespace acosf_internal
-static constexpr float acosf(float x) {
+LIBC_INLINE static constexpr float acosf(float x) {
using namespace acosf_internal;
using namespace inv_trigf_utils_internal;
using FPBits = typename fputil::FPBits<float>;
diff --git a/libc/src/__support/math/acosf16.h b/libc/src/__support/math/acosf16.h
index 58d3761..3f0e002 100644
--- a/libc/src/__support/math/acosf16.h
+++ b/libc/src/__support/math/acosf16.h
@@ -26,7 +26,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float16 acosf16(float16 x) {
+LIBC_INLINE static constexpr float16 acosf16(float16 x) {
// Generated by Sollya using the following command:
// > round(pi/2, SG, RN);
diff --git a/libc/src/__support/math/acoshf.h b/libc/src/__support/math/acoshf.h
index f18f169..4e00311 100644
--- a/libc/src/__support/math/acoshf.h
+++ b/libc/src/__support/math/acoshf.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float acoshf(float x) {
+LIBC_INLINE static constexpr float acoshf(float x) {
using namespace acoshf_internal;
using FPBits_t = typename fputil::FPBits<float>;
FPBits_t xbits(x);
diff --git a/libc/src/__support/math/acoshf16.h b/libc/src/__support/math/acoshf16.h
index 15e7f6a..e5be2a8 100644
--- a/libc/src/__support/math/acoshf16.h
+++ b/libc/src/__support/math/acoshf16.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
-#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
#include "include/llvm-libc-macros/float16-macros.h"
@@ -28,7 +28,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float16 acoshf16(float16 x) {
+LIBC_INLINE static constexpr float16 acoshf16(float16 x) {
using namespace acoshf_internal;
constexpr size_t N_EXCEPTS = 2;
@@ -120,4 +120,4 @@ static constexpr float16 acoshf16(float16 x) {
#endif // LIBC_TYPES_HAS_FLOAT16
-#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
diff --git a/libc/src/__support/math/acospif16.h b/libc/src/__support/math/acospif16.h
new file mode 100644
index 0000000..cf29c76
--- /dev/null
+++ b/libc/src/__support/math/acospif16.h
@@ -0,0 +1,147 @@
+//===-- Implementation header for acospif16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 acospif16(float16 x) {
+ using FPBits = fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+ uint16_t x_sign = x_u >> 15;
+
+ // |x| > 0x1p0, |x| > 1, or x is NaN.
+ if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
+ // acospif16(NaN) = NaN
+ if (xbits.is_nan()) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // 1 < |x| <= +inf
+ fputil::raise_except_if_required(FE_INVALID);
+ fputil::set_errno_if_required(EDOM);
+
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // |x| == 0x1p0, x is 1 or -1
+ // if x is (-)1, return 1
+ // if x is (+)1, return 0
+ if (LIBC_UNLIKELY(x_abs == 0x3c00))
+ return fputil::cast<float16>(x_sign ? 1.0f : 0.0f);
+
+ float xf = x;
+ float xsq = xf * xf;
+
+ // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya
+ // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0,
+ // 0.5]);
+ constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f,
+ 0x1.897e36p-6f, 0x1.9efafcp-7f,
+ 0x1.06d884p-6f};
+ // |x| <= 0x1p-1, |x| <= 0.5
+ if (x_abs <= 0x3800) {
+ // if x is 0, return 0.5
+ if (LIBC_UNLIKELY(x_abs == 0))
+ return fputil::cast<float16>(0.5f);
+
+ // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then
+ // acospi(x) = 0.5 - asin(x)/pi
+ float interm =
+ fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2],
+ POLY_COEFFS[3], POLY_COEFFS[4]);
+
+ return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f));
+ }
+
+ // When |x| > 0.5, assume that 0.5 < |x| <= 1
+ //
+ // Step-by-step range-reduction proof:
+ // 1: Let y = asin(x), such that, x = sin(y)
+ // 2: From complimentary angle identity:
+ // x = sin(y) = cos(pi/2 - y)
+ // 3: Let z = pi/2 - y, such that x = cos(z)
+ // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
+ // z = 2A, z/2 = A
+ // cos(z) = 1 - 2 * sin^2(z/2)
+ // 5: Make sin(z/2) subject of the formula:
+ // sin(z/2) = sqrt((1 - cos(z))/2)
+ // 6: Recall [3]; x = cos(z). Therefore:
+ // sin(z/2) = sqrt((1 - x)/2)
+ // 7: Let u = (1 - x)/2
+ // 8: Therefore:
+ // asin(sqrt(u)) = z/2
+ // 2 * asin(sqrt(u)) = z
+ // 9: Recall [3]; z = pi/2 - y. Therefore:
+ // y = pi/2 - z
+ // y = pi/2 - 2 * asin(sqrt(u))
+ // 10: Recall [1], y = asin(x). Therefore:
+ // asin(x) = pi/2 - 2 * asin(sqrt(u))
+ // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
+ // Therefore:
+ // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
+ // acos(x) = 2 * asin(sqrt(u))
+ // acospi(x) = 2 * (asin(sqrt(u)) / pi)
+ //
+ // THE RANGE REDUCTION, HOW?
+ // 12: Recall [7], u = (1 - x)/2
+ // 13: Since 0.5 < x <= 1, therefore:
+ // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
+ //
+ // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
+ // Step [11] as `sqrt(u)` is in range.
+ // When -1 < x <= -0.5, the identity:
+ // acos(x) = pi - acos(-x)
+ // acospi(x) = 1 - acos(-x)/pi
+ // allows us to compute for the negative x value (lhs)
+ // with a positive x value instead (rhs).
+
+ float xf_abs = (xf < 0 ? -xf : xf);
+ float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
+ float sqrt_u = fputil::sqrt<float>(u);
+
+ float asin_sqrt_u =
+ sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1],
+ POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]);
+
+ // Same as acos(x), but devided the expression with pi
+ return fputil::cast<float16>(
+ x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f)
+ : 2.0f * asin_sqrt_u);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
diff --git a/libc/src/__support/math/asin.h b/libc/src/__support/math/asin.h
new file mode 100644
index 0000000..5e06d04
--- /dev/null
+++ b/libc/src/__support/math/asin.h
@@ -0,0 +1,297 @@
+//===-- Implementation header for asin --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
+
+#include "asin_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+#include "src/__support/math/asin_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr double asin(double x) {
+ using namespace asin_internal;
+ using FPBits = fputil::FPBits<double>;
+
+ FPBits xbits(x);
+ int x_exp = xbits.get_biased_exponent();
+
+ // |x| < 0.5.
+ if (x_exp < FPBits::EXP_BIAS - 1) {
+ // |x| < 2^-26.
+ if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) {
+ // When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
+ // is:
+ // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+ // = x^2 / 6
+ // < 2^-54
+ // < epsilon(1)/2.
+ // So the correctly rounded values of asin(x) are:
+ // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+ // or (rounding mode = FE_UPWARD and x is
+ // negative),
+ // = x otherwise.
+ // To simplify the rounding decision and make it more efficient, we use
+ // fma(x, 2^-54, x) instead.
+ // Note: to use the formula x + 2^-54*x to decide the correct rounding, we
+ // do need fma(x, 2^-54, x) to prevent underflow caused by 2^-54*x when
+ // |x| < 2^-1022. For targets without FMA instructions, when x is close to
+ // denormal range, we normalize x,
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS)
+ return x;
+#elif defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+ return fputil::multiply_add(x, 0x1.0p-54, x);
+#else
+ if (xbits.abs().uintval() == 0)
+ return x;
+ // Get sign(x) * min_normal.
+ FPBits eps_bits = FPBits::min_normal();
+ eps_bits.set_sign(xbits.sign());
+ double eps = eps_bits.get_val();
+ double normalize_const = (x_exp == 0) ? eps : 0.0;
+ double scaled_normal =
+ fputil::multiply_add(x + normalize_const, 0x1.0p54, eps);
+ return fputil::multiply_add(scaled_normal, 0x1.0p-54, -normalize_const);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x * asin_eval(x * x);
+#else
+ using Float128 = fputil::DyadicFloat<128>;
+ using DoubleDouble = fputil::DoubleDouble;
+
+ unsigned idx = 0;
+ DoubleDouble x_sq = fputil::exact_mult(x, x);
+ double err = xbits.abs().get_val() * 0x1.0p-51;
+ // Polynomial approximation:
+ // p ~ asin(x)/x
+
+ DoubleDouble p = asin_eval(x_sq, idx, err);
+ // asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
+ DoubleDouble r0 = fputil::exact_mult(x, p.hi);
+ double r_lo = fputil::multiply_add(x, p.lo, r0.lo);
+
+ // Ziv's accuracy test.
+
+ double r_upper = r0.hi + (r_lo + err);
+ double r_lower = r0.hi + (r_lo - err);
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ // Ziv's accuracy test failed, perform 128-bit calculation.
+
+ // Recalculate mod 1/64.
+ idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6));
+
+ // Get x^2 - idx/64 exactly. When FMA is available, double-double
+ // multiplication will be correct for all rounding modes. Otherwise we use
+ // Float128 directly.
+ Float128 x_f128(x);
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ // u = x^2 - idx/64
+ Float128 u_hi(
+ fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
+ Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo));
+#else
+ Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128);
+ Float128 u = fputil::quick_add(
+ x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6)));
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ Float128 p_f128 = asin_eval(u, idx);
+ Float128 r = fputil::quick_mul(x_f128, p_f128);
+
+ return static_cast<double>(r);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+ // |x| >= 0.5
+
+ double x_abs = xbits.abs().get_val();
+
+ // Maintaining the sign:
+ constexpr double SIGN[2] = {1.0, -1.0};
+ double x_sign = SIGN[xbits.is_neg()];
+
+ // |x| >= 1
+ if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
+ // x = +-1, asin(x) = +- pi/2
+ if (x_abs == 1.0) {
+ // return +- pi/2
+ return fputil::multiply_add(x_sign, PI_OVER_TWO.hi,
+ x_sign * PI_OVER_TWO.lo);
+ }
+ // |x| > 1, return NaN.
+ if (xbits.is_quiet_nan())
+ return x;
+
+ // Set domain error for non-NaN input.
+ if (!xbits.is_nan())
+ fputil::set_errno_if_required(EDOM);
+
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // When |x| >= 0.5, we perform range reduction as follow:
+ //
+ // Assume further that 0.5 <= x < 1, and let:
+ // y = asin(x)
+ // We will use the double angle formula:
+ // cos(2y) = 1 - 2 sin^2(y)
+ // and the complement angle identity:
+ // x = sin(y) = cos(pi/2 - y)
+ // = 1 - 2 sin^2 (pi/4 - y/2)
+ // So:
+ // sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+ // And hence:
+ // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+ // Equivalently:
+ // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+ // Let u = (1 - x)/2, then:
+ // asin(x) = pi/2 - 2 * asin( sqrt(u) )
+ // Moreover, since 0.5 <= x < 1:
+ // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
+ // And hence we can reuse the same polynomial approximation of asin(x) when
+ // |x| <= 0.5:
+ // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+ // u = (1 - |x|)/2
+ double u = fputil::multiply_add(x_abs, -0.5, 0.5);
+ // v_hi + v_lo ~ sqrt(u).
+ // Let:
+ // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
+ // Then:
+ // sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+ // ~ v_hi + h / (2 * v_hi)
+ // So we can use:
+ // v_lo = h / (2 * v_hi).
+ // Then,
+ // asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
+ double v_hi = fputil::sqrt<double>(u);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ double p = asin_eval(u);
+ double r = x_sign * fputil::multiply_add(-2.0 * v_hi, p, PI_OVER_TWO.hi);
+ return r;
+#else
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ double h = fputil::multiply_add(v_hi, -v_hi, u);
+#else
+ DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi);
+ double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ // Scale v_lo and v_hi by 2 from the formula:
+ // vh = v_hi * 2
+ // vl = 2*v_lo = h / v_hi.
+ double vh = v_hi * 2.0;
+ double vl = h / v_hi;
+
+ // Polynomial approximation:
+ // p ~ asin(sqrt(u))/sqrt(u)
+ unsigned idx = 0;
+ double err = vh * 0x1.0p-51;
+
+ DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err);
+
+ // Perform computations in double-double arithmetic:
+ // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
+ DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p);
+ DoubleDouble r = fputil::exact_add(PI_OVER_TWO.hi, -r0.hi);
+
+ double r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
+
+ // Ziv's accuracy test.
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ double r_upper = fputil::multiply_add(
+ r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, err));
+ double r_lower = fputil::multiply_add(
+ r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, -err));
+#else
+ r_lo *= x_sign;
+ r.hi *= x_sign;
+ double r_upper = r.hi + (r_lo + err);
+ double r_lower = r.hi + (r_lo - err);
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ // Ziv's accuracy test failed, we redo the computations in Float128.
+ // Recalculate mod 1/64.
+ idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6));
+
+ // After the first step of Newton-Raphson approximating v = sqrt(u), we have
+ // that:
+ // sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+ // v_lo = h / (2 * v_hi)
+ // With error:
+ // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
+ // = -h^2 / (2*v * (sqrt(u) + v)^2).
+ // Since:
+ // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
+ // we can add another correction term to (v_hi + v_lo) that is:
+ // v_ll = -h^2 / (2*v_hi * 4u)
+ // = -v_lo * (h / 4u)
+ // = -vl * (h / 8u),
+ // making the errors:
+ // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
+ // well beyond 128-bit precision needed.
+
+ // Get the rounding error of vl = 2 * v_lo ~ h / vh
+ // Get full product of vh * vl
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
+#else
+ DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl);
+ double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ // vll = 2*v_ll = -vl * (h / (4u)).
+ double t = h * (-0.25) / u;
+ double vll = fputil::multiply_add(vl, t, vl_lo);
+ // m_v = -(v_hi + v_lo + v_ll).
+ Float128 m_v = fputil::quick_add(
+ Float128(vh), fputil::quick_add(Float128(vl), Float128(vll)));
+ m_v.sign = Sign::NEG;
+
+ // Perform computations in Float128:
+ // asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
+ Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u));
+
+ Float128 p_f128 = asin_eval(y_f128, idx);
+ Float128 r0_f128 = fputil::quick_mul(m_v, p_f128);
+ Float128 r_f128 = fputil::quick_add(PI_OVER_TWO_F128, r0_f128);
+
+ if (xbits.is_neg())
+ r_f128.sign = Sign::NEG;
+
+ return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
diff --git a/libc/src/__support/math/asinf.h b/libc/src/__support/math/asinf.h
new file mode 100644
index 0000000..bfa0dc3
--- /dev/null
+++ b/libc/src/__support/math/asinf.h
@@ -0,0 +1,175 @@
+//===-- Implementation header for asinf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float asinf(float x) {
+ using namespace inv_trigf_utils_internal;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ constexpr size_t N_EXCEPTS = 2;
+
+ // Exceptional values when |x| <= 0.5
+ constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
+ // (inputs, RZ output, RU offset, RD offset, RN offset)
+ // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
+ {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
+ // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
+ {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
+ }};
+
+ // Exceptional values when 0.5 < |x| <= 1
+ constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
+ // (inputs, RZ output, RU offset, RD offset, RN offset)
+ // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
+ {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
+ // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
+ {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
+ }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ using namespace inv_trigf_utils_internal;
+ using FPBits = typename fputil::FPBits<float>;
+
+ FPBits xbits(x);
+ uint32_t x_uint = xbits.uintval();
+ uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
+ constexpr double SIGN[2] = {1.0, -1.0};
+ uint32_t x_sign = x_uint >> 31;
+
+ // |x| <= 0.5-ish
+ if (x_abs < 0x3f04'471dU) {
+ // |x| < 0x1.d12edp-12
+ if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) {
+ // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x
+ // is:
+ // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+ // = x^2 / 6
+ // < 2^-25
+ // < epsilon(1)/2.
+ // So the correctly rounded values of asin(x) are:
+ // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+ // or (rounding mode = FE_UPWARD and x is
+ // negative),
+ // = x otherwise.
+ // To simplify the rounding decision and make it more efficient, we use
+ // fma(x, 2^-25, x) instead.
+ // An exhaustive test shows that this formula work correctly for all
+ // rounding modes up to |x| < 0x1.d12edp-12.
+ // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
+ // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
+ // |x| < 2^-125. For targets without FMA instructions, we simply use
+ // double for intermediate results as it is more efficient than using an
+ // emulated version of FMA.
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+ return fputil::multiply_add(x, 0x1.0p-25f, x);
+#else
+ double xd = static_cast<double>(x);
+ return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+ }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ // Check for exceptional values
+ if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
+ LIBC_UNLIKELY(r.has_value()))
+ return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ // For |x| <= 0.5, we approximate asinf(x) by:
+ // asin(x) = x * P(x^2)
+ // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+ // asin(x)/x on [0, 0.5] generated by Sollya with:
+ // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+ // [|1, D...|], [0, 0.5]);
+ // An exhaustive test shows that this approximation works well up to a
+ // little more than 0.5.
+ double xd = static_cast<double>(x);
+ double xsq = xd * xd;
+ double x3 = xd * xsq;
+ double r = asin_eval(xsq);
+ return static_cast<float>(fputil::multiply_add(x3, r, xd));
+ }
+
+ // |x| > 1, return NaNs.
+ if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ if (x_abs <= 0x7f80'0000U) {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ }
+
+ return FPBits::quiet_nan().get_val();
+ }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ // Check for exceptional values
+ if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
+ LIBC_UNLIKELY(r.has_value()))
+ return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ // When |x| > 0.5, we perform range reduction as follow:
+ //
+ // Assume further that 0.5 < x <= 1, and let:
+ // y = asin(x)
+ // We will use the double angle formula:
+ // cos(2y) = 1 - 2 sin^2(y)
+ // and the complement angle identity:
+ // x = sin(y) = cos(pi/2 - y)
+ // = 1 - 2 sin^2 (pi/4 - y/2)
+ // So:
+ // sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+ // And hence:
+ // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+ // Equivalently:
+ // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+ // Let u = (1 - x)/2, then:
+ // asin(x) = pi/2 - 2 * asin( sqrt(u) )
+ // Moreover, since 0.5 < x <= 1:
+ // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
+ // And hence we can reuse the same polynomial approximation of asin(x) when
+ // |x| <= 0.5:
+ // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+ xbits.set_sign(Sign::POS);
+ double sign = SIGN[x_sign];
+ double xd = static_cast<double>(xbits.get_val());
+ double u = fputil::multiply_add(-0.5, xd, 0.5);
+ double c1 = sign * (-2 * fputil::sqrt<double>(u));
+ double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
+ double c3 = c1 * u;
+
+ double r = asin_eval(u);
+ return static_cast<float>(fputil::multiply_add(c3, r, c2));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
diff --git a/libc/src/__support/math/asinf16.h b/libc/src/__support/math/asinf16.h
new file mode 100644
index 0000000..3d032a4
--- /dev/null
+++ b/libc/src/__support/math/asinf16.h
@@ -0,0 +1,146 @@
+//===-- Implementation header for asinf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 asinf16(float16 x) {
+
+ // Generated by Sollya using the following command:
+ // > round(pi/2, D, RN);
+ constexpr float PI_2 = 0x1.921fb54442d18p0f;
+
+ using FPBits = fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+ float xf = x;
+
+ // |x| > 0x1p0, |x| > 1, or x is NaN.
+ if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
+ // asinf16(NaN) = NaN
+ if (xbits.is_nan()) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // 1 < |x| <= +/-inf
+ fputil::raise_except_if_required(FE_INVALID);
+ fputil::set_errno_if_required(EDOM);
+
+ return FPBits::quiet_nan().get_val();
+ }
+
+ float xsq = xf * xf;
+
+ // |x| <= 0x1p-1, |x| <= 0.5
+ if (x_abs <= 0x3800) {
+ // asinf16(+/-0) = +/-0
+ if (LIBC_UNLIKELY(x_abs == 0))
+ return x;
+
+ // Exhaustive tests show that,
+ // for |x| <= 0x1.878p-9, when:
+ // x > 0, and rounding upward, or
+ // x < 0, and rounding downward, then,
+ // asin(x) = x * 2^-11 + x
+ // else, in other rounding modes,
+ // asin(x) = x
+ if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) {
+ int rounding = fputil::quick_get_round();
+
+ if ((xbits.is_pos() && rounding == FE_UPWARD) ||
+ (xbits.is_neg() && rounding == FE_DOWNWARD))
+ return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf));
+ return x;
+ }
+
+ // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
+ // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
+ float result =
+ fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
+ 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
+ return fputil::cast<float16>(xf * result);
+ }
+
+ // When |x| > 0.5, assume that 0.5 < |x| <= 1,
+ //
+ // Step-by-step range-reduction proof:
+ // 1: Let y = asin(x), such that, x = sin(y)
+ // 2: From complimentary angle identity:
+ // x = sin(y) = cos(pi/2 - y)
+ // 3: Let z = pi/2 - y, such that x = cos(z)
+ // 4: From double angle formula; cos(2A) = 1 - sin^2(A):
+ // z = 2A, z/2 = A
+ // cos(z) = 1 - 2 * sin^2(z/2)
+ // 5: Make sin(z/2) subject of the formula:
+ // sin(z/2) = sqrt((1 - cos(z))/2)
+ // 6: Recall [3]; x = cos(z). Therefore:
+ // sin(z/2) = sqrt((1 - x)/2)
+ // 7: Let u = (1 - x)/2
+ // 8: Therefore:
+ // asin(sqrt(u)) = z/2
+ // 2 * asin(sqrt(u)) = z
+ // 9: Recall [3], z = pi/2 - y. Therefore:
+ // y = pi/2 - z
+ // y = pi/2 - 2 * asin(sqrt(u))
+ // 10: Recall [1], y = asin(x). Therefore:
+ // asin(x) = pi/2 - 2 * asin(sqrt(u))
+ //
+ // WHY?
+ // 11: Recall [7], u = (1 - x)/2
+ // 12: Since 0.5 < x <= 1, therefore:
+ // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
+ //
+ // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
+ // Step [10] as `sqrt(u)` is in range.
+
+ // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0
+ float xf_abs = (xf < 0 ? -xf : xf);
+ float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0);
+ float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
+ float u_sqrt = fputil::sqrt<float>(u);
+
+ // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
+ // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
+ float asin_sqrt_u =
+ u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
+ 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
+
+ return fputil::cast<float16>(sign *
+ fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h
new file mode 100644
index 0000000..1c08a6e
--- /dev/null
+++ b/libc/src/__support/math/asinhf.h
@@ -0,0 +1,125 @@
+//===-- Implementation header for asinf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float asinhf(float x) {
+ using namespace acoshf_internal;
+ using FPBits_t = typename fputil::FPBits<float>;
+ FPBits_t xbits(x);
+ uint32_t x_u = xbits.uintval();
+ uint32_t x_abs = xbits.abs().uintval();
+
+ // |x| <= 2^-3
+ if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
+ // |x| <= 2^-26
+ if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
+ return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
+ ? x
+ : (x - 0x1.5555555555555p-3 * x * x * x));
+ }
+
+ double x_d = x;
+ double x_sq = x_d * x_d;
+ // Generated by Sollya with:
+ // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
+ // [0, 2^-2]);
+ double p = fputil::polyeval(
+ x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
+ -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
+ 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
+ return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
+ }
+
+ const double SIGN[2] = {1.0, -1.0};
+ double x_sign = SIGN[x_u >> 31];
+ double x_d = x;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ // Helper functions to set results for exceptional cases.
+ auto round_result_slightly_down = [x_sign](float r) -> float {
+ return fputil::multiply_add(static_cast<float>(x_sign), r,
+ static_cast<float>(x_sign) * (-0x1.0p-24f));
+ };
+ auto round_result_slightly_up = [x_sign](float r) -> float {
+ return fputil::multiply_add(static_cast<float>(x_sign), r,
+ static_cast<float>(x_sign) * 0x1.0p-24f);
+ };
+
+ if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
+ if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits_t::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // Exceptional cases when x > 2^24.
+ switch (x_abs) {
+ case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
+ return round_result_slightly_down(0x1.1e0696p4f);
+ case 0x4c803f2c: // |x| = 0x1.007e58p26f
+ return round_result_slightly_down(0x1.2b786cp4f);
+ case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
+ return round_result_slightly_up(0x1.6fdd34p4f);
+ case 0x5c569e88: // |x| = 0x1.ad3d1p57f
+ return round_result_slightly_up(0x1.45c146p5f);
+ case 0x5e68984e: // |x| = 0x1.d1309cp61f
+ return round_result_slightly_up(0x1.5c9442p5f);
+ case 0x655890d3: // |x| = 0x1.b121a6p75f
+ return round_result_slightly_down(0x1.a9a3f2p5f);
+ case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
+ return round_result_slightly_up(0x1.af66cp5f);
+ case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
+ return round_result_slightly_down(0x1.08b512p6f);
+ case 0x7997f30a: // |x| = 0x1.2fe614p116f
+ return round_result_slightly_up(0x1.451436p6f);
+ }
+ } else {
+ // Exceptional cases when x < 2^24.
+ if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
+ // |x| = 0x1.575e4cp12f
+ return round_result_slightly_down(0x1.29becap3f);
+ }
+ if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
+ // |x| = 0x1.a5209p20f
+ return round_result_slightly_down(0x1.e1b92p3f);
+ }
+ }
+#else
+ if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
+ return x;
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ // asinh(x) = log(x + sqrt(x^2 + 1))
+ return static_cast<float>(
+ x_sign * log_eval(fputil::multiply_add(
+ x_d, x_sign,
+ fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
diff --git a/libc/src/__support/math/erff.h b/libc/src/__support/math/erff.h
index e54ec77..b81be30 100644
--- a/libc/src/__support/math/erff.h
+++ b/libc/src/__support/math/erff.h
@@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float erff(float x) {
+LIBC_INLINE static constexpr float erff(float x) {
// Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by
// Sollya with: > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h
index ff59ff7..83638e8 100644
--- a/libc/src/__support/math/exp.h
+++ b/libc/src/__support/math/exp.h
@@ -67,7 +67,7 @@ namespace {
// Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
// For |dx| < 2^-13 + 2^-30:
// | output - expm1(dx) / dx | < 2^-51.
-static double poly_approx_d(double dx) {
+LIBC_INLINE static double poly_approx_d(double dx) {
// dx^2
double dx2 = dx * dx;
// c0 = 1 + dx / 2
@@ -85,7 +85,7 @@ static double poly_approx_d(double dx) {
// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
// For |dx| < 2^-13 + 2^-30:
// | output - exp(dx) | < 2^-101
-static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
+LIBC_INLINE static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
// Taylor polynomial.
constexpr DoubleDouble COEFFS[] = {
{0, 0x1p0}, // 1
@@ -106,7 +106,7 @@ static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^7 / 5040
// For |dx| < 2^-13 + 2^-30:
// | output - exp(dx) | < 2^-126.
-static Float128 poly_approx_f128(const Float128 &dx) {
+LIBC_INLINE static Float128 poly_approx_f128(const Float128 &dx) {
constexpr Float128 COEFFS_128[]{
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
@@ -127,7 +127,7 @@ static Float128 poly_approx_f128(const Float128 &dx) {
// Compute exp(x) using 128-bit precision.
// TODO(lntue): investigate triple-double precision implementation for this
// step.
-static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
+LIBC_INLINE static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
// Recalculate dx:
double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -160,8 +160,8 @@ static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
}
// Compute exp(x) with double-double precision.
-static DoubleDouble exp_double_double(double x, double kd,
- const DoubleDouble &exp_mid) {
+LIBC_INLINE static DoubleDouble exp_double_double(double x, double kd,
+ const DoubleDouble &exp_mid) {
// Recalculate dx:
// dx = x - k * 2^-12 * log(2)
double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -184,7 +184,7 @@ static DoubleDouble exp_double_double(double x, double kd,
// Check for exceptional cases when
// |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
-static double set_exceptional(double x) {
+LIBC_INLINE static double set_exceptional(double x) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
@@ -234,7 +234,7 @@ static double set_exceptional(double x) {
namespace math {
-static double exp(double x) {
+LIBC_INLINE static double exp(double x) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
index fa60e40c..12a09d7 100644
--- a/libc/src/__support/math/exp10.h
+++ b/libc/src/__support/math/exp10.h
@@ -83,7 +83,8 @@ LIBC_INLINE static double exp10_poly_approx_d(double dx) {
// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
// Error bounds:
// | output - 10^(dx) | < 2^-101
-static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) {
+LIBC_INLINE static constexpr DoubleDouble
+exp10_poly_approx_dd(const DoubleDouble &dx) {
// Taylor polynomial.
constexpr DoubleDouble COEFFS[] = {
{0, 0x1p0},
@@ -105,7 +106,8 @@ static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) {
// Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
// For |dx| < 2^-14:
// | output - 10^dx | < 1.5 * 2^-124.
-static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) {
+LIBC_INLINE static constexpr Float128
+exp10_poly_approx_f128(const Float128 &dx) {
constexpr Float128 COEFFS_128[]{
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
{Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128},
@@ -126,7 +128,8 @@ static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) {
// Compute 10^(x) using 128-bit precision.
// TODO(lntue): investigate triple-double precision implementation for this
// step.
-static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
+LIBC_INLINE static Float128 exp10_f128(double x, double kd, int idx1,
+ int idx2) {
double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
@@ -157,8 +160,8 @@ static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
}
// Compute 10^x with double-double precision.
-static DoubleDouble exp10_double_double(double x, double kd,
- const DoubleDouble &exp_mid) {
+LIBC_INLINE static DoubleDouble
+exp10_double_double(double x, double kd, const DoubleDouble &exp_mid) {
// Recalculate dx:
// dx = x - k * 2^-12 * log10(2)
double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
@@ -180,7 +183,7 @@ static DoubleDouble exp10_double_double(double x, double kd,
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// When output is denormal.
-static double exp10_denorm(double x) {
+LIBC_INLINE static double exp10_denorm(double x) {
// Range reduction.
double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
@@ -234,7 +237,7 @@ static double exp10_denorm(double x) {
// * x >= log10(2^1024)
// * x <= log10(2^-1022)
// * x is inf or nan
-static constexpr double exp10_set_exceptional(double x) {
+LIBC_INLINE static constexpr double exp10_set_exceptional(double x) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
@@ -285,7 +288,7 @@ static constexpr double exp10_set_exceptional(double x) {
namespace math {
-static constexpr double exp10(double x) {
+LIBC_INLINE static constexpr double exp10(double x) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
diff --git a/libc/src/__support/math/exp10f.h b/libc/src/__support/math/exp10f.h
index 807b4f0..76ae197 100644
--- a/libc/src/__support/math/exp10f.h
+++ b/libc/src/__support/math/exp10f.h
@@ -20,7 +20,7 @@
namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float exp10f(float x) {
+LIBC_INLINE static constexpr float exp10f(float x) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
diff --git a/libc/src/__support/math/exp10f16.h b/libc/src/__support/math/exp10f16.h
index 0d8b125..3eca867 100644
--- a/libc/src/__support/math/exp10f16.h
+++ b/libc/src/__support/math/exp10f16.h
@@ -57,7 +57,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS>
}};
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr float16 exp10f16(float16 x) {
+LIBC_INLINE static constexpr float16 exp10f16(float16 x) {
using FPBits = fputil::FPBits<float16>;
FPBits x_bits(x);
diff --git a/libc/src/__support/math/exp10f16_utils.h b/libc/src/__support/math/exp10f16_utils.h
index bffb81b..5952a41 100644
--- a/libc/src/__support/math/exp10f16_utils.h
+++ b/libc/src/__support/math/exp10f16_utils.h
@@ -19,8 +19,7 @@
namespace LIBC_NAMESPACE_DECL {
-LIBC_INLINE static constexpr ExpRangeReduction
-exp10_range_reduction(float16 x) {
+LIBC_INLINE static ExpRangeReduction exp10_range_reduction(float16 x) {
// For -8 < x < 5, to compute 10^x, we perform the following range reduction:
// find hi, mid, lo, such that:
// x = (hi + mid) * log2(10) + lo, in which
diff --git a/libc/src/__support/math/exp10f_utils.h b/libc/src/__support/math/exp10f_utils.h
index c30def9..010a2f1 100644
--- a/libc/src/__support/math/exp10f_utils.h
+++ b/libc/src/__support/math/exp10f_utils.h
@@ -89,7 +89,7 @@ struct Exp10Base : public ExpBase {
0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
0x1.1429e74a98f43p-1};
- static double powb_lo(double dx) {
+ LIBC_INLINE static double powb_lo(double dx) {
using fputil::multiply_add;
double dx2 = dx * dx;
// c0 = 1 + COEFFS[0] * dx
diff --git a/libc/src/__support/math/exp_utils.h b/libc/src/__support/math/exp_utils.h
index fc9ab10..ef408ed 100644
--- a/libc/src/__support/math/exp_utils.h
+++ b/libc/src/__support/math/exp_utils.h
@@ -22,8 +22,8 @@ namespace LIBC_NAMESPACE_DECL {
// So if we scale x up by 2^1022, we can use
// double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
template <bool SKIP_ZIV_TEST = false>
-static constexpr cpp::optional<double> ziv_test_denorm(int hi, double mid,
- double lo, double err) {
+LIBC_INLINE static constexpr cpp::optional<double>
+ziv_test_denorm(int hi, double mid, double lo, double err) {
using FPBits = typename fputil::FPBits<double>;
// Scaling factor = 1/(min normal number) = 2^1022
diff --git a/libc/src/__support/math/expf.h b/libc/src/__support/math/expf.h
index 88c1514..f7e11be 100644
--- a/libc/src/__support/math/expf.h
+++ b/libc/src/__support/math/expf.h
@@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float expf(float x) {
+LIBC_INLINE static constexpr float expf(float x) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
diff --git a/libc/src/__support/math/expf16.h b/libc/src/__support/math/expf16.h
index ded28c7..14302a7 100644
--- a/libc/src/__support/math/expf16.h
+++ b/libc/src/__support/math/expf16.h
@@ -31,7 +31,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float16 expf16(float16 x) {
+LIBC_INLINE static constexpr float16 expf16(float16 x) {
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
constexpr fputil::ExceptValues<float16, 2> EXPF16_EXCEPTS_LO = {{
// (input, RZ output, RU offset, RD offset, RN offset)
diff --git a/libc/src/__support/math/expf16_utils.h b/libc/src/__support/math/expf16_utils.h
index 8a2fc94..4204dab7 100644
--- a/libc/src/__support/math/expf16_utils.h
+++ b/libc/src/__support/math/expf16_utils.h
@@ -47,7 +47,8 @@ struct ExpRangeReduction {
float exp_lo;
};
-[[maybe_unused]] static ExpRangeReduction exp_range_reduction(float16 x) {
+[[maybe_unused]] LIBC_INLINE static ExpRangeReduction
+exp_range_reduction(float16 x) {
// For -18 < x < 12, to compute exp(x), we perform the following range
// reduction: find hi, mid, lo, such that:
// x = hi + mid + lo, in which
diff --git a/libc/src/__support/math/frexpf.h b/libc/src/__support/math/frexpf.h
index 4d2f494..7834a12 100644
--- a/libc/src/__support/math/frexpf.h
+++ b/libc/src/__support/math/frexpf.h
@@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float frexpf(float x, int *exp) {
+LIBC_INLINE static constexpr float frexpf(float x, int *exp) {
return fputil::frexp(x, *exp);
}
diff --git a/libc/src/__support/math/frexpf128.h b/libc/src/__support/math/frexpf128.h
index 2fd5bc4..5218b26 100644
--- a/libc/src/__support/math/frexpf128.h
+++ b/libc/src/__support/math/frexpf128.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float128 frexpf128(float128 x, int *exp) {
+LIBC_INLINE static constexpr float128 frexpf128(float128 x, int *exp) {
return fputil::frexp(x, *exp);
}
diff --git a/libc/src/__support/math/frexpf16.h b/libc/src/__support/math/frexpf16.h
index 8deeba0..530b61a 100644
--- a/libc/src/__support/math/frexpf16.h
+++ b/libc/src/__support/math/frexpf16.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float16 frexpf16(float16 x, int *exp) {
+LIBC_INLINE static constexpr float16 frexpf16(float16 x, int *exp) {
return fputil::frexp(x, *exp);
}
diff --git a/libc/src/__support/math/ldexpf.h b/libc/src/__support/math/ldexpf.h
index 3a5ec1d..9ef5d96 100644
--- a/libc/src/__support/math/ldexpf.h
+++ b/libc/src/__support/math/ldexpf.h
@@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float ldexpf(float x, int exp) {
+LIBC_INLINE static constexpr float ldexpf(float x, int exp) {
return fputil::ldexp(x, exp);
}
diff --git a/libc/src/__support/math/ldexpf128.h b/libc/src/__support/math/ldexpf128.h
index 3625830..4fba20c 100644
--- a/libc/src/__support/math/ldexpf128.h
+++ b/libc/src/__support/math/ldexpf128.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float128 ldexpf128(float128 x, int exp) {
+LIBC_INLINE static constexpr float128 ldexpf128(float128 x, int exp) {
return fputil::ldexp(x, exp);
}
diff --git a/libc/src/__support/math/ldexpf16.h b/libc/src/__support/math/ldexpf16.h
index fbead87..d978d22 100644
--- a/libc/src/__support/math/ldexpf16.h
+++ b/libc/src/__support/math/ldexpf16.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
-static constexpr float16 ldexpf16(float16 x, int exp) {
+LIBC_INLINE static constexpr float16 ldexpf16(float16 x, int exp) {
return fputil::ldexp(x, exp);
}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index a001d99..f91feacb 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3889,12 +3889,7 @@ add_entrypoint_object(
HDRS
../asinhf.h
DEPENDS
- .explogxf
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
+ libc.src.__support.math.asinhf
)
add_entrypoint_object(
@@ -3958,13 +3953,7 @@ add_entrypoint_object(
HDRS
../asinf.h
DEPENDS
- libc.src.__support.FPUtil.except_value_utils
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
- libc.src.__support.math.inv_trigf_utils
+ libc.src.__support.math.asinf
)
add_entrypoint_object(
@@ -3974,16 +3963,7 @@ add_entrypoint_object(
HDRS
../asinf16.h
DEPENDS
- libc.hdr.errno_macros
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.cast
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
- libc.src.__support.macros.properties.types
+ libc.src.__support.math.asinf16
)
add_entrypoint_object(
@@ -3993,16 +3973,7 @@ add_entrypoint_object(
HDRS
../asin.h
DEPENDS
- libc.src.__support.math.asin_utils
- libc.src.__support.FPUtil.double_double
- libc.src.__support.FPUtil.dyadic_float
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
- libc.src.__support.macros.properties.cpu_features
+ libc.src.__support.math.asin
)
add_entrypoint_object(
@@ -4043,16 +4014,8 @@ add_entrypoint_object(
HDRS
../acospif16.h
DEPENDS
- libc.hdr.errno_macros
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.cast
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
- libc.src.__support.macros.properties.types
+ libc.src.__support.math.acospif16
+ libc.src.errno.errno
)
add_header_library(
diff --git a/libc/src/math/generic/acospif16.cpp b/libc/src/math/generic/acospif16.cpp
index bfdf169..09cbd99 100644
--- a/libc/src/math/generic/acospif16.cpp
+++ b/libc/src/math/generic/acospif16.cpp
@@ -7,128 +7,12 @@
//===----------------------------------------------------------------------===//
#include "src/math/acospif16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/acospif16.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, acospif16, (float16 x)) {
- using FPBits = fputil::FPBits<float16>;
- FPBits xbits(x);
-
- uint16_t x_u = xbits.uintval();
- uint16_t x_abs = x_u & 0x7fff;
- uint16_t x_sign = x_u >> 15;
-
- // |x| > 0x1p0, |x| > 1, or x is NaN.
- if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
- // acospif16(NaN) = NaN
- if (xbits.is_nan()) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
-
- return x;
- }
-
- // 1 < |x| <= +inf
- fputil::raise_except_if_required(FE_INVALID);
- fputil::set_errno_if_required(EDOM);
-
- return FPBits::quiet_nan().get_val();
- }
-
- // |x| == 0x1p0, x is 1 or -1
- // if x is (-)1, return 1
- // if x is (+)1, return 0
- if (LIBC_UNLIKELY(x_abs == 0x3c00))
- return fputil::cast<float16>(x_sign ? 1.0f : 0.0f);
-
- float xf = x;
- float xsq = xf * xf;
-
- // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya
- // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0,
- // 0.5]);
- constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f,
- 0x1.897e36p-6f, 0x1.9efafcp-7f,
- 0x1.06d884p-6f};
- // |x| <= 0x1p-1, |x| <= 0.5
- if (x_abs <= 0x3800) {
- // if x is 0, return 0.5
- if (LIBC_UNLIKELY(x_abs == 0))
- return fputil::cast<float16>(0.5f);
-
- // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then
- // acospi(x) = 0.5 - asin(x)/pi
- float interm =
- fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2],
- POLY_COEFFS[3], POLY_COEFFS[4]);
-
- return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f));
- }
-
- // When |x| > 0.5, assume that 0.5 < |x| <= 1
- //
- // Step-by-step range-reduction proof:
- // 1: Let y = asin(x), such that, x = sin(y)
- // 2: From complimentary angle identity:
- // x = sin(y) = cos(pi/2 - y)
- // 3: Let z = pi/2 - y, such that x = cos(z)
- // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
- // z = 2A, z/2 = A
- // cos(z) = 1 - 2 * sin^2(z/2)
- // 5: Make sin(z/2) subject of the formula:
- // sin(z/2) = sqrt((1 - cos(z))/2)
- // 6: Recall [3]; x = cos(z). Therefore:
- // sin(z/2) = sqrt((1 - x)/2)
- // 7: Let u = (1 - x)/2
- // 8: Therefore:
- // asin(sqrt(u)) = z/2
- // 2 * asin(sqrt(u)) = z
- // 9: Recall [3]; z = pi/2 - y. Therefore:
- // y = pi/2 - z
- // y = pi/2 - 2 * asin(sqrt(u))
- // 10: Recall [1], y = asin(x). Therefore:
- // asin(x) = pi/2 - 2 * asin(sqrt(u))
- // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
- // Therefore:
- // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
- // acos(x) = 2 * asin(sqrt(u))
- // acospi(x) = 2 * (asin(sqrt(u)) / pi)
- //
- // THE RANGE REDUCTION, HOW?
- // 12: Recall [7], u = (1 - x)/2
- // 13: Since 0.5 < x <= 1, therefore:
- // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
- //
- // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
- // Step [11] as `sqrt(u)` is in range.
- // When -1 < x <= -0.5, the identity:
- // acos(x) = pi - acos(-x)
- // acospi(x) = 1 - acos(-x)/pi
- // allows us to compute for the negative x value (lhs)
- // with a positive x value instead (rhs).
-
- float xf_abs = (xf < 0 ? -xf : xf);
- float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
- float sqrt_u = fputil::sqrt<float>(u);
-
- float asin_sqrt_u =
- sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1],
- POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]);
-
- // Same as acos(x), but devided the expression with pi
- return fputil::cast<float16>(
- x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f)
- : 2.0f * asin_sqrt_u);
+ return math::acospif16(x);
}
+
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp
index d286fce..b5ba9ea 100644
--- a/libc/src/math/generic/asin.cpp
+++ b/libc/src/math/generic/asin.cpp
@@ -7,23 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asin.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/dyadic_float.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#include "src/__support/math/asin_utils.h"
+#include "src/__support/math/asin.h"
namespace LIBC_NAMESPACE_DECL {
-using DoubleDouble = fputil::DoubleDouble;
-using Float128 = fputil::DyadicFloat<128>;
-
LLVM_LIBC_FUNCTION(double, asin, (double x)) {
using namespace asin_internal;
using FPBits = fputil::FPBits<double>;
diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp
index 77d6de9..9c6766f 100644
--- a/libc/src/math/generic/asinf.cpp
+++ b/libc/src/math/generic/asinf.cpp
@@ -7,161 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asinf.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-
-#include "src/__support/math/inv_trigf_utils.h"
+#include "src/__support/math/asinf.h"
namespace LIBC_NAMESPACE_DECL {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr size_t N_EXCEPTS = 2;
-
-// Exceptional values when |x| <= 0.5
-static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
- // (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
- {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
- // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
- {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
-}};
-
-// Exceptional values when 0.5 < |x| <= 1
-static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
- // (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
- {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
- // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
- {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
-}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float, asinf, (float x)) {
- using namespace inv_trigf_utils_internal;
- using FPBits = typename fputil::FPBits<float>;
-
- FPBits xbits(x);
- uint32_t x_uint = xbits.uintval();
- uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
- constexpr double SIGN[2] = {1.0, -1.0};
- uint32_t x_sign = x_uint >> 31;
-
- // |x| <= 0.5-ish
- if (x_abs < 0x3f04'471dU) {
- // |x| < 0x1.d12edp-12
- if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) {
- // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x
- // is:
- // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
- // = x^2 / 6
- // < 2^-25
- // < epsilon(1)/2.
- // So the correctly rounded values of asin(x) are:
- // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
- // or (rounding mode = FE_UPWARD and x is
- // negative),
- // = x otherwise.
- // To simplify the rounding decision and make it more efficient, we use
- // fma(x, 2^-25, x) instead.
- // An exhaustive test shows that this formula work correctly for all
- // rounding modes up to |x| < 0x1.d12edp-12.
- // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
- // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
- // |x| < 2^-125. For targets without FMA instructions, we simply use
- // double for intermediate results as it is more efficient than using an
- // emulated version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
- return fputil::multiply_add(x, 0x1.0p-25f, x);
-#else
- double xd = static_cast<double>(x);
- return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
- }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Check for exceptional values
- if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
- LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- // For |x| <= 0.5, we approximate asinf(x) by:
- // asin(x) = x * P(x^2)
- // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
- // asin(x)/x on [0, 0.5] generated by Sollya with:
- // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
- // [|1, D...|], [0, 0.5]);
- // An exhaustive test shows that this approximation works well up to a
- // little more than 0.5.
- double xd = static_cast<double>(x);
- double xsq = xd * xd;
- double x3 = xd * xsq;
- double r = asin_eval(xsq);
- return static_cast<float>(fputil::multiply_add(x3, r, xd));
- }
-
- // |x| > 1, return NaNs.
- if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
-
- if (x_abs <= 0x7f80'0000U) {
- fputil::set_errno_if_required(EDOM);
- fputil::raise_except_if_required(FE_INVALID);
- }
-
- return FPBits::quiet_nan().get_val();
- }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Check for exceptional values
- if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
- LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- // When |x| > 0.5, we perform range reduction as follow:
- //
- // Assume further that 0.5 < x <= 1, and let:
- // y = asin(x)
- // We will use the double angle formula:
- // cos(2y) = 1 - 2 sin^2(y)
- // and the complement angle identity:
- // x = sin(y) = cos(pi/2 - y)
- // = 1 - 2 sin^2 (pi/4 - y/2)
- // So:
- // sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
- // And hence:
- // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
- // Equivalently:
- // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
- // Let u = (1 - x)/2, then:
- // asin(x) = pi/2 - 2 * asin( sqrt(u) )
- // Moreover, since 0.5 < x <= 1:
- // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
- // And hence we can reuse the same polynomial approximation of asin(x) when
- // |x| <= 0.5:
- // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
-
- xbits.set_sign(Sign::POS);
- double sign = SIGN[x_sign];
- double xd = static_cast<double>(xbits.get_val());
- double u = fputil::multiply_add(-0.5, xd, 0.5);
- double c1 = sign * (-2 * fputil::sqrt<double>(u));
- double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
- double c3 = c1 * u;
-
- double r = asin_eval(u);
- return static_cast<float>(fputil::multiply_add(c3, r, c2));
-}
+LLVM_LIBC_FUNCTION(float, asinf, (float x)) { return math::asinf(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asinf16.cpp b/libc/src/math/generic/asinf16.cpp
index 518c384..af8dbfe 100644
--- a/libc/src/math/generic/asinf16.cpp
+++ b/libc/src/math/generic/asinf16.cpp
@@ -7,127 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asinf16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/asinf16.h"
namespace LIBC_NAMESPACE_DECL {
-// Generated by Sollya using the following command:
-// > round(pi/2, D, RN);
-static constexpr float PI_2 = 0x1.921fb54442d18p0f;
-
-LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) {
- using FPBits = fputil::FPBits<float16>;
- FPBits xbits(x);
-
- uint16_t x_u = xbits.uintval();
- uint16_t x_abs = x_u & 0x7fff;
- float xf = x;
-
- // |x| > 0x1p0, |x| > 1, or x is NaN.
- if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
- // asinf16(NaN) = NaN
- if (xbits.is_nan()) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
-
- return x;
- }
-
- // 1 < |x| <= +/-inf
- fputil::raise_except_if_required(FE_INVALID);
- fputil::set_errno_if_required(EDOM);
-
- return FPBits::quiet_nan().get_val();
- }
-
- float xsq = xf * xf;
-
- // |x| <= 0x1p-1, |x| <= 0.5
- if (x_abs <= 0x3800) {
- // asinf16(+/-0) = +/-0
- if (LIBC_UNLIKELY(x_abs == 0))
- return x;
-
- // Exhaustive tests show that,
- // for |x| <= 0x1.878p-9, when:
- // x > 0, and rounding upward, or
- // x < 0, and rounding downward, then,
- // asin(x) = x * 2^-11 + x
- // else, in other rounding modes,
- // asin(x) = x
- if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) {
- int rounding = fputil::quick_get_round();
-
- if ((xbits.is_pos() && rounding == FE_UPWARD) ||
- (xbits.is_neg() && rounding == FE_DOWNWARD))
- return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf));
- return x;
- }
-
- // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
- // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
- float result =
- fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
- 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
- return fputil::cast<float16>(xf * result);
- }
-
- // When |x| > 0.5, assume that 0.5 < |x| <= 1,
- //
- // Step-by-step range-reduction proof:
- // 1: Let y = asin(x), such that, x = sin(y)
- // 2: From complimentary angle identity:
- // x = sin(y) = cos(pi/2 - y)
- // 3: Let z = pi/2 - y, such that x = cos(z)
- // 4: From double angle formula; cos(2A) = 1 - sin^2(A):
- // z = 2A, z/2 = A
- // cos(z) = 1 - 2 * sin^2(z/2)
- // 5: Make sin(z/2) subject of the formula:
- // sin(z/2) = sqrt((1 - cos(z))/2)
- // 6: Recall [3]; x = cos(z). Therefore:
- // sin(z/2) = sqrt((1 - x)/2)
- // 7: Let u = (1 - x)/2
- // 8: Therefore:
- // asin(sqrt(u)) = z/2
- // 2 * asin(sqrt(u)) = z
- // 9: Recall [3], z = pi/2 - y. Therefore:
- // y = pi/2 - z
- // y = pi/2 - 2 * asin(sqrt(u))
- // 10: Recall [1], y = asin(x). Therefore:
- // asin(x) = pi/2 - 2 * asin(sqrt(u))
- //
- // WHY?
- // 11: Recall [7], u = (1 - x)/2
- // 12: Since 0.5 < x <= 1, therefore:
- // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
- //
- // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
- // Step [10] as `sqrt(u)` is in range.
-
- // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0
- float xf_abs = (xf < 0 ? -xf : xf);
- float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0);
- float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
- float u_sqrt = fputil::sqrt<float>(u);
-
- // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
- // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
- float asin_sqrt_u =
- u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
- 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
-
- return fputil::cast<float16>(sign *
- fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2));
-}
+LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) { return math::asinf16(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp
index 3aed3bc..45023c8 100644
--- a/libc/src/math/generic/asinhf.cpp
+++ b/libc/src/math/generic/asinhf.cpp
@@ -7,112 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asinhf.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/generic/common_constants.h"
-#include "src/math/generic/explogxf.h"
+#include "src/__support/math/asinhf.h"
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, asinhf, (float x)) {
- using namespace acoshf_internal;
- using FPBits_t = typename fputil::FPBits<float>;
- FPBits_t xbits(x);
- uint32_t x_u = xbits.uintval();
- uint32_t x_abs = xbits.abs().uintval();
-
- // |x| <= 2^-3
- if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
- // |x| <= 2^-26
- if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
- return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
- ? x
- : (x - 0x1.5555555555555p-3 * x * x * x));
- }
-
- double x_d = x;
- double x_sq = x_d * x_d;
- // Generated by Sollya with:
- // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
- // [0, 2^-2]);
- double p = fputil::polyeval(
- x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
- -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
- 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
- return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
- }
-
- const double SIGN[2] = {1.0, -1.0};
- double x_sign = SIGN[x_u >> 31];
- double x_d = x;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Helper functions to set results for exceptional cases.
- auto round_result_slightly_down = [x_sign](float r) -> float {
- return fputil::multiply_add(static_cast<float>(x_sign), r,
- static_cast<float>(x_sign) * (-0x1.0p-24f));
- };
- auto round_result_slightly_up = [x_sign](float r) -> float {
- return fputil::multiply_add(static_cast<float>(x_sign), r,
- static_cast<float>(x_sign) * 0x1.0p-24f);
- };
-
- if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits_t::quiet_nan().get_val();
- }
-
- return x;
- }
-
- // Exceptional cases when x > 2^24.
- switch (x_abs) {
- case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
- return round_result_slightly_down(0x1.1e0696p4f);
- case 0x4c803f2c: // |x| = 0x1.007e58p26f
- return round_result_slightly_down(0x1.2b786cp4f);
- case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
- return round_result_slightly_up(0x1.6fdd34p4f);
- case 0x5c569e88: // |x| = 0x1.ad3d1p57f
- return round_result_slightly_up(0x1.45c146p5f);
- case 0x5e68984e: // |x| = 0x1.d1309cp61f
- return round_result_slightly_up(0x1.5c9442p5f);
- case 0x655890d3: // |x| = 0x1.b121a6p75f
- return round_result_slightly_down(0x1.a9a3f2p5f);
- case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
- return round_result_slightly_up(0x1.af66cp5f);
- case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
- return round_result_slightly_down(0x1.08b512p6f);
- case 0x7997f30a: // |x| = 0x1.2fe614p116f
- return round_result_slightly_up(0x1.451436p6f);
- }
- } else {
- // Exceptional cases when x < 2^24.
- if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
- // |x| = 0x1.575e4cp12f
- return round_result_slightly_down(0x1.29becap3f);
- }
- if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
- // |x| = 0x1.a5209p20f
- return round_result_slightly_down(0x1.e1b92p3f);
- }
- }
-#else
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
- return x;
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- // asinh(x) = log(x + sqrt(x^2 + 1))
- return static_cast<float>(
- x_sign * log_eval(fputil::multiply_add(
- x_d, x_sign,
- fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
-}
+LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h
index f3dcdb9..a93ee25 100644
--- a/libc/src/math/generic/range_reduction_double_common.h
+++ b/libc/src/math/generic/range_reduction_double_common.h
@@ -278,7 +278,7 @@ private:
};
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static Float128 range_reduction_small_f128(double x) {
+LIBC_INLINE static Float128 range_reduction_small_f128(double x) {
constexpr Float128 PI_OVER_128_F128 = {
Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128};
constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5;
diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt
index e879230..548938f 100644
--- a/libc/src/stdio/baremetal/CMakeLists.txt
+++ b/libc/src/stdio/baremetal/CMakeLists.txt
@@ -72,6 +72,7 @@ add_entrypoint_object(
../scanf.h
DEPENDS
.scanf_internal
+ libc.include.inttypes
libc.src.stdio.scanf_core.scanf_main
libc.src.__support.arg_list
libc.src.__support.OSUtil.osutil
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index dee125c..561180c 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -35,6 +35,7 @@ add_header_library(
core_structs.h
DEPENDS
.scanf_config
+ libc.include.inttypes
libc.src.__support.CPP.string_view
libc.src.__support.CPP.bitset
libc.src.__support.FPUtil.fp_bits
@@ -97,6 +98,7 @@ add_header_library(
DEPENDS
.reader
.core_structs
+ libc.include.inttypes
libc.src.__support.common
libc.src.__support.ctype_utils
libc.src.__support.CPP.bitset
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 43f44a9..49f4a1b 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -137,6 +137,21 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ mbsinit
+ SRCS
+ mbsinit.cpp
+ HDRS
+ mbsinit.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.src.__support.wchar.character_converter
+ libc.src.__support.wchar.mbstate
+)
+
+add_entrypoint_object(
mbrtowc
SRCS
mbrtowc.cpp
diff --git a/libc/src/wchar/mbsinit.cpp b/libc/src/wchar/mbsinit.cpp
new file mode 100644
index 0000000..23ba542
--- /dev/null
+++ b/libc/src/wchar/mbsinit.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of mbsinit -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsinit.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, mbsinit, (mbstate_t * ps)) {
+ if (ps == nullptr)
+ return true;
+ internal::CharacterConverter cr(reinterpret_cast<internal::mbstate *>(ps));
+ return cr.isValidState() && cr.isEmpty();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsinit.h b/libc/src/wchar/mbsinit.h
new file mode 100644
index 0000000..fa6be0f
--- /dev/null
+++ b/libc/src/wchar/mbsinit.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for mbsinit ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSINIT_H
+#define LLVM_LIBC_SRC_WCHAR_MBSINIT_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int mbsinit(mbstate_t *ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSINIT_H
diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h
index e0218c7..55a3cee 100644
--- a/libc/src/wchar/wchar_utils.h
+++ b/libc/src/wchar/wchar_utils.h
@@ -17,13 +17,10 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-// returns true if the character exists in the string
-LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) {
- for (int n = 0; str[n]; ++n) {
- if (str[n] == c)
- return true;
- }
- return false;
+LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) {
+ for (; *s && *s != c; ++s)
+ ;
+ return (*s == c) ? s : nullptr;
}
// bool should be true for wcscspn for complimentary span
@@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2,
bool not_match_set) {
size_t i = 0;
for (; s1[i]; ++i) {
- bool in_set = wcschr(s1[i], s2);
+ bool in_set = internal::wcschr(s2, s1[i]);
if (in_set == not_match_set)
return i;
}
diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
index defc2ce..8ac4916 100644
--- a/libc/src/wchar/wcschr.cpp
+++ b/libc/src/wchar/wcschr.cpp
@@ -11,15 +11,14 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
- for (; *s && *s != c; ++s)
- ;
- if (*s == c)
- return s;
- return nullptr;
+ LIBC_CRASH_ON_NULLPTR(s);
+ return internal::wcschr(s, c);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index a00ba99..5d86a49 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -11,17 +11,10 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/macros/null_check.h"
+#include "wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
-bool contains_char(const wchar_t *str, wchar_t target) {
- for (; *str != L'\0'; str++)
- if (*str == target)
- return true;
-
- return false;
-}
-
LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
(const wchar_t *src, const wchar_t *breakset)) {
LIBC_CRASH_ON_NULLPTR(src);
@@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
// currently O(n * m), can be further optimized to O(n + m) with a hash set
for (int src_idx = 0; src[src_idx] != 0; src_idx++)
- if (contains_char(breakset, src[src_idx]))
+ if (internal::wcschr(breakset, src[src_idx]))
return src + src_idx;
return nullptr;
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 291efc1..ed4f0aa 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -10,18 +10,12 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
+#include "wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
-bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
- for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
- if (wc == *delim_ptr)
- return true;
- return false;
-}
-
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
- (wchar_t *__restrict str, const wchar_t *__restrict delim,
+ (wchar_t *__restrict str, const wchar_t *__restrict delims,
wchar_t **__restrict context)) {
if (str == nullptr) {
if (*context == nullptr)
@@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
str = *context;
}
- wchar_t *tok_start, *tok_end;
- for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
- ++tok_start)
- ;
+ wchar_t *tok_start = str;
+ while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
+ ++tok_start;
- for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
- ++tok_end)
- ;
+ wchar_t *tok_end = tok_start;
+ while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
+ ++tok_end;
if (*tok_end != L'\0') {
*tok_end = L'\0';
diff --git a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
index 77479f8..4c540a8 100644
--- a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
+++ b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
@@ -14,6 +14,20 @@
using namespace LIBC_NAMESPACE;
+static inline uint32_t entropy() {
+ return (static_cast<uint32_t>(gpu::processor_clock()) ^
+ (gpu::get_thread_id_x() * 0x632be59b) ^
+ (gpu::get_block_id_x() * 0x85157af5)) *
+ 0x9e3779bb;
+}
+
+static inline uint32_t xorshift32(uint32_t &state) {
+ state ^= state << 13;
+ state ^= state >> 17;
+ state ^= state << 5;
+ return state * 0x9e3779bb;
+}
+
static inline void use(uint8_t *ptr, uint32_t size) {
EXPECT_NE(ptr, nullptr);
for (int i = 0; i < size; ++i)
@@ -34,5 +48,19 @@ TEST_MAIN(int, char **, char **) {
for (int i = 0; i < 256; ++i)
free(ptrs[i]);
+
+ uint32_t state = entropy();
+ for (int i = 0; i < 1024; ++i) {
+ if (xorshift32(state) % 2) {
+ uint64_t size = xorshift32(state) % 256 + 16;
+ uint64_t *ptr = reinterpret_cast<uint64_t *>(malloc(size));
+ *ptr = gpu::get_thread_id();
+
+ EXPECT_EQ(*ptr, gpu::get_thread_id());
+ ASSERT_TRUE(ptr);
+ ASSERT_TRUE(__builtin_is_aligned(ptr, 16));
+ free(ptr);
+ }
+ }
return 0;
}
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 89b607d..77f3617 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -12,6 +12,11 @@ add_fp_unittest(
libc.src.__support.math.acosf16
libc.src.__support.math.acoshf
libc.src.__support.math.acoshf16
+ libc.src.__support.math.acospif16
+ libc.src.__support.math.asin
+ libc.src.__support.math.asinf
+ libc.src.__support.math.asinf16
+ libc.src.__support.math.asinhf
libc.src.__support.math.erff
libc.src.__support.math.exp
libc.src.__support.math.exp10
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 8d3cebd..2e4450a 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -8,64 +8,73 @@
#include "shared/math.h"
#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
#ifdef LIBC_TYPES_HAS_FLOAT16
TEST(LlvmLibcSharedMathTest, AllFloat16) {
int exponent;
- EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf16(1.0f));
+ EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acoshf16(1.0f16));
+ EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acospif16(1.0f16));
+ EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16));
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
- ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(float(8), 5));
+ ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5));
ASSERT_FP_EQ(float16(-1 * (8 << 5)),
- LIBC_NAMESPACE::shared::ldexpf16(float(-8), 5));
+ LIBC_NAMESPACE::shared::ldexpf16(-8.0f16, 5));
- EXPECT_FP_EQ_ALL_ROUNDING(0.75f16,
- LIBC_NAMESPACE::shared::frexpf16(24.0f, &exponent));
+ EXPECT_FP_EQ_ALL_ROUNDING(
+ 0.75f16, LIBC_NAMESPACE::shared::frexpf16(24.0f16, &exponent));
EXPECT_EQ(exponent, 5);
EXPECT_FP_EQ(0x1.921fb6p+0f16, LIBC_NAMESPACE::shared::acosf16(0.0f16));
}
-#endif
+#endif // LIBC_TYPES_HAS_FLOAT16
TEST(LlvmLibcSharedMathTest, AllFloat) {
int exponent;
EXPECT_FP_EQ(0x1.921fb6p+0, LIBC_NAMESPACE::shared::acosf(0.0f));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinf(0.0f));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinhf(0.0f));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f));
EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f));
EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f));
- EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f));
- EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f));
EXPECT_FP_EQ_ALL_ROUNDING(0.75f,
LIBC_NAMESPACE::shared::frexpf(24.0f, &exponent));
EXPECT_EQ(exponent, 5);
- ASSERT_FP_EQ(float(8 << 5), LIBC_NAMESPACE::shared::ldexpf(float(8), 5));
- ASSERT_FP_EQ(float(-1 * (8 << 5)),
- LIBC_NAMESPACE::shared::ldexpf(float(-8), 5));
+ ASSERT_FP_EQ(float(8 << 5), LIBC_NAMESPACE::shared::ldexpf(8.0f, 5));
+ ASSERT_FP_EQ(float(-1 * (8 << 5)), LIBC_NAMESPACE::shared::ldexpf(-8.0f, 5));
}
TEST(LlvmLibcSharedMathTest, AllDouble) {
EXPECT_FP_EQ(0x1.921fb54442d18p+0, LIBC_NAMESPACE::shared::acos(0.0));
+ EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::asin(0.0));
EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp(0.0));
EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
}
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
TEST(LlvmLibcSharedMathTest, AllFloat128) {
int exponent;
- EXPECT_FP_EQ_ALL_ROUNDING(
- float128(0.75), LIBC_NAMESPACE::shared::frexpf128(24.0f, &exponent));
+ EXPECT_FP_EQ_ALL_ROUNDING(float128(0.75), LIBC_NAMESPACE::shared::frexpf128(
+ float128(24), &exponent));
EXPECT_EQ(exponent, 5);
ASSERT_FP_EQ(float128(8 << 5),
- LIBC_NAMESPACE::shared::ldexpf128(float(8), 5));
+ LIBC_NAMESPACE::shared::ldexpf128(float128(8), 5));
ASSERT_FP_EQ(float128(-1 * (8 << 5)),
- LIBC_NAMESPACE::shared::ldexpf128(float(-8), 5));
+ LIBC_NAMESPACE::shared::ldexpf128(float128(-8), 5));
}
+
+#endif // LIBC_TYPES_HAS_FLOAT128
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index f420ecc..fad89dc 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -99,7 +99,22 @@ add_libc_test(
libc.src.string.memset
libc.src.wchar.mbrlen
libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
libc.test.UnitTest.ErrnoCheckingTest
+)
+
+add_libc_test(
+ mbsinit_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ mbsinit_test.cpp
+ DEPENDS
+ libc.src.string.memset
+ libc.src.wchar.mbsinit
+ libc.src.wchar.mbrtowc
+ libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
)
add_libc_test(
diff --git a/libc/test/src/wchar/mbsinit_test.cpp b/libc/test/src/wchar/mbsinit_test.cpp
new file mode 100644
index 0000000..ecb48aa
--- /dev/null
+++ b/libc/test/src/wchar/mbsinit_test.cpp
@@ -0,0 +1,33 @@
+//===-- Unittests for mbsinit ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbrtowc.h"
+#include "src/wchar/mbsinit.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcMBSInitTest, EmptyState) {
+ mbstate_t ps;
+ LIBC_NAMESPACE::memset(&ps, 0, sizeof(mbstate_t));
+ ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0);
+ ASSERT_NE(LIBC_NAMESPACE::mbsinit(nullptr), 0);
+}
+
+TEST(LlvmLibcMBSInitTest, ConversionTest) {
+ const char *src = "\xf0\x9f\xa4\xa3"; // 4 byte emoji
+ wchar_t dest[2];
+ mbstate_t ps;
+ LIBC_NAMESPACE::memset(&ps, 0, sizeof(mbstate_t));
+
+ ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0);
+ LIBC_NAMESPACE::mbrtowc(dest, src, 2, &ps); // partial conversion
+ ASSERT_EQ(LIBC_NAMESPACE::mbsinit(&ps), 0);
+ LIBC_NAMESPACE::mbrtowc(dest, src + 2, 2, &ps); // complete conversion
+ ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0); // state should be reset now
+}