diff options
Diffstat (limited to 'libc/src/math')
118 files changed, 2235 insertions, 2848 deletions
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index c3840d3..766c60d 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -58,6 +58,8 @@ add_math_entrypoint_object(asinh) add_math_entrypoint_object(asinhf) add_math_entrypoint_object(asinhf16) +add_math_entrypoint_object(asinpif16) + add_math_entrypoint_object(atan) add_math_entrypoint_object(atanf) add_math_entrypoint_object(atanf16) @@ -226,60 +228,70 @@ add_math_entrypoint_object(fmaxf) add_math_entrypoint_object(fmaxl) add_math_entrypoint_object(fmaxf128) add_math_entrypoint_object(fmaxf16) +add_math_entrypoint_object(fmaxbf16) add_math_entrypoint_object(fmin) add_math_entrypoint_object(fminf) add_math_entrypoint_object(fminl) add_math_entrypoint_object(fminf128) add_math_entrypoint_object(fminf16) +add_math_entrypoint_object(fminbf16) add_math_entrypoint_object(fmaximum) add_math_entrypoint_object(fmaximumf) add_math_entrypoint_object(fmaximuml) add_math_entrypoint_object(fmaximumf16) add_math_entrypoint_object(fmaximumf128) +add_math_entrypoint_object(fmaximumbf16) add_math_entrypoint_object(fmaximum_num) add_math_entrypoint_object(fmaximum_numf) add_math_entrypoint_object(fmaximum_numl) add_math_entrypoint_object(fmaximum_numf16) add_math_entrypoint_object(fmaximum_numf128) +add_math_entrypoint_object(fmaximum_numbf16) add_math_entrypoint_object(fmaximum_mag) add_math_entrypoint_object(fmaximum_magf) add_math_entrypoint_object(fmaximum_magl) add_math_entrypoint_object(fmaximum_magf16) add_math_entrypoint_object(fmaximum_magf128) +add_math_entrypoint_object(fmaximum_magbf16) add_math_entrypoint_object(fmaximum_mag_num) add_math_entrypoint_object(fmaximum_mag_numf) add_math_entrypoint_object(fmaximum_mag_numl) add_math_entrypoint_object(fmaximum_mag_numf16) add_math_entrypoint_object(fmaximum_mag_numf128) +add_math_entrypoint_object(fmaximum_mag_numbf16) add_math_entrypoint_object(fminimum) add_math_entrypoint_object(fminimumf) add_math_entrypoint_object(fminimuml) add_math_entrypoint_object(fminimumf16) add_math_entrypoint_object(fminimumf128) +add_math_entrypoint_object(fminimumbf16) add_math_entrypoint_object(fminimum_num) add_math_entrypoint_object(fminimum_numf) add_math_entrypoint_object(fminimum_numl) add_math_entrypoint_object(fminimum_numf16) add_math_entrypoint_object(fminimum_numf128) +add_math_entrypoint_object(fminimum_numbf16) add_math_entrypoint_object(fminimum_mag) add_math_entrypoint_object(fminimum_magf) add_math_entrypoint_object(fminimum_magl) add_math_entrypoint_object(fminimum_magf16) add_math_entrypoint_object(fminimum_magf128) +add_math_entrypoint_object(fminimum_magbf16) add_math_entrypoint_object(fminimum_mag_num) add_math_entrypoint_object(fminimum_mag_numf) add_math_entrypoint_object(fminimum_mag_numl) add_math_entrypoint_object(fminimum_mag_numf16) add_math_entrypoint_object(fminimum_mag_numf128) +add_math_entrypoint_object(fminimum_mag_numbf16) add_math_entrypoint_object(fmul) add_math_entrypoint_object(fmull) @@ -302,12 +314,14 @@ add_math_entrypoint_object(fromfpf) add_math_entrypoint_object(fromfpl) add_math_entrypoint_object(fromfpf16) add_math_entrypoint_object(fromfpf128) +add_math_entrypoint_object(fromfpbf16) add_math_entrypoint_object(fromfpx) add_math_entrypoint_object(fromfpxf) add_math_entrypoint_object(fromfpxl) add_math_entrypoint_object(fromfpxf16) add_math_entrypoint_object(fromfpxf128) +add_math_entrypoint_object(fromfpxbf16) add_math_entrypoint_object(fsub) add_math_entrypoint_object(fsubl) @@ -557,9 +571,36 @@ add_math_entrypoint_object(ufromfpf) add_math_entrypoint_object(ufromfpl) add_math_entrypoint_object(ufromfpf16) add_math_entrypoint_object(ufromfpf128) +add_math_entrypoint_object(ufromfpbf16) add_math_entrypoint_object(ufromfpx) add_math_entrypoint_object(ufromfpxf) add_math_entrypoint_object(ufromfpxl) add_math_entrypoint_object(ufromfpxf16) add_math_entrypoint_object(ufromfpxf128) +add_math_entrypoint_object(ufromfpxbf16) + +add_math_entrypoint_object(bf16add) +add_math_entrypoint_object(bf16addf) +add_math_entrypoint_object(bf16addl) +add_math_entrypoint_object(bf16addf128) + +add_math_entrypoint_object(bf16div) +add_math_entrypoint_object(bf16divf) +add_math_entrypoint_object(bf16divl) +add_math_entrypoint_object(bf16divf128) + +add_math_entrypoint_object(bf16fma) +add_math_entrypoint_object(bf16fmaf) +add_math_entrypoint_object(bf16fmal) +add_math_entrypoint_object(bf16fmaf128) + +add_math_entrypoint_object(bf16mul) +add_math_entrypoint_object(bf16mulf) +add_math_entrypoint_object(bf16mull) +add_math_entrypoint_object(bf16mulf128) + +add_math_entrypoint_object(bf16sub) +add_math_entrypoint_object(bf16subf) +add_math_entrypoint_object(bf16subl) +add_math_entrypoint_object(bf16subf128) diff --git a/libc/src/math/asinpif16.h b/libc/src/math/asinpif16.h new file mode 100644 index 0000000..b97166a --- /dev/null +++ b/libc/src/math/asinpif16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for asinpif16 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_ASINPIF16_H +#define LLVM_LIBC_SRC_MATH_ASINPIF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 asinpif16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_ASINPIF16_H diff --git a/libc/src/math/bf16add.h b/libc/src/math/bf16add.h new file mode 100644 index 0000000..a29970e --- /dev/null +++ b/libc/src/math/bf16add.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16add -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADD_H +#define LLVM_LIBC_SRC_MATH_BF16ADD_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16add(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADD_H diff --git a/libc/src/math/bf16addf.h b/libc/src/math/bf16addf.h new file mode 100644 index 0000000..80a5e2a --- /dev/null +++ b/libc/src/math/bf16addf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF_H +#define LLVM_LIBC_SRC_MATH_BF16ADDF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDF_H diff --git a/libc/src/math/bf16addf128.h b/libc/src/math/bf16addf128.h new file mode 100644 index 0000000..3c2f3a1 --- /dev/null +++ b/libc/src/math/bf16addf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF128_H +#define LLVM_LIBC_SRC_MATH_BF16ADDF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDF128_H diff --git a/libc/src/math/bf16addl.h b/libc/src/math/bf16addl.h new file mode 100644 index 0000000..a9e7d68 --- /dev/null +++ b/libc/src/math/bf16addl.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addl ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDL_H +#define LLVM_LIBC_SRC_MATH_BF16ADDL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDL_H diff --git a/libc/src/math/bf16div.h b/libc/src/math/bf16div.h new file mode 100644 index 0000000..ade9c06 --- /dev/null +++ b/libc/src/math/bf16div.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16div -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16DIV_H +#define LLVM_LIBC_SRC_MATH_BF16DIV_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16div(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16DIV_H diff --git a/libc/src/math/bf16divf.h b/libc/src/math/bf16divf.h new file mode 100644 index 0000000..481b176 --- /dev/null +++ b/libc/src/math/bf16divf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16divf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16DIVF_H +#define LLVM_LIBC_SRC_MATH_BF16DIVF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16divf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16DIVF_H diff --git a/libc/src/math/bf16divf128.h b/libc/src/math/bf16divf128.h new file mode 100644 index 0000000..d990066 --- /dev/null +++ b/libc/src/math/bf16divf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16divf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16DIVF128_H +#define LLVM_LIBC_SRC_MATH_BF16DIVF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16divf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16DIVF128_H diff --git a/libc/src/math/bf16divl.h b/libc/src/math/bf16divl.h new file mode 100644 index 0000000..b19ac873 --- /dev/null +++ b/libc/src/math/bf16divl.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16divl ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16DIVL_H +#define LLVM_LIBC_SRC_MATH_BF16DIVL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16divl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16DIVL_H diff --git a/libc/src/math/bf16fma.h b/libc/src/math/bf16fma.h new file mode 100644 index 0000000..aa54956 --- /dev/null +++ b/libc/src/math/bf16fma.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16fma -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16FMA_H +#define LLVM_LIBC_SRC_MATH_BF16FMA_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16fma(double x, double y, double z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16FMA_H diff --git a/libc/src/math/bf16fmaf.h b/libc/src/math/bf16fmaf.h new file mode 100644 index 0000000..e8582bd --- /dev/null +++ b/libc/src/math/bf16fmaf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16fmaf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16FMAF_H +#define LLVM_LIBC_SRC_MATH_BF16FMAF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16fmaf(float x, float y, float z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16FMAF_H diff --git a/libc/src/math/bf16fmaf128.h b/libc/src/math/bf16fmaf128.h new file mode 100644 index 0000000..4215e54 --- /dev/null +++ b/libc/src/math/bf16fmaf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16fmaf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16FMAF128_H +#define LLVM_LIBC_SRC_MATH_BF16FMAF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16fmaf128(float128 x, float128 y, float128 z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16FMAF128_H diff --git a/libc/src/math/bf16fmal.h b/libc/src/math/bf16fmal.h new file mode 100644 index 0000000..b92f17b --- /dev/null +++ b/libc/src/math/bf16fmal.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16fmal ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16FMAL_H +#define LLVM_LIBC_SRC_MATH_BF16FMAL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16fmal(long double x, long double y, long double z); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16FMAL_H diff --git a/libc/src/math/bf16mul.h b/libc/src/math/bf16mul.h new file mode 100644 index 0000000..14e8a30 --- /dev/null +++ b/libc/src/math/bf16mul.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16mul -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16MUL_H +#define LLVM_LIBC_SRC_MATH_BF16MUL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16mul(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16MUL_H diff --git a/libc/src/math/bf16mulf.h b/libc/src/math/bf16mulf.h new file mode 100644 index 0000000..1d02c8e --- /dev/null +++ b/libc/src/math/bf16mulf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16mulf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16MULF_H +#define LLVM_LIBC_SRC_MATH_BF16MULF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16mulf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16MULF_H diff --git a/libc/src/math/bf16mulf128.h b/libc/src/math/bf16mulf128.h new file mode 100644 index 0000000..6ba7cef --- /dev/null +++ b/libc/src/math/bf16mulf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16mulf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16MULF128_H +#define LLVM_LIBC_SRC_MATH_BF16MULF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16mulf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16MULF128_H diff --git a/libc/src/math/bf16mull.h b/libc/src/math/bf16mull.h new file mode 100644 index 0000000..dad6523 --- /dev/null +++ b/libc/src/math/bf16mull.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16mull ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16MULL_H +#define LLVM_LIBC_SRC_MATH_BF16MULL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16mull(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16MULL_H diff --git a/libc/src/math/bf16sub.h b/libc/src/math/bf16sub.h new file mode 100644 index 0000000..8108e914 --- /dev/null +++ b/libc/src/math/bf16sub.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16sub -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUB_H +#define LLVM_LIBC_SRC_MATH_BF16SUB_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16sub(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUB_H diff --git a/libc/src/math/bf16subf.h b/libc/src/math/bf16subf.h new file mode 100644 index 0000000..1bd79bf --- /dev/null +++ b/libc/src/math/bf16subf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF_H +#define LLVM_LIBC_SRC_MATH_BF16SUBF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBF_H diff --git a/libc/src/math/bf16subf128.h b/libc/src/math/bf16subf128.h new file mode 100644 index 0000000..19590e8 --- /dev/null +++ b/libc/src/math/bf16subf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF128_H +#define LLVM_LIBC_SRC_MATH_BF16SUBF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBF128_H diff --git a/libc/src/math/bf16subl.h b/libc/src/math/bf16subl.h new file mode 100644 index 0000000..13b2093 --- /dev/null +++ b/libc/src/math/bf16subl.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subl ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBL_H +#define LLVM_LIBC_SRC_MATH_BF16SUBL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBL_H diff --git a/libc/src/math/fmaxbf16.h b/libc/src/math/fmaxbf16.h new file mode 100644 index 0000000..bdbd14c --- /dev/null +++ b/libc/src/math/fmaxbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fmaxbf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMAXBF16_H +#define LLVM_LIBC_SRC_MATH_FMAXBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fmaxbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMAXBF16_H diff --git a/libc/src/math/fmaximum_mag_numbf16.h b/libc/src/math/fmaximum_mag_numbf16.h new file mode 100644 index 0000000..7663525 --- /dev/null +++ b/libc/src/math/fmaximum_mag_numbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fmaximum_mag_numbf16 ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fmaximum_mag_numbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAG_NUMBF16_H diff --git a/libc/src/math/fmaximum_magbf16.h b/libc/src/math/fmaximum_magbf16.h new file mode 100644 index 0000000..ff0ff1a --- /dev/null +++ b/libc/src/math/fmaximum_magbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fmaximum_magbf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGBF16_H +#define LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fmaximum_magbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_MAGBF16_H diff --git a/libc/src/math/fmaximum_numbf16.h b/libc/src/math/fmaximum_numbf16.h new file mode 100644 index 0000000..f23bc52 --- /dev/null +++ b/libc/src/math/fmaximum_numbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fmaximum_numbf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fmaximum_numbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMAXIMUM_NUMBF16_H diff --git a/libc/src/math/fmaximumbf16.h b/libc/src/math/fmaximumbf16.h new file mode 100644 index 0000000..9842e99 --- /dev/null +++ b/libc/src/math/fmaximumbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fmaximumbf16 ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMAXIMUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMAXIMUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fmaximumbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMAXIMUMBF16_H diff --git a/libc/src/math/fminbf16.h b/libc/src/math/fminbf16.h new file mode 100644 index 0000000..4c1ada9 --- /dev/null +++ b/libc/src/math/fminbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fminbf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMINBF16_H +#define LLVM_LIBC_SRC_MATH_FMINBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fminbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMINBF16_H diff --git a/libc/src/math/fminimum_mag_numbf16.h b/libc/src/math/fminimum_mag_numbf16.h new file mode 100644 index 0000000..2773381 --- /dev/null +++ b/libc/src/math/fminimum_mag_numbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fminimum_mag_numbf16 ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fminimum_mag_numbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAG_NUMBF16_H diff --git a/libc/src/math/fminimum_magbf16.h b/libc/src/math/fminimum_magbf16.h new file mode 100644 index 0000000..fee5c4c8 --- /dev/null +++ b/libc/src/math/fminimum_magbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fminimum_magbf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_MAGBF16_H +#define LLVM_LIBC_SRC_MATH_FMINIMUM_MAGBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fminimum_magbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_MAGBF16_H diff --git a/libc/src/math/fminimum_numbf16.h b/libc/src/math/fminimum_numbf16.h new file mode 100644 index 0000000..a3fd474 --- /dev/null +++ b/libc/src/math/fminimum_numbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fminimum_numbf16 --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMINIMUM_NUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMINIMUM_NUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fminimum_numbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMINIMUM_NUMBF16_H diff --git a/libc/src/math/fminimumbf16.h b/libc/src/math/fminimumbf16.h new file mode 100644 index 0000000..07f1ada --- /dev/null +++ b/libc/src/math/fminimumbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fminimumbf16 ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMINIMUMBF16_H +#define LLVM_LIBC_SRC_MATH_FMINIMUMBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fminimumbf16(bfloat16 x, bfloat16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FMINIMUMBF16_H diff --git a/libc/src/math/fromfpbf16.h b/libc/src/math/fromfpbf16.h new file mode 100644 index 0000000..bff991c --- /dev/null +++ b/libc/src/math/fromfpbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fromfpbf16 --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FROMFPBF16_H +#define LLVM_LIBC_SRC_MATH_FROMFPBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fromfpbf16(bfloat16 x, int rnd, unsigned int width); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FROMFPBF16_H diff --git a/libc/src/math/fromfpxbf16.h b/libc/src/math/fromfpxbf16.h new file mode 100644 index 0000000..e40d975 --- /dev/null +++ b/libc/src/math/fromfpxbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fromfpxbf16 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FROMFPXBF16_H +#define LLVM_LIBC_SRC_MATH_FROMFPXBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fromfpxbf16(bfloat16 x, int rnd, unsigned int width); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FROMFPXBF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 0bec7dd..2bb4d44 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -278,69 +278,6 @@ add_entrypoint_object( libc.src.__support.FPUtil.generic.add_sub ) -add_header_library( - range_reduction - HDRS - range_reduction.h - range_reduction_fma.h - DEPENDS - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.fma - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common -) - -add_header_library( - range_reduction_double - HDRS - range_reduction_double_common.h - range_reduction_double_fma.h - range_reduction_double_nofma.h - DEPENDS - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.fma - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common - libc.src.__support.integer_literals -) - -add_header_library( - sincosf_utils - HDRS - sincosf_utils.h - DEPENDS - .range_reduction - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.polyeval - libc.src.__support.common -) - -add_header_library( - sincosf16_utils - HDRS - sincosf16_utils.h - DEPENDS - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common -) - -add_header_library( - sincos_eval - HDRS - sincos_eval.h - DEPENDS - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals -) - add_entrypoint_object( cos SRCS @@ -348,16 +285,7 @@ add_entrypoint_object( HDRS ../cos.h DEPENDS - .range_reduction_double - .sincos_eval - libc.hdr.errno_macros - libc.src.errno.errno - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.macros.optimization + libc.src.__support.math.cos ) add_entrypoint_object( @@ -367,15 +295,7 @@ add_entrypoint_object( HDRS ../cosf.h DEPENDS - .sincosf_utils - libc.src.errno.errno - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fma - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization + libc.src.__support.math.cosf ) add_entrypoint_object( @@ -385,16 +305,7 @@ add_entrypoint_object( HDRS ../cosf16.h DEPENDS - .sincosf16_utils - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.multiply_add - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.cosf16 ) add_entrypoint_object( @@ -404,7 +315,7 @@ add_entrypoint_object( HDRS ../cospif.h DEPENDS - .sincosf_utils + libc.src.__support.math.sincosf_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fma @@ -419,7 +330,6 @@ add_entrypoint_object( HDRS ../cospif16.h DEPENDS - .sincosf16_utils libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -427,6 +337,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization + libc.src.__support.math.sincosf16_utils ) add_entrypoint_object( @@ -436,8 +347,8 @@ add_entrypoint_object( HDRS ../sin.h DEPENDS - .range_reduction_double - .sincos_eval + libc.src.__support.math.range_reduction_double + libc.src.__support.math.sincos_eval libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -456,8 +367,8 @@ add_entrypoint_object( HDRS ../sinf.h DEPENDS - .range_reduction - .sincosf_utils + libc.src.__support.math.range_reduction + libc.src.__support.math.sincosf_utils libc.src.errno.errno libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fenv_impl @@ -475,7 +386,6 @@ add_entrypoint_object( HDRS ../sinf16.h DEPENDS - .sincosf16_utils libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -485,6 +395,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization libc.src.__support.macros.properties.types + libc.src.__support.math.sincosf16_utils COMPILE_OPTIONS ${libc_opt_high_flag} ) @@ -496,8 +407,8 @@ add_entrypoint_object( HDRS ../sincos.h DEPENDS - .range_reduction_double - .sincos_eval + libc.src.__support.math.range_reduction_double + libc.src.__support.math.sincos_eval libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -516,7 +427,7 @@ add_entrypoint_object( HDRS ../sinpif.h DEPENDS - .sincosf_utils + libc.src.__support.math.sincosf_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fma @@ -533,8 +444,8 @@ add_entrypoint_object( HDRS ../sincosf.h DEPENDS - .range_reduction - .sincosf_utils + libc.src.__support.math.range_reduction + libc.src.__support.math.sincosf_utils libc.src.errno.errno libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -552,7 +463,6 @@ add_entrypoint_object( HDRS ../sinpif16.h DEPENDS - .sincosf16_utils libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -560,6 +470,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization + libc.src.__support.math.sincosf16_utils ) add_entrypoint_object( @@ -569,7 +480,7 @@ add_entrypoint_object( HDRS ../tan.h DEPENDS - .range_reduction_double + libc.src.__support.math.range_reduction_double libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -588,8 +499,8 @@ add_entrypoint_object( HDRS ../tanf.h DEPENDS - .range_reduction - .sincosf_utils + libc.src.__support.math.range_reduction + libc.src.__support.math.sincosf_utils libc.src.errno.errno libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fenv_impl @@ -608,7 +519,6 @@ add_entrypoint_object( HDRS ../tanf16.h DEPENDS - .sincosf16_utils libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -618,6 +528,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization libc.src.__support.macros.properties.types + libc.src.__support.math.sincosf16_utils ) add_entrypoint_object( @@ -627,7 +538,7 @@ add_entrypoint_object( HDRS ../tanpif.h DEPENDS - .sincosf_utils + libc.src.__support.math.sincosf_utils libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -642,7 +553,6 @@ add_entrypoint_object( HDRS ../tanpif16.h DEPENDS - .sincosf16_utils libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -651,6 +561,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization + libc.src.__support.math.sincosf16_utils ) add_entrypoint_object( @@ -1433,7 +1344,6 @@ add_entrypoint_object( ../exp2.h DEPENDS .common_constants - .explogxf libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -1446,6 +1356,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.triple_double libc.src.__support.integer_literals libc.src.__support.macros.optimization + libc.src.__support.math.exp_utils libc.src.errno.errno ) @@ -1454,7 +1365,6 @@ add_header_library( HDRS exp2f_impl.h DEPENDS - .explogxf libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -1463,6 +1373,7 @@ add_header_library( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.exp10f_utils libc.src.__support.common libc.src.errno.errno ) @@ -1484,7 +1395,6 @@ add_entrypoint_object( HDRS ../exp2f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -1493,6 +1403,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -1502,7 +1413,6 @@ add_entrypoint_object( HDRS ../exp2m1f.h DEPENDS - .explogxf libc.src.errno.errno libc.src.__support.common libc.src.__support.FPUtil.except_value_utils @@ -1513,6 +1423,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.exp10f_utils ) add_entrypoint_object( @@ -1522,7 +1433,6 @@ add_entrypoint_object( HDRS ../exp2m1f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.common @@ -1535,6 +1445,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -1577,7 +1488,6 @@ add_entrypoint_object( HDRS ../exp10m1f.h DEPENDS - .explogxf libc.src.errno.errno libc.src.__support.common libc.src.__support.FPUtil.except_value_utils @@ -1587,6 +1497,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.exp10f_utils ) add_entrypoint_object( @@ -1618,14 +1529,11 @@ add_entrypoint_object( ../expm1.h DEPENDS .common_constants - .explogxf libc.src.__support.CPP.bit - libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double @@ -1660,7 +1568,6 @@ add_entrypoint_object( HDRS ../expm1f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -1671,6 +1578,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -1682,7 +1590,6 @@ add_entrypoint_object( DEPENDS .common_constants .exp2f_impl - .explogxf libc.src.__support.math.exp10f libc.src.__support.CPP.bit libc.src.__support.FPUtil.fenv_impl @@ -2042,7 +1949,6 @@ add_entrypoint_object( HDRS ../log10f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -2053,6 +1959,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -2131,7 +2038,6 @@ add_entrypoint_object( HDRS ../log2f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -2142,6 +2048,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -2186,7 +2093,6 @@ add_entrypoint_object( HDRS ../logf16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -2197,6 +2103,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -2361,6 +2268,21 @@ add_entrypoint_object( MISC_MATH_BASIC_OPS_OPT ) +add_entrypoint_object( + fminbf16 + SRCS + fminbf16.cpp + HDRS + ../fminbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) add_entrypoint_object( fmax @@ -2421,6 +2343,22 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaxbf16 + SRCS + fmaxbf16.cpp + HDRS + ../fmaxbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmaximum SRCS fmaximum.cpp @@ -2479,6 +2417,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaximumbf16 + SRCS + fmaximumbf16.cpp + HDRS + ../fmaximumbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmaximum_num SRCS fmaximum_num.cpp @@ -2537,6 +2490,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaximum_numbf16 + SRCS + fmaximum_numbf16.cpp + HDRS + ../fmaximum_numbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmaximum_mag SRCS fmaximum_mag.cpp @@ -2595,6 +2563,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaximum_magbf16 + SRCS + fmaximum_magbf16.cpp + HDRS + ../fmaximum_magbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmaximum_mag_num SRCS fmaximum_mag_num.cpp @@ -2653,6 +2636,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaximum_mag_numbf16 + SRCS + fmaximum_mag_numbf16.cpp + HDRS + ../fmaximum_mag_numbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fminimum SRCS fminimum.cpp @@ -2711,6 +2709,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fminimumbf16 + SRCS + fminimumbf16.cpp + HDRS + ../fminimumbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fminimum_num SRCS fminimum_num.cpp @@ -2769,6 +2782,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fminimum_numbf16 + SRCS + fminimum_numbf16.cpp + HDRS + ../fminimum_numbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fminimum_mag SRCS fminimum_mag.cpp @@ -2827,6 +2855,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fminimum_magbf16 + SRCS + fminimum_magbf16.cpp + HDRS + ../fminimum_magbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fminimum_mag_num SRCS fminimum_mag_num.cpp @@ -2885,6 +2928,21 @@ add_entrypoint_object( ) add_entrypoint_object( + fminimum_mag_numbf16 + SRCS + fminimum_mag_numbf16.cpp + HDRS + ../fminimum_mag_numbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmul SRCS fmul.cpp @@ -3688,6 +3746,20 @@ add_entrypoint_object( ) add_entrypoint_object( + fromfpbf16 + SRCS + fromfpbf16.cpp + HDRS + ../fromfpbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( fromfpx SRCS fromfpx.cpp @@ -3740,6 +3812,20 @@ add_entrypoint_object( ) add_entrypoint_object( + fromfpxbf16 + SRCS + fromfpxbf16.cpp + HDRS + ../fromfpxbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( ufromfp SRCS ufromfp.cpp @@ -3792,6 +3878,20 @@ add_entrypoint_object( ) add_entrypoint_object( + ufromfpbf16 + SRCS + ufromfpbf16.cpp + HDRS + ../ufromfpbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( ufromfpx SRCS ufromfpx.cpp @@ -3843,17 +3943,18 @@ add_entrypoint_object( libc.src.__support.FPUtil.nearest_integer_operations ) -#TODO: Add errno include to the hyperbolic functions. -add_header_library( - explogxf +add_entrypoint_object( + ufromfpxbf16 + SRCS + ufromfpxbf16.cpp HDRS - explogxf.h + ../ufromfpxbf16.h DEPENDS - .common_constants - libc.src.__support.math.exp_utils - libc.src.__support.math.acoshf_utils - libc.src.__support.macros.properties.cpu_features - libc.src.errno.errno + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.macros.config + libc.src.__support.macros.properties.types ) add_entrypoint_object( @@ -3863,11 +3964,7 @@ add_entrypoint_object( HDRS ../coshf.h DEPENDS - .explogxf - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization + libc.src.__support.math.coshf ) add_entrypoint_object( @@ -3877,14 +3974,7 @@ add_entrypoint_object( HDRS ../coshf16.h DEPENDS - .expxf16 - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization + libc.src.__support.math.coshf16 ) add_entrypoint_object( @@ -3894,10 +3984,10 @@ add_entrypoint_object( HDRS ../sinhf.h DEPENDS - .explogxf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.sinhfcoshf_utils ) add_entrypoint_object( @@ -3907,14 +3997,14 @@ add_entrypoint_object( HDRS ../sinhf16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -3924,12 +4014,12 @@ add_entrypoint_object( HDRS ../tanhf.h DEPENDS - .explogxf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization + libc.src.__support.math.exp10f_utils ) add_entrypoint_object( @@ -3939,7 +4029,6 @@ add_entrypoint_object( HDRS ../tanhf16.h DEPENDS - .expxf16 libc.hdr.fenv_macros libc.src.__support.CPP.array libc.src.__support.FPUtil.cast @@ -3951,6 +4040,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.expxf16_utils ) add_entrypoint_object( @@ -3960,7 +4050,6 @@ add_entrypoint_object( HDRS ../acoshf.h DEPENDS - .explogxf libc.src.__support.math.acoshf ) @@ -3996,6 +4085,25 @@ add_entrypoint_object( ) add_entrypoint_object( + asinpif16 + SRCS + asinpif16.cpp + HDRS + ../asinpif16.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization +) + +add_entrypoint_object( atanhf SRCS atanhf.cpp @@ -4819,11 +4927,7 @@ add_entrypoint_object( HDRS ../cbrtf.h DEPENDS - libc.hdr.fenv_macros - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.macros.optimization + libc.src.__support.math.cbrtf ) add_entrypoint_object( @@ -4901,17 +5005,282 @@ add_entrypoint_object( libc.src.__support.FPUtil.generic.mul ) -add_header_library( - expxf16 +add_entrypoint_object( + bf16add + SRCS + bf16add.cpp HDRS - expxf16.h + ../bf16add.h DEPENDS - libc.hdr.stdint_proxy - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.macros.attributes - libc.src.__support.math.expf16_utils - libc.src.__support.math.exp10_float16_constants + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addf + SRCS + bf16addf.cpp + HDRS + ../bf16addf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addl + SRCS + bf16addl.cpp + HDRS + ../bf16addl.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addf128 + SRCS + bf16addf128.cpp + HDRS + ../bf16addf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16div + SRCS + bf16div.cpp + HDRS + ../bf16div.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.div + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16divf + SRCS + bf16divf.cpp + HDRS + ../bf16divf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.div + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16divl + SRCS + bf16divl.cpp + HDRS + ../bf16divl.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.div + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16divf128 + SRCS + bf16divf128.cpp + HDRS + ../bf16divf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.div + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16fma + SRCS + bf16fma.cpp + HDRS + ../bf16fma.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.fma + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16fmaf + SRCS + bf16fmaf.cpp + HDRS + ../bf16fmaf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.fma + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16fmal + SRCS + bf16fmal.cpp + HDRS + ../bf16fmal.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.fma + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16fmaf128 + SRCS + bf16fmaf128.cpp + HDRS + ../bf16fmaf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.fma + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16mul + SRCS + bf16mul.cpp + HDRS + ../bf16mul.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.mul + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16mulf + SRCS + bf16mulf.cpp + HDRS + ../bf16mulf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.mul + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16mull + SRCS + bf16mull.cpp + HDRS + ../bf16mull.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.mul + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16mulf128 + SRCS + bf16mulf128.cpp + HDRS + ../bf16mulf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.mul + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16sub + SRCS + bf16sub.cpp + HDRS + ../bf16sub.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subf + SRCS + bf16subf.cpp + HDRS + ../bf16subf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subl + SRCS + bf16subl.cpp + HDRS + ../bf16subl.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subf128 + SRCS + bf16subf128.cpp + HDRS + ../bf16subf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types ) diff --git a/libc/src/math/generic/acoshf.cpp b/libc/src/math/generic/acoshf.cpp index 5c04583..c964632 100644 --- a/libc/src/math/generic/acoshf.cpp +++ b/libc/src/math/generic/acoshf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/acoshf.h" - #include "src/__support/math/acoshf.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/asinpif16.cpp b/libc/src/math/generic/asinpif16.cpp new file mode 100644 index 0000000..aabc086 --- /dev/null +++ b/libc/src/math/generic/asinpif16.cpp @@ -0,0 +1,127 @@ +//===-- Half-precision asinpif16(x) function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception. +// +//===----------------------------------------------------------------------===// + +#include "src/math/asinpif16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float16, asinpif16, (float16 x)) { + using FPBits = fputil::FPBits<float16>; + + FPBits xbits(x); + bool is_neg = xbits.is_neg(); + double x_abs = fputil::cast<double>(xbits.abs().get_val()); + + auto signed_result = [is_neg](auto r) -> auto { return is_neg ? -r : r; }; + + if (LIBC_UNLIKELY(x_abs > 1.0)) { + // aspinf16(NaN) = NaN + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + + // 1 < |x| <= +/-inf + fputil::raise_except_if_required(FE_INVALID); + fputil::set_errno_if_required(EDOM); + + return FPBits::quiet_nan().get_val(); + } + + // the coefficients for the polynomial approximation of asin(x)/pi in the + // range [0, 0.5] extracted using python-sympy + // + // Python code to generate the coefficients: + // > from sympy import * + // > import math + // > x = symbols('x') + // > print(series(asin(x)/math.pi, x, 0, 21)) + // + // OUTPUT: + // + // 0.318309886183791*x + 0.0530516476972984*x**3 + 0.0238732414637843*x**5 + + // 0.0142102627760621*x**7 + 0.00967087327815336*x**9 + + // 0.00712127941391293*x**11 + 0.00552355646848375*x**13 + + // 0.00444514782463692*x**15 + 0.00367705242846804*x**17 + + // 0.00310721681820837*x**19 + O(x**21) + // + // it's very accurate in the range [0, 0.5] and has a maximum error of + // 0.0000000000000001 in the range [0, 0.5]. + constexpr double POLY_COEFFS[] = { + 0x1.45f306dc9c889p-2, // x^1 + 0x1.b2995e7b7b5fdp-5, // x^3 + 0x1.8723a1d588a36p-6, // x^5 + 0x1.d1a452f20430dp-7, // x^7 + 0x1.3ce52a3a09f61p-7, // x^9 + 0x1.d2b33e303d375p-8, // x^11 + 0x1.69fde663c674fp-8, // x^13 + 0x1.235134885f19bp-8, // x^15 + }; + // polynomial evaluation using horner's method + // work only for |x| in [0, 0.5] + auto asinpi_polyeval = [](double x) -> double { + return x * fputil::polyeval(x * x, POLY_COEFFS[0], POLY_COEFFS[1], + POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4], + POLY_COEFFS[5], POLY_COEFFS[6], POLY_COEFFS[7]); + }; + + // if |x| <= 0.5: + if (LIBC_UNLIKELY(x_abs <= 0.5)) { + // Use polynomial approximation of asin(x)/pi in the range [0, 0.5] + double result = asinpi_polyeval(fputil::cast<double>(x)); + return fputil::cast<float16>(result); + } + + // If |x| > 0.5, we need to use the range reduction method: + // y = asin(x) => x = sin(y) + // because: sin(a) = cos(pi/2 - a) + // therefore: + // x = cos(pi/2 - y) + // let z = pi/2 - y, + // x = cos(z) + // because: cos(2a) = 1 - 2 * sin^2(a), z = 2a, a = z/2 + // therefore: + // cos(z) = 1 - 2 * sin^2(z/2) + // sin(z/2) = sqrt((1 - cos(z))/2) + // sin(z/2) = sqrt((1 - x)/2) + // let u = (1 - x)/2 + // then: + // sin(z/2) = sqrt(u) + // z/2 = asin(sqrt(u)) + // z = 2 * asin(sqrt(u)) + // pi/2 - y = 2 * asin(sqrt(u)) + // y = pi/2 - 2 * asin(sqrt(u)) + // y/pi = 1/2 - 2 * asin(sqrt(u))/pi + // + // Finally, we can write: + // asinpi(x) = 1/2 - 2 * asinpi(sqrt(u)) + // where u = (1 - x) /2 + // = 0.5 - 0.5 * x + // = multiply_add(-0.5, x, 0.5) + + double u = fputil::multiply_add(-0.5, x_abs, 0.5); + double asinpi_sqrt_u = asinpi_polyeval(fputil::sqrt<double>(u)); + double result = fputil::multiply_add(-2.0, asinpi_sqrt_u, 0.5); + + return fputil::cast<float16>(signed_result(result)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16add.cpp b/libc/src/math/generic/bf16add.cpp new file mode 100644 index 0000000..257596a --- /dev/null +++ b/libc/src/math/generic/bf16add.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16add function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16add.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16add, (double x, double y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addf.cpp b/libc/src/math/generic/bf16addf.cpp new file mode 100644 index 0000000..65e6cbf --- /dev/null +++ b/libc/src/math/generic/bf16addf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addf, (float x, float y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addf128.cpp b/libc/src/math/generic/bf16addf128.cpp new file mode 100644 index 0000000..03f70af --- /dev/null +++ b/libc/src/math/generic/bf16addf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addf128, (float128 x, float128 y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addl.cpp b/libc/src/math/generic/bf16addl.cpp new file mode 100644 index 0000000..c212195 --- /dev/null +++ b/libc/src/math/generic/bf16addl.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addl function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addl.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addl, (long double x, long double y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16div.cpp b/libc/src/math/generic/bf16div.cpp new file mode 100644 index 0000000..5e9b1b4 --- /dev/null +++ b/libc/src/math/generic/bf16div.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16div function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16div.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/div.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16div, (double x, double y)) { + return fputil::generic::div<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16divf.cpp b/libc/src/math/generic/bf16divf.cpp new file mode 100644 index 0000000..2054a64 --- /dev/null +++ b/libc/src/math/generic/bf16divf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16divf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16divf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/div.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16divf, (float x, float y)) { + return fputil::generic::div<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16divf128.cpp b/libc/src/math/generic/bf16divf128.cpp new file mode 100644 index 0000000..fbe9775 --- /dev/null +++ b/libc/src/math/generic/bf16divf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16divf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16divf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/div.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16divf128, (float128 x, float128 y)) { + return fputil::generic::div<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16divl.cpp b/libc/src/math/generic/bf16divl.cpp new file mode 100644 index 0000000..21dd6b1 --- /dev/null +++ b/libc/src/math/generic/bf16divl.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16divl function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16divl.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/div.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16divl, (long double x, long double y)) { + return fputil::generic::div<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16fma.cpp b/libc/src/math/generic/bf16fma.cpp new file mode 100644 index 0000000..0f0fe86 --- /dev/null +++ b/libc/src/math/generic/bf16fma.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16fma function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16fma.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16fma, (double x, double y, double z)) { + return fputil::fma<bfloat16>(x, y, z); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16fmaf.cpp b/libc/src/math/generic/bf16fmaf.cpp new file mode 100644 index 0000000..739691c --- /dev/null +++ b/libc/src/math/generic/bf16fmaf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16fmaf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16fmaf.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16fmaf, (float x, float y, float z)) { + return fputil::fma<bfloat16>(x, y, z); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16fmaf128.cpp b/libc/src/math/generic/bf16fmaf128.cpp new file mode 100644 index 0000000..a29a0b0 --- /dev/null +++ b/libc/src/math/generic/bf16fmaf128.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of bf16fmaf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16fmaf128.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16fmaf128, + (float128 x, float128 y, float128 z)) { + return fputil::fma<bfloat16>(x, y, z); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16fmal.cpp b/libc/src/math/generic/bf16fmal.cpp new file mode 100644 index 0000000..f31ec69 --- /dev/null +++ b/libc/src/math/generic/bf16fmal.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of bf16fmal function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16fmal.h" + +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16fmal, + (long double x, long double y, long double z)) { + return fputil::fma<bfloat16>(x, y, z); +} +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16mul.cpp b/libc/src/math/generic/bf16mul.cpp new file mode 100644 index 0000000..c50eec2 --- /dev/null +++ b/libc/src/math/generic/bf16mul.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16mul function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16mul.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/mul.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16mul, (double x, double y)) { + return fputil::generic::mul<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16mulf.cpp b/libc/src/math/generic/bf16mulf.cpp new file mode 100644 index 0000000..117fcd1 --- /dev/null +++ b/libc/src/math/generic/bf16mulf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16mulf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16mulf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/mul.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16mulf, (float x, float y)) { + return fputil::generic::mul<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16mulf128.cpp b/libc/src/math/generic/bf16mulf128.cpp new file mode 100644 index 0000000..ff2a081 --- /dev/null +++ b/libc/src/math/generic/bf16mulf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16mulf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16mulf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/mul.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16mulf128, (float128 x, float128 y)) { + return fputil::generic::mul<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16mull.cpp b/libc/src/math/generic/bf16mull.cpp new file mode 100644 index 0000000..e7c4fc0 --- /dev/null +++ b/libc/src/math/generic/bf16mull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16mull function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16mull.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/mul.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16mull, (long double x, long double y)) { + return fputil::generic::mul<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16sub.cpp b/libc/src/math/generic/bf16sub.cpp new file mode 100644 index 0000000..65eb209 --- /dev/null +++ b/libc/src/math/generic/bf16sub.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16sub function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16sub.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16sub, (double x, double y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subf.cpp b/libc/src/math/generic/bf16subf.cpp new file mode 100644 index 0000000..6bba4be --- /dev/null +++ b/libc/src/math/generic/bf16subf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subf, (float x, float y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subf128.cpp b/libc/src/math/generic/bf16subf128.cpp new file mode 100644 index 0000000..e5fe107 --- /dev/null +++ b/libc/src/math/generic/bf16subf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subf128, (float128 x, float128 y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subl.cpp b/libc/src/math/generic/bf16subl.cpp new file mode 100644 index 0000000..d3a970c --- /dev/null +++ b/libc/src/math/generic/bf16subl.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subl function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subl.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subl, (long double x, long double y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp index 71b23c4..0bd8f71 100644 --- a/libc/src/math/generic/cbrtf.cpp +++ b/libc/src/math/generic/cbrtf.cpp @@ -7,153 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cbrtf.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/cbrtf.h" namespace LIBC_NAMESPACE_DECL { -namespace { - -// Look up table for 2^(i/3) for i = 0, 1, 2. -constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}; - -// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1 -// generated by Sollya with: -// > for i from 0 to 15 do { -// P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]); -// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",", -// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", -// coeff(P, 6), "},"); -// }; -// Then (1 + x)^(1/3) ~ 1 + x * P(x). -constexpr double COEFFS[16][7] = { - {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5, - -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6, - 0x1.047f75e0aff14p-6}, - {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5, - -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6, - 0x1.716eca1d6e3bcp-7}, - {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5, - -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6, - 0x1.0aeca34893785p-7}, - {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5, - -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6, - 0x1.88410674c6a5dp-8}, - {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5, - -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7, - 0x1.249bb51a1c498p-8}, - {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5, - -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7, - 0x1.ba9230918fa2ep-9}, - {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5, - -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7, - 0x1.52e3e17c71069p-9}, - {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5, - -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7, - 0x1.0677d9af6aad4p-9}, - {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5, - -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7, - 0x1.9ad325dc7adcbp-10}, - {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5, - -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8, - 0x1.449f5ee175c69p-10}, - {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5, - -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8, - 0x1.02cb36389bd79p-10}, - {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5, - -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8, - 0x1.a008ae91f5936p-11}, - {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5, - -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8, - 0x1.50f8174cdb6e9p-11}, - {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5, - -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9, - 0x1.12e0e94e8586dp-11}, - {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5, - -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9, - 0x1.c3726535f1fc6p-12}, - {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5, - -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9, - 0x1.75077abf18d84p-12}, -}; - -} // anonymous namespace - -LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { - using FloatBits = typename fputil::FPBits<float>; - using DoubleBits = typename fputil::FPBits<double>; - - FloatBits x_bits(x); - - uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff; - uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN; - - if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) { - // x is 0, Inf, or NaN. - // Make sure it works for FTZ/DAZ modes. - return x + x; - } - - double xd = static_cast<double>(x); - DoubleBits xd_bits(xd); - - // When using biased exponent of x in double precision, - // x_e = real_exponent_of_x + 1023 - // Then: - // x_e / 3 = real_exponent_of_x / 3 + 1023/3 - // = real_exponent_of_x / 3 + 341 - // So to make it the correct biased exponent of x^(1/3), we add - // 1023 - 341 = 682 - // to the quotient x_e / 3. - unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent()); - unsigned out_e = (x_e / 3 + 682) | sign_bit; - unsigned shift_e = x_e % 3; - - // Set x_m = 2^(x_e % 3) * (1.mantissa) - uint64_t x_m = xd_bits.get_mantissa(); - // Use the leading 4 bits for look up table - unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4)); - - x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS) - << DoubleBits::FRACTION_LEN; - - double x_reduced = DoubleBits(x_m).get_val(); - double dx = x_reduced - 1.0; - - double dx_sq = dx * dx; - double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0); - double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]); - double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]); - double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]); - - double dx_4 = dx_sq * dx_sq; - double p0 = fputil::multiply_add(dx_sq, c1, c0); - double p1 = fputil::multiply_add(dx_sq, c3, c2); - - double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e]; - - uint64_t r_m = DoubleBits(r).get_mantissa(); - // Check if the output is exact. To be exact, the smallest 1-bit of the - // output has to be at least 2^-7 or higher. So we check the lowest 44 bits - // to see if they are within 2^(-52 + 3) errors from all zeros, then the - // result cube root is exact. - if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) { - if ((r_m & 0xfffffffffff) <= 8) - r_m &= 0xffff'ffff'ffff'ffe0; - else - r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20; - fputil::clear_except_if_required(FE_INEXACT); - } - // Adjust exponent and sign. - uint64_t r_bits = - r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN); - - return static_cast<float>(DoubleBits(r_bits).get_val()); -} +LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { return math::cbrtf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cos.cpp b/libc/src/math/generic/cos.cpp index 5da0f868..aabf3bc 100644 --- a/libc/src/math/generic/cos.cpp +++ b/libc/src/math/generic/cos.cpp @@ -7,161 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cos.h" -#include "hdr/errno_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" -#else -#include "range_reduction_double_nofma.h" -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE +#include "src/__support/math/cos.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = typename fputil::DyadicFloat<128>; - -LLVM_LIBC_FUNCTION(double, cos, (double x)) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - uint16_t x_e = xbits.get_biased_exponent(); - - DoubleDouble y; - unsigned k; - LargeRangeReduction range_reduction_large{}; - - // |x| < 2^16. - if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { - // |x| < 2^-7 - if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) { - // |x| < 2^-27 - if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) { - // Signed zeros. - if (LIBC_UNLIKELY(x == 0.0)) - return 1.0; - - // For |x| < 2^-27, |cos(x) - 1| < |x|^2/2 < 2^-54 = ulp(1 - 2^-53)/2. - return fputil::round_result_slightly_down(1.0); - } - // No range reduction needed. - k = 0; - y.lo = 0.0; - y.hi = x; - } else { - // Small range reduction. - k = range_reduction_small(x, y); - } - } else { - // Inf or NaN - if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - // cos(+-Inf) = NaN - if (xbits.get_mantissa() == 0) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - return x + FPBits::quiet_nan().get_val(); - } - - // Large range reduction. - k = range_reduction_large.fast(x, y); - } - - DoubleDouble sin_y, cos_y; - - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); - - // Look up sin(k * pi/128) and cos(k * pi/128) -#ifdef LIBC_MATH_HAS_SMALL_TABLES - // Memory saving versions. Use 65-entry table. - auto get_idx_dd = [](unsigned kk) -> DoubleDouble { - unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); - DoubleDouble ans = SIN_K_PI_OVER_128[idx]; - if (kk & 128) { - ans.hi = -ans.hi; - ans.lo = -ans.lo; - } - return ans; - }; - DoubleDouble msin_k = get_idx_dd(k + 128); - DoubleDouble cos_k = get_idx_dd(k + 64); -#else - // Fast look up version, but needs 256-entry table. - // -sin(k * pi/128) = sin((k + 128) * pi/128) - // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). - DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255]; - DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255]; -#endif // LIBC_MATH_HAS_SMALL_TABLES - - // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). - // So k is an integer and -pi / 256 <= y <= pi / 256. - // Then cos(x) = cos((k * pi/128 + y) - // = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128) - DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k); - DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k); - - DoubleDouble rr = fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi); - rr.lo += msin_k_sin_y.lo + cos_k_cos_y.lo; - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return rr.hi + rr.lo; -#else - - double rlp = rr.lo + err; - double rlm = rr.lo - err; - - double r_upper = rr.hi + rlp; // (rr.lo + ERR); - double r_lower = rr.hi + rlm; // (rr.lo - ERR); - - // Ziv's rounding test. - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - Float128 u_f128, sin_u, cos_u; - if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) - u_f128 = range_reduction_small_f128(x); - else - u_f128 = range_reduction_large.accurate(); - - generic::sincos_eval(u_f128, sin_u, cos_u); - - auto get_sin_k = [](unsigned kk) -> Float128 { - unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); - Float128 ans = SIN_K_PI_OVER_128_F128[idx]; - if (kk & 128) - ans.sign = Sign::NEG; - return ans; - }; - - // -sin(k * pi/128) = sin((k + 128) * pi/128) - // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). - Float128 msin_k_f128 = get_sin_k(k + 128); - Float128 cos_k_f128 = get_sin_k(k + 64); - - // cos(x) = cos((k * pi/128 + u) - // = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128) - Float128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u), - fputil::quick_mul(msin_k_f128, sin_u)); - - // TODO: Add assertion if Ziv's accuracy tests fail in debug mode. - // https://github.com/llvm/llvm-project/issues/96452. - - return static_cast<double>(r); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS -} +LLVM_LIBC_FUNCTION(double, cos, (double x)) { return math::cos(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp index 7cdae09..5c23d99 100644 --- a/libc/src/math/generic/cosf.cpp +++ b/libc/src/math/generic/cosf.cpp @@ -7,139 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cosf.h" -#include "sincosf_utils.h" -#include "src/__support/FPUtil/BasicOperations.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/cosf.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -// Exceptional cases for cosf. -static constexpr size_t N_EXCEPTS = 6; - -static constexpr fputil::ExceptValues<float, N_EXCEPTS> COSF_EXCEPTS{{ - // (inputs, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ) - {0x55325019, 0x3f4ea5d2, 1, 0, 0}, - // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ) - {0x5922aa80, 0x3f08aebe, 1, 0, 1}, - // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ) - {0x5aa4542c, 0x3efa40a4, 1, 0, 0}, - // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ) - {0x5f18b878, 0x3f7f14bb, 1, 0, 0}, - // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ) - {0x6115cb11, 0x3f78142e, 1, 0, 1}, - // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ) - {0x7beef5ef, 0x3f08a21c, 1, 0, 0}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float, cosf, (float x)) { - using FPBits = typename fputil::FPBits<float>; - - FPBits xbits(x); - xbits.set_sign(Sign::POS); - - uint32_t x_abs = xbits.uintval(); - double xd = static_cast<double>(xbits.get_val()); - - // Range reduction: - // For |x| > pi/16, we perform range reduction as follows: - // Find k and y such that: - // x = (k + y) * pi/32 - // k is an integer - // |y| < 0.5 - // For small range (|x| < 2^45 when FMA instructions are available, 2^22 - // otherwise), this is done by performing: - // k = round(x * 32/pi) - // y = x * 32/pi - k - // For large range, we will omit all the higher parts of 16/pi such that the - // least significant bits of their full products with x are larger than 63, - // since cos((k + y + 64*i) * pi/32) = cos(x + i * 2pi) = cos(x). - // - // When FMA instructions are not available, we store the digits of 32/pi in - // chunks of 28-bit precision. This will make sure that the products: - // x * THIRTYTWO_OVER_PI_28[i] are all exact. - // When FMA instructions are available, we simply store the digits of 32/pi in - // chunks of doubles (53-bit of precision). - // So when multiplying by the largest values of single precision, the - // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the - // worst-case analysis of range reduction, |y| >= 2^-38, so this should give - // us more than 40 bits of accuracy. For the worst-case estimation of range - // reduction, see for instances: - // Elementary Functions by J-M. Muller, Chapter 11, - // Handbook of Floating-Point Arithmetic by J-M. Muller et. al., - // Chapter 10.2. - // - // Once k and y are computed, we then deduce the answer by the cosine of sum - // formula: - // cos(x) = cos((k + y)*pi/32) - // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32) - // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed - // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are - // computed using degree-7 and degree-6 minimax polynomials generated by - // Sollya respectively. - - // |x| < 0x1.0p-12f - if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) { - // When |x| < 2^-12, the relative error of the approximation cos(x) ~ 1 - // is: - // |cos(x) - 1| < |x^2 / 2| = 2^-25 < epsilon(1)/2. - // So the correctly rounded values of cos(x) are: - // = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD, - // = 1 otherwise. - // To simplify the rounding decision and make it more efficient and to - // prevent compiler to perform constant folding, we use - // fma(x, -2^-25, 1) instead. - // Note: to use the formula 1 - 2^-25*x to decide the correct rounding, we - // do need fma(x, -2^-25, 1) to prevent underflow caused by -2^-25*x when - // |x| < 2^-125. For targets without FMA instructions, we simply use - // double for intermediate results as it is more efficient than using an - // emulated version of FMA. -#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) - return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f); -#else - return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0)); -#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = COSF_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // x is inf or nan. - if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - if (x_abs == 0x7f80'0000U) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - return x + FPBits::quiet_nan().get_val(); - } - - // Combine the results with the sine of sum formula: - // cos(x) = cos((k + y)*pi/32) - // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32) - // = cosm1_y * cos_k + sin_y * sin_k - // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k - double sin_k, cos_k, sin_y, cosm1_y; - - sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - - return static_cast<float>(fputil::multiply_add( - sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k))); -} +LLVM_LIBC_FUNCTION(float, cosf, (float x)) { return math::cosf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cosf16.cpp b/libc/src/math/generic/cosf16.cpp index 99bb03e..031c3e1 100644 --- a/libc/src/math/generic/cosf16.cpp +++ b/libc/src/math/generic/cosf16.cpp @@ -7,87 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cosf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/cosf16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -constexpr size_t N_EXCEPTS = 4; - -constexpr fputil::ExceptValues<float16, N_EXCEPTS> COSF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - {0x2b7c, 0x3bfc, 1, 0, 1}, - {0x4ac1, 0x38b5, 1, 0, 0}, - {0x5c49, 0xb8c6, 0, 1, 0}, - {0x7acc, 0xa474, 0, 1, 0}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, cosf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - float xf = x; - - // Range reduction: - // For |x| > pi/32, we perform range reduction as follows: - // Find k and y such that: - // x = (k + y) * pi/32 - // k is an integer, |y| < 0.5 - // - // This is done by performing: - // k = round(x * 32/pi) - // y = x * 32/pi - k - // - // Once k and y are computed, we then deduce the answer by the cosine of sum - // formula: - // cos(x) = cos((k + y) * pi/32) - // = cos(k * pi/32) * cos(y * pi/32) - - // sin(k * pi/32) * sin(y * pi/32) - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Handle exceptional values - if (auto r = COSF16_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // cos(+/-0) = 1 - if (LIBC_UNLIKELY(x_abs == 0U)) - return fputil::cast<float16>(1.0f); - - // cos(+/-inf) = NaN, and cos(NaN) = NaN - if (xbits.is_inf_or_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - if (xbits.is_inf()) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - - return x + FPBits::quiet_nan().get_val(); - } - - float sin_k, cos_k, sin_y, cosm1_y; - sincosf16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); - // Since, cosm1_y = cos_y - 1, therefore: - // cos(x) = cos_k * cos_y - sin_k * sin_y - // = cos_k * (cos_y - 1 + 1) - sin_k * sin_y - // = cos_k * cosm1_y - sin_k * sin_y + cos_k - return fputil::cast<float16>(fputil::multiply_add( - cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k))); -} +LLVM_LIBC_FUNCTION(float16, cosf16, (float16 x)) { return math::cosf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp index 9f87564..368c0fd 100644 --- a/libc/src/math/generic/coshf.cpp +++ b/libc/src/math/generic/coshf.cpp @@ -7,50 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/coshf.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/explogxf.h" +#include "src/__support/math/coshf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, coshf, (float x)) { - using FPBits = typename fputil::FPBits<float>; - - FPBits xbits(x); - xbits.set_sign(Sign::POS); - x = xbits.get_val(); - - uint32_t x_u = xbits.uintval(); - - // When |x| >= 90, or x is inf or nan - if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U || x_u <= 0x3280'0000U)) { - // |x| <= 2^-26 - if (x_u <= 0x3280'0000U) { - return 1.0f + x; - } - - if (xbits.is_inf_or_nan()) - return x + FPBits::inf().get_val(); - - int rounding = fputil::quick_get_round(); - if (LIBC_UNLIKELY(rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)) - return FPBits::max_normal().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - - return x + FPBits::inf().get_val(); - } - - // TODO: We should be able to reduce the latency and reciprocal throughput - // further by using a low degree (maybe 3-7 ?) minimax polynomial for small - // but not too small inputs, such as |x| < 2^-2, or |x| < 2^-3. - - // cosh(x) = (e^x + e^(-x)) / 2. - return static_cast<float>(exp_pm_eval</*is_sinh*/ false>(x)); -} +LLVM_LIBC_FUNCTION(float, coshf, (float x)) { return math::coshf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/coshf16.cpp b/libc/src/math/generic/coshf16.cpp index 689d16a..d86edd9 100644 --- a/libc/src/math/generic/coshf16.cpp +++ b/libc/src/math/generic/coshf16.cpp @@ -7,105 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/coshf16.h" -#include "expxf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/coshf16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr fputil::ExceptValues<float16, 9> COSHF16_EXCEPTS_POS = {{ - // x = 0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ) - {0x29a8U, 0x3c00U, 1U, 0U, 1U}, - // x = 0x1.8c4p+0, coshf16(x) = 0x1.3a8p+1 (RZ) - {0x3e31U, 0x40eaU, 1U, 0U, 0U}, - // x = 0x1.994p+0, coshf16(x) = 0x1.498p+1 (RZ) - {0x3e65U, 0x4126U, 1U, 0U, 0U}, - // x = 0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ) - {0x3ed8U, 0x41b6U, 1U, 0U, 1U}, - // x = 0x1.aap+1, coshf16(x) = 0x1.be8p+3 (RZ) - {0x42a8U, 0x4afaU, 1U, 0U, 1U}, - // x = 0x1.cc4p+1, coshf16(x) = 0x1.23cp+4 (RZ) - {0x4331U, 0x4c8fU, 1U, 0U, 0U}, - // x = 0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ) - {0x44a2U, 0x526dU, 1U, 0U, 0U}, - // x = 0x1.958p+2, coshf16(x) = 0x1.1a4p+8 (RZ) - {0x4656U, 0x5c69U, 1U, 0U, 0U}, - // x = 0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ) - {0x497cU, 0x7715U, 1U, 0U, 1U}, -}}; - -static constexpr fputil::ExceptValues<float16, 6> COSHF16_EXCEPTS_NEG = {{ - // x = -0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ) - {0xa9a8U, 0x3c00U, 1U, 0U, 1U}, - // x = -0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ) - {0xbed8U, 0x41b6U, 1U, 0U, 1U}, - // x = -0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ) - {0xc4a2U, 0x526dU, 1U, 0U, 0U}, - // x = -0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ) - {0xc97cU, 0x7715U, 1U, 0U, 1U}, - // x = -0x1.8c4p+0, coshf16(x) = 0x1.3a8p+1 (RZ) - {0xbe31U, 0x40eaU, 1U, 0U, 0U}, - // x = -0x1.994p+0, coshf16(x) = 0x1.498p+1 (RZ) - {0xbe65U, 0x4126U, 1U, 0U, 0U}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, coshf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| >= acosh(2^16), or x is NaN. - if (LIBC_UNLIKELY(x_abs >= 0x49e5U)) { - // cosh(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When |x| >= acosh(2^16). - if (x_abs >= 0x49e5U) { - // cosh(+/-inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (x_bits.is_pos()) { - if (auto r = COSHF16_EXCEPTS_POS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); - } else { - if (auto r = COSHF16_EXCEPTS_NEG.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); - } -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - return eval_sinh_or_cosh</*IsSinh=*/false>(x); -} +LLVM_LIBC_FUNCTION(float16, coshf16, (float16 x)) { return math::coshf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cospif.cpp b/libc/src/math/generic/cospif.cpp index 5b6880f..6b524a2 100644 --- a/libc/src/math/generic/cospif.cpp +++ b/libc/src/math/generic/cospif.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/cospif.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/multiply_add.h" @@ -15,6 +14,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/sincosf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/cospif16.cpp b/libc/src/math/generic/cospif16.cpp index 9dc2592..c99285b 100644 --- a/libc/src/math/generic/cospif16.cpp +++ b/libc/src/math/generic/cospif16.cpp @@ -9,16 +9,17 @@ #include "src/math/cospif16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/sincosf16_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, cospif16, (float16 x)) { + using namespace sincosf16_internal; using FPBits = typename fputil::FPBits<float16>; FPBits xbits(x); diff --git a/libc/src/math/generic/exp10m1f.cpp b/libc/src/math/generic/exp10m1f.cpp index 2772910..8589e3f 100644 --- a/libc/src/math/generic/exp10m1f.cpp +++ b/libc/src/math/generic/exp10m1f.cpp @@ -17,8 +17,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" - -#include "explogxf.h" +#include "src/__support/math/exp10f_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp index 726f88b..154154f 100644 --- a/libc/src/math/generic/exp2.cpp +++ b/libc/src/math/generic/exp2.cpp @@ -8,7 +8,6 @@ #include "src/math/exp2.h" #include "common_constants.h" // Lookup tables EXP2_MID1 and EXP_M2. -#include "explogxf.h" // ziv_test_denorm. #include "src/__support/CPP/bit.h" #include "src/__support/CPP/optional.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -24,6 +23,7 @@ #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/exp_utils.h" // ziv_test_denorm. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/exp2f16.cpp b/libc/src/math/generic/exp2f16.cpp index 5c039c5..5db0c3a 100644 --- a/libc/src/math/generic/exp2f16.cpp +++ b/libc/src/math/generic/exp2f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -18,6 +17,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -34,6 +34,7 @@ static constexpr fputil::ExceptValues<float16, 3> EXP2F16_EXCEPTS = {{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/exp2f_impl.h b/libc/src/math/generic/exp2f_impl.h index 5c6c2bd..b85bb15 100644 --- a/libc/src/math/generic/exp2f_impl.h +++ b/libc/src/math/generic/exp2f_impl.h @@ -20,8 +20,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" - -#include "explogxf.h" +#include "src/__support/math/exp10f_utils.h" namespace LIBC_NAMESPACE_DECL { namespace generic { diff --git a/libc/src/math/generic/exp2m1f.cpp b/libc/src/math/generic/exp2m1f.cpp index 127c6ea..16244ed 100644 --- a/libc/src/math/generic/exp2m1f.cpp +++ b/libc/src/math/generic/exp2m1f.cpp @@ -18,8 +18,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" - -#include "explogxf.h" +#include "src/__support/math/exp10f_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp index 61633cd..ce0cc60 100644 --- a/libc/src/math/generic/exp2m1f16.cpp +++ b/libc/src/math/generic/exp2m1f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2m1f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -21,6 +20,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -76,6 +76,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI> #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h deleted file mode 100644 index 72f8da8..0000000 --- a/libc/src/math/generic/explogxf.h +++ /dev/null @@ -1,126 +0,0 @@ -//===-- Single-precision general exp/log functions ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H -#define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H - -#include "common_constants.h" - -#include "src/__support/common.h" -#include "src/__support/macros/properties/cpu_features.h" -#include "src/__support/math/acoshf_utils.h" -#include "src/__support/math/exp10f_utils.h" -#include "src/__support/math/exp_utils.h" - -namespace LIBC_NAMESPACE_DECL { - -constexpr int LOG_P1_BITS = 6; -constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; - -// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) -// and exp(-x) simultaneously. -// To compute e^x, we perform the following range -// reduction: find hi, mid, lo such that: -// x = (hi + mid) * log(2) + lo, in which -// hi is an integer, -// 0 <= mid * 2^5 < 32 is an integer -// -2^(-6) <= lo * log2(e) <= 2^-6. -// In particular, -// hi + mid = round(x * log2(e) * 2^5) * 2^(-5). -// Then, -// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. -// 2^mid is stored in the lookup table of 32 elements. -// e^lo is computed using a degree-5 minimax polynomial -// generated by Sollya: -// e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 -// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) -// = P_even + lo * P_odd -// We perform 2^hi * 2^mid by simply add hi to the exponent field -// of 2^mid. -// To compute e^(-x), notice that: -// e^(-x) = 2^(-(hi + mid)) * e^(-lo) -// ~ 2^(-(hi + mid)) * P(-lo) -// = 2^(-(hi + mid)) * (P_even - lo * P_odd) -// So: -// sinh(x) = (e^x - e^(-x)) / 2 -// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - -// 2^(-(hi + mid)) * (P_even - lo * P_odd)) -// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + -// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) -// And similarly: -// cosh(x) = (e^x + e^(-x)) / 2 -// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + -// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) -// The main point of these formulas is that the expensive part of calculating -// the polynomials approximating lower parts of e^(x) and e^(-x) are shared -// and only done once. -template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { - double xd = static_cast<double>(x); - - // kd = round(x * log2(e) * 2^5) - // k_p = round(x * log2(e) * 2^5) - // k_m = round(-x * log2(e) * 2^5) - double kd; - int k_p, k_m; - -#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT - kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); - k_p = static_cast<int>(kd); - k_m = -k_p; -#else - constexpr double HALF_WAY[2] = {0.5, -0.5}; - - k_p = static_cast<int>( - fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); - k_m = -k_p; - kd = static_cast<double>(k_p); -#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT - - // hi = floor(kf * 2^(-5)) - // exp_hi = shift hi to the exponent field of double precision. - int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS)) - << fputil::FPBits<double>::FRACTION_LEN; - int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS)) - << fputil::FPBits<double>::FRACTION_LEN; - // mh_p = 2^(hi + mid) - // mh_m = 2^(-(hi + mid)) - // mh_bits_* = bit field of mh_* - int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; - int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; - double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val(); - double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val(); - // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) - double mh_sum = mh_p + mh_m; - // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) - double mh_diff = mh_p - mh_m; - - // dx = lo = x - (hi + mid) * log(2) - double dx = - fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, - fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); - double dx2 = dx * dx; - - // c0 = 1 + COEFFS[0] * lo^2 - // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 - double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, - ExpBase::COEFFS[2] * 0.5); - // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 - double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, - ExpBase::COEFFS[3] * 0.5); - - double r; - if constexpr (is_sinh) - r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); - else - r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); - return r; -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp index a4dbf38..c360554 100644 --- a/libc/src/math/generic/expm1.cpp +++ b/libc/src/math/generic/expm1.cpp @@ -8,9 +8,7 @@ #include "src/math/expm1.h" #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. -#include "explogxf.h" // ziv_test_denorm. #include "src/__support/CPP/bit.h" -#include "src/__support/CPP/optional.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -18,7 +16,6 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" diff --git a/libc/src/math/generic/expm1f16.cpp b/libc/src/math/generic/expm1f16.cpp index 2188dfb..c2231f0 100644 --- a/libc/src/math/generic/expm1f16.cpp +++ b/libc/src/math/generic/expm1f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/expm1f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -20,6 +19,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -51,6 +51,7 @@ static constexpr fputil::ExceptValues<float16, N_EXPM1F16_EXCEPTS_HI> #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, expm1f16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h deleted file mode 100644 index 562a427..0000000 --- a/libc/src/math/generic/expxf16.h +++ /dev/null @@ -1,232 +0,0 @@ -//===-- Common utilities for half-precision exponential functions ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H -#define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H - -#include "hdr/stdint_proxy.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" -#include "src/__support/math/exp10_float16_constants.h" -#include "src/__support/math/expf16_utils.h" - -namespace LIBC_NAMESPACE_DECL { - -LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { - // For -25 < x < 16, to compute 2^x, we perform the following range reduction: - // find hi, mid, lo, such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^3 is an integer, - // -2^(-4) <= lo < 2^(-4). - // In particular, - // hi + mid = round(x * 2^3) * 2^(-3). - // Then, - // 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo. - // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid - // by adding hi to the exponent field of 2^mid. 2^lo is computed using a - // degree-3 minimax polynomial generated by Sollya. - - float xf = x; - float kf = fputil::nearest_integer(xf * 0x1.0p+3f); - int x_hi_mid = static_cast<int>(kf); - unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3; - unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7; - // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x - float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); - - uint32_t exp2_hi_mid_bits = - EXP2_MID_BITS[x_mid] + - static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN); - float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val(); - // Degree-3 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]); - // > 1 + x * P; - float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f, - 0x1.c6b4a6p-5f); - return {exp2_hi_mid, exp2_lo}; -} - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > round(log2(exp(1)), SG, RN); -static constexpr float LOG2F_E = 0x1.715476p+0f; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > round(log(2), SG, RN); -static constexpr float LOGF_2 = 0x1.62e43p-1f; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN)); -static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = { - 0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U, - 0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU, - 0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU, - 0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU, - 0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU, - 0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU, - 0x3ff5'257dU, 0x3ffa'83b3U, -}; - -// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x) -// and exp(-x) simultaneously. -// To compute e^x, we perform the following range reduction: -// find hi, mid, lo such that: -// x = (hi + mid) * log(2) + lo, in which -// hi is an integer, -// 0 <= mid * 2^5 < 32 is an integer -// -2^(-5) <= lo * log2(e) <= 2^-5. -// In particular, -// hi + mid = round(x * log2(e) * 2^5) * 2^(-5). -// Then, -// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. -// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid -// by adding hi to the exponent field of 2^mid. -// e^lo is computed using a degree-3 minimax polynomial generated by Sollya: -// e^lo ~ P(lo) -// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 -// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) -// = P_even + lo * P_odd -// To compute e^(-x), notice that: -// e^(-x) = 2^(-(hi + mid)) * e^(-lo) -// ~ 2^(-(hi + mid)) * P(-lo) -// = 2^(-(hi + mid)) * (P_even - lo * P_odd) -// So: -// sinh(x) = (e^x - e^(-x)) / 2 -// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - -// 2^(-(hi + mid)) * (P_even - lo * P_odd)) -// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + -// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) -// And similarly: -// cosh(x) = (e^x + e^(-x)) / 2 -// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + -// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) -// The main point of these formulas is that the expensive part of calculating -// the polynomials approximating lower parts of e^x and e^(-x) is shared and -// only done once. -template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) { - float xf = x; - float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f)); - int x_hi_mid_p = static_cast<int>(kf); - int x_hi_mid_m = -x_hi_mid_p; - - unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5; - unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5; - unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f; - unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f; - - uint32_t exp2_hi_mid_bits_p = - EXP2_MID_5_BITS[x_mid_p] + - static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN); - uint32_t exp2_hi_mid_bits_m = - EXP2_MID_5_BITS[x_mid_m] + - static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN); - // exp2_hi_mid_p = 2^(hi + mid) - float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val(); - // exp2_hi_mid_m = 2^(-(hi + mid)) - float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val(); - - // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid)) - float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m; - // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid)) - float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m; - - // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x - float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf); - float lo_sq = lo * lo; - - // Degree-3 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); - // > 1 + x * P; - constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f, - 0x1.555778p-3f}; - float half_p_odd = - fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f); - float half_p_even = - fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f); - - // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) + - // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid)))) - if constexpr (IsSinh) - return fputil::cast<float16>(fputil::multiply_add( - lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff)); - // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) + - // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid)))) - return fputil::cast<float16>(fputil::multiply_add( - lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum)); -} - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN)); -constexpr cpp::array<float, 32> LOGF_F = { - 0x0p+0f, 0x1.f829bp-6f, 0x1.f0a30cp-5f, 0x1.6f0d28p-4f, - 0x1.e27076p-4f, 0x1.29553p-3f, 0x1.5ff308p-3f, 0x1.9525aap-3f, - 0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f, - 0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f, 0x1.89a338p-2f, - 0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f, - 0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f, - 0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f, - 0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f, -}; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN)); -constexpr cpp::array<float, 32> LOG2F_F = { - 0x0p+0f, 0x1.6bad38p-5f, 0x1.663f7p-4f, 0x1.08c588p-3f, - 0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f, - 0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f, - 0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f, 0x1.1bf312p-1f, - 0x1.2b8034p-1f, 0x1.3abb4p-1f, 0x1.49a784p-1f, 0x1.584822p-1f, - 0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f, - 0x1.9d5dap-1f, 0x1.aa709p-1f, 0x1.b74948p-1f, 0x1.c3e9cap-1f, - 0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f, -}; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 31 do print(round(log10(1 + i * 2^-5), SG, RN)); -constexpr cpp::array<float, 32> LOG10F_F = { - 0x0p+0f, 0x1.b5e908p-7f, 0x1.af5f92p-6f, 0x1.3ed11ap-5f, - 0x1.a30a9ep-5f, 0x1.02428cp-4f, 0x1.31b306p-4f, 0x1.5fe804p-4f, - 0x1.8cf184p-4f, 0x1.b8de4ep-4f, 0x1.e3bc1ap-4f, 0x1.06cbd6p-3f, - 0x1.1b3e72p-3f, 0x1.2f3b6ap-3f, 0x1.42c7e8p-3f, 0x1.55e8c6p-3f, - 0x1.68a288p-3f, 0x1.7af974p-3f, 0x1.8cf184p-3f, 0x1.9e8e7cp-3f, - 0x1.afd3e4p-3f, 0x1.c0c514p-3f, 0x1.d1653p-3f, 0x1.e1b734p-3f, - 0x1.f1bdeep-3f, 0x1.00be06p-2f, 0x1.087a08p-2f, 0x1.101432p-2f, - 0x1.178da6p-2f, 0x1.1ee778p-2f, 0x1.2622bp-2f, 0x1.2d404cp-2f, -}; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN)); -constexpr cpp::array<float, 32> ONE_OVER_F_F = { - 0x1p+0f, 0x1.f07c2p-1f, 0x1.e1e1e2p-1f, 0x1.d41d42p-1f, - 0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f, - 0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f, - 0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f, - 0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f, - 0x1.3b13b2p-1f, 0x1.3521dp-1f, 0x1.2f684cp-1f, 0x1.29e412p-1f, - 0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f, - 0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f, 0x1.041042p-1f, -}; - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H diff --git a/libc/src/math/generic/fmaxbf16.cpp b/libc/src/math/generic/fmaxbf16.cpp new file mode 100644 index 0000000..01d395b --- /dev/null +++ b/libc/src/math/generic/fmaxbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaxbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaxbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaxbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmax(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fmaximum_mag_numbf16.cpp b/libc/src/math/generic/fmaximum_mag_numbf16.cpp new file mode 100644 index 0000000..485e3295 --- /dev/null +++ b/libc/src/math/generic/fmaximum_mag_numbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaximum_mag_numbf16 function -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaximum_mag_numbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaximum_mag_numbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmaximum_mag_num(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fmaximum_magbf16.cpp b/libc/src/math/generic/fmaximum_magbf16.cpp new file mode 100644 index 0000000..0654ed9 --- /dev/null +++ b/libc/src/math/generic/fmaximum_magbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaximum_magbf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaximum_magbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaximum_magbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmaximum_mag(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fmaximum_numbf16.cpp b/libc/src/math/generic/fmaximum_numbf16.cpp new file mode 100644 index 0000000..b058d50 --- /dev/null +++ b/libc/src/math/generic/fmaximum_numbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaximum_numbf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaximum_numbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaximum_numbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmaximum_num(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fmaximumbf16.cpp b/libc/src/math/generic/fmaximumbf16.cpp new file mode 100644 index 0000000..e10830b --- /dev/null +++ b/libc/src/math/generic/fmaximumbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaximumbf16 function ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaximumbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaximumbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmaximum(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminbf16.cpp b/libc/src/math/generic/fminbf16.cpp new file mode 100644 index 0000000..c3e29ee --- /dev/null +++ b/libc/src/math/generic/fminbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmin(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminimum_mag_numbf16.cpp b/libc/src/math/generic/fminimum_mag_numbf16.cpp new file mode 100644 index 0000000..5056fc7 --- /dev/null +++ b/libc/src/math/generic/fminimum_mag_numbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminimum_mag_numbf16 function -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminimum_mag_numbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminimum_mag_numbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fminimum_mag_num(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminimum_magbf16.cpp b/libc/src/math/generic/fminimum_magbf16.cpp new file mode 100644 index 0000000..f61d2d2 --- /dev/null +++ b/libc/src/math/generic/fminimum_magbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminimum_magbf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminimum_magbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminimum_magbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fminimum_mag(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminimum_numbf16.cpp b/libc/src/math/generic/fminimum_numbf16.cpp new file mode 100644 index 0000000..079a830 --- /dev/null +++ b/libc/src/math/generic/fminimum_numbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminimum_numbf16 function -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminimum_numbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminimum_numbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fminimum_num(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminimumbf16.cpp b/libc/src/math/generic/fminimumbf16.cpp new file mode 100644 index 0000000..da976b9 --- /dev/null +++ b/libc/src/math/generic/fminimumbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminimumbf16 function ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminimumbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminimumbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fminimum(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fromfpbf16.cpp b/libc/src/math/generic/fromfpbf16.cpp new file mode 100644 index 0000000..db1b8f1 --- /dev/null +++ b/libc/src/math/generic/fromfpbf16.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of fromfpbf16 function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fromfpbf16.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fromfpbf16, + (bfloat16 x, int rnd, unsigned int width)) { + return fputil::fromfp</*IsSigned=*/true>(x, rnd, width); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fromfpxbf16.cpp b/libc/src/math/generic/fromfpxbf16.cpp new file mode 100644 index 0000000..8c16c41 --- /dev/null +++ b/libc/src/math/generic/fromfpxbf16.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of fromfpxbf16 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fromfpxbf16.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fromfpxbf16, + (bfloat16 x, int rnd, unsigned int width)) { + return fputil::fromfpx</*IsSigned=*/true>(x, rnd, width); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/log10f16.cpp b/libc/src/math/generic/log10f16.cpp index 2626af4..4bb684a 100644 --- a/libc/src/math/generic/log10f16.cpp +++ b/libc/src/math/generic/log10f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/log10f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -20,6 +19,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -75,6 +75,7 @@ static constexpr fputil::ExceptValues<float16, N_LOG10F16_EXCEPTS> #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, log10f16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/log2f16.cpp b/libc/src/math/generic/log2f16.cpp index 34be780..5b60323 100644 --- a/libc/src/math/generic/log2f16.cpp +++ b/libc/src/math/generic/log2f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/log2f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -20,6 +19,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -61,6 +61,7 @@ static constexpr fputil::ExceptValues<float16, N_LOG2F16_EXCEPTS> #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, log2f16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/logf16.cpp b/libc/src/math/generic/logf16.cpp index 8e0d7d8..22e0dc8 100644 --- a/libc/src/math/generic/logf16.cpp +++ b/libc/src/math/generic/logf16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/logf16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -20,6 +19,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -68,6 +68,7 @@ static constexpr fputil::ExceptValues<float16, N_LOGF16_EXCEPTS> #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, logf16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/range_reduction.h b/libc/src/math/generic/range_reduction.h deleted file mode 100644 index 9ea446d..0000000 --- a/libc/src/math/generic/range_reduction.h +++ /dev/null @@ -1,90 +0,0 @@ -//===-- Utilities for trigonometric functions -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -namespace generic { - -static constexpr uint32_t FAST_PASS_BOUND = 0x4a80'0000U; // 2^22 - -static constexpr int N_ENTRIES = 8; - -// We choose to split bits of 32/pi into 28-bit precision pieces, so that the -// product of x * THIRTYTWO_OVER_PI_28[i] is exact. -// These are generated by Sollya with: -// > a1 = D(round(32/pi, 28, RN)); a1; -// > a2 = D(round(32/pi - a1, 28, RN)); a2; -// > a3 = D(round(32/pi - a1 - a2, 28, RN)); a3; -// > a4 = D(round(32/pi - a1 - a2 - a3, 28, RN)); a4; -// ... -static constexpr double THIRTYTWO_OVER_PI_28[N_ENTRIES] = { - 0x1.45f306ep+3, -0x1.b1bbeaep-28, 0x1.3f84ebp-57, -0x1.7056592p-87, - 0x1.c0db62ap-116, -0x1.4cd8778p-145, -0x1.bef806cp-174, 0x1.63abdecp-204}; - -// Exponents of the least significant bits of the corresponding entries in -// THIRTYTWO_OVER_PI_28. -static constexpr int THIRTYTWO_OVER_PI_28_LSB_EXP[N_ENTRIES] = { - -24, -55, -81, -114, -143, -170, -200, -230}; - -// Return k and y, where -// k = round(x * 16 / pi) and y = (x * 16 / pi) - k. -LIBC_INLINE int64_t small_range_reduction(double x, double &y) { - double prod = x * THIRTYTWO_OVER_PI_28[0]; - double kd = fputil::nearest_integer(prod); - y = prod - kd; - y = fputil::multiply_add(x, THIRTYTWO_OVER_PI_28[1], y); - y = fputil::multiply_add(x, THIRTYTWO_OVER_PI_28[2], y); - return static_cast<int64_t>(kd); -} - -// Return k and y, where -// k = round(x * 32 / pi) and y = (x * 32 / pi) - k. -// For large range, there are at most 2 parts of THIRTYTWO_OVER_PI_28 -// contributing to the lowest 6 binary digits (k & 63). If the least -// significant bit of x * the least significant bit of THIRTYTWO_OVER_PI_28[i] -// >= 64, we can completely ignore THIRTYTWO_OVER_PI_28[i]. -LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { - int idx = 0; - y = 0; - int x_lsb_exp_m4 = x_exp - fputil::FPBits<float>::FRACTION_LEN; - - // Skipping the first parts of 32/pi such that: - // LSB of x * LSB of THIRTYTWO_OVER_PI_28[i] >= 32. - while (x_lsb_exp_m4 + THIRTYTWO_OVER_PI_28_LSB_EXP[idx] > 5) - ++idx; - - double prod_hi = x * THIRTYTWO_OVER_PI_28[idx]; - // Get the integral part of x * THIRTYTWO_OVER_PI_28[idx] - double k_hi = fputil::nearest_integer(prod_hi); - // Get the fractional part of x * THIRTYTWO_OVER_PI_28[idx] - double frac = prod_hi - k_hi; - double prod_lo = fputil::multiply_add(x, THIRTYTWO_OVER_PI_28[idx + 1], frac); - double k_lo = fputil::nearest_integer(prod_lo); - - // Now y is the fractional parts. - y = prod_lo - k_lo; - y = fputil::multiply_add(x, THIRTYTWO_OVER_PI_28[idx + 2], y); - y = fputil::multiply_add(x, THIRTYTWO_OVER_PI_28[idx + 3], y); - - return static_cast<int64_t>(k_hi) + static_cast<int64_t>(k_lo); -} - -} // namespace generic - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h deleted file mode 100644 index a93ee25..0000000 --- a/libc/src/math/generic/range_reduction_double_common.h +++ /dev/null @@ -1,374 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan -*- C++ --------*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H - -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" - -namespace LIBC_NAMESPACE_DECL { - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -static constexpr unsigned SPLIT = fputil::DefaultSplit<double>::VALUE; -#else -// When there is no-FMA instructions, in order to have exact product of 2 double -// precision with directional roundings, we need to lower the precision of the -// constants by at least 1 bit, and use a different splitting constant. -static constexpr unsigned SPLIT = 28; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - -using LIBC_NAMESPACE::fputil::DoubleDouble; -using Float128 = LIBC_NAMESPACE::fputil::DyadicFloat<128>; - -#define FAST_PASS_EXPONENT 16 - -// For 2^-7 < |x| < 2^16, return k and u such that: -// k = round(x * 128/pi) -// x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo -// Error bound: -// |(x - k * pi/128) - (u_hi + u_lo)| <= max(ulp(ulp(u_hi)), 2^-119) -// <= 2^-111. -LIBC_INLINE unsigned range_reduction_small(double x, DoubleDouble &u) { - // Values of -pi/128 used for inputs with absolute value <= 2^16. - // The first 3 parts are generated with (53 - 21 = 32)-bit precision, so that - // the product k * MPI_OVER_128[i] is exact. - // Generated by Sollya with: - // > display = hexadecimal!; - // > a = round(pi/128, 32, RN); - // > b = round(pi/128 - a, 32, RN); - // > c = round(pi/128 - a - b, D, RN); - // > print(-a, ",", -b, ",", -c); - constexpr double MPI_OVER_128[3] = {-0x1.921fb544p-6, -0x1.0b4611a6p-40, - -0x1.3198a2e037073p-75}; - constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5; - double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D; - double kd = fputil::nearest_integer(prod_hi); - - // Let y = x - k * (pi/128) - // Then |y| < pi / 256 - // With extra rounding errors, we can bound |y| < 1.6 * 2^-7. - double y_hi = fputil::multiply_add(kd, MPI_OVER_128[0], x); // Exact - // |u.hi| < 1.6*2^-7 - u.hi = fputil::multiply_add(kd, MPI_OVER_128[1], y_hi); - double u0 = y_hi - u.hi; // Exact - // |u.lo| <= max(ulp(u.hi), |kd * MPI_OVER_128[2]|) - double u1 = fputil::multiply_add(kd, MPI_OVER_128[1], u0); // Exact - u.lo = fputil::multiply_add(kd, MPI_OVER_128[2], u1); - // Error bound: - // |x - k * pi/128| - (u.hi + u.lo) <= ulp(u.lo) - // <= ulp(max(ulp(u.hi), kd*MPI_OVER_128[2])) - // <= 2^(-7 - 104) = 2^-111. - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Digits of 2^(16*i) / pi, generated by Sollya with: -// > procedure ulp(x, n) { return 2^(floor(log2(abs(x))) - n); }; -// > for i from 0 to 63 do { -// if i < 3 then { pi_inv = 0.25 + 2^(16*(i - 3)) / pi; } -// else { pi_inv = 2^(16*(i-3)) / pi; }; -// pn = nearestint(pi_inv); -// pi_frac = pi_inv - pn; -// a = round(pi_frac, 51, RN); -// b = round(pi_frac - a, 51, RN); -// c = round(pi_frac - a - b, 51, RN); -// d = round(pi_frac - a - b - c, D, RN); -// print("{", 2^7 * a, ",", 2^7 * b, ",", 2^7 * c, ",", 2^7 * d, "},"); -// }; -// -// Notice that for [0..2] the leading bit of 2^(16*(i - 3)) / pi is very small, -// so we add 0.25 so that the conditions for the algorithms are still satisfied, -// and one of those conditions guarantees that ulp(0.25 * x_reduced) >= 2, and -// will safely be discarded. - -static constexpr double ONE_TWENTY_EIGHT_OVER_PI[64][4] = { - {0x1.0000000000014p5, 0x1.7cc1b727220a8p-49, 0x1.4fe13abe8fa9cp-101, - -0x1.911f924eb5336p-153}, - {0x1.0000000145f3p5, 0x1.b727220a94fep-49, 0x1.3abe8fa9a6eep-101, - 0x1.b6c52b3278872p-155}, - {0x1.000145f306dc8p5, 0x1.c882a53f84ebp-47, -0x1.70565911f925p-101, - 0x1.4acc9e21c821p-153}, - {0x1.45f306dc9c884p5, -0x1.5ac07b1505c14p-47, -0x1.96447e493ad4cp-99, - -0x1.b0ef1bef806bap-152}, - {-0x1.f246c6efab58p4, -0x1.ec5417056591p-49, -0x1.f924eb53361ep-101, - 0x1.c820ff28b1d5fp-153}, - {0x1.391054a7f09d4p4, 0x1.f47d4d377036cp-48, 0x1.8a5664f10e41p-100, - 0x1.fe5163abdebbcp-154}, - {0x1.529fc2757d1f4p2, 0x1.34ddc0db62958p-50, 0x1.93c439041fe5p-102, - 0x1.63abdebbc561bp-154}, - {-0x1.ec5417056591p-1, -0x1.f924eb53361ep-53, 0x1.c820ff28b1d6p-105, - -0x1.0a21d4f246dc9p-157}, - {-0x1.505c1596447e4p5, -0x1.275a99b0ef1cp-48, 0x1.07f9458eaf7bp-100, - -0x1.0ea79236e4717p-152}, - {-0x1.596447e493ad4p1, -0x1.9b0ef1bef806cp-52, 0x1.63abdebbc561cp-106, - -0x1.1b7238b7b645ap-159}, - {0x1.bb81b6c52b328p5, -0x1.de37df00d74e4p-49, 0x1.5ef5de2b0db94p-101, - -0x1.c8e2ded9169p-153}, - {0x1.b6c52b3278874p5, -0x1.f7c035d38a844p-47, 0x1.778ac36e48dc8p-99, - -0x1.6f6c8b47fe6dbp-152}, - {0x1.2b3278872084p5, -0x1.ae9c5421443a8p-50, -0x1.e48db91c5bdb4p-102, - 0x1.d2e006492eea1p-154}, - {-0x1.8778df7c035d4p5, 0x1.d5ef5de2b0db8p-49, 0x1.2371d2126e97p-101, - 0x1.924bba8274648p-160}, - {-0x1.bef806ba71508p4, -0x1.443a9e48db91cp-50, -0x1.6f6c8b47fe6dcp-104, - 0x1.77504e8c90e7fp-157}, - {-0x1.ae9c5421443a8p-2, -0x1.e48db91c5bdb4p-54, 0x1.d2e006492eeap-106, - 0x1.3a32439fc3bd6p-159}, - {-0x1.38a84288753c8p5, -0x1.1b7238b7b645cp-47, 0x1.c00c925dd413cp-99, - -0x1.cdbc603c429c7p-151}, - {-0x1.0a21d4f246dc8p3, -0x1.c5bdb22d1ff9cp-50, 0x1.25dd413a32438p-103, - 0x1.fc3bd63962535p-155}, - {-0x1.d4f246dc8e2ep3, 0x1.26e9700324978p-49, -0x1.5f62e6de301e4p-102, - 0x1.eb1cb129a73efp-154}, - {-0x1.236e4716f6c8cp4, 0x1.700324977505p-49, -0x1.736f180f10a7p-101, - -0x1.a76b2c608bbeep-153}, - {0x1.b8e909374b8p4, 0x1.924bba8274648p-48, 0x1.cfe1deb1cb128p-102, - 0x1.a73ee88235f53p-154}, - {0x1.09374b801924cp4, -0x1.15f62e6de302p-50, 0x1.deb1cb129a74p-102, - -0x1.177dca0ad144cp-154}, - {-0x1.68ffcdb688afcp3, 0x1.d1921cfe1debp-50, 0x1.cb129a73ee884p-102, - -0x1.ca0ad144bb7b1p-154}, - {0x1.924bba8274648p0, 0x1.cfe1deb1cb128p-54, 0x1.a73ee88235f54p-106, - -0x1.144bb7b16639p-158}, - {-0x1.a22bec5cdbc6p5, -0x1.e214e34ed658cp-50, -0x1.177dca0ad144cp-106, - 0x1.213a671c09ad1p-160}, - {0x1.3a32439fc3bd8p1, -0x1.c69dacb1822fp-51, 0x1.1afa975da2428p-105, - -0x1.6638fd94ba082p-158}, - {-0x1.b78c0788538d4p4, 0x1.29a73ee88236p-50, -0x1.5a28976f62cc8p-103, - 0x1.c09ad17df904ep-156}, - {0x1.fc3bd63962534p5, 0x1.cfba208d7d4bcp-48, -0x1.12edec598e3f8p-100, - 0x1.ad17df904e647p-152}, - {-0x1.4e34ed658c118p2, 0x1.046bea5d7689p-51, 0x1.3a671c09ad17cp-104, - 0x1.f904e64758e61p-156}, - {0x1.62534e7dd1048p5, -0x1.415a28976f62cp-47, -0x1.8e3f652e8207p-100, - 0x1.3991d63983534p-154}, - {-0x1.63045df7282b4p4, -0x1.44bb7b16638fcp-50, -0x1.94ba081bec67p-102, - 0x1.d639835339f4ap-154}, - {0x1.d1046bea5d768p5, 0x1.213a671c09adp-48, 0x1.7df904e64759p-100, - -0x1.9f2b3182d8defp-152}, - {0x1.afa975da24274p3, 0x1.9c7026b45f7e4p-50, 0x1.3991d63983534p-106, - -0x1.82d8dee81d108p-160}, - {-0x1.a28976f62cc7p5, -0x1.fb29741037d8cp-47, -0x1.b8a719f2b3184p-100, - 0x1.272117e2ef7e5p-152}, - {-0x1.76f62cc71fb28p5, -0x1.741037d8cdc54p-47, 0x1.cc1a99cfa4e44p-101, - -0x1.d03a21036be27p-153}, - {0x1.d338e04d68bfp5, -0x1.bec66e29c67ccp-50, 0x1.339f49c845f8cp-102, - -0x1.081b5f13801dap-156}, - {0x1.c09ad17df905p4, -0x1.9b8a719f2b318p-48, -0x1.6c6f740e8840cp-103, - -0x1.af89c00ed0004p-155}, - {0x1.68befc827323cp5, -0x1.38cf9598c16c8p-47, 0x1.08bf177bf2508p-99, - -0x1.3801da00087eap-152}, - {-0x1.037d8cdc538dp5, 0x1.a99cfa4e422fcp-49, 0x1.77bf250763ffp-103, - 0x1.2fffbc0b301fep-155}, - {-0x1.8cdc538cf9598p5, -0x1.82d8dee81d108p-48, -0x1.b5f13801dap-104, - -0x1.0fd33f8086877p-157}, - {-0x1.4e33e566305bp3, -0x1.bdd03a21036cp-49, 0x1.d8ffc4bffef04p-101, - -0x1.33f80868773a5p-153}, - {-0x1.f2b3182d8dee8p4, -0x1.d1081b5f138p-52, -0x1.da00087e99fcp-104, - -0x1.0d0ee74a5f593p-158}, - {-0x1.8c16c6f740e88p5, -0x1.036be27003b4p-49, -0x1.0fd33f8086878p-109, - 0x1.8b5a0a6d1f6d3p-162}, - {0x1.3908bf177bf24p5, 0x1.0763ff12fffbcp-47, 0x1.6603fbcbc462cp-104, - 0x1.6829b47db4dap-156}, - {0x1.7e2ef7e4a0ec8p4, -0x1.da00087e99fcp-56, -0x1.0d0ee74a5f594p-110, - 0x1.1f6d367ecf27dp-162}, - {-0x1.081b5f13801dcp4, 0x1.fff7816603fbcp-48, 0x1.788c5ad05369p-101, - -0x1.25930261b069fp-155}, - {-0x1.af89c00ed0004p5, -0x1.fa67f010d0ee8p-50, 0x1.6b414da3eda6cp-103, - 0x1.fb3c9f2c26dd4p-156}, - {-0x1.c00ed00043f4cp5, -0x1.fc04343b9d298p-48, 0x1.4da3eda6cfdap-103, - -0x1.b069ec9161738p-155}, - {0x1.2fffbc0b301fcp5, 0x1.e5e2316b414dcp-47, -0x1.c125930261b08p-99, - 0x1.6136e9e8c7ecdp-151}, - {-0x1.0fd33f8086878p3, 0x1.8b5a0a6d1f6d4p-50, -0x1.30261b069ec9p-103, - -0x1.61738132c3403p-155}, - {-0x1.9fc04343b9d28p4, -0x1.7d64b824b2604p-48, -0x1.86c1a7b24585cp-101, - -0x1.c09961a015d29p-154}, - {-0x1.0d0ee74a5f594p2, 0x1.1f6d367ecf27cp-50, 0x1.6136e9e8c7eccp-103, - 0x1.3cbfd45aea4f7p-155}, - {-0x1.dce94beb25c14p5, 0x1.a6cfd9e4f9614p-47, -0x1.22c2e70265868p-100, - -0x1.5d28ad8453814p-158}, - {-0x1.4beb25c12593p5, -0x1.30d834f648b0cp-50, 0x1.8fd9a797fa8b4p-104, - 0x1.d49eeb1faf97cp-156}, - {0x1.b47db4d9fb3c8p4, 0x1.f2c26dd3d18fcp-48, 0x1.9a797fa8b5d48p-100, - 0x1.eeb1faf97c5edp-152}, - {-0x1.25930261b06ap5, 0x1.36e9e8c7ecd3cp-47, 0x1.7fa8b5d49eebp-100, - 0x1.faf97c5ecf41dp-152}, - {0x1.fb3c9f2c26dd4p4, -0x1.738132c3402bcp-51, 0x1.aea4f758fd7ccp-103, - -0x1.d0985f18c10ebp-159}, - {-0x1.b069ec9161738p5, -0x1.32c3402ba515cp-51, 0x1.eeb1faf97c5ecp-104, - 0x1.e839cfbc52949p-157}, - {-0x1.ec9161738132cp5, -0x1.a015d28ad8454p-50, 0x1.faf97c5ecf41cp-104, - 0x1.cfbc529497536p-157}, - {-0x1.61738132c3404p5, 0x1.45aea4f758fd8p-47, -0x1.a0e84c2f8c608p-102, - -0x1.d6b5b45650128p-156}, - {0x1.fb34f2ff516bcp3, -0x1.6c229c0a0d074p-49, -0x1.30be31821d6b4p-104, - -0x1.b4565012813b8p-156}, - {0x1.3cbfd45aea4f8p5, -0x1.4e050683a130cp-48, 0x1.ce7de294a4ba8p-104, - 0x1.afed7ec47e357p-156}, - {-0x1.5d28ad8453814p2, -0x1.a0e84c2f8c608p-54, -0x1.d6b5b45650128p-108, - -0x1.3b81ca8bdea7fp-164}, - {-0x1.15b08a702834p5, -0x1.d0985f18c10ecp-47, 0x1.4a4ba9afed7ecp-100, - 0x1.1f8d5d0856033p-154}, -}; - -// For large range |x| >= 2^16, we perform the range reduction computations as: -// u = x - k * pi/128 = (pi/128) * (x * (128/pi) - k). -// We use the exponent of x to find 4 double-chunks of 128/pi: -// c_hi, c_mid, c_lo, c_lo_2 such that: -// 1) ulp(round(x * c_hi, D, RN)) >= 2^8 = 256, -// 2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then -// min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53. -// This will allow us to drop the high part ph_hi and the addition: -// (ph_lo + pm_hi) mod 1 -// can be exactly representable in a double precision. -// This will allow us to do split the computations as: -// (x * 256/pi) ~ x * (c_hi + c_mid + c_lo + c_lo_2) (mod 256) -// ~ (ph_lo + pm_hi) + (pm_lo + x * c_lo) + x * c_lo_2. -// Then, -// round(x * 128/pi) = round(ph_lo + pm_hi) (mod 256) -// And the high part of fractional part of (x * 128/pi) can simply be: -// {x * 128/pi}_hi = {ph_lo + pm_hi}. -// To prevent overflow when x is very large, we simply scale up -// (c_hi, c_mid, c_lo, c_lo_2) by a fixed power of 2 (based on the index) and -// scale down x by the same amount. - -struct LargeRangeReduction { - - // To be implemented in range_reduction_double_fma.h and - // range_reduction_double_nofma.h. - unsigned fast(double x, DoubleDouble &u); - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - LIBC_INLINE Float128 accurate() const { - constexpr Float128 PI_OVER_128_F128 = { - Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; - - // y_lo = x * c_lo + pm.lo - Float128 y_lo_0(x_reduced * ONE_TWENTY_EIGHT_OVER_PI[idx][3]); - Float128 y_lo_1 = fputil::quick_add(Float128(y_lo), y_lo_0); - Float128 y_mid_f128 = fputil::quick_add(Float128(y_mid.lo), y_lo_1); - Float128 y_hi_f128 = fputil::quick_add(Float128(y_hi), Float128(y_mid.hi)); - Float128 y = fputil::quick_add(y_hi_f128, y_mid_f128); - - return fputil::quick_mul(y, PI_OVER_128_F128); - } -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -private: - // Index of x in the look-up table ONE_TWENTY_EIGHT_OVER_PI. - unsigned idx; - // x scaled down by 2^(-16 *(idx - 3))). - double x_reduced; - // Parts of (x * 128/pi) mod 1. - double y_hi, y_lo; - DoubleDouble y_mid; -}; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -LIBC_INLINE static Float128 range_reduction_small_f128(double x) { - constexpr Float128 PI_OVER_128_F128 = { - Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; - constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5; - double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D; - double kd = fputil::nearest_integer(prod_hi); - - Float128 mk_f128(-kd); - Float128 x_f128(x); - Float128 p_hi = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][0])); - Float128 p_mid = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][1])); - Float128 p_lo = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][2])); - Float128 s_hi = fputil::quick_add(p_hi, mk_f128); - Float128 s_lo = fputil::quick_add(p_mid, p_lo); - Float128 y = fputil::quick_add(s_hi, s_lo); - - return fputil::quick_mul(y, PI_OVER_128_F128); -} - -static constexpr Float128 SIN_K_PI_OVER_128_F128[65] = { - {Sign::POS, 0, 0}, - {Sign::POS, -133, 0xc90a'afbd'1b33'efc9'c539'edcb'fda0'cf2c_u128}, - {Sign::POS, -132, 0xc8fb'2f88'6ec0'9f37'6a17'954b'2b7c'5171_u128}, - {Sign::POS, -131, 0x96a9'0496'70cf'ae65'f775'7409'4d3c'35c4_u128}, - {Sign::POS, -131, 0xc8bd'35e1'4da1'5f0e'c739'6c89'4bbf'7389_u128}, - {Sign::POS, -131, 0xfab2'72b5'4b98'71a2'7047'29ae'56d7'8a37_u128}, - {Sign::POS, -130, 0x9640'8374'7309'd113'000a'89a1'1e07'c1fe_u128}, - {Sign::POS, -130, 0xaf10'a224'59fe'32a6'3fee'f3bb'58b1'f10d_u128}, - {Sign::POS, -130, 0xc7c5'c1e3'4d30'55b2'5cc8'c00e'4fcc'd850_u128}, - {Sign::POS, -130, 0xe05c'1353'f27b'17e5'0ebc'61ad'e6ca'83cd_u128}, - {Sign::POS, -130, 0xf8cf'cbd9'0af8'd57a'4221'dc4b'a772'598d_u128}, - {Sign::POS, -129, 0x888e'9315'8fb3'bb04'9841'56f5'5334'4306_u128}, - {Sign::POS, -129, 0x94a0'3176'acf8'2d45'ae4b'a773'da6b'f754_u128}, - {Sign::POS, -129, 0xa09a'e4a0'bb30'0a19'2f89'5f44'a303'cc0b_u128}, - {Sign::POS, -129, 0xac7c'd3ad'58fe'e7f0'811f'9539'84ef'f83e_u128}, - {Sign::POS, -129, 0xb844'2987'd22c'f576'9cc3'ef36'746d'e3b8_u128}, - {Sign::POS, -129, 0xc3ef'1535'754b'168d'3122'c2a5'9efd'dc37_u128}, - {Sign::POS, -129, 0xcf7b'ca1d'476c'516d'a812'90bd'baad'62e4_u128}, - {Sign::POS, -129, 0xdae8'804f'0ae6'015b'362c'b974'182e'3030_u128}, - {Sign::POS, -129, 0xe633'74c9'8e22'f0b4'2872'ce1b'fc7a'd1cd_u128}, - {Sign::POS, -129, 0xf15a'e9c0'37b1'd8f0'6c48'e9e3'420b'0f1e_u128}, - {Sign::POS, -129, 0xfc5d'26df'c4d5'cfda'27c0'7c91'1290'b8d1_u128}, - {Sign::POS, -128, 0x839c'3cc9'17ff'6cb4'bfd7'9717'f288'0abf_u128}, - {Sign::POS, -128, 0x88f5'9aa0'da59'1421'b892'ca83'61d8'c84c_u128}, - {Sign::POS, -128, 0x8e39'd9cd'7346'4364'bba4'cfec'bff5'4867_u128}, - {Sign::POS, -128, 0x9368'2a66'e896'f544'b178'2191'1e71'c16e_u128}, - {Sign::POS, -128, 0x987f'bfe7'0b81'a708'19ce'c845'ac87'a5c6_u128}, - {Sign::POS, -128, 0x9d7f'd149'0285'c9e3'e25e'3954'9638'ae68_u128}, - {Sign::POS, -128, 0xa267'9928'48ee'b0c0'3b51'67ee'359a'234e_u128}, - {Sign::POS, -128, 0xa736'55df'1f2f'489e'149f'6e75'9934'68a3_u128}, - {Sign::POS, -128, 0xabeb'49a4'6764'fd15'1bec'da80'89c1'a94c_u128}, - {Sign::POS, -128, 0xb085'baa8'e966'f6da'e4ca'd00d'5c94'bcd2_u128}, - {Sign::POS, -128, 0xb504'f333'f9de'6484'597d'89b3'754a'be9f_u128}, - {Sign::POS, -128, 0xb968'41bf'7ffc'b21a'9de1'e3b2'2b8b'f4db_u128}, - {Sign::POS, -128, 0xbdae'f913'557d'76f0'ac85'320f'528d'6d5d_u128}, - {Sign::POS, -128, 0xc1d8'705f'fcbb'6e90'bdf0'715c'b8b2'0bd7_u128}, - {Sign::POS, -128, 0xc5e4'0358'a8ba'05a7'43da'25d9'9267'326b_u128}, - {Sign::POS, -128, 0xc9d1'124c'931f'da7a'8335'241b'e169'3225_u128}, - {Sign::POS, -128, 0xcd9f'023f'9c3a'059e'23af'31db'7179'a4aa_u128}, - {Sign::POS, -128, 0xd14d'3d02'313c'0eed'744f'ea20'e8ab'ef92_u128}, - {Sign::POS, -128, 0xd4db'3148'750d'1819'f630'e8b6'dac8'3e69_u128}, - {Sign::POS, -128, 0xd848'52c0'a80f'fcdb'24b9'fe00'6635'74a4_u128}, - {Sign::POS, -128, 0xdb94'1a28'cb71'ec87'2c19'b632'53da'43fc_u128}, - {Sign::POS, -128, 0xdebe'0563'7ca9'4cfb'4b19'aa71'fec3'ae6d_u128}, - {Sign::POS, -128, 0xe1c5'978c'05ed'8691'f4e8'a837'2f8c'5810_u128}, - {Sign::POS, -128, 0xe4aa'5909'a08f'a7b4'1227'85ae'67f5'515d_u128}, - {Sign::POS, -128, 0xe76b'd7a1'e63b'9786'1251'2952'9d48'a92f_u128}, - {Sign::POS, -128, 0xea09'a68a'6e49'cd62'15ad'45b4'a1b5'e823_u128}, - {Sign::POS, -128, 0xec83'5e79'946a'3145'7e61'0231'ac1d'6181_u128}, - {Sign::POS, -128, 0xeed8'9db6'6611'e307'86f8'c20f'b664'b01b_u128}, - {Sign::POS, -128, 0xf109'0827'b437'25fd'6712'7db3'5b28'7316_u128}, - {Sign::POS, -128, 0xf314'4762'4708'8f74'a548'6bdc'455d'56a2_u128}, - {Sign::POS, -128, 0xf4fa'0ab6'316e'd2ec'163c'5c7f'03b7'18c5_u128}, - {Sign::POS, -128, 0xf6ba'073b'424b'19e8'2c79'1f59'cc1f'fc23_u128}, - {Sign::POS, -128, 0xf853'f7dc'9186'b952'c7ad'c6b4'9888'91bb_u128}, - {Sign::POS, -128, 0xf9c7'9d63'272c'4628'4504'ae08'd19b'2980_u128}, - {Sign::POS, -128, 0xfb14'be7f'bae5'8156'2172'a361'fd2a'722f_u128}, - {Sign::POS, -128, 0xfc3b'27d3'8a5d'49ab'2567'78ff'cb5c'1769_u128}, - {Sign::POS, -128, 0xfd3a'abf8'4528'b50b'eae6'bd95'1c1d'abbe_u128}, - {Sign::POS, -128, 0xfe13'2387'0cfe'9a3d'90cd'1d95'9db6'74ef_u128}, - {Sign::POS, -128, 0xfec4'6d1e'8929'2cf0'4139'0efd'c726'e9ef_u128}, - {Sign::POS, -128, 0xff4e'6d68'0c41'd0a9'0f66'8633'f1ab'858a_u128}, - {Sign::POS, -128, 0xffb1'0f1b'cb6b'ef1d'421e'8eda'af59'453e_u128}, - {Sign::POS, -128, 0xffec'4304'2668'65d9'5657'5523'6696'1732_u128}, - {Sign::POS, 0, 1}, -}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H diff --git a/libc/src/math/generic/range_reduction_double_fma.h b/libc/src/math/generic/range_reduction_double_fma.h deleted file mode 100644 index 160fb24..0000000 --- a/libc/src/math/generic/range_reduction_double_fma.h +++ /dev/null @@ -1,346 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan w/ FMA -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/math/generic/range_reduction_double_common.h" - -namespace LIBC_NAMESPACE_DECL { - -using LIBC_NAMESPACE::fputil::DoubleDouble; - -LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62); - idx = static_cast<unsigned>((x_e_m62 >> 4) + 3); - // Scale x down by 2^(-(16 * (idx - 3)) - xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62); - // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 - x_reduced = xbits.get_val(); - // x * c_hi = ph.hi + ph.lo exactly. - DoubleDouble ph = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]); - // x * c_mid = pm.hi + pm.lo exactly. - DoubleDouble pm = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]); - // x * c_lo = pl.hi + pl.lo exactly. - DoubleDouble pl = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]); - // Extract integral parts and fractional parts of (ph.lo + pm.hi). - double sum_hi = ph.lo + pm.hi; - double kd = fputil::nearest_integer(sum_hi); - - // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo - y_hi = (ph.lo - kd) + pm.hi; // Exact - y_mid = fputil::exact_add(pm.lo, pl.hi); - y_lo = pl.lo; - - // y_l = x * c_lo_2 + pl.lo - double y_l = - fputil::multiply_add(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][3], y_lo); - DoubleDouble y = fputil::exact_add(y_hi, y_mid.hi); - y.lo += (y_mid.lo + y_l); - - // Digits of pi/128, generated by Sollya with: - // > a = round(pi/128, D, RN); - // > b = round(pi/128 - a, D, RN); - constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60, - 0x1.921fb54442d18p-6}; - - // Error bound: with {a} denote the fractional part of a, i.e.: - // {a} = a - round(a) - // Then, - // | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105 - // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110 - u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD); - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Lookup table for sin(k * pi / 128) with k = 0, ..., 255. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 255 do { -// a = D(sin(k * pi/128)); }; -// b = D(sin(k * pi/128) - a); -// print("{", b, ",", a, "},"); -// }; -LIBC_INLINE constexpr DoubleDouble SIN_K_PI_OVER_128[] = { - {0, 0}, - {-0x1.b1d63091a013p-64, 0x1.92155f7a3667ep-6}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {-0x1.9a088a8bf6b2cp-59, 0x1.2d52092ce19f6p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {0x1.a2704729ae56dp-59, 0x1.f564e56a9730ep-4}, - {0x1.13000a89a11ep-58, 0x1.2c8106e8e613ap-3}, - {0x1.531ff779ddac6p-57, 0x1.5e214448b3fc6p-3}, - {-0x1.26d19b9ff8d82p-57, 0x1.8f8b83c69a60bp-3}, - {-0x1.af1439e521935p-62, 0x1.c0b826a7e4f63p-3}, - {-0x1.42deef11da2c4p-57, 0x1.f19f97b215f1bp-3}, - {0x1.824c20ab7aa9ap-56, 0x1.111d262b1f677p-2}, - {-0x1.5d28da2c4612dp-56, 0x1.294062ed59f06p-2}, - {0x1.0c97c4afa2518p-56, 0x1.4135c94176601p-2}, - {-0x1.efdc0d58cf62p-62, 0x1.58f9a75ab1fddp-2}, - {-0x1.44b19e0864c5dp-56, 0x1.7088530fa459fp-2}, - {-0x1.72cedd3d5a61p-57, 0x1.87de2a6aea963p-2}, - {0x1.6da81290bdbabp-57, 0x1.9ef7943a8ed8ap-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {0x1.6850e59c37f8fp-58, 0x1.cc66e9931c45ep-2}, - {0x1.e0d891d3c6841p-58, 0x1.e2b5d3806f63bp-2}, - {-0x1.2ec1fc1b776b8p-60, 0x1.f8ba4dbf89abap-2}, - {-0x1.a5a014347406cp-55, 0x1.073879922ffeep-1}, - {-0x1.ef23b69abe4f1p-55, 0x1.11eb3541b4b23p-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.5da743ef3770cp-55, 0x1.26d054cdd12dfp-1}, - {-0x1.efcc626f74a6fp-57, 0x1.30ff7fce17035p-1}, - {0x1.e3e25e3954964p-56, 0x1.3affa292050b9p-1}, - {0x1.8076a2cfdc6b3p-57, 0x1.44cf325091dd6p-1}, - {0x1.3c293edceb327p-57, 0x1.4e6cabbe3e5e9p-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {-0x1.251b352ff2a37p-56, 0x1.610b7551d2cdfp-1}, - {-0x1.bdd3413b26456p-55, 0x1.6a09e667f3bcdp-1}, - {0x1.0d4ef0f1d915cp-55, 0x1.72d0837efff96p-1}, - {-0x1.0f537acdf0ad7p-56, 0x1.7b5df226aafafp-1}, - {-0x1.6f420f8ea3475p-56, 0x1.83b0e0bff976ep-1}, - {-0x1.2c5e12ed1336dp-55, 0x1.8bc806b151741p-1}, - {0x1.3d419a920df0bp-55, 0x1.93a22499263fbp-1}, - {-0x1.30ee286712474p-55, 0x1.9b3e047f38741p-1}, - {-0x1.128bb015df175p-56, 0x1.a29a7a0462782p-1}, - {0x1.9f630e8b6dac8p-60, 0x1.a9b66290ea1a3p-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {-0x1.bc69f324e6d61p-55, 0x1.b728345196e3ep-1}, - {-0x1.825a732ac700ap-55, 0x1.bd7c0ac6f952ap-1}, - {-0x1.6e0b1757c8d07p-56, 0x1.c38b2f180bdb1p-1}, - {-0x1.2fb761e946603p-58, 0x1.c954b213411f5p-1}, - {-0x1.e7b6bb5ab58aep-58, 0x1.ced7af43cc773p-1}, - {-0x1.4ef5295d25af2p-55, 0x1.d4134d14dc93ap-1}, - {0x1.457e610231ac2p-56, 0x1.d906bcf328d46p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {-0x1.014c76c126527p-55, 0x1.e212104f686e5p-1}, - {-0x1.16b56f2847754p-57, 0x1.e6288ec48e112p-1}, - {0x1.760b1e2e3f81ep-55, 0x1.e9f4156c62ddap-1}, - {0x1.e82c791f59cc2p-56, 0x1.ed740e7684963p-1}, - {0x1.52c7adc6b4989p-56, 0x1.f0a7efb9230d7p-1}, - {-0x1.d7bafb51f72e6p-56, 0x1.f38f3ac64e589p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.ab256778ffcb6p-56, 0x1.f8764fa714ba9p-1}, - {-0x1.7a0a8ca13571fp-55, 0x1.fa7557f08a517p-1}, - {0x1.1ec8668ecaceep-55, 0x1.fc26470e19fd3p-1}, - {-0x1.87df6378811c7p-55, 0x1.fd88da3d12526p-1}, - {0x1.521ecd0c67e35p-57, 0x1.fe9cdad01883ap-1}, - {-0x1.c57bc2e24aa15p-57, 0x1.ff621e3796d7ep-1}, - {-0x1.1354d4556e4cbp-55, 0x1.ffd886084cd0dp-1}, - {0, 1}, -#ifndef LIBC_MATH_HAS_SMALL_TABLES - {-0x1.1354d4556e4cbp-55, 0x1.ffd886084cd0dp-1}, - {-0x1.c57bc2e24aa15p-57, 0x1.ff621e3796d7ep-1}, - {0x1.521ecd0c67e35p-57, 0x1.fe9cdad01883ap-1}, - {-0x1.87df6378811c7p-55, 0x1.fd88da3d12526p-1}, - {0x1.1ec8668ecaceep-55, 0x1.fc26470e19fd3p-1}, - {-0x1.7a0a8ca13571fp-55, 0x1.fa7557f08a517p-1}, - {0x1.ab256778ffcb6p-56, 0x1.f8764fa714ba9p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {-0x1.d7bafb51f72e6p-56, 0x1.f38f3ac64e589p-1}, - {0x1.52c7adc6b4989p-56, 0x1.f0a7efb9230d7p-1}, - {0x1.e82c791f59cc2p-56, 0x1.ed740e7684963p-1}, - {0x1.760b1e2e3f81ep-55, 0x1.e9f4156c62ddap-1}, - {-0x1.16b56f2847754p-57, 0x1.e6288ec48e112p-1}, - {-0x1.014c76c126527p-55, 0x1.e212104f686e5p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {0x1.457e610231ac2p-56, 0x1.d906bcf328d46p-1}, - {-0x1.4ef5295d25af2p-55, 0x1.d4134d14dc93ap-1}, - {-0x1.e7b6bb5ab58aep-58, 0x1.ced7af43cc773p-1}, - {-0x1.2fb761e946603p-58, 0x1.c954b213411f5p-1}, - {-0x1.6e0b1757c8d07p-56, 0x1.c38b2f180bdb1p-1}, - {-0x1.825a732ac700ap-55, 0x1.bd7c0ac6f952ap-1}, - {-0x1.bc69f324e6d61p-55, 0x1.b728345196e3ep-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {0x1.9f630e8b6dac8p-60, 0x1.a9b66290ea1a3p-1}, - {-0x1.128bb015df175p-56, 0x1.a29a7a0462782p-1}, - {-0x1.30ee286712474p-55, 0x1.9b3e047f38741p-1}, - {0x1.3d419a920df0bp-55, 0x1.93a22499263fbp-1}, - {-0x1.2c5e12ed1336dp-55, 0x1.8bc806b151741p-1}, - {-0x1.6f420f8ea3475p-56, 0x1.83b0e0bff976ep-1}, - {-0x1.0f537acdf0ad7p-56, 0x1.7b5df226aafafp-1}, - {0x1.0d4ef0f1d915cp-55, 0x1.72d0837efff96p-1}, - {-0x1.bdd3413b26456p-55, 0x1.6a09e667f3bcdp-1}, - {-0x1.251b352ff2a37p-56, 0x1.610b7551d2cdfp-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {0x1.3c293edceb327p-57, 0x1.4e6cabbe3e5e9p-1}, - {0x1.8076a2cfdc6b3p-57, 0x1.44cf325091dd6p-1}, - {0x1.e3e25e3954964p-56, 0x1.3affa292050b9p-1}, - {-0x1.efcc626f74a6fp-57, 0x1.30ff7fce17035p-1}, - {-0x1.5da743ef3770cp-55, 0x1.26d054cdd12dfp-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.ef23b69abe4f1p-55, 0x1.11eb3541b4b23p-1}, - {-0x1.a5a014347406cp-55, 0x1.073879922ffeep-1}, - {-0x1.2ec1fc1b776b8p-60, 0x1.f8ba4dbf89abap-2}, - {0x1.e0d891d3c6841p-58, 0x1.e2b5d3806f63bp-2}, - {0x1.6850e59c37f8fp-58, 0x1.cc66e9931c45ep-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {0x1.6da81290bdbabp-57, 0x1.9ef7943a8ed8ap-2}, - {-0x1.72cedd3d5a61p-57, 0x1.87de2a6aea963p-2}, - {-0x1.44b19e0864c5dp-56, 0x1.7088530fa459fp-2}, - {-0x1.efdc0d58cf62p-62, 0x1.58f9a75ab1fddp-2}, - {0x1.0c97c4afa2518p-56, 0x1.4135c94176601p-2}, - {-0x1.5d28da2c4612dp-56, 0x1.294062ed59f06p-2}, - {0x1.824c20ab7aa9ap-56, 0x1.111d262b1f677p-2}, - {-0x1.42deef11da2c4p-57, 0x1.f19f97b215f1bp-3}, - {-0x1.af1439e521935p-62, 0x1.c0b826a7e4f63p-3}, - {-0x1.26d19b9ff8d82p-57, 0x1.8f8b83c69a60bp-3}, - {0x1.531ff779ddac6p-57, 0x1.5e214448b3fc6p-3}, - {0x1.13000a89a11ep-58, 0x1.2c8106e8e613ap-3}, - {0x1.a2704729ae56dp-59, 0x1.f564e56a9730ep-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {-0x1.9a088a8bf6b2cp-59, 0x1.2d52092ce19f6p-4}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {-0x1.b1d63091a013p-64, 0x1.92155f7a3667ep-6}, - {0, 0}, - {0x1.b1d63091a013p-64, -0x1.92155f7a3667ep-6}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {0x1.9a088a8bf6b2cp-59, -0x1.2d52092ce19f6p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {-0x1.a2704729ae56dp-59, -0x1.f564e56a9730ep-4}, - {-0x1.13000a89a11ep-58, -0x1.2c8106e8e613ap-3}, - {-0x1.531ff779ddac6p-57, -0x1.5e214448b3fc6p-3}, - {0x1.26d19b9ff8d82p-57, -0x1.8f8b83c69a60bp-3}, - {0x1.af1439e521935p-62, -0x1.c0b826a7e4f63p-3}, - {0x1.42deef11da2c4p-57, -0x1.f19f97b215f1bp-3}, - {-0x1.824c20ab7aa9ap-56, -0x1.111d262b1f677p-2}, - {0x1.5d28da2c4612dp-56, -0x1.294062ed59f06p-2}, - {-0x1.0c97c4afa2518p-56, -0x1.4135c94176601p-2}, - {0x1.efdc0d58cf62p-62, -0x1.58f9a75ab1fddp-2}, - {0x1.44b19e0864c5dp-56, -0x1.7088530fa459fp-2}, - {0x1.72cedd3d5a61p-57, -0x1.87de2a6aea963p-2}, - {-0x1.6da81290bdbabp-57, -0x1.9ef7943a8ed8ap-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {-0x1.6850e59c37f8fp-58, -0x1.cc66e9931c45ep-2}, - {-0x1.e0d891d3c6841p-58, -0x1.e2b5d3806f63bp-2}, - {0x1.2ec1fc1b776b8p-60, -0x1.f8ba4dbf89abap-2}, - {0x1.a5a014347406cp-55, -0x1.073879922ffeep-1}, - {0x1.ef23b69abe4f1p-55, -0x1.11eb3541b4b23p-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.5da743ef3770cp-55, -0x1.26d054cdd12dfp-1}, - {0x1.efcc626f74a6fp-57, -0x1.30ff7fce17035p-1}, - {-0x1.e3e25e3954964p-56, -0x1.3affa292050b9p-1}, - {-0x1.8076a2cfdc6b3p-57, -0x1.44cf325091dd6p-1}, - {-0x1.3c293edceb327p-57, -0x1.4e6cabbe3e5e9p-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {0x1.251b352ff2a37p-56, -0x1.610b7551d2cdfp-1}, - {0x1.bdd3413b26456p-55, -0x1.6a09e667f3bcdp-1}, - {-0x1.0d4ef0f1d915cp-55, -0x1.72d0837efff96p-1}, - {0x1.0f537acdf0ad7p-56, -0x1.7b5df226aafafp-1}, - {0x1.6f420f8ea3475p-56, -0x1.83b0e0bff976ep-1}, - {0x1.2c5e12ed1336dp-55, -0x1.8bc806b151741p-1}, - {-0x1.3d419a920df0bp-55, -0x1.93a22499263fbp-1}, - {0x1.30ee286712474p-55, -0x1.9b3e047f38741p-1}, - {0x1.128bb015df175p-56, -0x1.a29a7a0462782p-1}, - {-0x1.9f630e8b6dac8p-60, -0x1.a9b66290ea1a3p-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {0x1.bc69f324e6d61p-55, -0x1.b728345196e3ep-1}, - {0x1.825a732ac700ap-55, -0x1.bd7c0ac6f952ap-1}, - {0x1.6e0b1757c8d07p-56, -0x1.c38b2f180bdb1p-1}, - {0x1.2fb761e946603p-58, -0x1.c954b213411f5p-1}, - {0x1.e7b6bb5ab58aep-58, -0x1.ced7af43cc773p-1}, - {0x1.4ef5295d25af2p-55, -0x1.d4134d14dc93ap-1}, - {-0x1.457e610231ac2p-56, -0x1.d906bcf328d46p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {0x1.014c76c126527p-55, -0x1.e212104f686e5p-1}, - {0x1.16b56f2847754p-57, -0x1.e6288ec48e112p-1}, - {-0x1.760b1e2e3f81ep-55, -0x1.e9f4156c62ddap-1}, - {-0x1.e82c791f59cc2p-56, -0x1.ed740e7684963p-1}, - {-0x1.52c7adc6b4989p-56, -0x1.f0a7efb9230d7p-1}, - {0x1.d7bafb51f72e6p-56, -0x1.f38f3ac64e589p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.ab256778ffcb6p-56, -0x1.f8764fa714ba9p-1}, - {0x1.7a0a8ca13571fp-55, -0x1.fa7557f08a517p-1}, - {-0x1.1ec8668ecaceep-55, -0x1.fc26470e19fd3p-1}, - {0x1.87df6378811c7p-55, -0x1.fd88da3d12526p-1}, - {-0x1.521ecd0c67e35p-57, -0x1.fe9cdad01883ap-1}, - {0x1.c57bc2e24aa15p-57, -0x1.ff621e3796d7ep-1}, - {0x1.1354d4556e4cbp-55, -0x1.ffd886084cd0dp-1}, - {0, -1}, - {0x1.1354d4556e4cbp-55, -0x1.ffd886084cd0dp-1}, - {0x1.c57bc2e24aa15p-57, -0x1.ff621e3796d7ep-1}, - {-0x1.521ecd0c67e35p-57, -0x1.fe9cdad01883ap-1}, - {0x1.87df6378811c7p-55, -0x1.fd88da3d12526p-1}, - {-0x1.1ec8668ecaceep-55, -0x1.fc26470e19fd3p-1}, - {0x1.7a0a8ca13571fp-55, -0x1.fa7557f08a517p-1}, - {-0x1.ab256778ffcb6p-56, -0x1.f8764fa714ba9p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {0x1.d7bafb51f72e6p-56, -0x1.f38f3ac64e589p-1}, - {-0x1.52c7adc6b4989p-56, -0x1.f0a7efb9230d7p-1}, - {-0x1.e82c791f59cc2p-56, -0x1.ed740e7684963p-1}, - {-0x1.760b1e2e3f81ep-55, -0x1.e9f4156c62ddap-1}, - {0x1.16b56f2847754p-57, -0x1.e6288ec48e112p-1}, - {0x1.014c76c126527p-55, -0x1.e212104f686e5p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {-0x1.457e610231ac2p-56, -0x1.d906bcf328d46p-1}, - {0x1.4ef5295d25af2p-55, -0x1.d4134d14dc93ap-1}, - {0x1.e7b6bb5ab58aep-58, -0x1.ced7af43cc773p-1}, - {0x1.2fb761e946603p-58, -0x1.c954b213411f5p-1}, - {0x1.6e0b1757c8d07p-56, -0x1.c38b2f180bdb1p-1}, - {0x1.825a732ac700ap-55, -0x1.bd7c0ac6f952ap-1}, - {0x1.bc69f324e6d61p-55, -0x1.b728345196e3ep-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {-0x1.9f630e8b6dac8p-60, -0x1.a9b66290ea1a3p-1}, - {0x1.128bb015df175p-56, -0x1.a29a7a0462782p-1}, - {0x1.30ee286712474p-55, -0x1.9b3e047f38741p-1}, - {-0x1.3d419a920df0bp-55, -0x1.93a22499263fbp-1}, - {0x1.2c5e12ed1336dp-55, -0x1.8bc806b151741p-1}, - {0x1.6f420f8ea3475p-56, -0x1.83b0e0bff976ep-1}, - {0x1.0f537acdf0ad7p-56, -0x1.7b5df226aafafp-1}, - {-0x1.0d4ef0f1d915cp-55, -0x1.72d0837efff96p-1}, - {0x1.bdd3413b26456p-55, -0x1.6a09e667f3bcdp-1}, - {0x1.251b352ff2a37p-56, -0x1.610b7551d2cdfp-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {-0x1.3c293edceb327p-57, -0x1.4e6cabbe3e5e9p-1}, - {-0x1.8076a2cfdc6b3p-57, -0x1.44cf325091dd6p-1}, - {-0x1.e3e25e3954964p-56, -0x1.3affa292050b9p-1}, - {0x1.efcc626f74a6fp-57, -0x1.30ff7fce17035p-1}, - {0x1.5da743ef3770cp-55, -0x1.26d054cdd12dfp-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.ef23b69abe4f1p-55, -0x1.11eb3541b4b23p-1}, - {0x1.a5a014347406cp-55, -0x1.073879922ffeep-1}, - {0x1.2ec1fc1b776b8p-60, -0x1.f8ba4dbf89abap-2}, - {-0x1.e0d891d3c6841p-58, -0x1.e2b5d3806f63bp-2}, - {-0x1.6850e59c37f8fp-58, -0x1.cc66e9931c45ep-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {-0x1.6da81290bdbabp-57, -0x1.9ef7943a8ed8ap-2}, - {0x1.72cedd3d5a61p-57, -0x1.87de2a6aea963p-2}, - {0x1.44b19e0864c5dp-56, -0x1.7088530fa459fp-2}, - {0x1.efdc0d58cf62p-62, -0x1.58f9a75ab1fddp-2}, - {-0x1.0c97c4afa2518p-56, -0x1.4135c94176601p-2}, - {0x1.5d28da2c4612dp-56, -0x1.294062ed59f06p-2}, - {-0x1.824c20ab7aa9ap-56, -0x1.111d262b1f677p-2}, - {0x1.42deef11da2c4p-57, -0x1.f19f97b215f1bp-3}, - {0x1.af1439e521935p-62, -0x1.c0b826a7e4f63p-3}, - {0x1.26d19b9ff8d82p-57, -0x1.8f8b83c69a60bp-3}, - {-0x1.531ff779ddac6p-57, -0x1.5e214448b3fc6p-3}, - {-0x1.13000a89a11ep-58, -0x1.2c8106e8e613ap-3}, - {-0x1.a2704729ae56dp-59, -0x1.f564e56a9730ep-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {0x1.9a088a8bf6b2cp-59, -0x1.2d52092ce19f6p-4}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {0x1.b1d63091a013p-64, -0x1.92155f7a3667ep-6}, -#endif // !LIBC_MATH_HAS_SMALL_TABLES -}; - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H diff --git a/libc/src/math/generic/range_reduction_double_nofma.h b/libc/src/math/generic/range_reduction_double_nofma.h deleted file mode 100644 index 9d13d24..0000000 --- a/libc/src/math/generic/range_reduction_double_nofma.h +++ /dev/null @@ -1,347 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/math/generic/range_reduction_double_common.h" - -namespace LIBC_NAMESPACE_DECL { - -using fputil::DoubleDouble; - -LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62); - idx = static_cast<unsigned>((x_e_m62 >> 4) + 3); - // Scale x down by 2^(-(16 * (idx - 3)) - xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62); - // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 - x_reduced = xbits.get_val(); - // x * c_hi = ph.hi + ph.lo exactly. - DoubleDouble x_split = fputil::split(x_reduced); - DoubleDouble ph = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]); - // x * c_mid = pm.hi + pm.lo exactly. - DoubleDouble pm = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]); - // x * c_lo = pl.hi + pl.lo exactly. - DoubleDouble pl = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]); - // Extract integral parts and fractional parts of (ph.lo + pm.hi). - double sum_hi = ph.lo + pm.hi; - double kd = fputil::nearest_integer(sum_hi); - - // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo - y_hi = (ph.lo - kd) + pm.hi; // Exact - y_mid = fputil::exact_add(pm.lo, pl.hi); - y_lo = pl.lo; - - // y_l = x * c_lo_2 + pl.lo - double y_l = - fputil::multiply_add(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][3], y_lo); - DoubleDouble y = fputil::exact_add(y_hi, y_mid.hi); - y.lo += (y_mid.lo + y_l); - - // Digits of pi/128, generated by Sollya with: - // > a = round(pi/128, D, RN); - // > b = round(pi/128 - a, D, RN); - constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60, - 0x1.921fb54442d18p-6}; - - // Error bound: with {a} denote the fractional part of a, i.e.: - // {a} = a - round(a) - // Then, - // | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105 - // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110 - u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD); - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Lookup table for sin(k * pi / 128) with k = 0, ..., 255. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 255 do { -// a = round(sin(k * pi/128), 51, RN); -// b = round(sin(k * pi/128) - a, D, RN); -// print("{", b, ",", a, "},"); -// }; -LIBC_INLINE constexpr DoubleDouble SIN_K_PI_OVER_128[] = { - {0, 0}, - {0x1.f938a73db97fbp-58, 0x1.92155f7a3667cp-6}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {0x1.ccbeeeae8129ap-56, 0x1.2d52092ce19f4p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {-0x1.cbb1f71aca352p-56, 0x1.f564e56a9731p-4}, - {-0x1.dd9ffeaecbdc4p-55, 0x1.2c8106e8e613cp-3}, - {-0x1.ab3802218894fp-55, 0x1.5e214448b3fc8p-3}, - {-0x1.49b466e7fe36p-55, 0x1.8f8b83c69a60cp-3}, - {-0x1.035e2873ca432p-55, 0x1.c0b826a7e4f64p-3}, - {-0x1.50b7bbc4768b1p-55, 0x1.f19f97b215f1cp-3}, - {-0x1.3ed9efaa42ab3p-55, 0x1.111d262b1f678p-2}, - {0x1.a8b5c974ee7b5p-54, 0x1.294062ed59f04p-2}, - {0x1.4325f12be8946p-54, 0x1.4135c941766p-2}, - {0x1.fc2047e54e614p-55, 0x1.58f9a75ab1fdcp-2}, - {-0x1.512c678219317p-54, 0x1.7088530fa45ap-2}, - {-0x1.2e59dba7ab4c2p-54, 0x1.87de2a6aea964p-2}, - {-0x1.d24afdade848bp-54, 0x1.9ef7943a8ed8cp-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {-0x1.e97af1a63c807p-54, 0x1.cc66e9931c46p-2}, - {-0x1.c3e4edc5872f8p-55, 0x1.e2b5d3806f63cp-2}, - {0x1.fb44f80f92225p-54, 0x1.f8ba4dbf89ab8p-2}, - {0x1.9697faf2e2fe5p-53, 0x1.073879922ffecp-1}, - {-0x1.7bc8eda6af93cp-53, 0x1.11eb3541b4b24p-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.5769d0fbcddc3p-53, 0x1.26d054cdd12ep-1}, - {0x1.c20673b2116b2p-54, 0x1.30ff7fce17034p-1}, - {0x1.3c7c4bc72a92cp-53, 0x1.3affa292050b8p-1}, - {-0x1.e7f895d302395p-53, 0x1.44cf325091dd8p-1}, - {0x1.13c293edceb32p-53, 0x1.4e6cabbe3e5e8p-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {-0x1.24a366a5fe547p-53, 0x1.610b7551d2cep-1}, - {0x1.21165f626cdd5p-54, 0x1.6a09e667f3bccp-1}, - {-0x1.bcac43c389ba9p-53, 0x1.72d0837efff98p-1}, - {-0x1.21ea6f59be15bp-53, 0x1.7b5df226aafbp-1}, - {0x1.d217be0e2b971p-53, 0x1.83b0e0bff976cp-1}, - {0x1.69d0f6897664ap-54, 0x1.8bc806b15174p-1}, - {-0x1.615f32b6f907ap-54, 0x1.93a22499263fcp-1}, - {0x1.6788ebcc76dc6p-54, 0x1.9b3e047f3874p-1}, - {0x1.ddae89fd441d1p-53, 0x1.a29a7a046278p-1}, - {-0x1.f98273c5d2495p-54, 0x1.a9b66290ea1a4p-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {0x1.90e58336c64a8p-53, 0x1.b728345196e3cp-1}, - {0x1.9f6963354e3fep-53, 0x1.bd7c0ac6f9528p-1}, - {0x1.a47d3a2a0dcbep-54, 0x1.c38b2f180bdbp-1}, - {0x1.ed0489e16b9ap-54, 0x1.c954b213411f4p-1}, - {-0x1.0f3db5dad5ac5p-53, 0x1.ced7af43cc774p-1}, - {0x1.ac42b5a8b6943p-53, 0x1.d4134d14dc938p-1}, - {-0x1.d75033dfb9ca8p-53, 0x1.d906bcf328d48p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {0x1.7f59c49f6cd6dp-54, 0x1.e212104f686e4p-1}, - {0x1.ee94a90d7b88bp-53, 0x1.e6288ec48e11p-1}, - {-0x1.a27d3874701f9p-53, 0x1.e9f4156c62ddcp-1}, - {-0x1.85f4e1b8298dp-54, 0x1.ed740e7684964p-1}, - {-0x1.ab4e148e52d9ep-54, 0x1.f0a7efb9230d8p-1}, - {0x1.8a11412b82346p-54, 0x1.f38f3ac64e588p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.3564acef1ff97p-53, 0x1.f8764fa714ba8p-1}, - {-0x1.5e82a3284d5c8p-53, 0x1.fa7557f08a518p-1}, - {-0x1.709bccb89a989p-54, 0x1.fc26470e19fd4p-1}, - {0x1.9e082721dfb8ep-53, 0x1.fd88da3d12524p-1}, - {-0x1.eade132f3981dp-53, 0x1.fe9cdad01883cp-1}, - {0x1.e3a843d1db55fp-53, 0x1.ff621e3796d7cp-1}, - {0x1.765595d548d9ap-54, 0x1.ffd886084cd0cp-1}, - {0, 1}, -#ifndef LIBC_MATH_HAS_SMALL_TABLES - {0x1.765595d548d9ap-54, 0x1.ffd886084cd0cp-1}, - {0x1.e3a843d1db55fp-53, 0x1.ff621e3796d7cp-1}, - {-0x1.eade132f3981dp-53, 0x1.fe9cdad01883cp-1}, - {0x1.9e082721dfb8ep-53, 0x1.fd88da3d12524p-1}, - {-0x1.709bccb89a989p-54, 0x1.fc26470e19fd4p-1}, - {-0x1.5e82a3284d5c8p-53, 0x1.fa7557f08a518p-1}, - {0x1.3564acef1ff97p-53, 0x1.f8764fa714ba8p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.8a11412b82346p-54, 0x1.f38f3ac64e588p-1}, - {-0x1.ab4e148e52d9ep-54, 0x1.f0a7efb9230d8p-1}, - {-0x1.85f4e1b8298dp-54, 0x1.ed740e7684964p-1}, - {-0x1.a27d3874701f9p-53, 0x1.e9f4156c62ddcp-1}, - {0x1.ee94a90d7b88bp-53, 0x1.e6288ec48e11p-1}, - {0x1.7f59c49f6cd6dp-54, 0x1.e212104f686e4p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {-0x1.d75033dfb9ca8p-53, 0x1.d906bcf328d48p-1}, - {0x1.ac42b5a8b6943p-53, 0x1.d4134d14dc938p-1}, - {-0x1.0f3db5dad5ac5p-53, 0x1.ced7af43cc774p-1}, - {0x1.ed0489e16b9ap-54, 0x1.c954b213411f4p-1}, - {0x1.a47d3a2a0dcbep-54, 0x1.c38b2f180bdbp-1}, - {0x1.9f6963354e3fep-53, 0x1.bd7c0ac6f9528p-1}, - {0x1.90e58336c64a8p-53, 0x1.b728345196e3cp-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {-0x1.f98273c5d2495p-54, 0x1.a9b66290ea1a4p-1}, - {0x1.ddae89fd441d1p-53, 0x1.a29a7a046278p-1}, - {0x1.6788ebcc76dc6p-54, 0x1.9b3e047f3874p-1}, - {-0x1.615f32b6f907ap-54, 0x1.93a22499263fcp-1}, - {0x1.69d0f6897664ap-54, 0x1.8bc806b15174p-1}, - {0x1.d217be0e2b971p-53, 0x1.83b0e0bff976cp-1}, - {-0x1.21ea6f59be15bp-53, 0x1.7b5df226aafbp-1}, - {-0x1.bcac43c389ba9p-53, 0x1.72d0837efff98p-1}, - {0x1.21165f626cdd5p-54, 0x1.6a09e667f3bccp-1}, - {-0x1.24a366a5fe547p-53, 0x1.610b7551d2cep-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {0x1.13c293edceb32p-53, 0x1.4e6cabbe3e5e8p-1}, - {-0x1.e7f895d302395p-53, 0x1.44cf325091dd8p-1}, - {0x1.3c7c4bc72a92cp-53, 0x1.3affa292050b8p-1}, - {0x1.c20673b2116b2p-54, 0x1.30ff7fce17034p-1}, - {-0x1.5769d0fbcddc3p-53, 0x1.26d054cdd12ep-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.7bc8eda6af93cp-53, 0x1.11eb3541b4b24p-1}, - {0x1.9697faf2e2fe5p-53, 0x1.073879922ffecp-1}, - {0x1.fb44f80f92225p-54, 0x1.f8ba4dbf89ab8p-2}, - {-0x1.c3e4edc5872f8p-55, 0x1.e2b5d3806f63cp-2}, - {-0x1.e97af1a63c807p-54, 0x1.cc66e9931c46p-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {-0x1.d24afdade848bp-54, 0x1.9ef7943a8ed8cp-2}, - {-0x1.2e59dba7ab4c2p-54, 0x1.87de2a6aea964p-2}, - {-0x1.512c678219317p-54, 0x1.7088530fa45ap-2}, - {0x1.fc2047e54e614p-55, 0x1.58f9a75ab1fdcp-2}, - {0x1.4325f12be8946p-54, 0x1.4135c941766p-2}, - {0x1.a8b5c974ee7b5p-54, 0x1.294062ed59f04p-2}, - {-0x1.3ed9efaa42ab3p-55, 0x1.111d262b1f678p-2}, - {-0x1.50b7bbc4768b1p-55, 0x1.f19f97b215f1cp-3}, - {-0x1.035e2873ca432p-55, 0x1.c0b826a7e4f64p-3}, - {-0x1.49b466e7fe36p-55, 0x1.8f8b83c69a60cp-3}, - {-0x1.ab3802218894fp-55, 0x1.5e214448b3fc8p-3}, - {-0x1.dd9ffeaecbdc4p-55, 0x1.2c8106e8e613cp-3}, - {-0x1.cbb1f71aca352p-56, 0x1.f564e56a9731p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {0x1.ccbeeeae8129ap-56, 0x1.2d52092ce19f4p-4}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {0x1.f938a73db97fbp-58, 0x1.92155f7a3667cp-6}, - {0, 0}, - {-0x1.f938a73db97fbp-58, -0x1.92155f7a3667cp-6}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {-0x1.ccbeeeae8129ap-56, -0x1.2d52092ce19f4p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {0x1.cbb1f71aca352p-56, -0x1.f564e56a9731p-4}, - {0x1.dd9ffeaecbdc4p-55, -0x1.2c8106e8e613cp-3}, - {0x1.ab3802218894fp-55, -0x1.5e214448b3fc8p-3}, - {0x1.49b466e7fe36p-55, -0x1.8f8b83c69a60cp-3}, - {0x1.035e2873ca432p-55, -0x1.c0b826a7e4f64p-3}, - {0x1.50b7bbc4768b1p-55, -0x1.f19f97b215f1cp-3}, - {0x1.3ed9efaa42ab3p-55, -0x1.111d262b1f678p-2}, - {-0x1.a8b5c974ee7b5p-54, -0x1.294062ed59f04p-2}, - {-0x1.4325f12be8946p-54, -0x1.4135c941766p-2}, - {-0x1.fc2047e54e614p-55, -0x1.58f9a75ab1fdcp-2}, - {0x1.512c678219317p-54, -0x1.7088530fa45ap-2}, - {0x1.2e59dba7ab4c2p-54, -0x1.87de2a6aea964p-2}, - {0x1.d24afdade848bp-54, -0x1.9ef7943a8ed8cp-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {0x1.e97af1a63c807p-54, -0x1.cc66e9931c46p-2}, - {0x1.c3e4edc5872f8p-55, -0x1.e2b5d3806f63cp-2}, - {-0x1.fb44f80f92225p-54, -0x1.f8ba4dbf89ab8p-2}, - {-0x1.9697faf2e2fe5p-53, -0x1.073879922ffecp-1}, - {0x1.7bc8eda6af93cp-53, -0x1.11eb3541b4b24p-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.5769d0fbcddc3p-53, -0x1.26d054cdd12ep-1}, - {-0x1.c20673b2116b2p-54, -0x1.30ff7fce17034p-1}, - {-0x1.3c7c4bc72a92cp-53, -0x1.3affa292050b8p-1}, - {0x1.e7f895d302395p-53, -0x1.44cf325091dd8p-1}, - {-0x1.13c293edceb32p-53, -0x1.4e6cabbe3e5e8p-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {0x1.24a366a5fe547p-53, -0x1.610b7551d2cep-1}, - {-0x1.21165f626cdd5p-54, -0x1.6a09e667f3bccp-1}, - {0x1.bcac43c389ba9p-53, -0x1.72d0837efff98p-1}, - {0x1.21ea6f59be15bp-53, -0x1.7b5df226aafbp-1}, - {-0x1.d217be0e2b971p-53, -0x1.83b0e0bff976cp-1}, - {-0x1.69d0f6897664ap-54, -0x1.8bc806b15174p-1}, - {0x1.615f32b6f907ap-54, -0x1.93a22499263fcp-1}, - {-0x1.6788ebcc76dc6p-54, -0x1.9b3e047f3874p-1}, - {-0x1.ddae89fd441d1p-53, -0x1.a29a7a046278p-1}, - {0x1.f98273c5d2495p-54, -0x1.a9b66290ea1a4p-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {-0x1.90e58336c64a8p-53, -0x1.b728345196e3cp-1}, - {-0x1.9f6963354e3fep-53, -0x1.bd7c0ac6f9528p-1}, - {-0x1.a47d3a2a0dcbep-54, -0x1.c38b2f180bdbp-1}, - {-0x1.ed0489e16b9ap-54, -0x1.c954b213411f4p-1}, - {0x1.0f3db5dad5ac5p-53, -0x1.ced7af43cc774p-1}, - {-0x1.ac42b5a8b6943p-53, -0x1.d4134d14dc938p-1}, - {0x1.d75033dfb9ca8p-53, -0x1.d906bcf328d48p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {-0x1.7f59c49f6cd6dp-54, -0x1.e212104f686e4p-1}, - {-0x1.ee94a90d7b88bp-53, -0x1.e6288ec48e11p-1}, - {0x1.a27d3874701f9p-53, -0x1.e9f4156c62ddcp-1}, - {0x1.85f4e1b8298dp-54, -0x1.ed740e7684964p-1}, - {0x1.ab4e148e52d9ep-54, -0x1.f0a7efb9230d8p-1}, - {-0x1.8a11412b82346p-54, -0x1.f38f3ac64e588p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.3564acef1ff97p-53, -0x1.f8764fa714ba8p-1}, - {0x1.5e82a3284d5c8p-53, -0x1.fa7557f08a518p-1}, - {0x1.709bccb89a989p-54, -0x1.fc26470e19fd4p-1}, - {-0x1.9e082721dfb8ep-53, -0x1.fd88da3d12524p-1}, - {0x1.eade132f3981dp-53, -0x1.fe9cdad01883cp-1}, - {-0x1.e3a843d1db55fp-53, -0x1.ff621e3796d7cp-1}, - {-0x1.765595d548d9ap-54, -0x1.ffd886084cd0cp-1}, - {0, -1}, - {-0x1.765595d548d9ap-54, -0x1.ffd886084cd0cp-1}, - {-0x1.e3a843d1db55fp-53, -0x1.ff621e3796d7cp-1}, - {0x1.eade132f3981dp-53, -0x1.fe9cdad01883cp-1}, - {-0x1.9e082721dfb8ep-53, -0x1.fd88da3d12524p-1}, - {0x1.709bccb89a989p-54, -0x1.fc26470e19fd4p-1}, - {0x1.5e82a3284d5c8p-53, -0x1.fa7557f08a518p-1}, - {-0x1.3564acef1ff97p-53, -0x1.f8764fa714ba8p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.8a11412b82346p-54, -0x1.f38f3ac64e588p-1}, - {0x1.ab4e148e52d9ep-54, -0x1.f0a7efb9230d8p-1}, - {0x1.85f4e1b8298dp-54, -0x1.ed740e7684964p-1}, - {0x1.a27d3874701f9p-53, -0x1.e9f4156c62ddcp-1}, - {-0x1.ee94a90d7b88bp-53, -0x1.e6288ec48e11p-1}, - {-0x1.7f59c49f6cd6dp-54, -0x1.e212104f686e4p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {0x1.d75033dfb9ca8p-53, -0x1.d906bcf328d48p-1}, - {-0x1.ac42b5a8b6943p-53, -0x1.d4134d14dc938p-1}, - {0x1.0f3db5dad5ac5p-53, -0x1.ced7af43cc774p-1}, - {-0x1.ed0489e16b9ap-54, -0x1.c954b213411f4p-1}, - {-0x1.a47d3a2a0dcbep-54, -0x1.c38b2f180bdbp-1}, - {-0x1.9f6963354e3fep-53, -0x1.bd7c0ac6f9528p-1}, - {-0x1.90e58336c64a8p-53, -0x1.b728345196e3cp-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {0x1.f98273c5d2495p-54, -0x1.a9b66290ea1a4p-1}, - {-0x1.ddae89fd441d1p-53, -0x1.a29a7a046278p-1}, - {-0x1.6788ebcc76dc6p-54, -0x1.9b3e047f3874p-1}, - {0x1.615f32b6f907ap-54, -0x1.93a22499263fcp-1}, - {-0x1.69d0f6897664ap-54, -0x1.8bc806b15174p-1}, - {-0x1.d217be0e2b971p-53, -0x1.83b0e0bff976cp-1}, - {0x1.21ea6f59be15bp-53, -0x1.7b5df226aafbp-1}, - {0x1.bcac43c389ba9p-53, -0x1.72d0837efff98p-1}, - {-0x1.21165f626cdd5p-54, -0x1.6a09e667f3bccp-1}, - {0x1.24a366a5fe547p-53, -0x1.610b7551d2cep-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {-0x1.13c293edceb32p-53, -0x1.4e6cabbe3e5e8p-1}, - {0x1.e7f895d302395p-53, -0x1.44cf325091dd8p-1}, - {-0x1.3c7c4bc72a92cp-53, -0x1.3affa292050b8p-1}, - {-0x1.c20673b2116b2p-54, -0x1.30ff7fce17034p-1}, - {0x1.5769d0fbcddc3p-53, -0x1.26d054cdd12ep-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.7bc8eda6af93cp-53, -0x1.11eb3541b4b24p-1}, - {-0x1.9697faf2e2fe5p-53, -0x1.073879922ffecp-1}, - {-0x1.fb44f80f92225p-54, -0x1.f8ba4dbf89ab8p-2}, - {0x1.c3e4edc5872f8p-55, -0x1.e2b5d3806f63cp-2}, - {0x1.e97af1a63c807p-54, -0x1.cc66e9931c46p-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {0x1.d24afdade848bp-54, -0x1.9ef7943a8ed8cp-2}, - {0x1.2e59dba7ab4c2p-54, -0x1.87de2a6aea964p-2}, - {0x1.512c678219317p-54, -0x1.7088530fa45ap-2}, - {-0x1.fc2047e54e614p-55, -0x1.58f9a75ab1fdcp-2}, - {-0x1.4325f12be8946p-54, -0x1.4135c941766p-2}, - {-0x1.a8b5c974ee7b5p-54, -0x1.294062ed59f04p-2}, - {0x1.3ed9efaa42ab3p-55, -0x1.111d262b1f678p-2}, - {0x1.50b7bbc4768b1p-55, -0x1.f19f97b215f1cp-3}, - {0x1.035e2873ca432p-55, -0x1.c0b826a7e4f64p-3}, - {0x1.49b466e7fe36p-55, -0x1.8f8b83c69a60cp-3}, - {0x1.ab3802218894fp-55, -0x1.5e214448b3fc8p-3}, - {0x1.dd9ffeaecbdc4p-55, -0x1.2c8106e8e613cp-3}, - {0x1.cbb1f71aca352p-56, -0x1.f564e56a9731p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {-0x1.ccbeeeae8129ap-56, -0x1.2d52092ce19f4p-4}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {-0x1.f938a73db97fbp-58, -0x1.92155f7a3667cp-6}, -#endif // !LIBC_MATH_HAS_SMALL_TABLES -}; - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h deleted file mode 100644 index 537d572..0000000 --- a/libc/src/math/generic/range_reduction_fma.h +++ /dev/null @@ -1,92 +0,0 @@ -//===-- Utilities for trigonometric functions with FMA ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H - -#include "src/__support/FPUtil/FMA.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -namespace fma { - -static constexpr uint32_t FAST_PASS_BOUND = 0x5600'0000U; // 2^45 - -// Digits of 32/pi, generated by Sollya with: -// > a0 = D(32/pi); -// > a1 = D(32/pi - a0); -// > a2 = D(32/pi - a0 - a1); -// > a3 = D(32/pi - a0 - a1 - a2); -static constexpr double THIRTYTWO_OVER_PI[5] = { - 0x1.45f306dc9c883p+3, -0x1.6b01ec5417056p-51, -0x1.6447e493ad4cep-105, - 0x1.e21c820ff28b2p-159, -0x1.508510ea79237p-214}; - -// Return k and y, where -// k = round(x * 32 / pi) and y = (x * 32 / pi) - k. -LIBC_INLINE int64_t small_range_reduction(double x, double &y) { - double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -kd); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], y); - return static_cast<int64_t>(kd); -} - -// Return k and y, where -// k = round(x * 32 / pi) and y = (x * 32 / pi) - k. -// This is used for sinf, cosf, sincosf. -LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { - // 2^45 <= |x| < 2^99 - if (x_exp < 99) { - // - When x < 2^99, the full exact product of x * THIRTYTWO_OVER_PI[0] - // contains at least one integral bit <= 2^5. - // - When 2^45 <= |x| < 2^55, the lowest 6 unit bits are contained - // in the last 12 bits of double(x * THIRTYTWO_OVER_PI[0]). - // - When |x| >= 2^55, the LSB of double(x * THIRTYTWO_OVER_PI[0]) is at - // least 2^6. - fputil::FPBits<double> prod_hi(x * THIRTYTWO_OVER_PI[0]); - prod_hi.set_uintval(prod_hi.uintval() & - ((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55 - double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -k_hi); - double prod_lo = - fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod); - double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], y); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y); - - return static_cast<int64_t>(k_lo); - } - - // - When x >= 2^110, the full exact product of x * THIRTYTWO_OVER_PI[0] does - // not contain any of the lowest 6 unit bits, so we can ignore it completely. - // - When 2^99 <= |x| < 2^110, the lowest 6 unit bits are contained - // in the last 12 bits of double(x * THIRTYTWO_OVER_PI[1]). - // - When |x| >= 2^110, the LSB of double(x * THIRTYTWO_OVER_PI[1]) is at - // least 64. - fputil::FPBits<double> prod_hi(x * THIRTYTWO_OVER_PI[1]); - prod_hi.set_uintval(prod_hi.uintval() & - ((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110 - double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], -k_hi); - double prod_lo = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod); - double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y); - y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[4], y); - - return static_cast<int64_t>(k_lo); -} - -} // namespace fma - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp index a614427b..1b6310f 100644 --- a/libc/src/math/generic/sin.cpp +++ b/libc/src/math/generic/sin.cpp @@ -18,13 +18,13 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" +#include "src/__support/math/range_reduction_double_common.h" +#include "src/__support/math/sincos_eval.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -33,6 +33,7 @@ using DoubleDouble = fputil::DoubleDouble; using Float128 = typename fputil::DyadicFloat<128>; LLVM_LIBC_FUNCTION(double, sin, (double x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -95,7 +96,8 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { DoubleDouble sin_y, cos_y; - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); + [[maybe_unused]] double err = + math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y); // Look up sin(k * pi/128) and cos(k * pi/128) #ifdef LIBC_MATH_HAS_SMALL_TABLES @@ -149,7 +151,7 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { else u_f128 = range_reduction_large.accurate(); - generic::sincos_eval(u_f128, sin_u, cos_u); + math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u); auto get_sin_k = [](unsigned kk) -> Float128 { unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); diff --git a/libc/src/math/generic/sincos.cpp b/libc/src/math/generic/sincos.cpp index 08c8a82..38661de 100644 --- a/libc/src/math/generic/sincos.cpp +++ b/libc/src/math/generic/sincos.cpp @@ -19,13 +19,13 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" +#include "src/__support/math/range_reduction_double_common.h" +#include "src/__support/math/sincos_eval.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -34,6 +34,7 @@ using DoubleDouble = fputil::DoubleDouble; using Float128 = typename fputil::DyadicFloat<128>; LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -106,7 +107,8 @@ LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { DoubleDouble sin_y, cos_y; - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); + [[maybe_unused]] double err = + math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y); // Look up sin(k * pi/128) and cos(k * pi/128) #ifdef LIBC_MATH_HAS_SMALL_TABLES @@ -179,7 +181,7 @@ LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { else u_f128 = range_reduction_large.accurate(); - generic::sincos_eval(u_f128, sin_u, cos_u); + math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u); auto get_sin_k = [](unsigned kk) -> Float128 { unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); diff --git a/libc/src/math/generic/sincos_eval.h b/libc/src/math/generic/sincos_eval.h deleted file mode 100644 index 41a4c75..0000000 --- a/libc/src/math/generic/sincos_eval.h +++ /dev/null @@ -1,138 +0,0 @@ -//===-- Compute sin + cos for small angles ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOS_EVAL_H -#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOS_EVAL_H - -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -namespace generic { - -using fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - -LIBC_INLINE double sincos_eval(const DoubleDouble &u, DoubleDouble &sin_u, - DoubleDouble &cos_u) { - // Evaluate sin(y) = sin(x - k * (pi/128)) - // We use the degree-7 Taylor approximation: - // sin(y) ~ y - y^3/3! + y^5/5! - y^7/7! - // Then the error is bounded by: - // |sin(y) - (y - y^3/3! + y^5/5! - y^7/7!)| < |y|^9/9! < 2^-54/9! < 2^-72. - // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms - // < ulp(u_hi^3) gives us: - // y - y^3/3! + y^5/5! - y^7/7! = ... - // ~ u_hi + u_hi^3 * (-1/6 + u_hi^2 * (1/120 - u_hi^2 * 1/5040)) + - // + u_lo (1 + u_hi^2 * (-1/2 + u_hi^2 / 24)) - double u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58. - // p1 ~ 1/120 + u_hi^2 / 5040. - double p1 = fputil::multiply_add(u_hi_sq, -0x1.a01a01a01a01ap-13, - 0x1.1111111111111p-7); - // q1 ~ -1/2 + u_hi^2 / 24. - double q1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-5, -0x1.0p-1); - double u_hi_3 = u_hi_sq * u.hi; - // p2 ~ -1/6 + u_hi^2 (1/120 - u_hi^2 * 1/5040) - double p2 = fputil::multiply_add(u_hi_sq, p1, -0x1.5555555555555p-3); - // q2 ~ 1 + u_hi^2 (-1/2 + u_hi^2 / 24) - double q2 = fputil::multiply_add(u_hi_sq, q1, 1.0); - double sin_lo = fputil::multiply_add(u_hi_3, p2, u.lo * q2); - // Overall, |sin(y) - (u_hi + sin_lo)| < 2*ulp(u_hi^3) < 2^-69. - - // Evaluate cos(y) = cos(x - k * (pi/128)) - // We use the degree-8 Taylor approximation: - // cos(y) ~ 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! - // Then the error is bounded by: - // |cos(y) - (...)| < |y|^10/10! < 2^-81 - // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms - // < ulp(u_hi^3) gives us: - // 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! = ... - // ~ 1 - u_hi^2/2 + u_hi^4(1/24 + u_hi^2 (-1/720 + u_hi^2/40320)) + - // + u_hi u_lo (-1 + u_hi^2/6) - // We compute 1 - u_hi^2 accurately: - // v_hi + v_lo ~ 1 - u_hi^2/2 - // with error <= 2^-105. - double u_hi_neg_half = (-0.5) * u.hi; - DoubleDouble v; - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - v.hi = fputil::multiply_add(u.hi, u_hi_neg_half, 1.0); - v.lo = 1.0 - v.hi; // Exact - v.lo = fputil::multiply_add(u.hi, u_hi_neg_half, v.lo); -#else - DoubleDouble u_hi_sq_neg_half = fputil::exact_mult(u.hi, u_hi_neg_half); - v = fputil::exact_add(1.0, u_hi_sq_neg_half.hi); - v.lo += u_hi_sq_neg_half.lo; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - // r1 ~ -1/720 + u_hi^2 / 40320 - double r1 = fputil::multiply_add(u_hi_sq, 0x1.a01a01a01a01ap-16, - -0x1.6c16c16c16c17p-10); - // s1 ~ -1 + u_hi^2 / 6 - double s1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-3, -1.0); - double u_hi_4 = u_hi_sq * u_hi_sq; - double u_hi_u_lo = u.hi * u.lo; - // r2 ~ 1/24 + u_hi^2 (-1/720 + u_hi^2 / 40320) - double r2 = fputil::multiply_add(u_hi_sq, r1, 0x1.5555555555555p-5); - // s2 ~ v_lo + u_hi * u_lo * (-1 + u_hi^2 / 6) - double s2 = fputil::multiply_add(u_hi_u_lo, s1, v.lo); - double cos_lo = fputil::multiply_add(u_hi_4, r2, s2); - // Overall, |cos(y) - (v_hi + cos_lo)| < 2*ulp(u_hi^4) < 2^-75. - - sin_u = fputil::exact_add(u.hi, sin_lo); - cos_u = fputil::exact_add(v.hi, cos_lo); - - return fputil::multiply_add(fputil::FPBits<double>(u_hi_3).abs().get_val(), - 0x1.0p-51, 0x1.0p-105); -} - -LIBC_INLINE void sincos_eval(const Float128 &u, Float128 &sin_u, - Float128 &cos_u) { - Float128 u_sq = fputil::quick_mul(u, u); - - // sin(u) ~ x - x^3/3! + x^5/5! - x^7/7! + x^9/9! - x^11/11! + x^13/13! - constexpr Float128 SIN_COEFFS[] = { - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1 - {Sign::NEG, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // -1/3! - {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/5! - {Sign::NEG, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // -1/7! - {Sign::POS, -146, 0xb8ef1d2a'b6399c7d'560e4472'800b8ef2_u128}, // 1/9! - {Sign::NEG, -153, 0xd7322b3f'aa271c7f'3a3f25c1'bee38f10_u128}, // -1/11! - {Sign::POS, -160, 0xb092309d'43684be5'1c198e91'd7b4269e_u128}, // 1/13! - }; - - // cos(u) ~ 1 - x^2/2 + x^4/4! - x^6/6! + x^8/8! - x^10/10! + x^12/12! - constexpr Float128 COS_COEFFS[] = { - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, // 1/2 - {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/4! - {Sign::NEG, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/6! - {Sign::POS, -143, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/8! - {Sign::NEG, -149, 0x93f27dbb'c4fae397'780b69f5'333c725b_u128}, // 1/10! - {Sign::POS, -156, 0x8f76c77f'c6c4bdaa'26d4c3d6'7f425f60_u128}, // 1/12! - }; - - sin_u = fputil::quick_mul(u, fputil::polyeval(u_sq, SIN_COEFFS[0], - SIN_COEFFS[1], SIN_COEFFS[2], - SIN_COEFFS[3], SIN_COEFFS[4], - SIN_COEFFS[5], SIN_COEFFS[6])); - cos_u = fputil::polyeval(u_sq, COS_COEFFS[0], COS_COEFFS[1], COS_COEFFS[2], - COS_COEFFS[3], COS_COEFFS[4], COS_COEFFS[5], - COS_COEFFS[6]); -} - -} // namespace generic - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_EVAL_H diff --git a/libc/src/math/generic/sincosf.cpp b/libc/src/math/generic/sincosf.cpp index 9c7bf18..5179c98 100644 --- a/libc/src/math/generic/sincosf.cpp +++ b/libc/src/math/generic/sincosf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/sincosf.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/multiply_add.h" @@ -16,6 +15,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/sincosf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h deleted file mode 100644 index 05cab09d..0000000 --- a/libc/src/math/generic/sincosf16_utils.h +++ /dev/null @@ -1,112 +0,0 @@ -//===-- Collection of utils for sinf16/cosf16 -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H - -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -// Lookup table for sin(k * pi / 32) with k = 0, ..., 63. -// Table is generated with Sollya as follows: -// > display = hexadecimmal; -// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); }; -constexpr float SIN_K_PI_OVER_32[64] = { - 0x0.0p0, 0x1.917a6cp-4, 0x1.8f8b84p-3, 0x1.294062p-2, - 0x1.87de2ap-2, 0x1.e2b5d4p-2, 0x1.1c73b4p-1, 0x1.44cf32p-1, - 0x1.6a09e6p-1, 0x1.8bc806p-1, 0x1.a9b662p-1, 0x1.c38b3p-1, - 0x1.d906bcp-1, 0x1.e9f416p-1, 0x1.f6297cp-1, 0x1.fd88dap-1, - 0x1p0, 0x1.fd88dap-1, 0x1.f6297cp-1, 0x1.e9f416p-1, - 0x1.d906bcp-1, 0x1.c38b3p-1, 0x1.a9b662p-1, 0x1.8bc806p-1, - 0x1.6a09e6p-1, 0x1.44cf32p-1, 0x1.1c73b4p-1, 0x1.e2b5d4p-2, - 0x1.87de2ap-2, 0x1.294062p-2, 0x1.8f8b84p-3, 0x1.917a6cp-4, - 0x0.0p0, -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2, - -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1, - -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1, - -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1, - -0x1p0, -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1, - -0x1.d906bcp-1, -0x1.c38b3p-1, -0x1.a9b662p-1, -0x1.8bc806p-1, - -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2, - -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4}; - -LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) { - float kf = fputil::nearest_integer(x * 32); - y = fputil::multiply_add(x, 32.0f, -kf); - - return static_cast<int32_t>(kf); -} - -// Recall, range reduction: -// k = round(x * 32/pi) -// -// The precision choice of 'double' in the following function is to minimize -// rounding errors in this initial scaling step, -// preserving enough bits so errors accumulated while computing the subtraction: -// y = x * 32/pi - round(x * 32/pi) -// are beyond the least-significant bit of single-precision used during -// further intermediate computation. -LIBC_INLINE int32_t range_reduction_sincosf16(float x, float &y) { - // Generated by Sollya with: - // > D(32/pi); - constexpr double THIRTYTWO_OVER_PI = 0x1.45f306dc9c883p3; - - double prod = x * THIRTYTWO_OVER_PI; - double kd = fputil::nearest_integer(prod); - y = static_cast<float>(prod - kd); - - return static_cast<int32_t>(kd); -} - -static LIBC_INLINE void sincosf16_poly_eval(int32_t k, float y, float &sin_k, - float &cos_k, float &sin_y, - float &cosm1_y) { - - sin_k = SIN_K_PI_OVER_32[k & 63]; - cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; - - // Recall, after range reduction, -0.5 <= y <= 0.5. For very small values of - // y, calculating sin(y * p/32) can be inaccurate. Generating a polynomial for - // sin(y * p/32)/y instead significantly reduces the relative errors. - float ysq = y * y; - - // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya - // with: - // > Q = fpminimax(sin(y * pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]); - sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f, - 0x1.a03354p-21f, -0x1.ad02d2p-20f); - - // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya - // with: - // > P = fpminimax(cos(y * pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]); - cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f, - 0x1.a6f7a2p-29f); -} - -LIBC_INLINE void sincosf16_eval(float xf, float &sin_k, float &cos_k, - float &sin_y, float &cosm1_y) { - float y; - int32_t k = range_reduction_sincosf16(xf, y); - - sincosf16_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); -} - -LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, - float &sin_y, float &cosm1_y) { - float y; - int32_t k = range_reduction_sincospif16(xf, y); - - sincosf16_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H diff --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h deleted file mode 100644 index 6eaf820..0000000 --- a/libc/src/math/generic/sincosf_utils.h +++ /dev/null @@ -1,125 +0,0 @@ -//===-- Collection of utils for sinf/cosf/sincosf ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA - -#if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) -#include "range_reduction_fma.h" -// using namespace LIBC_NAMESPACE::fma; -using LIBC_NAMESPACE::fma::FAST_PASS_BOUND; -using LIBC_NAMESPACE::fma::large_range_reduction; -using LIBC_NAMESPACE::fma::small_range_reduction; - -#else -#include "range_reduction.h" -// using namespace LIBC_NAMESPACE::generic; -using LIBC_NAMESPACE::generic::FAST_PASS_BOUND; -using LIBC_NAMESPACE::generic::large_range_reduction; -using LIBC_NAMESPACE::generic::small_range_reduction; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - -namespace LIBC_NAMESPACE_DECL { - -// Lookup table for sin(k * pi / 32) with k = 0, ..., 63. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 63 do { D(sin(k * pi/32)); }; -const double SIN_K_PI_OVER_32[64] = { - 0x0.0000000000000p+0, 0x1.917a6bc29b42cp-4, 0x1.8f8b83c69a60bp-3, - 0x1.294062ed59f06p-2, 0x1.87de2a6aea963p-2, 0x1.e2b5d3806f63bp-2, - 0x1.1c73b39ae68c8p-1, 0x1.44cf325091dd6p-1, 0x1.6a09e667f3bcdp-1, - 0x1.8bc806b151741p-1, 0x1.a9b66290ea1a3p-1, 0x1.c38b2f180bdb1p-1, - 0x1.d906bcf328d46p-1, 0x1.e9f4156c62ddap-1, 0x1.f6297cff75cbp-1, - 0x1.fd88da3d12526p-1, 0x1.0000000000000p+0, 0x1.fd88da3d12526p-1, - 0x1.f6297cff75cbp-1, 0x1.e9f4156c62ddap-1, 0x1.d906bcf328d46p-1, - 0x1.c38b2f180bdb1p-1, 0x1.a9b66290ea1a3p-1, 0x1.8bc806b151741p-1, - 0x1.6a09e667f3bcdp-1, 0x1.44cf325091dd6p-1, 0x1.1c73b39ae68c8p-1, - 0x1.e2b5d3806f63bp-2, 0x1.87de2a6aea963p-2, 0x1.294062ed59f06p-2, - 0x1.8f8b83c69a60bp-3, 0x1.917a6bc29b42cp-4, 0x0.0000000000000p+0, - -0x1.917a6bc29b42cp-4, -0x1.8f8b83c69a60bp-3, -0x1.294062ed59f06p-2, - -0x1.87de2a6aea963p-2, -0x1.e2b5d3806f63bp-2, -0x1.1c73b39ae68c8p-1, - -0x1.44cf325091dd6p-1, -0x1.6a09e667f3bcdp-1, -0x1.8bc806b151741p-1, - -0x1.a9b66290ea1a3p-1, -0x1.c38b2f180bdb1p-1, -0x1.d906bcf328d46p-1, - -0x1.e9f4156c62ddap-1, -0x1.f6297cff75cbp-1, -0x1.fd88da3d12526p-1, - -0x1.0000000000000p+0, -0x1.fd88da3d12526p-1, -0x1.f6297cff75cbp-1, - -0x1.e9f4156c62ddap-1, -0x1.d906bcf328d46p-1, -0x1.c38b2f180bdb1p-1, - -0x1.a9b66290ea1a3p-1, -0x1.8bc806b151741p-1, -0x1.6a09e667f3bcdp-1, - -0x1.44cf325091dd6p-1, -0x1.1c73b39ae68c8p-1, -0x1.e2b5d3806f63bp-2, - -0x1.87de2a6aea963p-2, -0x1.294062ed59f06p-2, -0x1.8f8b83c69a60bp-3, - -0x1.917a6bc29b42cp-4, -}; - -static LIBC_INLINE void sincosf_poly_eval(int64_t k, double y, double &sin_k, - double &cos_k, double &sin_y, - double &cosm1_y) { - // After range reduction, k = round(x * 32 / pi) and y = (x * 32 / pi) - k. - // So k is an integer and -0.5 <= y <= 0.5. - // Then sin(x) = sin((k + y)*pi/32) - // = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32) - - sin_k = SIN_K_PI_OVER_32[k & 63]; - // cos(k * pi/32) = sin(k * pi/32 + pi/2) = sin((k + 16) * pi/32). - // cos_k = cos(k * pi/32) - cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; - - double ysq = y * y; - - // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya - // with: - // > Q = fpminimax(sin(y*pi/32)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); - sin_y = - y * fputil::polyeval(ysq, 0x1.921fb54442d18p-4, -0x1.4abbce625abb1p-13, - 0x1.466bc624f2776p-24, -0x1.32c3a619d4a7ep-36); - // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with: - // > P = fpminimax(cos(x*pi/32), [|0, 2, 4, 6|], [|1, D...|], [0, 0.5]); - // Note that cosm1_y = cos(y*pi/32) - 1. - cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be430bp-8, - 0x1.03c1f070c2e27p-18, -0x1.55cc84bd942p-30); -} - -LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k, - double &cos_k, double &sin_y, double &cosm1_y) { - int64_t k; - double y; - - if (LIBC_LIKELY(x_abs < FAST_PASS_BOUND)) { - k = small_range_reduction(xd, y); - } else { - fputil::FPBits<float> x_bits(x_abs); - k = large_range_reduction(xd, x_bits.get_exponent(), y); - } - - sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); -} - -// Return k and y, where -// k = round(x * 32) and y = (x * 32) - k. -// => pi * x = (k + y) * pi / 32 -static LIBC_INLINE int64_t range_reduction_sincospi(double x, double &y) { - double kd = fputil::nearest_integer(x * 32); - y = fputil::multiply_add(x, 32.0, -kd); - - return static_cast<int64_t>(kd); -} - -LIBC_INLINE void sincospif_eval(double xd, double &sin_k, double &cos_k, - double &sin_y, double &cosm1_y) { - double y; - int64_t k = range_reduction_sincospi(xd, y); - sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp index 38ea56f..a8e634c 100644 --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/sinf.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" @@ -18,11 +17,12 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/sincosf_utils.h" #if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) -#include "range_reduction_fma.h" +#include "src/__support/math/range_reduction_fma.h" #else -#include "range_reduction.h" +#include "src/__support/math/range_reduction.h" #endif namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/sinf16.cpp b/libc/src/math/generic/sinf16.cpp index 28debbd..2b57920 100644 --- a/libc/src/math/generic/sinf16.cpp +++ b/libc/src/math/generic/sinf16.cpp @@ -9,13 +9,13 @@ #include "src/math/sinf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/sincosf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -32,6 +32,7 @@ constexpr fputil::ExceptValues<float16, N_EXCEPTS> SINF16_EXCEPTS{{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, sinf16, (float16 x)) { + using namespace sincosf16_internal; using FPBits = fputil::FPBits<float16>; FPBits xbits(x); diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp index 63111f8..5f2d0b5 100644 --- a/libc/src/math/generic/sinhf.cpp +++ b/libc/src/math/generic/sinhf.cpp @@ -12,7 +12,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/explogxf.h" +#include "src/__support/math/sinhfcoshf_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -73,7 +73,8 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { } // sinh(x) = (e^x - e^(-x)) / 2. - return static_cast<float>(exp_pm_eval</*is_sinh*/ true>(x)); + return static_cast<float>( + math::sinhfcoshf_internal::exp_pm_eval</*is_sinh*/ true>(x)); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/sinhf16.cpp b/libc/src/math/generic/sinhf16.cpp index b426ea7..f6b5c9b 100644 --- a/libc/src/math/generic/sinhf16.cpp +++ b/libc/src/math/generic/sinhf16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/sinhf16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -17,6 +16,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -89,6 +89,7 @@ static constexpr fputil::ExceptValues<float16, 13> SINHF16_EXCEPTS_NEG = {{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, sinhf16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp index 492689d..f3383f1 100644 --- a/libc/src/math/generic/sinpif.cpp +++ b/libc/src/math/generic/sinpif.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/sinpif.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -15,6 +14,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/sincosf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp index 68af484..311e6f9 100644 --- a/libc/src/math/generic/sinpif16.cpp +++ b/libc/src/math/generic/sinpif16.cpp @@ -9,15 +9,16 @@ #include "src/math/sinpif16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/math/sincosf16_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) { + using namespace sincosf16_internal; using FPBits = typename fputil::FPBits<float16>; FPBits xbits(x); diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp index 89b812c..7ea40c9 100644 --- a/libc/src/math/generic/tan.cpp +++ b/libc/src/math/generic/tan.cpp @@ -20,12 +20,12 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" +#include "src/__support/math/range_reduction_double_common.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -121,6 +121,7 @@ LIBC_INLINE double tan_eval(const DoubleDouble &u, DoubleDouble &result) { } // anonymous namespace LLVM_LIBC_FUNCTION(double, tan, (double x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); diff --git a/libc/src/math/generic/tanf.cpp b/libc/src/math/generic/tanf.cpp index ca5e35d..a8c557b 100644 --- a/libc/src/math/generic/tanf.cpp +++ b/libc/src/math/generic/tanf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/tanf.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -18,6 +17,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/sincosf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/tanf16.cpp b/libc/src/math/generic/tanf16.cpp index 229f4a3..20323a8 100644 --- a/libc/src/math/generic/tanf16.cpp +++ b/libc/src/math/generic/tanf16.cpp @@ -9,13 +9,13 @@ #include "src/math/tanf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/sincosf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -37,6 +37,7 @@ constexpr fputil::ExceptValues<float16, N_EXCEPTS> TANF16_EXCEPTS{{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, tanf16, (float16 x)) { + using namespace sincosf16_internal; using FPBits = fputil::FPBits<float16>; FPBits xbits(x); diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp index 32153c3..0c55047 100644 --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -14,7 +14,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" -#include "src/math/generic/explogxf.h" +#include "src/__support/math/exp10f_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/tanhf16.cpp b/libc/src/math/generic/tanhf16.cpp index 4c43cfd..fc0e28b 100644 --- a/libc/src/math/generic/tanhf16.cpp +++ b/libc/src/math/generic/tanhf16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/tanhf16.h" -#include "expxf16.h" #include "hdr/fenv_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -21,6 +20,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/expxf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -34,6 +34,7 @@ static constexpr fputil::ExceptValues<float16, 2> TANHF16_EXCEPTS = {{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, tanhf16, (float16 x)) { + using namespace math::expxf16_internal; using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/math/generic/tanpif.cpp b/libc/src/math/generic/tanpif.cpp index 58d46c9..b49f3ce 100644 --- a/libc/src/math/generic/tanpif.cpp +++ b/libc/src/math/generic/tanpif.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/tanpif.h" -#include "sincosf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" @@ -16,6 +15,7 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/sincosf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/tanpif16.cpp b/libc/src/math/generic/tanpif16.cpp index 792d405..b137b09 100644 --- a/libc/src/math/generic/tanpif16.cpp +++ b/libc/src/math/generic/tanpif16.cpp @@ -9,13 +9,13 @@ #include "src/math/tanpif16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/macros/optimization.h" +#include "src/__support/math/sincosf16_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -39,6 +39,7 @@ constexpr fputil::ExceptValues<float16, N_EXCEPTS> TANPIF16_EXCEPTS{{ #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS LLVM_LIBC_FUNCTION(float16, tanpif16, (float16 x)) { + using namespace sincosf16_internal; using FPBits = typename fputil::FPBits<float16>; FPBits xbits(x); diff --git a/libc/src/math/generic/ufromfpbf16.cpp b/libc/src/math/generic/ufromfpbf16.cpp new file mode 100644 index 0000000..336771b --- /dev/null +++ b/libc/src/math/generic/ufromfpbf16.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of ufromfpbf16 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/ufromfpbf16.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, ufromfpbf16, + (bfloat16 x, int rnd, unsigned int width)) { + return fputil::fromfp</*IsSigned=*/false>(x, rnd, width); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/ufromfpxbf16.cpp b/libc/src/math/generic/ufromfpxbf16.cpp new file mode 100644 index 0000000..ac9cf44 --- /dev/null +++ b/libc/src/math/generic/ufromfpxbf16.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of ufromfpxbf16 function ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/ufromfpxbf16.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, ufromfpxbf16, + (bfloat16 x, int rnd, unsigned int width)) { + return fputil::fromfpx</*IsSigned=*/false>(x, rnd, width); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/ufromfpbf16.h b/libc/src/math/ufromfpbf16.h new file mode 100644 index 0000000..1fd876a --- /dev/null +++ b/libc/src/math/ufromfpbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for ufromfpbf16 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_UFROMFPBF16_H +#define LLVM_LIBC_SRC_MATH_UFROMFPBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 ufromfpbf16(bfloat16 x, int rnd, unsigned int width); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_UFROMFPBF16_H diff --git a/libc/src/math/ufromfpxbf16.h b/libc/src/math/ufromfpxbf16.h new file mode 100644 index 0000000..ec63744 --- /dev/null +++ b/libc/src/math/ufromfpxbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for ufromfpxbf16 ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_UFROMFPXBF16_H +#define LLVM_LIBC_SRC_MATH_UFROMFPXBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 ufromfpxbf16(bfloat16 x, int rnd, unsigned int width); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_UFROMFPXBF16_H |