diff options
author | Guillaume Chatelet <gchatelet@google.com> | 2024-05-30 14:24:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-30 14:24:05 +0200 |
commit | 662b1305292d69507a09164c911cfc782e0e4e73 (patch) | |
tree | 6266bbe028df17c25e84ab60f99179c59090460d /libc | |
parent | b49c0b8abc460f8fec707a6ffccf2129fd6e1772 (diff) | |
download | llvm-662b1305292d69507a09164c911cfc782e0e4e73.zip llvm-662b1305292d69507a09164c911cfc782e0e4e73.tar.gz llvm-662b1305292d69507a09164c911cfc782e0e4e73.tar.bz2 |
[libc][NFC] Tighten up guard conditions for sqrt and polyeval (#93791)
Found while investigating #93709
Diffstat (limited to 'libc')
-rw-r--r-- | libc/src/__support/FPUtil/sqrt.h | 3 | ||||
-rw-r--r-- | libc/src/__support/FPUtil/x86_64/PolyEval.h | 85 | ||||
-rw-r--r-- | libc/src/__support/FPUtil/x86_64/sqrt.h | 5 |
3 files changed, 5 insertions, 88 deletions
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index 3ba1bdf..eb86ddf 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -10,8 +10,9 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" -#if defined(LIBC_TARGET_ARCH_IS_X86_64) +#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2) #include "x86_64/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "aarch64/sqrt.h" diff --git a/libc/src/__support/FPUtil/x86_64/PolyEval.h b/libc/src/__support/FPUtil/x86_64/PolyEval.h deleted file mode 100644 index 69fd776..0000000 --- a/libc/src/__support/FPUtil/x86_64/PolyEval.h +++ /dev/null @@ -1,85 +0,0 @@ -//===-- Optimized PolyEval implementations for x86_64 --------- C++ -----*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H -#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H - -#include "src/__support/common.h" -#include "src/__support/macros/properties/architectures.h" - -#if !defined(LIBC_TARGET_ARCH_IS_X86_64) -#error "Invalid include" -#endif - -#include <immintrin.h> - -namespace LIBC_NAMESPACE { -namespace fputil { - -// Cubic polynomials: -// polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0 -template <> -LIBC_INLINE float polyeval(float x, float a0, float a1, float a2, float a3) { - __m128 xmm = _mm_set1_ps(x); // NOLINT - __m128 a13 = _mm_set_ps(0.0f, x, a3, a1); // NOLINT - __m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0); // NOLINT - // r = (0, x^2, a3*x + a2, a1*x + a0) - __m128 r = _mm_fmadd_ps(a13, xmm, a02); // NOLINT - // result = (a3*x + a2) * x^2 + (a1*x + a0) - return fma(r[2], r[1], r[0]); -} - -template <> -LIBC_INLINE double polyeval(double x, double a0, double a1, double a2, - double a3) { - __m256d xmm = _mm256_set1_pd(x); // NOLINT - __m256d a13 = _mm256_set_pd(0.0, x, a3, a1); // NOLINT - __m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0); // NOLINT - // r = (0, x^2, a3*x + a2, a1*x + a0) - __m256d r = _mm256_fmadd_pd(a13, xmm, a02); // NOLINT - // result = (a3*x + a2) * x^2 + (a1*x + a0) - return fma(r[2], r[1], r[0]); -} - -// Quintic polynomials: -// polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 + -// + a1*x + a0 -template <> -LIBC_INLINE float polyeval(float x, float a0, float a1, float a2, float a3, - float a4, float a5) { - __m128 xmm = _mm_set1_ps(x); // NOLINT - __m128 a25 = _mm_set_ps(0.0f, x, a5, a2); // NOLINT - __m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1); // NOLINT - __m128 a03 = _mm_set_ps(0.0f, 0.0f, a3, a0); // NOLINT - // r1 = (0, x^2, a5*x + a4, a2*x + a1) - __m128 r1 = _mm_fmadd_ps(a25, xmm, a14); // NOLINT - // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0 - __m128 r2 = _mm_fmadd_ps(r1, xmm, a03); // NOLINT - // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0) - return fma(r2[2], r2[1], r2[0]); -} - -template <> -LIBC_INLINE double polyeval(double x, double a0, double a1, double a2, - double a3, double a4, double a5) { - __m256d xmm = _mm256_set1_pd(x); // NOLINT - __m256d a25 = _mm256_set_pd(0.0, x, a5, a2); // NOLINT - __m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1); // NOLINT - __m256d a03 = _mm256_set_pd(0.0, 0.0, a3, a0); // NOLINT - // r1 = (0, x^2, a5*x + a4, a2*x + a1) - __m256d r1 = _mm256_fmadd_pd(a25, xmm, a14); // NOLINT - // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0 - __m256d r2 = _mm256_fmadd_pd(r1, xmm, a03); // NOLINT - // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0) - return fma(r2[2], r2[1], r2[0]); -} - -} // namespace fputil -} // namespace LIBC_NAMESPACE - -#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H diff --git a/libc/src/__support/FPUtil/x86_64/sqrt.h b/libc/src/__support/FPUtil/x86_64/sqrt.h index 93ba8c0..bfcc5e9 100644 --- a/libc/src/__support/FPUtil/x86_64/sqrt.h +++ b/libc/src/__support/FPUtil/x86_64/sqrt.h @@ -11,9 +11,10 @@ #include "src/__support/common.h" #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" -#if !defined(LIBC_TARGET_ARCH_IS_X86) -#error "Invalid include" +#if !(defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)) +#error "sqrtss / sqrtsd need SSE2" #endif #include "src/__support/FPUtil/generic/sqrt.h" |