//===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file provides a generic interface into fixed-size SIMD instructions // using the clang vector type. The API shares some similarities with the // stdx::simd proposal, but instead chooses to use vectors as primitive types // with several extra helper functions. // //===----------------------------------------------------------------------===// #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/limits.h" #include "src/__support/CPP/tuple.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/CPP/utility/integer_sequence.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #include #ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H #define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H #if LIBC_HAS_VECTOR_TYPE namespace LIBC_NAMESPACE_DECL { namespace cpp { namespace internal { #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_AVX2) template LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) template LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); #else template LIBC_INLINE constexpr size_t native_vector_size = 1; #endif } // namespace internal // Type aliases. template using fixed_size_simd = T [[clang::ext_vector_type(N)]]; template > using simd = T [[clang::ext_vector_type(N)]]; template using simd_mask = simd>; // Type trait helpers. template struct simd_size : cpp::integral_constant { }; template constexpr size_t simd_size_v = simd_size::value; template struct is_simd : cpp::integral_constant {}; template struct is_simd> : cpp::integral_constant {}; template constexpr bool is_simd_v = is_simd::value; template struct is_simd_mask : cpp::integral_constant {}; template struct is_simd_mask> : cpp::integral_constant {}; template constexpr bool is_simd_mask_v = is_simd_mask::value; template struct simd_element_type; template struct simd_element_type> { using type = T; }; template using simd_element_type_t = typename simd_element_type::type; namespace internal { template using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); template LIBC_INLINE constexpr T poison() { return __builtin_nondeterministic_value(T()); } template LIBC_INLINE constexpr static cpp::simd extend(cpp::simd x, cpp::index_sequence) { return __builtin_shufflevector( x, x, (Indices < OriginalSize ? static_cast(Indices) : -1)...); } template LIBC_INLINE constexpr static auto extend(cpp::simd x) { // Recursively resize an input vector to the target size, increasing its size // by at most double the input size each step due to shufflevector limitation. if constexpr (N == TargetSize) return x; else if constexpr (TargetSize <= 2 * N) return extend(x, cpp::make_index_sequence{}); else return extend( extend(x, cpp::make_index_sequence<2 * N>{})); } template LIBC_INLINE constexpr static cpp::simd concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { constexpr size_t Size = cpp::max(N, M); auto remap = [](size_t idx) -> int { if (idx < N) return static_cast(idx); if (idx < N + M) return static_cast((idx - N) + Size); return -1; }; // Extend the input vectors until they are the same size, then use the indices // to shuffle in only the indices that correspond to the original values. auto x_ext = extend(x); auto y_ext = extend(y); return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...); } template LIBC_INLINE constexpr static cpp::simd slice(cpp::simd x, cpp::index_sequence) { return __builtin_shufflevector(x, x, (Offset + Indices)...); } template LIBC_INLINE constexpr static auto split(cpp::simd x) { // Recursively splits the input vector by walking the variadic template list, // increasing our current head each call. auto result = cpp::make_tuple( slice(x, cpp::make_index_sequence{})); if constexpr (sizeof...(Tail) > 0) return cpp::tuple_cat(result, split(x)); else return result; } // Helper trait template using enable_if_integral_t = cpp::enable_if_t, T>; template using enable_if_simd_t = cpp::enable_if_t, bool>; } // namespace internal // Casting. template LIBC_INLINE constexpr static simd simd_cast(simd v) { return __builtin_convertvector(v, simd); } // SIMD mask operations. template = 0> LIBC_INLINE constexpr static bool all_of(simd v) { return __builtin_reduce_and(simd_cast(v)); } template = 0> LIBC_INLINE constexpr static bool any_of(simd v) { return __builtin_reduce_or(simd_cast(v)); } template = 0> LIBC_INLINE constexpr static bool none_of(simd v) { return !any_of(v); } template = 0> LIBC_INLINE constexpr static bool some_of(simd v) { return any_of(v) && !all_of(v); } template = 0> LIBC_INLINE constexpr static int popcount(simd v) { return __builtin_popcountg(v); } template = 0> LIBC_INLINE constexpr static int find_first_set(simd v) { return __builtin_ctzg(simd_cast(v)); } template = 0> LIBC_INLINE constexpr static int find_last_set(simd v) { constexpr size_t size = simd_size_v>; return size - 1 - __builtin_clzg(simd_cast(v)); } // Elementwise operations. template LIBC_INLINE constexpr static simd min(simd x, simd y) { return __builtin_elementwise_min(x, y); } template LIBC_INLINE constexpr static simd max(simd x, simd y) { return __builtin_elementwise_max(x, y); } template LIBC_INLINE constexpr static simd abs(simd x) { return __builtin_elementwise_abs(x); } template LIBC_INLINE constexpr static simd fma(simd x, simd y, simd z) { return __builtin_elementwise_fma(x, y, z); } template LIBC_INLINE constexpr static simd ceil(simd x) { return __builtin_elementwise_ceil(x); } template LIBC_INLINE constexpr static simd floor(simd x) { return __builtin_elementwise_floor(x); } template LIBC_INLINE constexpr static simd roundeven(simd x) { return __builtin_elementwise_roundeven(x); } template LIBC_INLINE constexpr static simd round(simd x) { return __builtin_elementwise_round(x); } template LIBC_INLINE constexpr static simd trunc(simd x) { return __builtin_elementwise_trunc(x); } template LIBC_INLINE constexpr static simd nearbyint(simd x) { return __builtin_elementwise_nearbyint(x); } template LIBC_INLINE constexpr static simd rint(simd x) { return __builtin_elementwise_rint(x); } template LIBC_INLINE constexpr static simd canonicalize(simd x) { return __builtin_elementwise_canonicalize(x); } template LIBC_INLINE constexpr static simd copysign(simd x, simd y) { return __builtin_elementwise_copysign(x, y); } template LIBC_INLINE constexpr static simd fmod(simd x, simd y) { return __builtin_elementwise_fmod(x, y); } // Reduction operations. template > LIBC_INLINE constexpr static T reduce(simd v, Op op = {}) { return reduce(v, op); } template LIBC_INLINE constexpr static T reduce(simd v, cpp::plus<>) { return __builtin_reduce_add(v); } template LIBC_INLINE constexpr static T reduce(simd v, cpp::multiplies<>) { return __builtin_reduce_mul(v); } template LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_and<>) { return __builtin_reduce_and(v); } template LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_or<>) { return __builtin_reduce_or(v); } template LIBC_INLINE constexpr static T reduce(simd v, cpp::bit_xor<>) { return __builtin_reduce_xor(v); } template LIBC_INLINE constexpr static T hmin(simd v) { return __builtin_reduce_min(v); } template LIBC_INLINE constexpr static T hmax(simd v) { return __builtin_reduce_max(v); } // Accessor helpers. template LIBC_INLINE T constexpr static load(const void *ptr, bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); T tmp; __builtin_memcpy_inline( &tmp, reinterpret_cast *>(ptr), sizeof(T)); return tmp; } template = 0> LIBC_INLINE constexpr static void store(T v, void *ptr, bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); __builtin_memcpy_inline(ptr, &v, sizeof(T)); } template = 0> LIBC_INLINE constexpr static T load_masked(simd> mask, const void *ptr, T passthru = internal::poison(), bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); return __builtin_masked_load( mask, reinterpret_cast *>(ptr), passthru); } template = 0> LIBC_INLINE constexpr static void store_masked(simd> mask, T v, void *ptr, bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); __builtin_masked_store(mask, v, reinterpret_cast *>(ptr)); } template = 0> LIBC_INLINE constexpr static T gather(simd> mask, Idx idx, const void *base, bool aligned = false) { if (aligned) base = __builtin_assume_aligned(base, alignof(T)); return __builtin_masked_gather( mask, idx, reinterpret_cast *>(base)); } template = 0> LIBC_INLINE constexpr static void scatter(simd> mask, Idx idx, T v, void *base, bool aligned = false) { if (aligned) base = __builtin_assume_aligned(base, alignof(T)); __builtin_masked_scatter(mask, idx, v, reinterpret_cast *>(base)); } template = 0> LIBC_INLINE constexpr static T expand(simd> mask, const void *ptr, T passthru = internal::poison(), bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); return __builtin_masked_expand_load( mask, reinterpret_cast *>(ptr), passthru); } template = 0> LIBC_INLINE constexpr static void compress(simd> mask, T v, void *ptr, bool aligned = false) { if (aligned) ptr = __builtin_assume_aligned(ptr, alignof(T)); __builtin_masked_compress_store( mask, v, reinterpret_cast *>(ptr)); } // Construction helpers. template LIBC_INLINE constexpr static simd splat(T v) { return simd(v); } template LIBC_INLINE constexpr static simd splat(T v) { return splat>>(v); } template LIBC_INLINE constexpr static simd iota(T base = T(0), T step = T(1)) { simd v{}; for (unsigned i = 0; i < N; ++i) v[i] = base + T(i) * step; return v; } template LIBC_INLINE constexpr static simd iota(T base = T(0), T step = T(1)) { return iota>>(base, step); } // Conditional helpers. template LIBC_INLINE constexpr static simd select(simd m, simd x, simd y) { return m ? x : y; } // Shuffling helpers. template LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y) { return internal::concat(x, y, make_index_sequence{}); } template LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y, Rest... rest) { auto xy = concat(x, y); if constexpr (sizeof...(Rest)) return concat(xy, rest...); else return xy; } template auto split(cpp::simd x) { static_assert((... + Sizes) == N, "split sizes must sum to vector size"); return internal::split(x); } // TODO: where expressions, scalar overloads, ABI types. } // namespace cpp } // namespace LIBC_NAMESPACE_DECL #endif // LIBC_HAS_VECTOR_TYPE #endif