From bfd94882f2648e2a5ed651bca6cfeb4fb7788b86 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Sun, 9 Jul 2023 21:34:50 +0000 Subject: [libc][NFC] Move aligned access implementations to separate header Follow up on https://reviews.llvm.org/D154770 Differential Revision: https://reviews.llvm.org/D154800 --- libc/src/string/memory_utils/CMakeLists.txt | 1 + .../src/string/memory_utils/bcmp_implementations.h | 54 +----- .../string/memory_utils/generic/aligned_access.h | 205 +++++++++++++++++++++ .../string/memory_utils/memcmp_implementations.h | 55 +----- .../string/memory_utils/memcpy_implementations.h | 53 +----- .../string/memory_utils/memset_implementations.h | 27 +-- 6 files changed, 210 insertions(+), 185 deletions(-) create mode 100644 libc/src/string/memory_utils/generic/aligned_access.h (limited to 'libc') diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index c9fc998..22e3249 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -6,6 +6,7 @@ add_header_library( aarch64/memcpy_implementations.h bcmp_implementations.h bzero_implementations.h + generic/aligned_access.h generic/byte_per_byte.h memcmp_implementations.h memcpy_implementations.h diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h index cb4b293..7bfc173 100644 --- a/libc/src/string/memory_utils/bcmp_implementations.h +++ b/libc/src/string/memory_utils/bcmp_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" @@ -23,59 +24,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE BcmpReturnType -inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { - constexpr size_t kAlign = sizeof(uint64_t); - if (count <= 2 * kAlign) - return inline_bcmp_byte_per_byte(p1, p2, count); - size_t bytes_to_p1_align = distance_to_align_up(p1); - if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) - return value; - size_t offset = bytes_to_p1_align; - size_t p2_alignment = distance_to_align_down(p2 + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint64_t a; - if (p2_alignment == 0) - a = load64_aligned(p2, offset); - else if (p2_alignment == 4) - a = load64_aligned(p2, offset); - else if (p2_alignment == 2) - a = load64_aligned(p2, offset); - else - a = load64_aligned( - p2, offset); - uint64_t b = load64_aligned(p1, offset); - if (a != b) - return BcmpReturnType::NONZERO(); - } - return inline_bcmp_byte_per_byte(p1, p2, count, offset); -} - -[[maybe_unused]] LIBC_INLINE BcmpReturnType -inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { - constexpr size_t kAlign = sizeof(uint32_t); - if (count <= 2 * kAlign) - return inline_bcmp_byte_per_byte(p1, p2, count); - size_t bytes_to_p1_align = distance_to_align_up(p1); - if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) - return value; - size_t offset = bytes_to_p1_align; - size_t p2_alignment = distance_to_align_down(p2 + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint32_t a; - if (p2_alignment == 0) - a = load32_aligned(p2, offset); - else if (p2_alignment == 2) - a = load32_aligned(p2, offset); - else - a = load32_aligned(p2, offset); - uint32_t b = load32_aligned(p1, offset); - if (a != b) - return BcmpReturnType::NONZERO(); - } - return inline_bcmp_byte_per_byte(p1, p2, count, offset); -} - #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64) [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { diff --git a/libc/src/string/memory_utils/generic/aligned_access.h b/libc/src/string/memory_utils/generic/aligned_access.h new file mode 100644 index 0000000..0a333d1 --- /dev/null +++ b/libc/src/string/memory_utils/generic/aligned_access.h @@ -0,0 +1,205 @@ +//===-- Implementations for platform with mandatory aligned memory access -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// For some platforms, unaligned loads and stores are either illegal or very +// slow. The implementations in this file make sure all loads and stores are +// always aligned. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H + +#include "src/__support/macros/config.h" // LIBC_INLINE +#include "src/string/memory_utils/generic/byte_per_byte.h" +#include "src/string/memory_utils/op_generic.h" // generic::splat +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include // size_t + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE uint32_t load32_aligned(CPtr ptr, size_t offset, + size_t alignment) { + if (alignment == 0) + return load32_aligned(ptr, offset); + else if (alignment == 2) + return load32_aligned(ptr, offset); + else + return load32_aligned(ptr, offset); +} + +[[maybe_unused]] LIBC_INLINE uint64_t load64_aligned(CPtr ptr, size_t offset, + size_t alignment) { + if (alignment == 0) + return load64_aligned(ptr, offset); + else if (alignment == 4) + return load64_aligned(ptr, offset); + else if (alignment == 2) + return load64_aligned(ptr, offset); + else + return load64_aligned( + ptr, offset); +} + +/////////////////////////////////////////////////////////////////////////////// +// memcpy +/////////////////////////////////////////////////////////////////////////////// + +[[maybe_unused]] LIBC_INLINE void +inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, + size_t count) { + constexpr size_t kAlign = sizeof(uint32_t); + if (count <= 2 * kAlign) + return inline_memcpy_byte_per_byte(dst, src, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + size_t src_alignment = distance_to_align_down(src + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint32_t value = load32_aligned(src, offset, src_alignment); + store32_aligned(value, dst, offset); + } + // remainder + inline_memcpy_byte_per_byte(dst, src, count, offset); +} + +[[maybe_unused]] LIBC_INLINE void +inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, + size_t count) { + constexpr size_t kAlign = sizeof(uint64_t); + if (count <= 2 * kAlign) + return inline_memcpy_byte_per_byte(dst, src, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + size_t src_alignment = distance_to_align_down(src + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint64_t value = load64_aligned(src, offset, src_alignment); + store64_aligned(value, dst, offset); + } + // remainder + inline_memcpy_byte_per_byte(dst, src, count, offset); +} + +/////////////////////////////////////////////////////////////////////////////// +// memset +/////////////////////////////////////////////////////////////////////////////// + +[[maybe_unused]] LIBC_INLINE static void +inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { + constexpr size_t kAlign = sizeof(uint32_t); + if (count <= 2 * kAlign) + return inline_memset_byte_per_byte(dst, value, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + for (; offset < count - kAlign; offset += kAlign) + store32_aligned(generic::splat(value), dst, offset); + inline_memset_byte_per_byte(dst, value, count, offset); +} + +[[maybe_unused]] LIBC_INLINE static void +inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { + constexpr size_t kAlign = sizeof(uint64_t); + if (count <= 2 * kAlign) + return inline_memset_byte_per_byte(dst, value, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + for (; offset < count - kAlign; offset += kAlign) + store64_aligned(generic::splat(value), dst, offset); + inline_memset_byte_per_byte(dst, value, count, offset); +} + +/////////////////////////////////////////////////////////////////////////////// +// bcmp +/////////////////////////////////////////////////////////////////////////////// + +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { + constexpr size_t kAlign = sizeof(uint32_t); + if (count <= 2 * kAlign) + return inline_bcmp_byte_per_byte(p1, p2, count); + size_t bytes_to_p1_align = distance_to_align_up(p1); + if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) + return value; + size_t offset = bytes_to_p1_align; + size_t p2_alignment = distance_to_align_down(p2 + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint32_t a = load32_aligned(p1, offset); + uint32_t b = load32_aligned(p2, offset, p2_alignment); + if (a != b) + return BcmpReturnType::NONZERO(); + } + return inline_bcmp_byte_per_byte(p1, p2, count, offset); +} + +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { + constexpr size_t kAlign = sizeof(uint64_t); + if (count <= 2 * kAlign) + return inline_bcmp_byte_per_byte(p1, p2, count); + size_t bytes_to_p1_align = distance_to_align_up(p1); + if (auto value = inline_bcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) + return value; + size_t offset = bytes_to_p1_align; + size_t p2_alignment = distance_to_align_down(p2 + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint64_t a = load64_aligned(p1, offset); + uint64_t b = load64_aligned(p2, offset, p2_alignment); + if (a != b) + return BcmpReturnType::NONZERO(); + } + return inline_bcmp_byte_per_byte(p1, p2, count, offset); +} + +/////////////////////////////////////////////////////////////////////////////// +// memcmp +/////////////////////////////////////////////////////////////////////////////// + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { + constexpr size_t kAlign = sizeof(uint32_t); + if (count <= 2 * kAlign) + return inline_memcmp_byte_per_byte(p1, p2, count); + size_t bytes_to_p1_align = distance_to_align_up(p1); + if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) + return value; + size_t offset = bytes_to_p1_align; + size_t p2_alignment = distance_to_align_down(p2 + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint32_t a = load32_aligned(p1, offset); + uint32_t b = load32_aligned(p2, offset, p2_alignment); + if (a != b) + return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b)); + } + return inline_memcmp_byte_per_byte(p1, p2, count, offset); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { + constexpr size_t kAlign = sizeof(uint64_t); + if (count <= 2 * kAlign) + return inline_memcmp_byte_per_byte(p1, p2, count); + size_t bytes_to_p1_align = distance_to_align_up(p1); + if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) + return value; + size_t offset = bytes_to_p1_align; + size_t p2_alignment = distance_to_align_down(p2 + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint64_t a = load64_aligned(p1, offset); + uint64_t b = load64_aligned(p2, offset, p2_alignment); + if (a != b) + return cmp_neq_uint64_t(Endian::to_big_endian(a), + Endian::to_big_endian(b)); + } + return inline_memcmp_byte_per_byte(p1, p2, count, offset); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_ALIGNED_ACCESS_H diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h index eb04136..87729a7 100644 --- a/libc/src/string/memory_utils/memcmp_implementations.h +++ b/libc/src/string/memory_utils/memcmp_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_generic.h" #include "src/string/memory_utils/op_riscv.h" @@ -27,60 +28,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { - constexpr size_t kAlign = sizeof(uint64_t); - if (count <= 2 * kAlign) - return inline_memcmp_byte_per_byte(p1, p2, count); - size_t bytes_to_p1_align = distance_to_align_up(p1); - if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) - return value; - size_t offset = bytes_to_p1_align; - size_t p2_alignment = distance_to_align_down(p2 + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint64_t b; - if (p2_alignment == 0) - b = load64_aligned(p2, offset); - else if (p2_alignment == 4) - b = load64_aligned(p2, offset); - else if (p2_alignment == 2) - b = load64_aligned(p2, offset); - else - b = load64_aligned( - p2, offset); - uint64_t a = load64_aligned(p1, offset); - if (a != b) - return cmp_neq_uint64_t(Endian::to_big_endian(a), - Endian::to_big_endian(b)); - } - return inline_memcmp_byte_per_byte(p1, p2, count, offset); -} - -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) { - constexpr size_t kAlign = sizeof(uint32_t); - if (count <= 2 * kAlign) - return inline_memcmp_byte_per_byte(p1, p2, count); - size_t bytes_to_p1_align = distance_to_align_up(p1); - if (auto value = inline_memcmp_byte_per_byte(p1, p2, bytes_to_p1_align)) - return value; - size_t offset = bytes_to_p1_align; - size_t p2_alignment = distance_to_align_down(p2 + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint32_t b; - if (p2_alignment == 0) - b = load32_aligned(p2, offset); - else if (p2_alignment == 2) - b = load32_aligned(p2, offset); - else - b = load32_aligned(p2, offset); - uint32_t a = load32_aligned(p1, offset); - if (a != b) - return cmp_uint32_t(Endian::to_big_endian(a), Endian::to_big_endian(b)); - } - return inline_memcmp_byte_per_byte(p1, p2, count, offset); -} - LIBC_INLINE MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) { #if defined(LIBC_TARGET_ARCH_IS_X86) return inline_memcmp_x86(p1, p2, count); diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h index 32b0cff..a7bfc9e 100644 --- a/libc/src/string/memory_utils/memcpy_implementations.h +++ b/libc/src/string/memory_utils/memcpy_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/macros/config.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/utils.h" @@ -26,58 +27,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE void -inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, - size_t count) { - constexpr size_t kAlign = sizeof(uint32_t); - if (count <= 2 * kAlign) - return inline_memcpy_byte_per_byte(dst, src, count); - size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); - size_t offset = bytes_to_dst_align; - size_t src_alignment = distance_to_align_down(src + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint32_t value; - if (src_alignment == 0) - value = load32_aligned(src, offset); - else if (src_alignment == 2) - value = load32_aligned(src, offset); - else - value = load32_aligned(src, offset); - store32_aligned(value, dst, offset); - } - // remainder - inline_memcpy_byte_per_byte(dst, src, count, offset); -} - -[[maybe_unused]] LIBC_INLINE void -inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, - size_t count) { - constexpr size_t kAlign = sizeof(uint64_t); - if (count <= 2 * kAlign) - return inline_memcpy_byte_per_byte(dst, src, count); - size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); - size_t offset = bytes_to_dst_align; - size_t src_alignment = distance_to_align_down(src + offset); - for (; offset < count - kAlign; offset += kAlign) { - uint64_t value; - if (src_alignment == 0) - value = load64_aligned(src, offset); - else if (src_alignment == 4) - value = load64_aligned(src, offset); - else if (src_alignment == 2) - value = - load64_aligned(src, offset); - else - value = load64_aligned( - src, offset); - store64_aligned(value, dst, offset); - } - // remainder - inline_memcpy_byte_per_byte(dst, src, count, offset); -} - LIBC_INLINE void inline_memcpy(Ptr __restrict dst, CPtr __restrict src, size_t count) { using namespace __llvm_libc::builtin; diff --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h index 800af1f..691d7f2 100644 --- a/libc/src/string/memory_utils/memset_implementations.h +++ b/libc/src/string/memory_utils/memset_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" @@ -23,32 +24,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE static void -inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { - constexpr size_t kAlign = sizeof(uint32_t); - if (count <= 2 * kAlign) - return inline_memset_byte_per_byte(dst, value, count); - size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); - size_t offset = bytes_to_dst_align; - for (; offset < count - kAlign; offset += kAlign) - store32_aligned(generic::splat(value), dst, offset); - inline_memset_byte_per_byte(dst, value, count, offset); -} - -[[maybe_unused]] LIBC_INLINE static void -inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { - constexpr size_t kAlign = sizeof(uint64_t); - if (count <= 2 * kAlign) - return inline_memset_byte_per_byte(dst, value, count); - size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); - size_t offset = bytes_to_dst_align; - for (; offset < count - kAlign; offset += kAlign) - store64_aligned(generic::splat(value), dst, offset); - inline_memset_byte_per_byte(dst, value, count, offset); -} - #if defined(LIBC_TARGET_ARCH_IS_X86) [[maybe_unused]] LIBC_INLINE static void inline_memset_x86(Ptr dst, uint8_t value, size_t count) { -- cgit v1.1