aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorGuillaume Chatelet <gchatelet@google.com>2023-07-12 16:16:28 +0000
committerGuillaume Chatelet <gchatelet@google.com>2023-07-13 10:19:00 +0000
commit1c4e4e03bdf6e64670947600ec864a73eafafda0 (patch)
tree3dcf931446c1ec2d98a881f7b0081d1c3b731d0b /libc
parente8a5df7beb18db1ec1b1cdc42efa1b38c641d073 (diff)
downloadllvm-1c4e4e03bdf6e64670947600ec864a73eafafda0.zip
llvm-1c4e4e03bdf6e64670947600ec864a73eafafda0.tar.gz
llvm-1c4e4e03bdf6e64670947600ec864a73eafafda0.tar.bz2
[libc][NFC] Split bcmp implementations per platform
This is a follow up on D154800 and D154770 to make the code structure more principled and avoid too many nested #ifdef/#endif. Reviewed By: courbet Differential Revision: https://reviews.llvm.org/D155076
Diffstat (limited to 'libc')
-rw-r--r--libc/src/string/memory_utils/CMakeLists.txt5
-rw-r--r--libc/src/string/memory_utils/aarch64/bcmp_implementations.h70
-rw-r--r--libc/src/string/memory_utils/bcmp_implementations.h166
-rw-r--r--libc/src/string/memory_utils/riscv/bcmp_implementations.h33
-rw-r--r--libc/src/string/memory_utils/x86_64/bcmp_implementations.h93
5 files changed, 218 insertions, 149 deletions
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 22e3249..d6fa1d9 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -1,7 +1,8 @@
-#TODO(michaelrj): split out the implementations from memory_utils
+# TODO(michaelrj): split out the implementations from memory_utils
add_header_library(
memory_utils
HDRS
+ aarch64/bcmp_implementations.h
aarch64/memcmp_implementations.h
aarch64/memcpy_implementations.h
bcmp_implementations.h
@@ -16,7 +17,9 @@ add_header_library(
op_builtin.h
op_generic.h
op_x86.h
+ riscv/bcmp_implementations.h
utils.h
+ x86_64/bcmp_implementations.h
x86_64/memcmp_implementations.h
x86_64/memcpy_implementations.h
DEPS
diff --git a/libc/src/string/memory_utils/aarch64/bcmp_implementations.h b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
new file mode 100644
index 0000000..61c4c4c6
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
@@ -0,0 +1,70 @@
+//===-- Bcmp implementation for aarch64 -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/string/memory_utils/op_aarch64.h"
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
+ CPtr p2,
+ size_t count) {
+ if (LIBC_LIKELY(count <= 32)) {
+ if (LIBC_UNLIKELY(count >= 16)) {
+ return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+ }
+ switch (count) {
+ case 0:
+ return BcmpReturnType::ZERO();
+ case 1:
+ return generic::Bcmp<uint8_t>::block(p1, p2);
+ case 2:
+ return generic::Bcmp<uint16_t>::block(p1, p2);
+ case 3:
+ return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
+ case 4:
+ return generic::Bcmp<uint32_t>::block(p1, p2);
+ case 5:
+ case 6:
+ case 7:
+ return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
+ case 8:
+ return generic::Bcmp<uint64_t>::block(p1, p2);
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+ }
+ }
+
+ if (count <= 64)
+ return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+
+ // Aligned loop if > 256, otherwise normal loop
+ if (LIBC_UNLIKELY(count > 256)) {
+ if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+ return value;
+ align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+ }
+ return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h
index 7bfc173..bfd5201 100644
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -1,4 +1,4 @@
-//===-- Implementation of bcmp --------------------------------------------===//
+//===-- Dispatch logic for bcmp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,164 +10,34 @@
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
#include "src/__support/common.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
-#include "src/__support/macros/properties/architectures.h"
-#include "src/string/memory_utils/generic/aligned_access.h"
-#include "src/string/memory_utils/generic/byte_per_byte.h"
-#include "src/string/memory_utils/op_aarch64.h"
-#include "src/string/memory_utils/op_builtin.h"
-#include "src/string/memory_utils/op_generic.h"
-#include "src/string/memory_utils/op_riscv.h"
-#include "src/string/memory_utils/op_x86.h"
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
#include <stddef.h> // size_t
-namespace __llvm_libc {
-
-#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
- return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
- // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
#if defined(LIBC_TARGET_ARCH_IS_X86)
-#if defined(__SSE4_1__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __SSE4_1__
-
-#if defined(__AVX__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- if (count <= 64)
- return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX__
-
-#if defined(__AVX512BW__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- if (count <= 64)
- return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
- if (count <= 128)
- return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX512BW__
-
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
- size_t count) {
- if (count == 0)
- return BcmpReturnType::ZERO();
- if (count == 1)
- return generic::Bcmp<uint8_t>::block(p1, p2);
- if (count == 2)
- return generic::Bcmp<uint16_t>::block(p1, p2);
- if (count == 3)
- return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
- if (count == 4)
- return generic::Bcmp<uint32_t>::block(p1, p2);
- if (count == 5)
- return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
- if (count == 6)
- return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
- if (count == 7)
- return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
- if (count == 8)
- return generic::Bcmp<uint64_t>::block(p1, p2);
- if (count <= 16)
- return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
-#if defined(__AVX512BW__)
- return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
-#elif defined(__AVX__)
- return inline_bcmp_x86_avx_gt16(p1, p2, count);
-#elif defined(__SSE4_1__)
- return inline_bcmp_x86_sse41_gt16(p1, p2, count);
-#else
- return inline_bcmp_generic_gt16(p1, p2, count);
-#endif
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86)
-
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
- CPtr p2,
- size_t count) {
- if (LIBC_LIKELY(count <= 32)) {
- if (LIBC_UNLIKELY(count >= 16)) {
- return aarch64::Bcmp<16>::head_tail(p1, p2, count);
- }
- switch (count) {
- case 0:
- return BcmpReturnType::ZERO();
- case 1:
- return generic::Bcmp<uint8_t>::block(p1, p2);
- case 2:
- return generic::Bcmp<uint16_t>::block(p1, p2);
- case 3:
- return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
- case 4:
- return generic::Bcmp<uint32_t>::block(p1, p2);
- case 5:
- case 6:
- case 7:
- return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
- case 8:
- return generic::Bcmp<uint64_t>::block(p1, p2);
- case 9:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
- }
- }
-
- if (count <= 64)
- return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
- // Aligned loop if > 256, otherwise normal loop
- if (LIBC_UNLIKELY(count > 256)) {
- if (auto value = aarch64::Bcmp<32>::block(p1, p2))
- return value;
- align_to_next_boundary<16, Arg::P1>(p1, p2, count);
- }
- return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
-LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
-#if defined(LIBC_TARGET_ARCH_IS_X86)
- return inline_bcmp_x86(p1, p2, count);
+#include "src/string/memory_utils/x86_64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
- return inline_bcmp_aarch64(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
- return inline_bcmp_aligned_access_64bit(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
- return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#include "src/string/memory_utils/aarch64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
+#include "src/string/memory_utils/riscv/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
#else
- return inline_bcmp_byte_per_byte(p1, p2, count);
+// We may want to error instead of defaulting to suboptimal implementation.
+#include "src/string/memory_utils/generic/byte_per_byte.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_byte_per_byte
#endif
-}
+
+namespace __llvm_libc {
LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
- return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
- reinterpret_cast<CPtr>(p2), count));
+ return static_cast<int>(LIBC_SRC_STRING_MEMORY_UTILS_BCMP(
+ reinterpret_cast<CPtr>(p1), reinterpret_cast<CPtr>(p2), count));
}
} // namespace __llvm_libc
+#undef LIBC_SRC_STRING_MEMORY_UTILS_BCMP
+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/riscv/bcmp_implementations.h b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
new file mode 100644
index 0000000..b7a29fe
--- /dev/null
+++ b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
@@ -0,0 +1,33 @@
+//===-- Bcmp implementation for riscv ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
+#include "src/string/memory_utils/generic/aligned_access.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_riscv(CPtr p1, CPtr p2,
+ size_t count) {
+#if defined(LIBC_TARGET_ARCH_IS_RISCV64)
+ return inline_bcmp_aligned_access_64bit(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
+ return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#else
+#error "Unimplemented"
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/x86_64/bcmp_implementations.h b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
new file mode 100644
index 0000000..4c610fc
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
@@ -0,0 +1,93 @@
+//===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/op_x86.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
+ return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#if defined(__SSE4_1__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __SSE4_1__
+
+#if defined(__AVX__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ if (count <= 64)
+ return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX__
+
+#if defined(__AVX512BW__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ if (count <= 64)
+ return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+ if (count <= 128)
+ return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX512BW__
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
+ size_t count) {
+ if (count == 0)
+ return BcmpReturnType::ZERO();
+ if (count == 1)
+ return generic::Bcmp<uint8_t>::block(p1, p2);
+ if (count == 2)
+ return generic::Bcmp<uint16_t>::block(p1, p2);
+ if (count == 3)
+ return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+ if (count == 4)
+ return generic::Bcmp<uint32_t>::block(p1, p2);
+ if (count == 5)
+ return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
+ if (count == 6)
+ return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
+ if (count == 7)
+ return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
+ if (count == 8)
+ return generic::Bcmp<uint64_t>::block(p1, p2);
+ if (count <= 16)
+ return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+#if defined(__AVX512BW__)
+ return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
+#elif defined(__AVX__)
+ return inline_bcmp_x86_avx_gt16(p1, p2, count);
+#elif defined(__SSE4_1__)
+ return inline_bcmp_x86_sse41_gt16(p1, p2, count);
+#else
+ return inline_bcmp_generic_gt16(p1, p2, count);
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H