diff options
author | Caslyn Tonelli <caslyn@google.com> | 2023-04-07 22:16:01 +0000 |
---|---|---|
committer | Caslyn Tonelli <caslyn@google.com> | 2023-04-11 20:49:25 +0000 |
commit | 718729e9971d4ad12d8c7cb088c661202564cf34 (patch) | |
tree | dc38f3b53485679fde47eca928020e017ed48914 /libc | |
parent | 1fa26e64fd87c848ff54d08e9a14ea03e01ae645 (diff) | |
download | llvm-718729e9971d4ad12d8c7cb088c661202564cf34.zip llvm-718729e9971d4ad12d8c7cb088c661202564cf34.tar.gz llvm-718729e9971d4ad12d8c7cb088c661202564cf34.tar.bz2 |
[libc] Add memmem implementation
Introduce the `memmem` libc string function.
`memmem_implementation` performs shared logic for `strstr`,
`strcasestr`, and `memmem`; essentially reconfiguring what was the
`strstr_implementation` to support length parameters.
Differential Revision: https://reviews.llvm.org/D147822
Diffstat (limited to 'libc')
-rw-r--r-- | libc/config/baremetal/arm/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/darwin/arm/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/darwin/x86_64/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/gpu/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/linux/aarch64/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/linux/arm/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/linux/riscv64/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/linux/x86_64/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/windows/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/spec/gnu_ext.td | 7 | ||||
-rw-r--r-- | libc/src/string/CMakeLists.txt | 10 | ||||
-rw-r--r-- | libc/src/string/memmem.cpp | 25 | ||||
-rw-r--r-- | libc/src/string/memmem.h | 21 | ||||
-rw-r--r-- | libc/src/string/memory_utils/CMakeLists.txt | 8 | ||||
-rw-r--r-- | libc/src/string/memory_utils/memmem_implementations.h | 42 | ||||
-rw-r--r-- | libc/src/string/memory_utils/strstr_implementations.h | 16 | ||||
-rw-r--r-- | libc/test/src/string/CMakeLists.txt | 10 | ||||
-rw-r--r-- | libc/test/src/string/memmem_test.cpp | 129 |
18 files changed, 265 insertions, 12 deletions
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index dbc6b00..56daccc 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 62996c1..e669525 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt index 122e7fc..57d21a2 100644 --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 4841ed1..4c78b2f 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index ef9a44f..4ebe5fd 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 5ed49d0..73d11100 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -29,6 +29,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt index ff741ed..5902b50 100644 --- a/libc/config/linux/riscv64/entrypoints.txt +++ b/libc/config/linux/riscv64/entrypoints.txt @@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index bc98b47..deff7db 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 8f5c4b6..d89ef04 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -25,6 +25,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td index 52fc061..bd31897 100644 --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -57,7 +57,12 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { [], // Macros [], // Types [], // Enumerations - [ + [ + FunctionSpec< + "memmem", + RetValSpec<VoidPtr>, + [ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType] + >, FunctionSpec< "memrchr", RetValSpec<VoidPtr>, diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index f51e68c..a514938 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -60,6 +60,16 @@ add_entrypoint_object( ) add_entrypoint_object( + memmem + SRCS + memmem.cpp + HDRS + memmem.h + DEPENDS + .memory_utils.memmem_implementation +) + +add_entrypoint_object( memchr SRCS memchr.cpp diff --git a/libc/src/string/memmem.cpp b/libc/src/string/memmem.cpp new file mode 100644 index 0000000..eed1d38 --- /dev/null +++ b/libc/src/string/memmem.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of memmem ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memmem.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memmem_implementations.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(void *, memmem, + (const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len)) { + constexpr auto comp = [](unsigned char l, unsigned char r) -> int { + return l - r; + }; + return memmem_implementation(haystack, haystack_len, needle, needle_len, + comp); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/memmem.h b/libc/src/string/memmem.h new file mode 100644 index 0000000..2809fd5 --- /dev/null +++ b/libc/src/string/memmem.h @@ -0,0 +1,21 @@ +//===-- Implementation header for memmem ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMMEM_H +#define LLVM_LIBC_SRC_STRING_MEMMEM_H + +#include <stddef.h> // For size_t + +namespace __llvm_libc { + +void *memmem(const void *haystack, size_t haystack_len, const void *needle, + size_t needle_len); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMMEM_H diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index 30123b0..3133522 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -72,7 +72,13 @@ add_header_library( ) add_header_library( - strstr_implementation + strstr_implementation HDRS strstr_implementations.h ) + +add_header_library( + memmem_implementation + HDRS + memmem_implementations.h +) diff --git a/libc/src/string/memory_utils/memmem_implementations.h b/libc/src/string/memory_utils/memmem_implementations.h new file mode 100644 index 0000000..da0be0f --- /dev/null +++ b/libc/src/string/memory_utils/memmem_implementations.h @@ -0,0 +1,42 @@ +//===-- memmem implementation -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H + +#include <stddef.h> + +namespace __llvm_libc { + +template <typename Comp> +constexpr static void * +memmem_implementation(const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len, Comp &&comp) { + // TODO: simple brute force implementation. This can be + // improved upon using well known string matching algorithms. + if (!needle_len) + return const_cast<void *>(haystack); + + if (needle_len > haystack_len) + return nullptr; + + const unsigned char *h = static_cast<const unsigned char *>(haystack); + const unsigned char *n = static_cast<const unsigned char *>(needle); + for (size_t i = 0; i <= (haystack_len - needle_len); ++i) { + size_t j = 0; + for (; j < needle_len && !comp(h[i + j], n[j]); ++j) + ; + if (j == needle_len) + return const_cast<unsigned char *>(h + i); + } + return nullptr; +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/strstr_implementations.h b/libc/src/string/memory_utils/strstr_implementations.h index 2c81ca7..d24aedd 100644 --- a/libc/src/string/memory_utils/strstr_implementations.h +++ b/libc/src/string/memory_utils/strstr_implementations.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H +#include "src/string/memory_utils/memmem_implementations.h" +#include "src/string/string_utils.h" #include <stddef.h> namespace __llvm_libc { @@ -16,16 +18,10 @@ namespace __llvm_libc { template <typename Comp> constexpr static char *strstr_implementation(const char *haystack, const char *needle, Comp &&comp) { - // TODO: This is a simple brute force implementation. This can be - // improved upon using well known string matching algorithms. - for (size_t i = 0; comp(haystack[i], 0); ++i) { - size_t j = 0; - for (; comp(haystack[i + j], 0) && !comp(haystack[i + j], needle[j]); ++j) - ; - if (!comp(needle[j], 0)) - return const_cast<char *>(haystack + i); - } - return nullptr; + void *result = memmem_implementation( + static_cast<const void *>(haystack), internal::string_length(haystack), + static_cast<const void *>(needle), internal::string_length(needle), comp); + return static_cast<char *>(result); } } // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index 6bb4c56..af5ddfe 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -52,6 +52,16 @@ add_libc_unittest( ) add_libc_unittest( + memmem_test + SUITE + libc_string_unittests + SRCS + memmem_test.cpp + DEPENDS + libc.src.string.memmem +) + +add_libc_unittest( memchr_test SUITE libc_string_unittests diff --git a/libc/test/src/string/memmem_test.cpp b/libc/test/src/string/memmem_test.cpp new file mode 100644 index 0000000..000fd49 --- /dev/null +++ b/libc/test/src/string/memmem_test.cpp @@ -0,0 +1,129 @@ +//===-- Unittests for memmem ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memmem.h" +#include "test/UnitTest/Test.h" + +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +TEST(LlvmLibcMemmemTest, EmptyHaystackEmptyNeedleReturnsHaystck) { + char *h = nullptr; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, 0, n, 0); + ASSERT_EQ(static_cast<char *>(result), h); +} + +TEST(LlvmLibcMemmemTest, EmptyHaystackNonEmptyNeedleReturnsNull) { + char *h = nullptr; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 0, n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); +} + +TEST(LlvmLibcMemmemTest, EmptyNeedleReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast<char *>(result), h); +} + +TEST(LlvmLibcMemmemTest, ExactMatchReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast<char *>(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstMatchOfNeedle) { + char h[] = {'a', 'a', 'b', 'c'}; + char n[] = {'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast<char *>(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstExactMatchOfNeedle) { + { + char h[] = {'a', 'b', 'a', 'c', 'a', 'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast<char *>(result), h + 4); + } + { + char h[] = {'a', 'a', 'b', 'a', 'b', 'a'}; + char n[] = {'a', 'b', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast<char *>(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, NullTerminatorDoesNotInterruptMatch) { + char h[] = {'\0', 'a', 'b'}; + char n[] = {'a', 'b'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast<char *>(result), h + 1); +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfNoExactMatch) { + { + char h[] = {'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } + { + char h[] = {'a', 'A'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } + { + char h[] = {'a'}; + char n[] = {'a', '\0'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } + { + char h[] = {'\0'}; + char n[] = {'\0', '\0'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } +} + +TEST(LlvmLibcMemmemTest, ReturnMatchOfSpecifiedNeedleLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'x', 'y', 'z'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast<char *>(result), h); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'b', 'c', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 2); + ASSERT_EQ(static_cast<char *>(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfInadequateHaystackLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast<void *>(nullptr)); + } +} +} // namespace __llvm_libc |