aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorCaslyn Tonelli <caslyn@google.com>2023-04-07 22:16:01 +0000
committerCaslyn Tonelli <caslyn@google.com>2023-04-11 20:49:25 +0000
commit718729e9971d4ad12d8c7cb088c661202564cf34 (patch)
treedc38f3b53485679fde47eca928020e017ed48914 /libc
parent1fa26e64fd87c848ff54d08e9a14ea03e01ae645 (diff)
downloadllvm-718729e9971d4ad12d8c7cb088c661202564cf34.zip
llvm-718729e9971d4ad12d8c7cb088c661202564cf34.tar.gz
llvm-718729e9971d4ad12d8c7cb088c661202564cf34.tar.bz2
[libc] Add memmem implementation
Introduce the `memmem` libc string function. `memmem_implementation` performs shared logic for `strstr`, `strcasestr`, and `memmem`; essentially reconfiguring what was the `strstr_implementation` to support length parameters. Differential Revision: https://reviews.llvm.org/D147822
Diffstat (limited to 'libc')
-rw-r--r--libc/config/baremetal/arm/entrypoints.txt1
-rw-r--r--libc/config/darwin/arm/entrypoints.txt1
-rw-r--r--libc/config/darwin/x86_64/entrypoints.txt1
-rw-r--r--libc/config/gpu/entrypoints.txt1
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt1
-rw-r--r--libc/config/linux/arm/entrypoints.txt1
-rw-r--r--libc/config/linux/riscv64/entrypoints.txt1
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt1
-rw-r--r--libc/config/windows/entrypoints.txt1
-rw-r--r--libc/spec/gnu_ext.td7
-rw-r--r--libc/src/string/CMakeLists.txt10
-rw-r--r--libc/src/string/memmem.cpp25
-rw-r--r--libc/src/string/memmem.h21
-rw-r--r--libc/src/string/memory_utils/CMakeLists.txt8
-rw-r--r--libc/src/string/memory_utils/memmem_implementations.h42
-rw-r--r--libc/src/string/memory_utils/strstr_implementations.h16
-rw-r--r--libc/test/src/string/CMakeLists.txt10
-rw-r--r--libc/test/src/string/memmem_test.cpp129
18 files changed, 265 insertions, 12 deletions
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index dbc6b00..56daccc 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 62996c1..e669525 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt
index 122e7fc..57d21a2 100644
--- a/libc/config/darwin/x86_64/entrypoints.txt
+++ b/libc/config/darwin/x86_64/entrypoints.txt
@@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 4841ed1..4c78b2f 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index ef9a44f..4ebe5fd 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 5ed49d0..73d11100 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -29,6 +29,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt
index ff741ed..5902b50 100644
--- a/libc/config/linux/riscv64/entrypoints.txt
+++ b/libc/config/linux/riscv64/entrypoints.txt
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bc98b47..deff7db 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 8f5c4b6..d89ef04 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -25,6 +25,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.memchr
libc.src.string.memcmp
libc.src.string.memcpy
+ libc.src.string.memmem
libc.src.string.memmove
libc.src.string.mempcpy
libc.src.string.memrchr
diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td
index 52fc061..bd31897 100644
--- a/libc/spec/gnu_ext.td
+++ b/libc/spec/gnu_ext.td
@@ -57,7 +57,12 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
[], // Macros
[], // Types
[], // Enumerations
- [
+ [
+ FunctionSpec<
+ "memmem",
+ RetValSpec<VoidPtr>,
+ [ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType]
+ >,
FunctionSpec<
"memrchr",
RetValSpec<VoidPtr>,
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index f51e68c..a514938 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -60,6 +60,16 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ memmem
+ SRCS
+ memmem.cpp
+ HDRS
+ memmem.h
+ DEPENDS
+ .memory_utils.memmem_implementation
+)
+
+add_entrypoint_object(
memchr
SRCS
memchr.cpp
diff --git a/libc/src/string/memmem.cpp b/libc/src/string/memmem.cpp
new file mode 100644
index 0000000..eed1d38
--- /dev/null
+++ b/libc/src/string/memmem.cpp
@@ -0,0 +1,25 @@
+//===-- Implementation of memmem ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memmem.h"
+#include "src/__support/common.h"
+#include "src/string/memory_utils/memmem_implementations.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(void *, memmem,
+ (const void *haystack, size_t haystack_len,
+ const void *needle, size_t needle_len)) {
+ constexpr auto comp = [](unsigned char l, unsigned char r) -> int {
+ return l - r;
+ };
+ return memmem_implementation(haystack, haystack_len, needle, needle_len,
+ comp);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/string/memmem.h b/libc/src/string/memmem.h
new file mode 100644
index 0000000..2809fd5
--- /dev/null
+++ b/libc/src/string/memmem.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for memmem ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMMEM_H
+#define LLVM_LIBC_SRC_STRING_MEMMEM_H
+
+#include <stddef.h> // For size_t
+
+namespace __llvm_libc {
+
+void *memmem(const void *haystack, size_t haystack_len, const void *needle,
+ size_t needle_len);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_MEMMEM_H
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 30123b0..3133522 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -72,7 +72,13 @@ add_header_library(
)
add_header_library(
- strstr_implementation
+ strstr_implementation
HDRS
strstr_implementations.h
)
+
+add_header_library(
+ memmem_implementation
+ HDRS
+ memmem_implementations.h
+)
diff --git a/libc/src/string/memory_utils/memmem_implementations.h b/libc/src/string/memory_utils/memmem_implementations.h
new file mode 100644
index 0000000..da0be0f
--- /dev/null
+++ b/libc/src/string/memory_utils/memmem_implementations.h
@@ -0,0 +1,42 @@
+//===-- memmem implementation -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+
+template <typename Comp>
+constexpr static void *
+memmem_implementation(const void *haystack, size_t haystack_len,
+ const void *needle, size_t needle_len, Comp &&comp) {
+ // TODO: simple brute force implementation. This can be
+ // improved upon using well known string matching algorithms.
+ if (!needle_len)
+ return const_cast<void *>(haystack);
+
+ if (needle_len > haystack_len)
+ return nullptr;
+
+ const unsigned char *h = static_cast<const unsigned char *>(haystack);
+ const unsigned char *n = static_cast<const unsigned char *>(needle);
+ for (size_t i = 0; i <= (haystack_len - needle_len); ++i) {
+ size_t j = 0;
+ for (; j < needle_len && !comp(h[i + j], n[j]); ++j)
+ ;
+ if (j == needle_len)
+ return const_cast<unsigned char *>(h + i);
+ }
+ return nullptr;
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/strstr_implementations.h b/libc/src/string/memory_utils/strstr_implementations.h
index 2c81ca7..d24aedd 100644
--- a/libc/src/string/memory_utils/strstr_implementations.h
+++ b/libc/src/string/memory_utils/strstr_implementations.h
@@ -9,6 +9,8 @@
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H
+#include "src/string/memory_utils/memmem_implementations.h"
+#include "src/string/string_utils.h"
#include <stddef.h>
namespace __llvm_libc {
@@ -16,16 +18,10 @@ namespace __llvm_libc {
template <typename Comp>
constexpr static char *strstr_implementation(const char *haystack,
const char *needle, Comp &&comp) {
- // TODO: This is a simple brute force implementation. This can be
- // improved upon using well known string matching algorithms.
- for (size_t i = 0; comp(haystack[i], 0); ++i) {
- size_t j = 0;
- for (; comp(haystack[i + j], 0) && !comp(haystack[i + j], needle[j]); ++j)
- ;
- if (!comp(needle[j], 0))
- return const_cast<char *>(haystack + i);
- }
- return nullptr;
+ void *result = memmem_implementation(
+ static_cast<const void *>(haystack), internal::string_length(haystack),
+ static_cast<const void *>(needle), internal::string_length(needle), comp);
+ return static_cast<char *>(result);
}
} // namespace __llvm_libc
diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index 6bb4c56..af5ddfe 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -52,6 +52,16 @@ add_libc_unittest(
)
add_libc_unittest(
+ memmem_test
+ SUITE
+ libc_string_unittests
+ SRCS
+ memmem_test.cpp
+ DEPENDS
+ libc.src.string.memmem
+)
+
+add_libc_unittest(
memchr_test
SUITE
libc_string_unittests
diff --git a/libc/test/src/string/memmem_test.cpp b/libc/test/src/string/memmem_test.cpp
new file mode 100644
index 0000000..000fd49
--- /dev/null
+++ b/libc/test/src/string/memmem_test.cpp
@@ -0,0 +1,129 @@
+//===-- Unittests for memmem ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memmem.h"
+#include "test/UnitTest/Test.h"
+
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+TEST(LlvmLibcMemmemTest, EmptyHaystackEmptyNeedleReturnsHaystck) {
+ char *h = nullptr;
+ char *n = nullptr;
+ void *result = __llvm_libc::memmem(h, 0, n, 0);
+ ASSERT_EQ(static_cast<char *>(result), h);
+}
+
+TEST(LlvmLibcMemmemTest, EmptyHaystackNonEmptyNeedleReturnsNull) {
+ char *h = nullptr;
+ char n[] = {'a', 'b', 'c'};
+ void *result = __llvm_libc::memmem(h, 0, n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+}
+
+TEST(LlvmLibcMemmemTest, EmptyNeedleReturnsHaystack) {
+ char h[] = {'a', 'b', 'c'};
+ char *n = nullptr;
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, 0);
+ ASSERT_EQ(static_cast<char *>(result), h);
+}
+
+TEST(LlvmLibcMemmemTest, ExactMatchReturnsHaystack) {
+ char h[] = {'a', 'b', 'c'};
+ char n[] = {'a', 'b', 'c'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(static_cast<char *>(result), h);
+}
+
+TEST(LlvmLibcMemmemTest, ReturnFirstMatchOfNeedle) {
+ char h[] = {'a', 'a', 'b', 'c'};
+ char n[] = {'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(static_cast<char *>(result), h);
+}
+
+TEST(LlvmLibcMemmemTest, ReturnFirstExactMatchOfNeedle) {
+ {
+ char h[] = {'a', 'b', 'a', 'c', 'a', 'a'};
+ char n[] = {'a', 'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(static_cast<char *>(result), h + 4);
+ }
+ {
+ char h[] = {'a', 'a', 'b', 'a', 'b', 'a'};
+ char n[] = {'a', 'b', 'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(static_cast<char *>(result), h + 1);
+ }
+}
+
+TEST(LlvmLibcMemmemTest, NullTerminatorDoesNotInterruptMatch) {
+ char h[] = {'\0', 'a', 'b'};
+ char n[] = {'a', 'b'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(static_cast<char *>(result), h + 1);
+}
+
+TEST(LlvmLibcMemmemTest, ReturnNullIfNoExactMatch) {
+ {
+ char h[] = {'a'};
+ char n[] = {'a', 'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+ {
+ char h[] = {'a', 'A'};
+ char n[] = {'a', 'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+ {
+ char h[] = {'a'};
+ char n[] = {'a', '\0'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+ {
+ char h[] = {'\0'};
+ char n[] = {'\0', '\0'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+}
+
+TEST(LlvmLibcMemmemTest, ReturnMatchOfSpecifiedNeedleLength) {
+ {
+ char h[] = {'a', 'b', 'c'};
+ char n[] = {'x', 'y', 'z'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, 0);
+ ASSERT_EQ(static_cast<char *>(result), h);
+ }
+ {
+ char h[] = {'a', 'b', 'c'};
+ char n[] = {'b', 'c', 'a'};
+ void *result = __llvm_libc::memmem(h, sizeof(h), n, 2);
+ ASSERT_EQ(static_cast<char *>(result), h + 1);
+ }
+}
+
+TEST(LlvmLibcMemmemTest, ReturnNullIfInadequateHaystackLength) {
+ {
+ char h[] = {'a', 'b', 'c'};
+ char n[] = {'c'};
+ void *result = __llvm_libc::memmem(h, 2, n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+ {
+ char h[] = {'a', 'b', 'c'};
+ char n[] = {'a', 'b', 'c'};
+ void *result = __llvm_libc::memmem(h, 2, n, sizeof(n));
+ ASSERT_EQ(result, static_cast<void *>(nullptr));
+ }
+}
+} // namespace __llvm_libc