diff options
author | Schrodinger ZHU Yifan <yifanzhu@rochester.edu> | 2024-05-02 15:36:10 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-02 15:36:10 -0400 |
commit | 0e5ff6251fa215e74bfa570e88bb6c7bf12c46d8 (patch) | |
tree | 11c57569922f7450d12d852fe77c38cd9235086a /libc/fuzzing | |
parent | 38f9c013a090b666328aecceac03dda84d2b14ca (diff) | |
download | llvm-0e5ff6251fa215e74bfa570e88bb6c7bf12c46d8.zip llvm-0e5ff6251fa215e74bfa570e88bb6c7bf12c46d8.tar.gz llvm-0e5ff6251fa215e74bfa570e88bb6c7bf12c46d8.tar.bz2 |
[libc] add hashtable fuzzing (#87949)
Diffstat (limited to 'libc/fuzzing')
-rw-r--r-- | libc/fuzzing/__support/CMakeLists.txt | 18 | ||||
-rw-r--r-- | libc/fuzzing/__support/hashtable_fuzz.cpp | 182 | ||||
-rw-r--r-- | libc/fuzzing/__support/uint_fuzz.cpp | 11 |
3 files changed, 211 insertions, 0 deletions
diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt index d4f6db7..b088761 100644 --- a/libc/fuzzing/__support/CMakeLists.txt +++ b/libc/fuzzing/__support/CMakeLists.txt @@ -5,3 +5,21 @@ add_libc_fuzzer( DEPENDS libc.src.__support.big_int ) + +add_libc_fuzzer( + hashtable_fuzz + SRCS + hashtable_fuzz.cpp + DEPENDS + libc.src.__support.HashTable.table +) + +add_libc_fuzzer( + hashtable_opt_fuzz + SRCS + hashtable_fuzz.cpp + DEPENDS + libc.src.__support.HashTable.table + COMPILE_OPTIONS + -D__LIBC_EXPLICIT_SIMD_OPT +) diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp new file mode 100644 index 0000000..07f1057 --- /dev/null +++ b/libc/fuzzing/__support/hashtable_fuzz.cpp @@ -0,0 +1,182 @@ +//===-- hashtable_fuzz.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc hashtable implementations. +/// +//===----------------------------------------------------------------------===// +#include "include/llvm-libc-types/ENTRY.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/HashTable/table.h" + +namespace LIBC_NAMESPACE { + +// A fuzzing payload starts with +// - uint16_t: initial capacity for table A +// - uint64_t: seed for table A +// - uint16_t: initial capacity for table B +// - uint64_t: seed for table B +// Followed by a sequence of actions: +// - CrossCheck: only a single byte valued (4 mod 5) +// - Find: a single byte valued (3 mod 5) followed by a null-terminated string +// - Insert: a single byte valued (0,1,2 mod 5) followed by a null-terminated +// string +static constexpr size_t INITIAL_HEADER_SIZE = + 2 * (sizeof(uint16_t) + sizeof(uint64_t)); +extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size); +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size, + size_t max_size, unsigned int seed) { + size = LLVMFuzzerMutate(data, size, max_size); + // not enough to read the initial capacities and seeds + if (size < INITIAL_HEADER_SIZE) + return 0; + + // skip the initial capacities and seeds + size_t i = INITIAL_HEADER_SIZE; + while (i < size) { + // cross check + if (static_cast<uint8_t>(data[i]) % 5 == 4) { + // skip the cross check byte + ++i; + continue; + } + + // find or insert + // check if there is enough space for the action byte and the + // null-terminator + if (i + 2 >= max_size) + return i; + // skip the action byte + ++i; + // skip the null-terminated string + while (i < max_size && data[i] != 0) + ++i; + // in the case the string is not null-terminated, null-terminate it + if (i == max_size && data[i - 1] != 0) { + data[i - 1] = 0; + return max_size; + } + + // move to the next action + ++i; + } + // return the new size + return i; +} + +// a tagged union +struct Action { + enum class Tag { Find, Insert, CrossCheck } tag; + cpp::string_view key; +}; + +static struct { + size_t remaining; + const char *buffer; + + template <typename T> T next() { + static_assert(cpp::is_integral<T>::value, "T must be an integral type"); + union { + T result; + char data[sizeof(T)]; + }; + for (size_t i = 0; i < sizeof(result); i++) + data[i] = buffer[i]; + buffer += sizeof(result); + remaining -= sizeof(result); + return result; + } + + cpp::string_view next_string() { + cpp::string_view result(buffer); + buffer = result.end() + 1; + remaining -= result.size() + 1; + return result; + } + + Action next_action() { + uint8_t byte = next<uint8_t>(); + switch (byte % 5) { + case 4: + return {Action::Tag::CrossCheck, {}}; + case 3: + return {Action::Tag::Find, next_string()}; + default: + return {Action::Tag::Insert, next_string()}; + } + } +} global_status; + +class HashTable { + internal::HashTable *table; + +public: + HashTable(uint64_t size, uint64_t seed) + : table(internal::HashTable::allocate(size, seed)) {} + HashTable(internal::HashTable *table) : table(table) {} + ~HashTable() { internal::HashTable::deallocate(table); } + HashTable(HashTable &&other) : table(other.table) { other.table = nullptr; } + bool is_valid() const { return table != nullptr; } + ENTRY *find(const char *key) { return table->find(key); } + ENTRY *insert(const ENTRY &entry) { + return internal::HashTable::insert(this->table, entry); + } + using iterator = internal::HashTable::iterator; + iterator begin() const { return table->begin(); } + iterator end() const { return table->end(); } +}; + +HashTable next_hashtable() { + size_t size = global_status.next<uint16_t>(); + uint64_t seed = global_status.next<uint64_t>(); + return HashTable(size, seed); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + global_status.buffer = reinterpret_cast<const char *>(data); + global_status.remaining = size; + if (global_status.remaining < INITIAL_HEADER_SIZE) + return 0; + + HashTable table_a = next_hashtable(); + HashTable table_b = next_hashtable(); + for (;;) { + if (global_status.remaining == 0) + break; + Action action = global_status.next_action(); + switch (action.tag) { + case Action::Tag::Find: { + if (static_cast<bool>(table_a.find(action.key.data())) != + static_cast<bool>(table_b.find(action.key.data()))) + __builtin_trap(); + break; + } + case Action::Tag::Insert: { + char *ptr = const_cast<char *>(action.key.data()); + ENTRY *a = table_a.insert(ENTRY{ptr, ptr}); + ENTRY *b = table_b.insert(ENTRY{ptr, ptr}); + if (a->data != b->data) + __builtin_trap(); + break; + } + case Action::Tag::CrossCheck: { + for (ENTRY a : table_a) + if (const ENTRY *b = table_b.find(a.key); a.data != b->data) + __builtin_trap(); + + for (ENTRY b : table_b) + if (const ENTRY *a = table_a.find(b.key); a->data != b.data) + __builtin_trap(); + + break; + } + } + } + return 0; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp index 07149f5..109375f 100644 --- a/libc/fuzzing/__support/uint_fuzz.cpp +++ b/libc/fuzzing/__support/uint_fuzz.cpp @@ -1,3 +1,14 @@ +//===-- uint_fuzz.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc unsigned integer utilities. +/// +//===----------------------------------------------------------------------===// #include "src/__support/CPP/bit.h" #include "src/__support/big_int.h" #include "src/string/memory_utils/inline_memcpy.h" |