//===-- Benchmark memory specific tools -------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file complements the `benchmark` header with memory specific tools and // benchmarking facilities. #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H #include "LibcBenchmark.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Alignment.h" #include #include namespace llvm { namespace libc_benchmarks { //-------------- // Configuration //-------------- // Specifies a range of sizes to explore. struct SizeRange { uint32_t From = 0; // Inclusive uint32_t To = 1024; // Inclusive uint32_t Step = 1; }; // An object to define how to test a memory function. struct StudyConfiguration { // The number of run for the study. uint32_t Runs = 1; // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp). // When testing small sizes, it's important to keep the total allocated // size under the size of the L1 cache (usually 16 or 32KiB). The framework // will also use 2KiB of additional L1 memory to store the function // parameters. uint32_t BufferSize = 8192; // The range of sizes to exercise. SizeRange Size; MaybeAlign AddressAlignment; // Unset : Use start of buffer which is at // least cache line aligned) // 1 : Use random address, // >1 : Use random address aligned to value. // The value to use for memset. uint8_t MemsetValue = 0; // The mismatch position for memcmp. uint32_t MemcmpMismatchAt = 0; // 0 : Buffer compare equal, // >0 : Buffer compare different at byte N-1. }; //-------- // Results //-------- // The time to run one iteration of the function under test for the specified // Size. struct Measurement { uint32_t Size = 0; Duration Runtime = {}; }; // The measurements for a specific function. struct FunctionMeasurements { std::string Name; std::vector Measurements; }; // The root object containing all the data (configuration and measurements). struct Study { HostState Host; BenchmarkOptions Options; StudyConfiguration Configuration; SmallVector Functions; }; // Provides an aligned, dynamically allocated buffer. class AlignedBuffer { char *const Buffer = nullptr; size_t Size = 0; public: static constexpr size_t Alignment = 1024; explicit AlignedBuffer(size_t Size) : Buffer(static_cast(aligned_alloc(1024, Size))), Size(Size) {} ~AlignedBuffer() { free(Buffer); } inline char *operator+(size_t Index) { return Buffer + Index; } inline const char *operator+(size_t Index) const { return Buffer + Index; } inline char &operator[](size_t Index) { return Buffer[Index]; } inline const char &operator[](size_t Index) const { return Buffer[Index]; } inline char *begin() { return Buffer; } inline char *end() { return Buffer + Size; } }; // Implements the ParameterProvider abstraction needed by the `benchmark` // function. This implementation makes sure that all parameters will fit into // `StorageSize` bytes. The total memory accessed during benchmark should be // less than the data L1 cache, that is the storage for the ParameterProvider // and the memory buffers. template class SmallParameterProvider { using ParameterType = typename Context::ParameterType; ByteConstrainedArray Parameters; size_t LastIterations; Context &Ctx; public: explicit SmallParameterProvider(Context &C) : Ctx(C) {} SmallParameterProvider(const SmallParameterProvider &) = delete; SmallParameterProvider &operator=(const SmallParameterProvider &) = delete; // Useful to compute the histogram of the size parameter. CircularArrayRef getLastBatch() const { return cycle(Parameters, LastIterations); } // Implements the interface needed by the `benchmark` function. CircularArrayRef generateBatch(size_t Iterations) { LastIterations = Iterations; Ctx.Randomize(Parameters); return getLastBatch(); } }; // Helper to generate random buffer offsets that satisfy the configuration // constraints. class OffsetDistribution { std::uniform_int_distribution Distribution; uint32_t Factor; public: explicit OffsetDistribution(const StudyConfiguration &Conf); template uint32_t operator()(Generator &G) { return Distribution(G) * Factor; } }; // Helper to generate random buffer offsets that satisfy the configuration // constraints. It is specifically designed to benchmark `memcmp` functions // where we may want the Nth byte to differ. class MismatchOffsetDistribution { std::uniform_int_distribution MismatchIndexSelector; llvm::SmallVector MismatchIndices; const uint32_t MismatchAt; public: explicit MismatchOffsetDistribution(const StudyConfiguration &Conf); explicit operator bool() const { return !MismatchIndices.empty(); } const llvm::SmallVectorImpl &getMismatchIndices() const { return MismatchIndices; } template uint32_t operator()(Generator &G, uint32_t Size) { const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; // We need to position the offset so that a mismatch occurs at MismatchAt. if (Size >= MismatchAt) return MismatchIndex - MismatchAt; // Size is too small to trigger the mismatch. return MismatchIndex - Size - 1; } }; } // namespace libc_benchmarks } // namespace llvm #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H