aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CAS
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CAS')
-rw-r--r--llvm/lib/CAS/CMakeLists.txt5
-rw-r--r--llvm/lib/CAS/DatabaseFile.cpp123
-rw-r--r--llvm/lib/CAS/DatabaseFile.h153
-rw-r--r--llvm/lib/CAS/InMemoryCAS.cpp3
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp1178
5 files changed, 1462 insertions, 0 deletions
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 6ed724b..7ae5f7e 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,14 +2,19 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
OnDiskCommon.cpp
+ OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
+ LINK_LIBS
+ ${LLVM_PTHREAD_LIB}
+
LINK_COMPONENTS
Support
)
diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp
new file mode 100644
index 0000000..db8ce1d
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.cpp
@@ -0,0 +1,123 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file implements the common abstractions for CAS database file.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DatabaseFile.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg) {
+ return createStringError(make_error_code(ErrC),
+ Path + "[" + TableName + "]: " + Msg);
+}
+
+Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName) {
+ if (Expected == Observed)
+ return Error::success();
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "mismatched " + Label +
+ " (expected: " + Twine(Expected) +
+ ", observed: " + Twine(Observed) + ")");
+}
+
+Expected<DatabaseFile>
+DatabaseFile::create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor) {
+ // Constructor for if the file doesn't exist.
+ auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error {
+ if (Alloc.capacity() <
+ sizeof(Header) + sizeof(MappedFileRegionArena::Header))
+ return createTableConfigError(std::errc::argument_out_of_domain,
+ Path.str(), "datafile",
+ "Allocator too small for header");
+ (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}};
+ DatabaseFile DB(Alloc);
+ return NewDBConstructor(DB);
+ };
+
+ // Get or create the file.
+ MappedFileRegionArena Alloc;
+ if (Error E = MappedFileRegionArena::create(Path, Capacity, sizeof(Header),
+ NewFileConstructor)
+ .moveInto(Alloc))
+ return std::move(E);
+
+ return DatabaseFile::get(
+ std::make_unique<MappedFileRegionArena>(std::move(Alloc)));
+}
+
+Error DatabaseFile::addTable(TableHandle Table) {
+ assert(Table);
+ assert(&Table.getRegion() == &getRegion());
+ int64_t ExistingRootOffset = 0;
+ const int64_t NewOffset =
+ reinterpret_cast<const char *>(&Table.getHeader()) - getRegion().data();
+ if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset))
+ return Error::success();
+
+ // Silently ignore attempts to set the root to itself.
+ if (ExistingRootOffset == NewOffset)
+ return Error::success();
+
+ // Return an proper error message.
+ TableHandle Root(getRegion(), ExistingRootOffset);
+ if (Root.getName() == Table.getName())
+ return createStringError(
+ make_error_code(std::errc::not_supported),
+ "collision with existing table of the same name '" + Table.getName() +
+ "'");
+
+ return createStringError(make_error_code(std::errc::not_supported),
+ "cannot add new table '" + Table.getName() +
+ "'"
+ " to existing root '" +
+ Root.getName() + "'");
+}
+
+std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) {
+ int64_t RootTableOffset = H->RootTableOffset.load();
+ if (!RootTableOffset)
+ return std::nullopt;
+
+ TableHandle Root(getRegion(), RootTableOffset);
+ if (Root.getName() == Name)
+ return Root;
+
+ return std::nullopt;
+}
+
+Error DatabaseFile::validate(MappedFileRegion &Region) {
+ if (Region.size() < sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: missing header");
+
+ // Check the magic and version.
+ auto *H = reinterpret_cast<Header *>(Region.data());
+ if (H->Magic != getMagic())
+ return createStringError(std::errc::invalid_argument,
+ "database: bad magic");
+ if (H->Version != getVersion())
+ return createStringError(std::errc::invalid_argument,
+ "database: wrong version");
+
+ auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() +
+ sizeof(Header));
+ // Check the bump-ptr, which should point past the header.
+ if (MFH->BumpPtr.load() < (int64_t)sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: corrupt bump-ptr");
+
+ return Error::success();
+}
diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h
new file mode 100644
index 0000000..609e5f13
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.h
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares the common interface for a DatabaseFile that is used to
+/// implement OnDiskCAS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CAS_DATABASEFILE_H
+#define LLVM_LIB_CAS_DATABASEFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas::ondisk {
+
+using MappedFileRegion = MappedFileRegionArena::RegionT;
+
+/// Generic handle for a table.
+///
+/// Generic table header layout:
+/// - 2-bytes: TableKind
+/// - 2-bytes: TableNameSize
+/// - 4-bytes: TableNameRelOffset (relative to header)
+class TableHandle {
+public:
+ enum class TableKind : uint16_t {
+ TrieRawHashMap = 1,
+ DataAllocator = 2,
+ };
+ struct Header {
+ TableKind Kind;
+ uint16_t NameSize;
+ int32_t NameRelOffset; ///< Relative to Header.
+ };
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ template <class T> static void check() {
+ static_assert(
+ std::is_same<decltype(T::Header::GenericHeader), Header>::value,
+ "T::GenericHeader should be of type TableHandle::Header");
+ static_assert(offsetof(typename T::Header, GenericHeader) == 0,
+ "T::GenericHeader must be the head of T::Header");
+ }
+ template <class T> bool is() const { return T::Kind == H->Kind; }
+ template <class T> T dyn_cast() const {
+ check<T>();
+ if (is<T>())
+ return T(*Region, *reinterpret_cast<typename T::Header *>(H));
+ return T();
+ }
+ template <class T> T cast() const {
+ assert(is<T>());
+ return dyn_cast<T>();
+ }
+
+ StringRef getName() const {
+ auto *Begin = reinterpret_cast<const char *>(H) + H->NameRelOffset;
+ return StringRef(Begin, H->NameSize);
+ }
+
+ TableHandle() = default;
+ TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {}
+ TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TableHandle(Region,
+ *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+/// Encapsulate a database file, which:
+/// - Sets/checks magic.
+/// - Sets/checks version.
+/// - Points at an arbitrary root table.
+/// - Sets up a MappedFileRegionArena for allocation.
+///
+/// Top-level layout:
+/// - 4-bytes: Magic
+/// - 4-bytes: Version
+/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset)
+/// - 8-bytes: BumpPtr from MappedFileRegionArena
+class DatabaseFile {
+public:
+ static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; }
+ static constexpr uint32_t getVersion() { return 1UL; }
+ struct Header {
+ uint32_t Magic;
+ uint32_t Version;
+ std::atomic<int64_t> RootTableOffset;
+ };
+
+ const Header &getHeader() { return *H; }
+ MappedFileRegionArena &getAlloc() { return Alloc; }
+ MappedFileRegion &getRegion() { return Alloc.getRegion(); }
+
+ /// Add a table. This is currently not thread safe and should be called inside
+ /// NewDBConstructor.
+ Error addTable(TableHandle Table);
+
+ /// Find a table. May return null.
+ std::optional<TableHandle> findTable(StringRef Name);
+
+ /// Create the DatabaseFile at Path with Capacity.
+ static Expected<DatabaseFile>
+ create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor);
+
+ size_t size() const { return Alloc.size(); }
+
+private:
+ static Expected<DatabaseFile>
+ get(std::unique_ptr<MappedFileRegionArena> Alloc) {
+ if (Error E = validate(Alloc->getRegion()))
+ return std::move(E);
+ return DatabaseFile(std::move(Alloc));
+ }
+
+ static Error validate(MappedFileRegion &Region);
+
+ DatabaseFile(MappedFileRegionArena &Alloc)
+ : H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {}
+ DatabaseFile(std::unique_ptr<MappedFileRegionArena> Alloc)
+ : DatabaseFile(*Alloc) {
+ OwnedAlloc = std::move(Alloc);
+ }
+
+ Header *H = nullptr;
+ MappedFileRegionArena &Alloc;
+ std::unique_ptr<MappedFileRegionArena> OwnedAlloc;
+};
+
+Error createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg);
+
+Error checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName);
+
+} // namespace llvm::cas::ondisk
+
+#endif
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index 255b89c..c63ee70d 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -57,6 +57,9 @@ public:
InMemoryObject() = delete;
InMemoryObject(InMemoryObject &&) = delete;
InMemoryObject(const InMemoryObject &) = delete;
+ InMemoryObject &operator=(const InMemoryObject &) = delete;
+ InMemoryObject &operator=(InMemoryObject &&) = delete;
+ virtual ~InMemoryObject() = default;
protected:
InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {}
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
new file mode 100644
index 0000000..9b382dd7
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -0,0 +1,1178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements OnDiskTrieRawHashMap.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "DatabaseFile.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TrieHashIndexGenerator.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+//===----------------------------------------------------------------------===//
+// TrieRawHashMap data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class SubtrieHandle;
+class TrieRawHashMapHandle;
+class TrieVisitor;
+
+/// A value stored in the slots inside a SubTrie. A stored value can either be a
+/// subtrie (encoded after negation) which is the file offset to another
+/// subtrie, or it can be a fileset to a DataRecord.
+class SubtrieSlotValue {
+public:
+ explicit operator bool() const { return !isEmpty(); }
+ bool isEmpty() const { return !Offset; }
+ bool isData() const { return Offset > 0; }
+ bool isSubtrie() const { return Offset < 0; }
+ uint64_t asData() const {
+ assert(isData());
+ return Offset;
+ }
+ uint64_t asSubtrie() const {
+ assert(isSubtrie());
+ return -Offset;
+ }
+
+ FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); }
+
+ FileOffset asDataFileOffset() const { return FileOffset(asData()); }
+
+ int64_t getRawOffset() const { return Offset; }
+
+ static SubtrieSlotValue getDataOffset(int64_t Offset) {
+ return SubtrieSlotValue(Offset);
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(int64_t Offset) {
+ return SubtrieSlotValue(-Offset);
+ }
+
+ static SubtrieSlotValue getDataOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) {
+ return SubtrieSlotValue(Slot.load());
+ }
+
+ SubtrieSlotValue() = default;
+
+private:
+ friend class SubtrieHandle;
+ explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {}
+ int64_t Offset = 0;
+};
+
+/// Subtrie layout:
+/// - 2-bytes: StartBit
+/// - 1-bytes: NumBits=lg(num-slots)
+/// - 5-bytes: 0-pad
+/// - <slots>
+class SubtrieHandle {
+public:
+ struct Header {
+ /// The bit this subtrie starts on.
+ uint16_t StartBit;
+
+ /// The number of bits this subtrie handles. It has 2^NumBits slots.
+ uint8_t NumBits;
+
+ /// 0-pad to 8B.
+ uint8_t ZeroPad1B;
+ uint32_t ZeroPad4B;
+ };
+
+ /// Slot storage:
+ /// - zero: Empty
+ /// - positive: RecordOffset
+ /// - negative: SubtrieOffset
+ using SlotT = std::atomic<int64_t>;
+
+ static int64_t getSlotsSize(uint32_t NumBits) {
+ return sizeof(int64_t) * (1u << NumBits);
+ }
+
+ static int64_t getSize(uint32_t NumBits) {
+ return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits);
+ }
+
+ int64_t getSize() const { return getSize(H->NumBits); }
+ size_t getNumSlots() const { return Slots.size(); }
+
+ SubtrieSlotValue load(size_t I) const {
+ return SubtrieSlotValue(Slots[I].load());
+ }
+ void store(size_t I, SubtrieSlotValue V) {
+ return Slots[I].store(V.getRawOffset());
+ }
+
+ void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const;
+
+ /// Return None on success, or the existing offset on failure.
+ bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected,
+ SubtrieSlotValue New) {
+ return Slots[I].compare_exchange_strong(Expected.Offset, New.Offset);
+ }
+
+ /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with
+ /// \p NumSubtrieBits.
+ ///
+ /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a
+ /// new subtrie is created that isn't used because of a lost race, then it If
+ /// it's already valid, it should be used instead of allocating a new one.
+ /// should be returned as an out parameter to be passed back in the future.
+ /// If it's already valid, it should be used instead of allocating a new one.
+ ///
+ /// Returns the subtrie that now lives at \p I.
+ Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie, size_t NewI);
+
+ /// Only safe if the subtrie is empty.
+ void reinitialize(uint32_t StartBit, uint32_t NumBits);
+
+ SubtrieSlotValue getOffset() const {
+ return SubtrieSlotValue::getSubtrieOffset(
+ reinterpret_cast<const char *>(H) - Region->data());
+ }
+
+ FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); }
+
+ explicit operator bool() const { return H; }
+
+ Header &getHeader() const { return *H; }
+ uint32_t getStartBit() const { return H->StartBit; }
+ uint32_t getNumBits() const { return H->NumBits; }
+
+ static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit, uint32_t NumBits);
+
+ static SubtrieHandle getFromFileOffset(MappedFileRegion &Region,
+ FileOffset Offset) {
+ return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset));
+ }
+
+ SubtrieHandle() = default;
+ SubtrieHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H), Slots(getSlots(H)) {}
+ SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset)
+ : SubtrieHandle(Region, *reinterpret_cast<Header *>(
+ Region.data() + Offset.asSubtrie())) {}
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+ MutableArrayRef<SlotT> Slots;
+
+ static MutableArrayRef<SlotT> getSlots(Header &H) {
+ return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits);
+ }
+};
+
+/// Handle for a TrieRawHashMap table.
+///
+/// TrieRawHashMap table layout:
+/// - [8-bytes: Generic table header]
+/// - 1-byte: NumSubtrieBits
+/// - 1-byte: Flags (not used yet)
+/// - 2-bytes: NumHashBits
+/// - 4-bytes: RecordDataSize (in bytes)
+/// - 8-bytes: RootTrieOffset
+/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
+/// - <name> '\0'
+///
+/// Record layout:
+/// - <hash>
+/// - <data>
+class TrieRawHashMapHandle {
+public:
+ static constexpr TableHandle::TableKind Kind =
+ TableHandle::TableKind::TrieRawHashMap;
+
+ struct Header {
+ TableHandle::Header GenericHeader;
+ uint8_t NumSubtrieBits;
+ uint8_t Flags; ///< None used yet.
+ uint16_t NumHashBits;
+ uint32_t RecordDataSize;
+ std::atomic<int64_t> RootTrieOffset;
+ std::atomic<int64_t> AllocatorOffset;
+ };
+
+ operator TableHandle() const {
+ if (!H)
+ return TableHandle();
+ return TableHandle(*Region, H->GenericHeader);
+ }
+
+ struct RecordData {
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ SubtrieSlotValue Offset;
+ FileOffset getFileOffset() const { return Offset.asDataFileOffset(); }
+ };
+
+ enum Limits : size_t {
+ /// Seems like 65528 hash bits ought to be enough.
+ MaxNumHashBytes = UINT16_MAX >> 3,
+ MaxNumHashBits = MaxNumHashBytes << 3,
+
+ /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit
+ /// index. This many slots is suspicously large anyway.
+ MaxNumRootBits = 16,
+
+ /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously
+ /// large for subtries.
+ MaxNumSubtrieBits = 10,
+ };
+
+ static constexpr size_t getNumHashBytes(size_t NumHashBits) {
+ assert(NumHashBits % 8 == 0);
+ return NumHashBits / 8;
+ }
+ static constexpr size_t getRecordSize(size_t RecordDataSize,
+ size_t NumHashBits) {
+ return RecordDataSize + getNumHashBytes(NumHashBits);
+ }
+
+ RecordData getRecord(SubtrieSlotValue Offset);
+ Expected<RecordData> createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash);
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ SubtrieHandle getRoot() const;
+ Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ size_t getFlags() const { return H->Flags; }
+ size_t getNumSubtrieBits() const { return H->NumSubtrieBits; }
+ size_t getNumHashBits() const { return H->NumHashBits; }
+ size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); }
+ size_t getRecordDataSize() const { return H->RecordDataSize; }
+ size_t getRecordSize() const {
+ return getRecordSize(H->RecordDataSize, H->NumHashBits);
+ }
+
+ TrieHashIndexGenerator getIndexGen(SubtrieHandle Root,
+ ArrayRef<uint8_t> Hash) {
+ assert(Root.getStartBit() == 0);
+ assert(getNumHashBytes() == Hash.size());
+ assert(getNumHashBits() == Hash.size() * 8);
+ return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash};
+ }
+
+ static Expected<TrieRawHashMapHandle>
+ create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits,
+ uint64_t NumHashBits, uint64_t RecordDataSize);
+
+ void
+ print(raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
+
+ Error validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const;
+ TrieRawHashMapHandle() = default;
+ TrieRawHashMapHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H) {}
+ TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TrieRawHashMapHandle(
+ Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+} // end anonymous namespace
+
+struct OnDiskTrieRawHashMap::ImplType {
+ DatabaseFile File;
+ TrieRawHashMapHandle Trie;
+};
+
+Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit,
+ uint32_t NumBits) {
+ assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits);
+ assert(NumBits <= UINT8_MAX);
+ assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits);
+
+ auto Mem = Alloc.allocate(getSize(NumBits));
+ if (LLVM_UNLIKELY(!Mem))
+ return Mem.takeError();
+ auto *H =
+ new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits,
+ /*ZeroPad1B=*/0, /*ZeroPad4B=*/0};
+ SubtrieHandle S(Alloc.getRegion(), *H);
+ for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I)
+ new (I) SlotT(0);
+ return S;
+}
+
+SubtrieHandle TrieRawHashMapHandle::getRoot() const {
+ if (int64_t Root = H->RootTrieOffset)
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root));
+ return SubtrieHandle();
+}
+
+Expected<SubtrieHandle>
+TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
+ assert(&Alloc.getRegion() == &getRegion());
+ if (SubtrieHandle Root = getRoot())
+ return Root;
+
+ int64_t Race = 0;
+ auto LazyRoot = SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits);
+ if (LLVM_UNLIKELY(!LazyRoot))
+ return LazyRoot.takeError();
+ if (H->RootTrieOffset.compare_exchange_strong(
+ Race, LazyRoot->getOffset().asSubtrie()))
+ return *LazyRoot;
+
+ // There was a race. Return the other root.
+ //
+ // TODO: Avoid leaking the lazy root by storing it in an allocator.
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race));
+}
+
+Expected<TrieRawHashMapHandle>
+TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits,
+ uint64_t NumSubtrieBits, uint64_t NumHashBits,
+ uint64_t RecordDataSize) {
+ // Allocate.
+ auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1);
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ // Construct the header and the name.
+ assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
+ assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits");
+ assert(NumHashBits <= UINT16_MAX && "Expected valid hash size");
+ assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name");
+ auto *H = new (Alloc.getRegion().data() + *Offset)
+ Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(),
+ (uint32_t)sizeof(Header)},
+ (uint8_t)NumSubtrieBits,
+ /*Flags=*/0,
+ (uint16_t)NumHashBits,
+ (uint32_t)RecordDataSize,
+ /*RootTrieOffset=*/{0},
+ /*AllocatorOffset=*/{0}};
+ char *NameStorage = reinterpret_cast<char *>(H + 1);
+ llvm::copy(Name, NameStorage);
+ NameStorage[Name.size()] = 0;
+
+ // Construct a root trie, if requested.
+ TrieRawHashMapHandle Trie(Alloc.getRegion(), *H);
+ auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits);
+ if (LLVM_UNLIKELY(!Sub))
+ return Sub.takeError();
+ if (NumRootBits)
+ H->RootTrieOffset = Sub->getOffset().asSubtrie();
+ return Trie;
+}
+
+TrieRawHashMapHandle::RecordData
+TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) {
+ char *Begin = Region->data() + Offset.asData();
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ Proxy.Data = MutableArrayRef(Begin, getRecordDataSize());
+ Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()),
+ getNumHashBytes());
+ return RecordData{Proxy, Offset};
+}
+
+Expected<TrieRawHashMapHandle::RecordData>
+TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash) {
+ assert(&Alloc.getRegion() == Region);
+ assert(Hash.size() == getNumHashBytes());
+ auto Offset = Alloc.allocateOffset(getRecordSize());
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset));
+ llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin()));
+ return Record;
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ // Check alignment.
+ if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get()))
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "unaligned file offset at 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Check bounds.
+ //
+ // Note: There's no potential overflow when using \c uint64_t because Offset
+ // is in valid offset range and the record size is in \c [0,UINT32_MAX].
+ if (!validOffset(Offset) ||
+ Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size())
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "file offset too large: 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Looks okay...
+ TrieRawHashMapHandle::RecordData D =
+ Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ return const_pointer(D.Proxy, D.getFileOffset());
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ SubtrieHandle S = Trie.getRoot();
+ if (!S)
+ return const_pointer();
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash);
+ size_t Index = IndexGen.next();
+ for (;;) {
+ // Try to set the content.
+ SubtrieSlotValue V = S.load(Index);
+ if (!V)
+ return const_pointer();
+
+ // Check for an exact match.
+ if (V.isData()) {
+ TrieRawHashMapHandle::RecordData D = Trie.getRecord(V);
+ return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset())
+ : const_pointer();
+ }
+
+ Index = IndexGen.next();
+ S = SubtrieHandle(Trie.getRegion(), V);
+ }
+}
+
+/// Only safe if the subtrie is empty.
+void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
+ assert(StartBit > H->StartBit);
+ assert(NumBits <= H->NumBits);
+ // Ideally would also assert that all slots are empty, but that's expensive.
+
+ H->StartBit = StartBit;
+ H->NumBits = NumBits;
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ MappedFileRegionArena &Alloc = Impl->File.getAlloc();
+ std::optional<SubtrieHandle> S;
+ auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash);
+ size_t Index = IndexGen.next();
+
+ // Walk through the hash bytes and insert into correct trie position.
+ std::optional<TrieRawHashMapHandle::RecordData> NewRecord;
+ SubtrieHandle UnusedSubtrie;
+ for (;;) {
+ SubtrieSlotValue Existing = S->load(Index);
+
+ // Try to set it, if it's empty.
+ if (!Existing) {
+ if (!NewRecord) {
+ auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ if (OnConstruct)
+ OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy);
+ }
+
+ if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset))
+ return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset());
+
+ // Race means that Existing is no longer empty; fall through...
+ }
+
+ if (Existing.isSubtrie()) {
+ S = SubtrieHandle(Trie.getRegion(), Existing);
+ Index = IndexGen.next();
+ continue;
+ }
+
+ // Check for an exact match.
+ TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing);
+ if (ExistingRecord.Proxy.Hash == Hash) {
+ if (NewRecord && OnLeak)
+ OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
+ ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
+ return pointer(ExistingRecord.Proxy,
+ ExistingRecord.Offset.asDataFileOffset());
+ }
+
+ // Sink the existing content as long as the indexes match.
+ for (;;) {
+ size_t NextIndex = IndexGen.next();
+ size_t NewIndexForExistingContent =
+ IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash);
+
+ auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(),
+ UnusedSubtrie, NewIndexForExistingContent)
+ .moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ Index = NextIndex;
+
+ // Found the difference.
+ if (NextIndex != NewIndexForExistingContent)
+ break;
+ }
+ }
+}
+
+Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie,
+ size_t NewI) {
+ std::optional<SubtrieHandle> NewS;
+ if (UnusedSubtrie) {
+ // Steal UnusedSubtrie and initialize it.
+ NewS.emplace();
+ std::swap(*NewS, UnusedSubtrie);
+ NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits);
+ } else {
+ // Allocate a new, empty subtrie.
+ auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(),
+ NumSubtrieBits)
+ .moveInto(NewS);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ }
+
+ NewS->store(NewI, V);
+ if (compare_exchange_strong(I, V, NewS->getOffset()))
+ return *NewS; // Success!
+
+ // Raced.
+ assert(V.isSubtrie() && "Expected racing sink() to add a subtrie");
+
+ // Wipe out the new slot so NewS can be reused and set the out parameter.
+ NewS->store(NewI, SubtrieSlotValue());
+ UnusedSubtrie = *NewS;
+
+ // Return the subtrie added by the concurrent sink() call.
+ return SubtrieHandle(Alloc.getRegion(), V);
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ Impl->Trie.print(OS, PrintRecordData);
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
+ return Impl->Trie.validate(RecordVerifier);
+}
+
+// Helper function that prints hexdigit and have a sub-byte starting position.
+static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
+ size_t StartBit, size_t NumBits) {
+ assert(StartBit % 4 == 0);
+ assert(NumBits % 4 == 0);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) {
+ uint8_t HexPair = Bytes[I / 8];
+ uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf;
+ OS << hexdigit(HexDigit, /*LowerCase=*/true);
+ }
+}
+
+static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit,
+ size_t NumBits) {
+ assert(StartBit + NumBits <= Bytes.size() * 8u);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) {
+ uint8_t Byte = Bytes[I / 8];
+ size_t ByteOffset = I % 8;
+ if (size_t ByteShift = 8 - ByteOffset - 1)
+ Byte >>= ByteShift;
+ OS << (Byte & 0x1 ? '1' : '0');
+ }
+}
+
+void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const {
+ // afb[1c:00*01110*0]def
+ size_t EndBit = getStartBit() + getNumBits();
+ size_t HashEndBit = Bytes.size() * 8u;
+
+ size_t FirstBinaryBit = getStartBit() & ~0x3u;
+ printHexDigits(OS, Bytes, 0, FirstBinaryBit);
+
+ size_t LastBinaryBit = (EndBit + 3u) & ~0x3u;
+ OS << "[";
+ printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit);
+ OS << "]";
+
+ printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit);
+}
+
+static void appendIndexBits(std::string &Prefix, size_t Index,
+ size_t NumSlots) {
+ std::string Bits;
+ for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) {
+ Bits.push_back('0' + (Index & 0x1));
+ Index >>= 1;
+ }
+ for (char Ch : llvm::reverse(Bits))
+ Prefix += Ch;
+}
+
+static void printPrefix(raw_ostream &OS, StringRef Prefix) {
+ while (Prefix.size() >= 4) {
+ uint8_t Digit;
+ bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit);
+ assert(!ErrorParsingBinary);
+ (void)ErrorParsingBinary;
+ OS << hexdigit(Digit, /*LowerCase=*/true);
+ Prefix = Prefix.drop_front(4);
+ }
+ if (!Prefix.empty())
+ OS << "[" << Prefix << "]";
+}
+
+LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); }
+
+static Expected<size_t> checkParameter(StringRef Label, size_t Max,
+ std::optional<size_t> Value,
+ std::optional<size_t> Default,
+ StringRef Path, StringRef TableName) {
+ assert(Value || Default);
+ assert(!Default || *Default <= Max);
+ if (!Value)
+ return *Default;
+
+ if (*Value <= Max)
+ return *Value;
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TableName,
+ "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); }
+size_t OnDiskTrieRawHashMap::capacity() const {
+ return Impl->File.getRegion().size();
+}
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ SmallString<128> PathStorage;
+ StringRef Path = PathTwine.toStringRef(PathStorage);
+ SmallString<128> TrieNameStorage;
+ StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
+
+ constexpr size_t DefaultNumRootBits = 10;
+ constexpr size_t DefaultNumSubtrieBits = 6;
+
+ size_t NumRootBits;
+ if (Error E = checkParameter(
+ "root bits", TrieRawHashMapHandle::MaxNumRootBits,
+ NewTableNumRootBits, DefaultNumRootBits, Path, TrieName)
+ .moveInto(NumRootBits))
+ return std::move(E);
+
+ size_t NumSubtrieBits;
+ if (Error E = checkParameter("subtrie bits",
+ TrieRawHashMapHandle::MaxNumSubtrieBits,
+ NewTableNumSubtrieBits, DefaultNumSubtrieBits,
+ Path, TrieName)
+ .moveInto(NumSubtrieBits))
+ return std::move(E);
+
+ size_t NumHashBytes = NumHashBits >> 3;
+ if (Error E =
+ checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits,
+ NumHashBits, std::nullopt, Path, TrieName)
+ .takeError())
+ return std::move(E);
+ assert(NumHashBits == NumHashBytes << 3 &&
+ "Expected hash size to be byte-aligned");
+ if (NumHashBits != NumHashBytes << 3)
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TrieName,
+ "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)");
+
+ // Constructor for if the file doesn't exist.
+ auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
+ auto Trie =
+ TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits,
+ NumSubtrieBits, NumHashBits, DataSize);
+ if (LLVM_UNLIKELY(!Trie))
+ return Trie.takeError();
+
+ return DB.addTable(*Trie);
+ };
+
+ // Get or create the file.
+ Expected<DatabaseFile> File =
+ DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
+ if (!File)
+ return File.takeError();
+
+ // Find the trie and validate it.
+ std::optional<TableHandle> Table = File->findTable(TrieName);
+ if (!Table)
+ return createTableConfigError(std::errc::argument_out_of_domain, Path,
+ TrieName, "table not found");
+ if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind,
+ (size_t)Table->getHeader().Kind, Path, TrieName))
+ return std::move(E);
+ auto Trie = Table->cast<TrieRawHashMapHandle>();
+ assert(Trie && "Already checked the kind");
+
+ // Check the hash and data size.
+ if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(),
+ Path, TrieName))
+ return std::move(E);
+ if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(),
+ Path, TrieName))
+ return std::move(E);
+
+ // No flags supported right now. Either corrupt, or coming from a future
+ // writer.
+ if (size_t Flags = Trie.getFlags())
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "unsupported flags: " + Twine(Flags));
+
+ // Success.
+ OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie};
+ return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl)));
+}
+
+static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) {
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "invalid trie at 0x" +
+ utohexstr(Offset, /*LowerCase=*/true) + ": " +
+ Msg);
+}
+
+//===----------------------------------------------------------------------===//
+// TrieVisitor data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A multi-threaded vistior to traverse the Trie.
+///
+/// TODO: add more sanity checks that isn't just plain data corruption. For
+/// example, some ill-formed data can be constructed to form a cycle using
+/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting
+/// data).
+class TrieVisitor {
+public:
+ TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0,
+ unsigned ErrorLimit = 50)
+ : Trie(Trie), ErrorLimit(ErrorLimit),
+ Threads(hardware_concurrency(ThreadCount)) {}
+ virtual ~TrieVisitor() = default;
+ Error visit();
+
+private:
+ // Virtual method to implement the action when visiting a sub-trie.
+ virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) {
+ return Error::success();
+ }
+
+ // Virtual method to implement the action when visiting a slot in a trie node.
+ virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) {
+ return Error::success();
+ }
+
+protected:
+ TrieRawHashMapHandle Trie;
+
+private:
+ Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix);
+
+ Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
+
+ // Helper function to capture errors when visiting the trie nodes.
+ void addError(Error NewError) {
+ assert(NewError && "not an error");
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ if (NumError >= ErrorLimit) {
+ // Too many errors.
+ consumeError(std::move(NewError));
+ return;
+ }
+
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(NewError));
+ else
+ Err = std::move(NewError);
+ NumError++;
+ }
+
+ bool tooManyErrors() {
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ return (bool)Err && NumError >= ErrorLimit;
+ }
+
+ const unsigned ErrorLimit;
+ std::optional<Error> Err;
+ unsigned NumError = 0;
+ std::mutex Lock;
+ DefaultThreadPool Threads;
+};
+
+/// A visitor that traverse and print the Trie.
+class TriePrinter : public TrieVisitor {
+public:
+ TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData)
+ : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS),
+ PrintRecordData(PrintRecordData) {}
+
+ Error printRecords() {
+ if (Records.empty())
+ return Error::success();
+
+ OS << "records\n";
+ llvm::sort(Records);
+ for (int64_t Offset : Records) {
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ if (auto Err = printRecord(Record))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ Error printRecord(TrieRawHashMapHandle::RecordData &Record) {
+ OS << "- addr=" << (void *)Record.getFileOffset().get() << " ";
+ if (PrintRecordData) {
+ PrintRecordData(Record.Proxy.Data);
+ } else {
+ OS << "bytes=";
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()),
+ Record.Proxy.Data.size());
+ printHexDigits(OS, Data, 0, Data.size() * 8);
+ }
+ OS << "\n";
+ return Error::success();
+ }
+
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override {
+ if (Prefix.empty()) {
+ OS << "root";
+ } else {
+ OS << "subtrie=";
+ printPrefix(OS, Prefix);
+ }
+
+ OS << " addr="
+ << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) -
+ Trie.getRegion().data());
+ OS << " num-slots=" << SubTrie.getNumSlots() << "\n";
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) override {
+ OS << "- index=";
+ for (size_t Pad : {10, 100, 1000})
+ if (I < Pad && Subtrie.getNumSlots() >= Pad)
+ OS << "0";
+ OS << I << " ";
+ if (Slot.isSubtrie()) {
+ OS << "addr=" << (void *)Slot.asSubtrie();
+ OS << " subtrie=";
+ printPrefix(OS, Prefix);
+ OS << "\n";
+ return Error::success();
+ }
+ TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot);
+ OS << "addr=" << (void *)Record.getFileOffset().get();
+ OS << " content=";
+ Subtrie.printHash(OS, Record.Proxy.Hash);
+ OS << "\n";
+ Records.push_back(Slot.asData());
+ return Error::success();
+ }
+
+private:
+ raw_ostream &OS;
+ function_ref<void(ArrayRef<char>)> PrintRecordData;
+ SmallVector<int64_t> Records;
+};
+
+/// TrieVerifier that adds additional verification on top of the basic visitor.
+class TrieVerifier : public TrieVisitor {
+public:
+ TrieVerifier(
+ TrieRawHashMapHandle Trie,
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier)
+ : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {}
+
+private:
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final {
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) final {
+ if (RecordVerifier && Slot.isData()) {
+ if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData()))
+ return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
+
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
+ return RecordVerifier(Slot.asDataFileOffset(),
+ OnDiskTrieRawHashMap::ConstValueProxy{
+ Record.Proxy.Hash, Record.Proxy.Data});
+ }
+ return Error::success();
+ }
+
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier;
+};
+} // namespace
+
+Error TrieVisitor::visit() {
+ auto Root = Trie.getRoot();
+ if (!Root)
+ return Error::success();
+
+ if (auto Err = validateSubTrie(Root, /*IsRoot=*/true))
+ return Err;
+
+ if (auto Err = visitSubTrie("", Root))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Root.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Root.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix;
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot))
+ return Err;
+ }
+
+ for (size_t I = 0, E = Subs.size(); I != E; ++I) {
+ Threads.async(
+ [&](unsigned Idx) {
+ // Don't run if there is an error already.
+ if (tooManyErrors())
+ return;
+ if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx]))
+ addError(std::move(Err));
+ },
+ I);
+ }
+
+ Threads.wait();
+ if (Err)
+ return std::move(*Err);
+ return Error::success();
+}
+
+Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
+ char *Addr = reinterpret_cast<char *>(&Node.getHeader());
+ const int64_t Offset = Node.getFileOffset().get();
+ if (Addr + Node.getSize() >=
+ Trie.getRegion().data() + Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "subtrie node spans out of bound");
+
+ if (!IsRoot &&
+ Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) {
+ return createInvalidTrieError(Offset,
+ "subtrie represents too many hash bits");
+ }
+
+ if (IsRoot) {
+ if (Node.getStartBit() != 0)
+ return createInvalidTrieError(Offset,
+ "root node doesn't start at 0 index");
+
+ return Error::success();
+ }
+
+ if (Node.getNumBits() > Trie.getNumSubtrieBits())
+ return createInvalidTrieError(Offset, "subtrie has wrong number of slots");
+
+ return Error::success();
+}
+
+Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
+ if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
+ return Err;
+
+ if (auto Err = visitSubTrie(Prefix, Node))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Node.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Node.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix = Prefix.str();
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot))
+ return Err;
+ }
+ for (size_t I = 0, E = Subs.size(); I != E; ++I)
+ if (auto Err = traverseTrieNode(Subs[I], Prefixes[I]))
+ return Err;
+
+ return Error::success();
+}
+
+void TrieRawHashMapHandle::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ OS << "hash-num-bits=" << getNumHashBits()
+ << " hash-size=" << getNumHashBytes()
+ << " record-data-size=" << getRecordDataSize() << "\n";
+
+ TriePrinter Printer(*this, OS, PrintRecordData);
+ if (auto Err = Printer.visit())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ if (auto Err = Printer.printRecords())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ return;
+}
+
+Error TrieRawHashMapHandle::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ // Use the base TrieVisitor to identify the errors inside trie first.
+ TrieVisitor BasicVerifier(*this);
+ if (auto Err = BasicVerifier.visit())
+ return Err;
+
+ // If the trie data structure is sound, do a second pass to verify data and
+ // verifier function can assume the index is correct. However, there can be
+ // newly added bad entries that can still produce error.
+ TrieVerifier Verifier(*this, RecordVerifier);
+ return Verifier.visit();
+}
+
+#else // !LLVM_ENABLE_ONDISK_CAS
+
+struct OnDiskTrieRawHashMap::ImplType {};
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ return const_pointer();
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return 0; }
+size_t OnDiskTrieRawHashMap::capacity() const { return 0; }
+
+#endif // LLVM_ENABLE_ONDISK_CAS
+
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl)
+ : Impl(std::move(Impl)) {}
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) =
+ default;
+OnDiskTrieRawHashMap &
+OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default;
+OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default;