//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This file implements OnDiskGraphDB, an on-disk CAS nodes database, /// independent of a particular hashing algorithm. It only needs to be /// configured for the hash size and controls the schema of the storage. /// /// OnDiskGraphDB defines: /// /// - How the data is stored inside database, either as a standalone file, or /// allocated inside a datapool. /// - How references to other objects inside the same database is stored. They /// are stored as internal references, instead of full hash value to save /// space. /// - How to chain databases together and import objects from upstream /// databases. /// /// Here's a top-level description of the current layout: /// /// - db/index.: a file for the "index" table, named by \a /// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B /// that are accessed atomically, describing the object kind and where/how /// it's stored (including an optional file offset). See \a TrieRecord for /// more details. /// - db/data.: a file for the "data" table, named by \a /// DataPoolTableName and managed by \a DataStore. New objects within /// TrieRecord::MaxEmbeddedSize are inserted here as \a /// TrieRecord::StorageKind::DataPool. /// - db/obj..: a file storing an object outside the main /// "data" table, named by its offset into the "index" table, with the /// format of \a TrieRecord::StorageKind::Standalone. /// - db/leaf..: a file storing a leaf node outside the /// main "data" table, named by its offset into the "index" table, with /// the format of \a TrieRecord::StorageKind::StandaloneLeaf. /// - db/leaf+0..: a file storing a null-terminated leaf object /// outside the main "data" table, named by its offset into the "index" table, /// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0. // //===----------------------------------------------------------------------===// #include "llvm/CAS/OnDiskGraphDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskDataAllocator.h" #include "llvm/CAS/OnDiskTrieRawHashMap.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include #include #include #define DEBUG_TYPE "on-disk-cas" using namespace llvm; using namespace llvm::cas; using namespace llvm::cas::ondisk; static constexpr StringLiteral IndexTableName = "llvm.cas.index"; static constexpr StringLiteral DataPoolTableName = "llvm.cas.data"; static constexpr StringLiteral IndexFilePrefix = "index."; static constexpr StringLiteral DataPoolFilePrefix = "data."; static constexpr StringLiteral FilePrefixObject = "obj."; static constexpr StringLiteral FilePrefixLeaf = "leaf."; static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0."; static Error createCorruptObjectError(Expected> ID) { if (!ID) return ID.takeError(); return createStringError(llvm::errc::invalid_argument, "corrupt object '" + toHex(*ID) + "'"); } namespace { /// Trie record data: 8 bytes, atomic /// - 1-byte: StorageKind /// - 7-bytes: DataStoreOffset (offset into referenced file) class TrieRecord { public: enum class StorageKind : uint8_t { /// Unknown object. Unknown = 0, /// data.vX: main pool, full DataStore record. DataPool = 1, /// obj..vX: standalone, with a full DataStore record. Standalone = 10, /// leaf..vX: standalone, just the data. File contents /// exactly the data content and file size matches the data size. No refs. StandaloneLeaf = 11, /// leaf+0..vX: standalone, just the data plus an /// extra null character ('\0'). File size is 1 bigger than the data size. /// No refs. StandaloneLeaf0 = 12, }; static StringRef getStandaloneFilePrefix(StorageKind SK) { switch (SK) { default: llvm_unreachable("Expected standalone storage kind"); case TrieRecord::StorageKind::Standalone: return FilePrefixObject; case TrieRecord::StorageKind::StandaloneLeaf: return FilePrefixLeaf; case TrieRecord::StorageKind::StandaloneLeaf0: return FilePrefixLeaf0; } } enum Limits : int64_t { /// Saves files bigger than 64KB standalone instead of embedding them. MaxEmbeddedSize = 64LL * 1024LL - 1, }; struct Data { StorageKind SK = StorageKind::Unknown; FileOffset Offset; }; /// Pack StorageKind and Offset from Data into 8 byte TrieRecord. static uint64_t pack(Data D) { assert(D.Offset.get() < (int64_t)(1ULL << 56)); uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get(); assert(D.SK != StorageKind::Unknown || Packed == 0); #ifndef NDEBUG Data RoundTrip = unpack(Packed); assert(D.SK == RoundTrip.SK); assert(D.Offset.get() == RoundTrip.Offset.get()); #endif return Packed; } // Unpack TrieRecord into Data. static Data unpack(uint64_t Packed) { Data D; if (!Packed) return D; D.SK = (StorageKind)(Packed >> 56); D.Offset = FileOffset(Packed & (UINT64_MAX >> 8)); return D; } TrieRecord() : Storage(0) {} Data load() const { return unpack(Storage); } bool compare_exchange_strong(Data &Existing, Data New); private: std::atomic Storage; }; /// DataStore record data: 4B + size? + refs? + data + 0 /// - 4-bytes: Header /// - {0,4,8}-bytes: DataSize (may be packed in Header) /// - {0,4,8}-bytes: NumRefs (may be packed in Header) /// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned) /// - /// - 1-byte: 0-term struct DataRecordHandle { /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment /// convenience to avoid computing padding later. enum class NumRefsFlags : uint8_t { Uses0B = 0U, Uses1B = 1U, Uses2B = 2U, Uses4B = 3U, Uses8B = 4U, Max = Uses8B, }; /// DataSize storage: 8B, 4B, 2B, or 1B. enum class DataSizeFlags { Uses1B = 0U, Uses2B = 1U, Uses4B = 2U, Uses8B = 3U, Max = Uses8B, }; /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B. enum class RefKindFlags { InternalRef = 0U, InternalRef4B = 1U, Max = InternalRef4B, }; enum Counts : int { NumRefsShift = 0, NumRefsBits = 3, DataSizeShift = NumRefsShift + NumRefsBits, DataSizeBits = 2, RefKindShift = DataSizeShift + DataSizeBits, RefKindBits = 1, }; static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) == 0, "Not enough bits"); static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) == 0, "Not enough bits"); static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) == 0, "Not enough bits"); /// Layout of the DataRecordHandle and how to decode it. struct LayoutFlags { NumRefsFlags NumRefs; DataSizeFlags DataSize; RefKindFlags RefKind; static uint64_t pack(LayoutFlags LF) { unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) | ((unsigned)LF.DataSize << DataSizeShift) | ((unsigned)LF.RefKind << RefKindShift); #ifndef NDEBUG LayoutFlags RoundTrip = unpack(Packed); assert(LF.NumRefs == RoundTrip.NumRefs); assert(LF.DataSize == RoundTrip.DataSize); assert(LF.RefKind == RoundTrip.RefKind); #endif return Packed; } static LayoutFlags unpack(uint64_t Storage) { assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte"); LayoutFlags LF; LF.NumRefs = (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1)); LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) & ((1U << DataSizeBits) - 1)); LF.RefKind = (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1)); return LF; } }; /// Header layout: /// - 1-byte: LayoutFlags /// - 1-byte: 1B size field /// - {0,2}-bytes: 2B size field struct Header { using PackTy = uint32_t; PackTy Packed; static constexpr unsigned LayoutFlagsShift = (sizeof(PackTy) - 1) * CHAR_BIT; }; struct Input { InternalRefArrayRef Refs; ArrayRef Data; }; LayoutFlags getLayoutFlags() const { return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift); } uint64_t getDataSize() const; void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const; uint32_t getNumRefs() const; void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const; int64_t getRefsRelOffset() const; int64_t getDataRelOffset() const; static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) { return DataRelOffset + DataSize + 1; } uint64_t getTotalSize() const { return getDataRelOffset() + getDataSize() + 1; } /// Describe the layout of data stored and how to decode from /// DataRecordHandle. struct Layout { explicit Layout(const Input &I); LayoutFlags Flags; uint64_t DataSize = 0; uint32_t NumRefs = 0; int64_t RefsRelOffset = 0; int64_t DataRelOffset = 0; uint64_t getTotalSize() const { return DataRecordHandle::getTotalSize(DataRelOffset, DataSize); } }; InternalRefArrayRef getRefs() const { assert(H && "Expected valid handle"); auto *BeginByte = reinterpret_cast(H) + getRefsRelOffset(); size_t Size = getNumRefs(); if (!Size) return InternalRefArrayRef(); if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B) return ArrayRef(reinterpret_cast(BeginByte), Size); return ArrayRef(reinterpret_cast(BeginByte), Size); } ArrayRef getData() const { assert(H && "Expected valid handle"); return ArrayRef(reinterpret_cast(H) + getDataRelOffset(), getDataSize()); } static DataRecordHandle create(function_ref Alloc, const Input &I); static Expected createWithError(function_ref(size_t Size)> Alloc, const Input &I); static DataRecordHandle construct(char *Mem, const Input &I); static DataRecordHandle get(const char *Mem) { return DataRecordHandle( *reinterpret_cast(Mem)); } static Expected getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset); explicit operator bool() const { return H; } const Header &getHeader() const { return *H; } DataRecordHandle() = default; explicit DataRecordHandle(const Header &H) : H(&H) {} private: static DataRecordHandle constructImpl(char *Mem, const Input &I, const Layout &L); const Header *H = nullptr; }; /// Proxy for any on-disk object or raw data. struct OnDiskContent { std::optional Record; std::optional> Bytes; }; /// Data loaded inside the memory from standalone file. class StandaloneDataInMemory { public: OnDiskContent getContent() const; StandaloneDataInMemory(std::unique_ptr Region, TrieRecord::StorageKind SK) : Region(std::move(Region)), SK(SK) { #ifndef NDEBUG bool IsStandalone = false; switch (SK) { case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: IsStandalone = true; break; default: break; } assert(IsStandalone); #endif } private: std::unique_ptr Region; TrieRecord::StorageKind SK; }; /// Container to lookup loaded standalone objects. template class StandaloneDataMap { static_assert(isPowerOf2_64(NumShards), "Expected power of 2"); public: uintptr_t insert(ArrayRef Hash, TrieRecord::StorageKind SK, std::unique_ptr Region); const StandaloneDataInMemory *lookup(ArrayRef Hash) const; bool count(ArrayRef Hash) const { return bool(lookup(Hash)); } private: struct Shard { /// Needs to store a std::unique_ptr for a stable address identity. DenseMap> Map; mutable std::mutex Mutex; }; Shard &getShard(ArrayRef Hash) { return const_cast( const_cast(this)->getShard(Hash)); } const Shard &getShard(ArrayRef Hash) const { static_assert(NumShards <= 256, "Expected only 8 bits of shard"); return Shards[Hash[0] % NumShards]; } Shard Shards[NumShards]; }; using StandaloneDataMapTy = StandaloneDataMap<16>; /// A vector of internal node references. class InternalRefVector { public: void push_back(InternalRef Ref) { if (NeedsFull) return FullRefs.push_back(Ref); if (std::optional Small = InternalRef4B::tryToShrink(Ref)) return SmallRefs.push_back(*Small); NeedsFull = true; assert(FullRefs.empty()); FullRefs.reserve(SmallRefs.size() + 1); for (InternalRef4B Small : SmallRefs) FullRefs.push_back(Small); FullRefs.push_back(Ref); SmallRefs.clear(); } operator InternalRefArrayRef() const { assert(SmallRefs.empty() || FullRefs.empty()); return NeedsFull ? InternalRefArrayRef(FullRefs) : InternalRefArrayRef(SmallRefs); } private: bool NeedsFull = false; SmallVector SmallRefs; SmallVector FullRefs; }; } // namespace Expected DataRecordHandle::createWithError( function_ref(size_t Size)> Alloc, const Input &I) { Layout L(I); if (Expected Mem = Alloc(L.getTotalSize())) return constructImpl(*Mem, I, L); else return Mem.takeError(); } DataRecordHandle DataRecordHandle::create(function_ref Alloc, const Input &I) { Layout L(I); return constructImpl(Alloc(L.getTotalSize()), I, L); } ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) { // Store the file offset as it is. assert(!(Offset.get() & 0x1)); return ObjectHandle(Offset.get()); } ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) { // Store the pointer from memory with lowest bit set. assert(!(Ptr & 0x1)); return ObjectHandle(Ptr | 1); } /// Proxy for an on-disk index record. struct OnDiskGraphDB::IndexProxy { FileOffset Offset; ArrayRef Hash; TrieRecord &Ref; }; template uintptr_t StandaloneDataMap::insert( ArrayRef Hash, TrieRecord::StorageKind SK, std::unique_ptr Region) { auto &S = getShard(Hash); std::lock_guard Lock(S.Mutex); auto &V = S.Map[Hash.data()]; if (!V) V = std::make_unique(std::move(Region), SK); return reinterpret_cast(V.get()); } template const StandaloneDataInMemory * StandaloneDataMap::lookup(ArrayRef Hash) const { auto &S = getShard(Hash); std::lock_guard Lock(S.Mutex); auto I = S.Map.find(Hash.data()); if (I == S.Map.end()) return nullptr; return &*I->second; } namespace { /// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too /// expensive to register/unregister at this rate. /// /// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp /// files and has a signal handler registerd that removes them all. class TempFile { bool Done = false; TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {} public: /// This creates a temporary file with createUniqueFile. static Expected create(const Twine &Model); TempFile(TempFile &&Other) { *this = std::move(Other); } TempFile &operator=(TempFile &&Other) { TmpName = std::move(Other.TmpName); FD = Other.FD; Other.Done = true; Other.FD = -1; return *this; } // Name of the temporary file. std::string TmpName; // The open file descriptor. int FD = -1; // Keep this with the given name. Error keep(const Twine &Name); Error discard(); // This checks that keep or delete was called. ~TempFile() { consumeError(discard()); } }; class MappedTempFile { public: char *data() const { return Map.data(); } size_t size() const { return Map.size(); } Error discard() { assert(Map && "Map already destroyed"); Map.unmap(); return Temp.discard(); } Error keep(const Twine &Name) { assert(Map && "Map already destroyed"); Map.unmap(); return Temp.keep(Name); } MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map) : Temp(std::move(Temp)), Map(std::move(Map)) {} private: TempFile Temp; sys::fs::mapped_file_region Map; }; } // namespace Error TempFile::discard() { Done = true; if (FD != -1) { sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); if (std::error_code EC = sys::fs::closeFile(File)) return errorCodeToError(EC); } FD = -1; // Always try to close and remove. std::error_code RemoveEC; if (!TmpName.empty()) { std::error_code EC = sys::fs::remove(TmpName); if (EC) return errorCodeToError(EC); } TmpName = ""; return Error::success(); } Error TempFile::keep(const Twine &Name) { assert(!Done); Done = true; // Always try to close and rename. std::error_code RenameEC = sys::fs::rename(TmpName, Name); if (!RenameEC) TmpName = ""; sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); if (std::error_code EC = sys::fs::closeFile(File)) return errorCodeToError(EC); FD = -1; return errorCodeToError(RenameEC); } Expected TempFile::create(const Twine &Model) { int FD; SmallString<128> ResultPath; if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath)) return errorCodeToError(EC); TempFile Ret(ResultPath, FD); return std::move(Ret); } bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) { uint64_t ExistingPacked = pack(Existing); uint64_t NewPacked = pack(New); if (Storage.compare_exchange_strong(ExistingPacked, NewPacked)) return true; Existing = unpack(ExistingPacked); return false; } DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) { return constructImpl(Mem, I, Layout(I)); } Expected DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset) { auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header)); if (!HeaderData) return HeaderData.takeError(); auto Record = DataRecordHandle::get(HeaderData->data()); if (Record.getTotalSize() + Offset.get() > Pool.size()) return createStringError( make_error_code(std::errc::illegal_byte_sequence), "data record span passed the end of the data pool"); return Record; } DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I, const Layout &L) { char *Next = Mem + sizeof(Header); // Fill in Packed and set other data, then come back to construct the header. Header::PackTy Packed = 0; Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift; // Construct DataSize. switch (L.Flags.DataSize) { case DataSizeFlags::Uses1B: assert(I.Data.size() <= UINT8_MAX); Packed |= (Header::PackTy)I.Data.size() << ((sizeof(Packed) - 2) * CHAR_BIT); break; case DataSizeFlags::Uses2B: assert(I.Data.size() <= UINT16_MAX); Packed |= (Header::PackTy)I.Data.size() << ((sizeof(Packed) - 4) * CHAR_BIT); break; case DataSizeFlags::Uses4B: support::endian::write32le(Next, I.Data.size()); Next += 4; break; case DataSizeFlags::Uses8B: support::endian::write64le(Next, I.Data.size()); Next += 8; break; } // Construct NumRefs. // // NOTE: May be writing NumRefs even if there are zero refs in order to fix // alignment. switch (L.Flags.NumRefs) { case NumRefsFlags::Uses0B: break; case NumRefsFlags::Uses1B: assert(I.Refs.size() <= UINT8_MAX); Packed |= (Header::PackTy)I.Refs.size() << ((sizeof(Packed) - 2) * CHAR_BIT); break; case NumRefsFlags::Uses2B: assert(I.Refs.size() <= UINT16_MAX); Packed |= (Header::PackTy)I.Refs.size() << ((sizeof(Packed) - 4) * CHAR_BIT); break; case NumRefsFlags::Uses4B: support::endian::write32le(Next, I.Refs.size()); Next += 4; break; case NumRefsFlags::Uses8B: support::endian::write64le(Next, I.Refs.size()); Next += 8; break; } // Construct Refs[]. if (!I.Refs.empty()) { assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B()); ArrayRef RefsBuffer = I.Refs.getBuffer(); llvm::copy(RefsBuffer, Next); Next += RefsBuffer.size(); } // Construct Data and the trailing null. assert(isAddrAligned(Align(8), Next)); llvm::copy(I.Data, Next); Next[I.Data.size()] = 0; // Construct the header itself and return. Header *H = new (Mem) Header{Packed}; DataRecordHandle Record(*H); assert(Record.getData() == I.Data); assert(Record.getNumRefs() == I.Refs.size()); assert(Record.getRefs() == I.Refs); assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize); assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs); assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind); return Record; } DataRecordHandle::Layout::Layout(const Input &I) { // Start initial relative offsets right after the Header. uint64_t RelOffset = sizeof(Header); // Initialize the easy stuff. DataSize = I.Data.size(); NumRefs = I.Refs.size(); // Check refs size. Flags.RefKind = I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef; // Find the smallest slot available for DataSize. bool Has1B = true; bool Has2B = true; if (DataSize <= UINT8_MAX && Has1B) { Flags.DataSize = DataSizeFlags::Uses1B; Has1B = false; } else if (DataSize <= UINT16_MAX && Has2B) { Flags.DataSize = DataSizeFlags::Uses2B; Has2B = false; } else if (DataSize <= UINT32_MAX) { Flags.DataSize = DataSizeFlags::Uses4B; RelOffset += 4; } else { Flags.DataSize = DataSizeFlags::Uses8B; RelOffset += 8; } // Find the smallest slot available for NumRefs. Never sets NumRefs8B here. if (!NumRefs) { Flags.NumRefs = NumRefsFlags::Uses0B; } else if (NumRefs <= UINT8_MAX && Has1B) { Flags.NumRefs = NumRefsFlags::Uses1B; Has1B = false; } else if (NumRefs <= UINT16_MAX && Has2B) { Flags.NumRefs = NumRefsFlags::Uses2B; Has2B = false; } else { Flags.NumRefs = NumRefsFlags::Uses4B; RelOffset += 4; } // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated // padding rules when reading and writing. This also bumps RelOffset. // // The value for NumRefs is strictly limited to UINT32_MAX, but it can be // stored as 8B. This means we can *always* find a size to grow. // // NOTE: Only call this once. auto GrowSizeFieldsBy4B = [&]() { assert(isAligned(Align(4), RelOffset)); RelOffset += 4; assert(Flags.NumRefs != NumRefsFlags::Uses8B && "Expected to be able to grow NumRefs8B"); // First try to grow DataSize. NumRefs will not (yet) be 8B, and if // DataSize is upgraded to 8B it'll already be aligned. // // Failing that, grow NumRefs. if (Flags.DataSize < DataSizeFlags::Uses4B) Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B. else if (Flags.DataSize < DataSizeFlags::Uses8B) Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B. else if (Flags.NumRefs < NumRefsFlags::Uses4B) Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B. else Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B. }; assert(isAligned(Align(4), RelOffset)); if (Flags.RefKind == RefKindFlags::InternalRef) { // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this // without padding. if (!isAligned(Align(8), RelOffset)) GrowSizeFieldsBy4B(); assert(isAligned(Align(8), RelOffset)); RefsRelOffset = RelOffset; RelOffset += 8 * NumRefs; } else { // The array of 4B refs doesn't need 8B alignment, but the data will need // to be 8B-aligned. Detect this now, and, if necessary, shift everything // by 4B by growing one of the sizes. // If we remove the need for 8B-alignment for data there is <1% savings in // disk storage for a clang build using MCCAS but the 8B-alignment may be // useful in the future so keep it for now. uint64_t RefListSize = 4 * NumRefs; if (!isAligned(Align(8), RelOffset + RefListSize)) GrowSizeFieldsBy4B(); RefsRelOffset = RelOffset; RelOffset += RefListSize; } assert(isAligned(Align(8), RelOffset)); DataRelOffset = RelOffset; } uint64_t DataRecordHandle::getDataSize() const { int64_t RelOffset = sizeof(Header); auto *DataSizePtr = reinterpret_cast(H) + RelOffset; switch (getLayoutFlags().DataSize) { case DataSizeFlags::Uses1B: return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; case DataSizeFlags::Uses2B: return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & UINT16_MAX; case DataSizeFlags::Uses4B: return support::endian::read32le(DataSizePtr); case DataSizeFlags::Uses8B: return support::endian::read64le(DataSizePtr); } llvm_unreachable("Unknown DataSizeFlags enum"); } void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const { if (LF.DataSize >= DataSizeFlags::Uses4B) RelOffset += 4; if (LF.DataSize >= DataSizeFlags::Uses8B) RelOffset += 4; } uint32_t DataRecordHandle::getNumRefs() const { LayoutFlags LF = getLayoutFlags(); int64_t RelOffset = sizeof(Header); skipDataSize(LF, RelOffset); auto *NumRefsPtr = reinterpret_cast(H) + RelOffset; switch (LF.NumRefs) { case NumRefsFlags::Uses0B: return 0; case NumRefsFlags::Uses1B: return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX; case NumRefsFlags::Uses2B: return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) & UINT16_MAX; case NumRefsFlags::Uses4B: return support::endian::read32le(NumRefsPtr); case NumRefsFlags::Uses8B: return support::endian::read64le(NumRefsPtr); } llvm_unreachable("Unknown NumRefsFlags enum"); } void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const { if (LF.NumRefs >= NumRefsFlags::Uses4B) RelOffset += 4; if (LF.NumRefs >= NumRefsFlags::Uses8B) RelOffset += 4; } int64_t DataRecordHandle::getRefsRelOffset() const { LayoutFlags LF = getLayoutFlags(); int64_t RelOffset = sizeof(Header); skipDataSize(LF, RelOffset); skipNumRefs(LF, RelOffset); return RelOffset; } int64_t DataRecordHandle::getDataRelOffset() const { LayoutFlags LF = getLayoutFlags(); int64_t RelOffset = sizeof(Header); skipDataSize(LF, RelOffset); skipNumRefs(LF, RelOffset); uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8; RelOffset += RefSize * getNumRefs(); return RelOffset; } Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { return Index.validate([&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { auto formatError = [&](Twine Msg) { return createStringError( llvm::errc::illegal_byte_sequence, "bad record at 0x" + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + Msg.str()); }; if (Record.Data.size() != sizeof(TrieRecord)) return formatError("wrong data record size"); if (!isAligned(Align::Of(), Record.Data.size())) return formatError("wrong data record alignment"); auto *R = reinterpret_cast(Record.Data.data()); TrieRecord::Data D = R->load(); std::unique_ptr FileBuffer; if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown && (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool && (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone && (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf && (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0) return formatError("invalid record kind value"); auto Ref = InternalRef::getFromOffset(Offset); auto I = getIndexProxyFromRef(Ref); if (!I) return I.takeError(); switch (D.SK) { case TrieRecord::StorageKind::Unknown: // This could be an abandoned entry due to a termination before updating // the record. It can be reused by later insertion so just skip this entry // for now. return Error::success(); case TrieRecord::StorageKind::DataPool: // Check offset is a postive value, and large enough to hold the // header for the data record. if (D.Offset.get() <= 0 || (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size()) return formatError("datapool record out of bound"); break; case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: SmallString<256> Path; getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path); // If need to validate the content of the file later, just load the // buffer here. Otherwise, just check the existance of the file. if (Deep) { auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!File || !*File) return formatError("record file \'" + Path + "\' does not exist"); FileBuffer = std::move(*File); } else if (!llvm::sys::fs::exists(Path)) return formatError("record file \'" + Path + "\' does not exist"); } if (!Deep) return Error::success(); auto dataError = [&](Twine Msg) { return createStringError(llvm::errc::illegal_byte_sequence, "bad data for digest \'" + toHex(I->Hash) + "\': " + Msg.str()); }; SmallVector> Refs; ArrayRef StoredData; switch (D.SK) { case TrieRecord::StorageKind::Unknown: llvm_unreachable("already handled"); case TrieRecord::StorageKind::DataPool: { auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset); if (!DataRecord) return dataError(toString(DataRecord.takeError())); for (auto InternRef : DataRecord->getRefs()) { auto Index = getIndexProxyFromRef(InternRef); if (!Index) return Index.takeError(); Refs.push_back(Index->Hash); } StoredData = DataRecord->getData(); break; } case TrieRecord::StorageKind::Standalone: { if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header)) return dataError("data record is not big enough to read the header"); auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart()); if (DataRecord.getTotalSize() < FileBuffer->getBufferSize()) return dataError( "data record span passed the end of the standalone file"); for (auto InternRef : DataRecord.getRefs()) { auto Index = getIndexProxyFromRef(InternRef); if (!Index) return Index.takeError(); Refs.push_back(Index->Hash); } StoredData = DataRecord.getData(); break; } case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: { StoredData = arrayRefFromStringRef(FileBuffer->getBuffer()); if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) { if (!FileBuffer->getBuffer().ends_with('\0')) return dataError("standalone file is not zero terminated"); StoredData = StoredData.drop_back(1); } break; } } SmallVector ComputedHash; Hasher(Refs, StoredData, ComputedHash); if (I->Hash != ArrayRef(ComputedHash)) return dataError("hash mismatch, got \'" + toHex(ComputedHash) + "\' instead"); return Error::success(); }); } void OnDiskGraphDB::print(raw_ostream &OS) const { OS << "on-disk-root-path: " << RootPath << "\n"; struct PoolInfo { uint64_t Offset; }; SmallVector Pool; OS << "\n"; OS << "index:\n"; Index.print(OS, [&](ArrayRef Data) { assert(Data.size() == sizeof(TrieRecord)); assert(isAligned(Align::Of(), Data.size())); auto *R = reinterpret_cast(Data.data()); TrieRecord::Data D = R->load(); OS << " SK="; switch (D.SK) { case TrieRecord::StorageKind::Unknown: OS << "unknown "; break; case TrieRecord::StorageKind::DataPool: OS << "datapool "; Pool.push_back({D.Offset.get()}); break; case TrieRecord::StorageKind::Standalone: OS << "standalone-data "; break; case TrieRecord::StorageKind::StandaloneLeaf: OS << "standalone-leaf "; break; case TrieRecord::StorageKind::StandaloneLeaf0: OS << "standalone-leaf+0"; break; } OS << " Offset=" << (void *)D.Offset.get(); }); if (Pool.empty()) return; OS << "\n"; OS << "pool:\n"; llvm::sort( Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; }); for (PoolInfo PI : Pool) { OS << "- addr=" << (void *)PI.Offset << " "; auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset)); if (!D) { OS << "error: " << toString(D.takeError()); return; } OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize() << " size=" << D->getTotalSize() << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n"; } } Expected OnDiskGraphDB::indexHash(ArrayRef Hash) { auto P = Index.insertLazy( Hash, [](FileOffset TentativeOffset, OnDiskTrieRawHashMap::ValueProxy TentativeValue) { assert(TentativeValue.Data.size() == sizeof(TrieRecord)); assert( isAddrAligned(Align::Of(), TentativeValue.Data.data())); new (TentativeValue.Data.data()) TrieRecord(); }); if (LLVM_UNLIKELY(!P)) return P.takeError(); assert(*P && "Expected insertion"); return getIndexProxyFromPointer(*P); } OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer( OnDiskTrieRawHashMap::ConstOnDiskPtr P) const { assert(P); assert(P.getOffset()); return IndexProxy{P.getOffset(), P->Hash, *const_cast( reinterpret_cast(P->Data.data()))}; } Expected OnDiskGraphDB::getReference(ArrayRef Hash) { auto I = indexHash(Hash); if (LLVM_UNLIKELY(!I)) return I.takeError(); return getExternalReference(*I); } ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) { return getExternalReference(makeInternalRef(I.Offset)); } std::optional OnDiskGraphDB::getExistingReference(ArrayRef Digest) { auto tryUpstream = [&](std::optional I) -> std::optional { if (!UpstreamDB) return std::nullopt; std::optional UpstreamID = UpstreamDB->getExistingReference(Digest); if (LLVM_UNLIKELY(!UpstreamID)) return std::nullopt; auto Ref = expectedToOptional(indexHash(Digest)); if (!Ref) return std::nullopt; if (!I) I.emplace(*Ref); return getExternalReference(*I); }; OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest); if (!P) return tryUpstream(std::nullopt); IndexProxy I = getIndexProxyFromPointer(P); TrieRecord::Data Obj = I.Ref.load(); if (Obj.SK == TrieRecord::StorageKind::Unknown) return tryUpstream(I); return getExternalReference(makeInternalRef(I.Offset)); } Expected OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const { auto P = Index.recoverFromFileOffset(Ref.getFileOffset()); if (LLVM_UNLIKELY(!P)) return P.takeError(); return getIndexProxyFromPointer(*P); } Expected> OnDiskGraphDB::getDigest(InternalRef Ref) const { auto I = getIndexProxyFromRef(Ref); if (!I) return I.takeError(); return I->Hash; } ArrayRef OnDiskGraphDB::getDigest(const IndexProxy &I) const { return I.Hash; } static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH) { // Decode ObjectHandle to locate the stored content. uint64_t Data = OH.getOpaqueData(); if (Data & 1) { const auto *SDIM = reinterpret_cast(Data & (-1ULL << 1)); return SDIM->getContent(); } auto DataHandle = cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data))); assert(DataHandle.getData().end()[0] == 0 && "Null termination"); return OnDiskContent{DataHandle, std::nullopt}; } ArrayRef OnDiskGraphDB::getObjectData(ObjectHandle Node) const { OnDiskContent Content = getContentFromHandle(DataPool, Node); if (Content.Bytes) return *Content.Bytes; assert(Content.Record && "Expected record or bytes"); return Content.Record->getData(); } InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const { if (std::optional Record = getContentFromHandle(DataPool, Node).Record) return Record->getRefs(); return std::nullopt; } Expected> OnDiskGraphDB::load(ObjectID ExternalRef) { InternalRef Ref = getInternalRef(ExternalRef); auto I = getIndexProxyFromRef(Ref); if (!I) return I.takeError(); TrieRecord::Data Object = I->Ref.load(); if (Object.SK == TrieRecord::StorageKind::Unknown) { if (!UpstreamDB) return std::nullopt; return faultInFromUpstream(ExternalRef); } if (Object.SK == TrieRecord::StorageKind::DataPool) return ObjectHandle::fromFileOffset(Object.Offset); // Only TrieRecord::StorageKind::Standalone (and variants) need to be // explicitly loaded. // // There's corruption if standalone objects have offsets, or if we get here // for something that isn't standalone. if (Object.Offset) return createCorruptObjectError(getDigest(*I)); switch (Object.SK) { case TrieRecord::StorageKind::Unknown: case TrieRecord::StorageKind::DataPool: llvm_unreachable("unexpected storage kind"); case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf0: case TrieRecord::StorageKind::StandaloneLeaf: break; } // Load it from disk. // // Note: Creation logic guarantees that data that needs null-termination is // suitably 0-padded. Requiring null-termination here would be too expensive // for extremely large objects that happen to be page-aligned. SmallString<256> Path; getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path); auto File = sys::fs::openNativeFileForRead(Path); if (!File) return createFileError(Path, File.takeError()); auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); }); sys::fs::file_status Status; if (std::error_code EC = sys::fs::status(*File, Status)) return createCorruptObjectError(getDigest(*I)); std::error_code EC; auto Region = std::make_unique( *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC); if (EC) return createCorruptObjectError(getDigest(*I)); return ObjectHandle::fromMemory( static_cast(StandaloneData) ->insert(I->Hash, Object.SK, std::move(Region))); } Expected OnDiskGraphDB::isMaterialized(ObjectID Ref) { auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true); if (!Presence) return Presence.takeError(); switch (*Presence) { case ObjectPresence::Missing: return false; case ObjectPresence::InPrimaryDB: return true; case ObjectPresence::OnlyInUpstreamDB: if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult) return FaultInResult.takeError(); return true; } llvm_unreachable("Unknown ObjectPresence enum"); } Expected OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, bool CheckUpstream) const { InternalRef Ref = getInternalRef(ExternalRef); auto I = getIndexProxyFromRef(Ref); if (!I) return I.takeError(); TrieRecord::Data Object = I->Ref.load(); if (Object.SK != TrieRecord::StorageKind::Unknown) return ObjectPresence::InPrimaryDB; if (!CheckUpstream || !UpstreamDB) return ObjectPresence::Missing; std::optional UpstreamID = UpstreamDB->getExistingReference(getDigest(*I)); return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB : ObjectPresence::Missing; } InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { return InternalRef::getFromOffset(IndexOffset); } void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I, SmallVectorImpl &Path) const { Path.assign(RootPath.begin(), RootPath.end()); sys::path::append(Path, Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion); } OnDiskContent StandaloneDataInMemory::getContent() const { bool Leaf0 = false; bool Leaf = false; switch (SK) { default: llvm_unreachable("Storage kind must be standalone"); case TrieRecord::StorageKind::Standalone: break; case TrieRecord::StorageKind::StandaloneLeaf0: Leaf = Leaf0 = true; break; case TrieRecord::StorageKind::StandaloneLeaf: Leaf = true; break; } if (Leaf) { StringRef Data(Region->data(), Region->size()); assert(Data.drop_back(Leaf0).end()[0] == 0 && "Standalone node data missing null termination"); return OnDiskContent{std::nullopt, arrayRefFromStringRef(Data.drop_back(Leaf0))}; } DataRecordHandle Record = DataRecordHandle::get(Region->data()); assert(Record.getData().end()[0] == 0 && "Standalone object record missing null termination for data"); return OnDiskContent{Record, std::nullopt}; } static Expected createTempFile(StringRef FinalPath, uint64_t Size) { assert(Size && "Unexpected request for an empty temp file"); Expected File = TempFile::create(FinalPath + ".%%%%%%"); if (!File) return File.takeError(); if (Error E = preallocateFileTail(File->FD, 0, Size).takeError()) return createFileError(File->TmpName, std::move(E)); if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size)) return createFileError(File->TmpName, EC); std::error_code EC; sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD), sys::fs::mapped_file_region::readwrite, Size, 0, EC); if (EC) return createFileError(File->TmpName, EC); return MappedTempFile(std::move(*File), std::move(Map)); } static size_t getPageSize() { static int PageSize = sys::Process::getPageSizeEstimate(); return PageSize; } Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { assert(Data.size() > TrieRecord::MaxEmbeddedSize && "Expected a bigger file for external content..."); bool Leaf0 = isAligned(Align(getPageSize()), Data.size()); TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0 : TrieRecord::StorageKind::StandaloneLeaf; SmallString<256> Path; int64_t FileSize = Data.size() + Leaf0; getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path); // Write the file. Don't reuse this mapped_file_region, which is read/write. // Let load() pull up one that's read-only. Expected File = createTempFile(Path, FileSize); if (!File) return File.takeError(); assert(File->size() == (uint64_t)FileSize); llvm::copy(Data, File->data()); if (Leaf0) File->data()[Data.size()] = 0; assert(File->data()[Data.size()] == 0); if (Error E = File->keep(Path)) return E; // Store the object reference. TrieRecord::Data Existing; { TrieRecord::Data Leaf{SK, FileOffset()}; if (I.Ref.compare_exchange_strong(Existing, Leaf)) { recordStandaloneSizeIncrease(FileSize); return Error::success(); } } // If there was a race, confirm that the new value has valid storage. if (Existing.SK == TrieRecord::StorageKind::Unknown) return createCorruptObjectError(getDigest(I)); return Error::success(); } Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, ArrayRef Data) { auto I = getIndexProxyFromRef(getInternalRef(ID)); if (LLVM_UNLIKELY(!I)) return I.takeError(); // Early return in case the node exists. { TrieRecord::Data Existing = I->Ref.load(); if (Existing.SK != TrieRecord::StorageKind::Unknown) return Error::success(); } // Big leaf nodes. if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize) return createStandaloneLeaf(*I, Data); // TODO: Check whether it's worth checking the index for an already existing // object (like storeTreeImpl() does) before building up the // InternalRefVector. InternalRefVector InternalRefs; for (ObjectID Ref : Refs) InternalRefs.push_back(getInternalRef(Ref)); // Create the object. DataRecordHandle::Input Input{InternalRefs, Data}; // Compute the storage kind, allocate it, and create the record. TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown; FileOffset PoolOffset; SmallString<256> Path; std::optional File; std::optional FileSize; auto AllocStandaloneFile = [&](size_t Size) -> Expected { getStandalonePath(TrieRecord::getStandaloneFilePrefix( TrieRecord::StorageKind::Standalone), *I, Path); if (Error E = createTempFile(Path, Size).moveInto(File)) return std::move(E); assert(File->size() == Size); FileSize = Size; SK = TrieRecord::StorageKind::Standalone; return File->data(); }; auto Alloc = [&](size_t Size) -> Expected { if (Size <= TrieRecord::MaxEmbeddedSize) { SK = TrieRecord::StorageKind::DataPool; auto P = DataPool.allocate(Size); if (LLVM_UNLIKELY(!P)) { char *NewAlloc = nullptr; auto NewE = handleErrors( P.takeError(), [&](std::unique_ptr E) -> Error { if (E->convertToErrorCode() == std::errc::not_enough_memory) return AllocStandaloneFile(Size).moveInto(NewAlloc); return Error(std::move(E)); }); if (!NewE) return NewAlloc; return std::move(NewE); } PoolOffset = P->getOffset(); LLVM_DEBUG({ dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get() << " size=" << Size << " end=" << (void *)(PoolOffset.get() + Size) << "\n"; }); return (*P)->data(); } return AllocStandaloneFile(Size); }; DataRecordHandle Record; if (Error E = DataRecordHandle::createWithError(Alloc, Input).moveInto(Record)) return E; assert(Record.getData().end()[0] == 0 && "Expected null-termination"); assert(Record.getData() == Input.Data && "Expected initialization"); assert(SK != TrieRecord::StorageKind::Unknown); assert(bool(File) != bool(PoolOffset) && "Expected either a mapped file or a pooled offset"); // Check for a race before calling MappedTempFile::keep(). // // Then decide what to do with the file. Better to discard than overwrite if // another thread/process has already added this. TrieRecord::Data Existing = I->Ref.load(); { TrieRecord::Data NewObject{SK, PoolOffset}; if (File) { if (Existing.SK == TrieRecord::StorageKind::Unknown) { // Keep the file! if (Error E = File->keep(Path)) return E; } else { File.reset(); } } // If we didn't already see a racing/existing write, then try storing the // new object. If that races, confirm that the new value has valid storage. // // TODO: Find a way to reuse the storage from the new-but-abandoned record // handle. if (Existing.SK == TrieRecord::StorageKind::Unknown) { if (I->Ref.compare_exchange_strong(Existing, NewObject)) { if (FileSize) recordStandaloneSizeIncrease(*FileSize); return Error::success(); } } } if (Existing.SK == TrieRecord::StorageKind::Unknown) return createCorruptObjectError(getDigest(*I)); // Load existing object. return Error::success(); } void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) { standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed); } std::atomic &OnDiskGraphDB::standaloneStorageSize() const { MutableArrayRef UserHeader = DataPool.getUserHeader(); assert(UserHeader.size() == sizeof(std::atomic)); assert(isAddrAligned(Align(8), UserHeader.data())); return *reinterpret_cast *>(UserHeader.data()); } uint64_t OnDiskGraphDB::getStandaloneStorageSize() const { return standaloneStorageSize().load(std::memory_order_relaxed); } size_t OnDiskGraphDB::getStorageSize() const { return Index.size() + DataPool.size() + getStandaloneStorageSize(); } unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { unsigned IndexPercent = Index.size() * 100ULL / Index.capacity(); unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity(); return std::max(IndexPercent, DataPercent); } Expected> OnDiskGraphDB::open( StringRef AbsPath, StringRef HashName, unsigned HashByteSize, std::unique_ptr UpstreamDB, FaultInPolicy Policy) { if (std::error_code EC = sys::fs::create_directories(AbsPath)) return createFileError(AbsPath, EC); constexpr uint64_t MB = 1024ull * 1024ull; constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; uint64_t MaxIndexSize = 12 * GB; uint64_t MaxDataPoolSize = 24 * GB; if (useSmallMappingSize(AbsPath)) { MaxIndexSize = 1 * GB; MaxDataPoolSize = 2 * GB; } auto CustomSize = getOverriddenMaxMappingSize(); if (!CustomSize) return CustomSize.takeError(); if (*CustomSize) MaxIndexSize = MaxDataPoolSize = **CustomSize; SmallString<256> IndexPath(AbsPath); sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion); std::optional Index; if (Error E = OnDiskTrieRawHashMap::create( IndexPath, IndexTableName + "[" + HashName + "]", HashByteSize * CHAR_BIT, /*DataSize=*/sizeof(TrieRecord), MaxIndexSize, /*MinFileSize=*/MB) .moveInto(Index)) return std::move(E); uint32_t UserHeaderSize = sizeof(std::atomic); SmallString<256> DataPoolPath(AbsPath); sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion); std::optional DataPool; StringRef PolicyName = Policy == FaultInPolicy::SingleNode ? "single" : "full"; if (Error E = OnDiskDataAllocator::create( DataPoolPath, DataPoolTableName + "[" + HashName + "]" + PolicyName, MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize, [](void *UserHeaderPtr) { new (UserHeaderPtr) std::atomic(0); }) .moveInto(DataPool)) return std::move(E); if (DataPool->getUserHeader().size() != UserHeaderSize) return createStringError(llvm::errc::argument_out_of_domain, "unexpected user header in '" + DataPoolPath + "'"); return std::unique_ptr( new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), std::move(UpstreamDB), Policy)); } OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, OnDiskDataAllocator DataPool, std::unique_ptr UpstreamDB, FaultInPolicy Policy) : Index(std::move(Index)), DataPool(std::move(DataPool)), RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), FIPolicy(Policy) { /// Lifetime for "big" objects not in DataPool. /// /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something /// simpler on the assumption there won't be much contention since most data /// is not big. If there is contention, and we've already fixed ObjectProxy /// object handles to be cheap enough to use consistently, the fix might be /// to use better use of them rather than optimizing this map. /// /// FIXME: Figure out the right number of shards, if any. StandaloneData = new StandaloneDataMapTy(); } OnDiskGraphDB::~OnDiskGraphDB() { delete static_cast(StandaloneData); } Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode) { // Copies the full CAS tree from upstream. Uses depth-first copying to protect // against the process dying during importing and leaving the database with an // incomplete tree. Note that if the upstream has missing nodes then the tree // will be copied with missing nodes as well, it won't be considered an error. struct UpstreamCursor { ObjectHandle Node; size_t RefsCount; object_refs_iterator RefI; object_refs_iterator RefE; }; /// Keeps track of the state of visitation for current node and all of its /// parents. SmallVector CursorStack; /// Keeps track of the currently visited nodes as they are imported into /// primary database, from current node and its parents. When a node is /// entered for visitation it appends its own ID, then appends referenced IDs /// as they get imported. When a node is fully imported it removes the /// referenced IDs from the bottom of the stack which leaves its own ID at the /// bottom, adding to the list of referenced IDs for the parent node. SmallVector PrimaryNodesStack; auto enqueueNode = [&](ObjectID PrimaryID, std::optional Node) { PrimaryNodesStack.push_back(PrimaryID); if (!Node) return; auto Refs = UpstreamDB->getObjectRefs(*Node); CursorStack.push_back({*Node, (size_t)std::distance(Refs.begin(), Refs.end()), Refs.begin(), Refs.end()}); }; enqueueNode(PrimaryID, UpstreamNode); while (!CursorStack.empty()) { UpstreamCursor &Cur = CursorStack.back(); if (Cur.RefI == Cur.RefE) { // Copy the node data into the primary store. // FIXME: Use hard-link or cloning if the file-system supports it and data // is stored into a separate file. // The bottom of \p PrimaryNodesStack contains the primary ID for the // current node plus the list of imported referenced IDs. assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1); ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1); auto PrimaryRefs = ArrayRef(PrimaryNodesStack) .slice(PrimaryNodesStack.size() - Cur.RefsCount); auto Data = UpstreamDB->getObjectData(Cur.Node); if (Error E = store(PrimaryID, PrimaryRefs, Data)) return E; // Remove the current node and its IDs from the stack. PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount); CursorStack.pop_back(); continue; } ObjectID UpstreamID = *(Cur.RefI++); auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID)); if (LLVM_UNLIKELY(!PrimaryID)) return PrimaryID.takeError(); if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) { // This \p ObjectID already exists in the primary. Either it was imported // via \p importFullTree or the client created it, in which case the // client takes responsibility for how it was formed. enqueueNode(*PrimaryID, std::nullopt); continue; } Expected> UpstreamNode = UpstreamDB->load(UpstreamID); if (!UpstreamNode) return UpstreamNode.takeError(); enqueueNode(*PrimaryID, *UpstreamNode); } assert(PrimaryNodesStack.size() == 1); assert(PrimaryNodesStack.front() == PrimaryID); return Error::success(); } Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode) { // Copies only a single node, it doesn't copy the referenced nodes. // Copy the node data into the primary store. // FIXME: Use hard-link or cloning if the file-system supports it and data is // stored into a separate file. auto Data = UpstreamDB->getObjectData(UpstreamNode); auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); SmallVector Refs; Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end())); for (ObjectID UpstreamRef : UpstreamRefs) { auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef)); if (LLVM_UNLIKELY(!Ref)) return Ref.takeError(); Refs.push_back(*Ref); } return store(PrimaryID, Refs, Data); } Expected> OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { assert(UpstreamDB); auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); if (LLVM_UNLIKELY(!UpstreamID)) return UpstreamID.takeError(); Expected> UpstreamNode = UpstreamDB->load(*UpstreamID); if (!UpstreamNode) return UpstreamNode.takeError(); if (!*UpstreamNode) return std::nullopt; if (Error E = FIPolicy == FaultInPolicy::SingleNode ? importSingleNode(PrimaryID, **UpstreamNode) : importFullTree(PrimaryID, **UpstreamNode)) return std::move(E); return load(PrimaryID); }