aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CAS
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CAS')
-rw-r--r--llvm/lib/CAS/ActionCaches.cpp166
-rw-r--r--llvm/lib/CAS/BuiltinCAS.cpp14
-rw-r--r--llvm/lib/CAS/BuiltinCAS.h25
-rw-r--r--llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp38
-rw-r--r--llvm/lib/CAS/CMakeLists.txt3
-rw-r--r--llvm/lib/CAS/InMemoryCAS.cpp8
-rw-r--r--llvm/lib/CAS/ObjectStore.cpp93
-rw-r--r--llvm/lib/CAS/OnDiskCAS.cpp211
-rw-r--r--llvm/lib/CAS/OnDiskGraphDB.cpp34
-rw-r--r--llvm/lib/CAS/OnDiskKeyValueDB.cpp21
-rw-r--r--llvm/lib/CAS/UnifiedOnDiskCache.cpp613
11 files changed, 1198 insertions, 28 deletions
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 571c5b3..003c850 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -13,7 +13,11 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/TrieRawHashMap.h"
#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/Errc.h"
#define DEBUG_TYPE "cas-action-caches"
@@ -47,12 +51,54 @@ public:
Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
bool CanBeDistributed) const final;
+ Error validate() const final {
+ return createStringError("InMemoryActionCache doesn't support validate()");
+ }
+
private:
using DataT = CacheEntry<sizeof(HashType)>;
using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;
InMemoryCacheT Cache;
};
+
+/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
+class OnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);
+
+ Error validate() const final;
+
+private:
+ static StringRef getHashName() { return "BLAKE3"; }
+
+ OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
+
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ using DataT = CacheEntry<sizeof(HashType)>;
+};
+
+/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide
+/// access to its ActionCache.
+class UnifiedOnDiskActionCache final : public ActionCache {
+public:
+ Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+ bool CanBeDistributed) final;
+ Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+ bool CanBeDistributed) const final;
+
+ UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+ Error validate() const final;
+
+private:
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+};
} // end namespace
static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
@@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {
}
} // namespace llvm::cas
+
+OnDiskActionCache::OnDiskActionCache(
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ DB(std::move(DB)) {}
+
+Expected<std::unique_ptr<OnDiskActionCache>>
+OnDiskActionCache::create(StringRef AbsPath) {
+ std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+ if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
+ sizeof(HashType), getHashName(),
+ sizeof(DataT))
+ .moveInto(DB))
+ return std::move(E);
+ return std::unique_ptr<OnDiskActionCache>(
+ new OnDiskActionCache(std::move(DB)));
+}
+
+Expected<std::optional<CASID>>
+OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = DB->get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ return CASID::create(&getContext(), toStringRef(*Val));
+}
+
+Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto ResultHash = Result.getHash();
+ ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
+ ArrayRef<char> Observed;
+ if (Error E = DB->put(Key, Expected).moveInto(Observed))
+ return E;
+
+ if (Expected == Observed)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result,
+ ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
+}
+
+Error OnDiskActionCache::validate() const {
+ // FIXME: without the matching CAS there is nothing we can check about the
+ // cached values. The hash size is already validated by the DB validator.
+ return DB->validate(nullptr);
+}
+
+UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+ UniDB(std::move(UniDB)) {}
+
+Expected<std::optional<CASID>>
+UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+ bool /*CanBeDistributed*/) const {
+ std::optional<ArrayRef<char>> Val;
+ if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
+ return std::move(E);
+ if (!Val)
+ return std::nullopt;
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
+ return CASID::create(&getContext(),
+ toStringRef(UniDB->getGraphDB().getDigest(ID)));
+}
+
+Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
+ const CASID &Result,
+ bool /*CanBeDistributed*/) {
+ auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
+ if (LLVM_UNLIKELY(!Expected))
+ return Expected.takeError();
+
+ auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
+ std::optional<ArrayRef<char>> Observed;
+ if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
+ return E;
+
+ auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
+ if (*Expected == ObservedID)
+ return Error::success();
+
+ return createResultCachePoisonedError(
+ Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
+}
+
+Error UnifiedOnDiskActionCache::validate() const {
+ auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
+ auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
+ auto formatError = [&](Twine Msg) {
+ return createStringError(
+ llvm::errc::illegal_byte_sequence,
+ "bad record at 0x" +
+ utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+ Msg.str());
+ };
+ if (ID.getOpaqueData() == 0)
+ return formatError("zero is not a valid ref");
+ return Error::success();
+ };
+ return UniDB->getKeyValueDB().validate(ValidateRef);
+}
+
+Expected<std::unique_ptr<ActionCache>>
+cas::createOnDiskActionCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return OnDiskActionCache::create(Path);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
+
+std::unique_ptr<ActionCache>
+cas::builtin::createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp
index 73646ad..e9bc6d8 100644
--- a/llvm/lib/CAS/BuiltinCAS.cpp
+++ b/llvm/lib/CAS/BuiltinCAS.cpp
@@ -9,6 +9,7 @@
#include "BuiltinCAS.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Process.h"
using namespace llvm;
@@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
Refs, Data);
}
-Error BuiltinCAS::validate(const CASID &ID) {
+Error BuiltinCAS::validateObject(const CASID &ID) {
auto Ref = getReference(ID);
if (!Ref)
return createUnknownObjectError(ID);
@@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {
return Error::success();
}
+
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt,
+ BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h
index 3b5374d..4d2de66 100644
--- a/llvm/lib/CAS/BuiltinCAS.h
+++ b/llvm/lib/CAS/BuiltinCAS.h
@@ -1,4 +1,4 @@
-//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,9 @@
namespace llvm::cas {
class ActionCache;
+namespace ondisk {
+class UnifiedOnDiskCache;
+} // namespace ondisk
namespace builtin {
/// Common base class for builtin CAS implementations using the same CASContext.
@@ -65,9 +68,27 @@ public:
"corrupt storage");
}
- Error validate(const CASID &ID) final;
+ Error validateObject(const CASID &ID) final;
};
+/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing.
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+createBuiltinUnifiedOnDiskCache(StringRef Path);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+// FIXME: Proxy not portable. Maybe also error-prone?
+constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default";
+constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";
+
} // end namespace builtin
} // end namespace llvm::cas
diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
new file mode 100644
index 0000000..f3f6fa0
--- /dev/null
+++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "BuiltinCAS.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+ if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB))
+ return std::move(E);
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
+ return std::make_pair(std::move(CAS), std::move(AC));
+}
+
+Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
+ StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinary) {
+#if LLVM_ENABLE_ONDISK_CAS
+ return ondisk::UnifiedOnDiskCache::validateIfNeeded(
+ Path, builtin::BuiltinCASContext::getHashName(),
+ sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
+ LLVMCasBinary);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index a2f8c49..aad77dc 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ BuiltinUnifiedCASDatabases.cpp
DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
+ OnDiskCAS.cpp
OnDiskCommon.cpp
OnDiskDataAllocator.cpp
OnDiskGraphDB.cpp
OnDiskKeyValueDB.cpp
OnDiskTrieRawHashMap.cpp
+ UnifiedOnDiskCache.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index c63ee70d..2d4eedd 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -233,6 +233,12 @@ public:
return cast<InMemoryObject>(asInMemoryObject(Node)).getData();
}
+ void print(raw_ostream &OS) const final;
+
+ Error validate(bool CheckHash) const final {
+ return createStringError("InMemoryCAS doesn't support validate()");
+ }
+
InMemoryCAS() = default;
private:
@@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
return cast<InMemoryInlineObject>(this)->getRefsImpl();
}
+void InMemoryCAS::print(raw_ostream &OS) const {}
+
Expected<ObjectRef>
InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
sys::fs::mapped_file_region Map) {
diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp
index e0be50b..3110577 100644
--- a/llvm/lib/CAS/ObjectStore.cpp
+++ b/llvm/lib/CAS/ObjectStore.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <optional>
+#include <deque>
using namespace llvm;
using namespace llvm::cas;
@@ -21,6 +21,7 @@ void CASContext::anchor() {}
void ObjectStore::anchor() {}
LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
@@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {
auto [I, Inserted] = ValidatedRefs.insert(Ref);
if (!Inserted)
continue; // already validated.
- if (Error E = validate(getID(Ref)))
+ if (Error E = validateObject(getID(Ref)))
return E;
Expected<ObjectHandle> Obj = load(Ref);
if (!Obj)
@@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {
return Error::success();
}
+Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
+ ObjectRef Other) {
+ // Copy the full CAS tree from upstream with depth-first ordering to ensure
+ // all the child nodes are available in downstream CAS before inserting
+ // current object. This uses a similar algorithm as
+ // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
+ // so it can be used to import from any other ObjectStore reguardless of the
+ // CAS schema.
+
+ // There is no work to do if importing from self.
+ if (this == &Upstream)
+ return Other;
+
+ /// Keeps track of the state of visitation for current node and all of its
+ /// parents. Upstream Cursor holds information only from upstream CAS.
+ struct UpstreamCursor {
+ ObjectRef Ref;
+ ObjectHandle Node;
+ size_t RefsCount;
+ std::deque<ObjectRef> Refs;
+ };
+ SmallVector<UpstreamCursor, 16> CursorStack;
+ /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
+ /// just stored in the CAS or nodes already exists in the current CAS.
+ SmallVector<ObjectRef, 128> PrimaryRefStack;
+ /// A map from upstream ObjectRef to current ObjectRef.
+ llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
+
+ auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
+ unsigned NumRefs = Upstream.getNumRefs(Node);
+ std::deque<ObjectRef> Refs;
+ for (unsigned I = 0; I < NumRefs; ++I)
+ Refs.push_back(Upstream.readRef(Node, I));
+
+ CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
+ };
+
+ auto UpstreamHandle = Upstream.load(Other);
+ if (!UpstreamHandle)
+ return UpstreamHandle.takeError();
+ enqueueNode(Other, *UpstreamHandle);
+
+ while (!CursorStack.empty()) {
+ UpstreamCursor &Cur = CursorStack.back();
+ if (Cur.Refs.empty()) {
+ // Copy the node data into the primary store.
+ // The bottom of \p PrimaryRefStack contains the ObjectRef for the
+ // current node.
+ assert(PrimaryRefStack.size() >= Cur.RefsCount);
+ auto Refs = ArrayRef(PrimaryRefStack)
+ .slice(PrimaryRefStack.size() - Cur.RefsCount);
+ auto NewNode = store(Refs, Upstream.getData(Cur.Node));
+ if (!NewNode)
+ return NewNode.takeError();
+
+ // Remove the current node and its IDs from the stack.
+ PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
+ CursorStack.pop_back();
+
+ PrimaryRefStack.push_back(*NewNode);
+ CreatedObjects.try_emplace(Cur.Ref, *NewNode);
+ continue;
+ }
+
+ // Check if the node exists already.
+ auto CurrentID = Cur.Refs.front();
+ Cur.Refs.pop_front();
+ auto Ref = CreatedObjects.find(CurrentID);
+ if (Ref != CreatedObjects.end()) {
+ // If exists already, just need to enqueue the primary node.
+ PrimaryRefStack.push_back(Ref->second);
+ continue;
+ }
+
+ // Load child.
+ auto PrimaryID = Upstream.load(CurrentID);
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+
+ enqueueNode(CurrentID, *PrimaryID);
+ }
+
+ assert(PrimaryRefStack.size() == 1);
+ return PrimaryRefStack.front();
+}
+
std::unique_ptr<MemoryBuffer>
ObjectProxy::getMemoryBuffer(StringRef Name,
bool RequiresNullTerminator) const {
diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp
new file mode 100644
index 0000000..7d29f44
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskCAS.cpp
@@ -0,0 +1,211 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BuiltinCAS.h"
+#include "llvm/CAS/BuiltinCASContext.h"
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::builtin;
+
+namespace {
+
+class OnDiskCAS : public BuiltinCAS {
+public:
+ Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) final;
+
+ Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
+
+ CASID getID(ObjectRef Ref) const final;
+
+ std::optional<ObjectRef> getReference(const CASID &ID) const final;
+
+ Expected<bool> isMaterialized(ObjectRef Ref) const final;
+
+ ArrayRef<char> getDataConst(ObjectHandle Node) const final;
+
+ void print(raw_ostream &OS) const final;
+ Error validate(bool CheckHash) const final;
+
+ static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
+
+ OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+ : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
+
+private:
+ ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
+ return makeObjectHandle(Node.getOpaqueData());
+ }
+
+ ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
+ return ondisk::ObjectHandle(Node.getInternalRef(*this));
+ }
+
+ ObjectRef convertRef(ondisk::ObjectID Ref) const {
+ return makeObjectRef(Ref.getOpaqueData());
+ }
+
+ ondisk::ObjectID convertRef(ObjectRef Ref) const {
+ return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
+ }
+
+ size_t getNumRefs(ObjectHandle Node) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return std::distance(RefsRange.begin(), RefsRange.end());
+ }
+
+ ObjectRef readRef(ObjectHandle Node, size_t I) const final {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ return convertRef(RefsRange.begin()[I]);
+ }
+
+ Error forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const final;
+
+ Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
+ Expected<std::optional<uint64_t>> getStorageSize() const final;
+ Error pruneStorageData() final;
+
+ OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
+ : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
+
+ std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
+ ondisk::OnDiskGraphDB *DB;
+};
+
+} // end anonymous namespace
+
+void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
+Error OnDiskCAS::validate(bool CheckHash) const {
+ auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data,
+ SmallVectorImpl<uint8_t> &Result) {
+ auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject(
+ Refs, Data);
+ Result.assign(Hash.begin(), Hash.end());
+ };
+
+ if (auto E = DB->validate(CheckHash, Hasher))
+ return E;
+
+ return Error::success();
+}
+
+CASID OnDiskCAS::getID(ObjectRef Ref) const {
+ ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
+ return CASID::create(&getContext(), toStringRef(Hash));
+}
+
+std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
+ std::optional<ondisk::ObjectID> ObjID =
+ DB->getExistingReference(ID.getHash());
+ if (!ObjID)
+ return std::nullopt;
+ return convertRef(*ObjID);
+}
+
+Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
+ return DB->isMaterialized(convertRef(ExternalRef));
+}
+
+ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
+ return DB->getObjectData(convertHandle(Node));
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
+ Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
+ DB->load(convertRef(ExternalRef));
+ if (!ObjHnd)
+ return ObjHnd.takeError();
+ if (!*ObjHnd)
+ return std::nullopt;
+ return convertHandle(**ObjHnd);
+}
+
+Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
+ ArrayRef<ObjectRef> Refs,
+ ArrayRef<char> Data) {
+ SmallVector<ondisk::ObjectID, 64> IDs;
+ IDs.reserve(Refs.size());
+ for (ObjectRef Ref : Refs) {
+ IDs.push_back(convertRef(Ref));
+ }
+
+ auto StoredID = DB->getReference(ComputedHash);
+ if (LLVM_UNLIKELY(!StoredID))
+ return StoredID.takeError();
+ if (Error E = DB->store(*StoredID, IDs, Data))
+ return std::move(E);
+ return convertRef(*StoredID);
+}
+
+Error OnDiskCAS::forEachRef(ObjectHandle Node,
+ function_ref<Error(ObjectRef)> Callback) const {
+ auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+ for (ondisk::ObjectID Ref : RefsRange) {
+ if (Error E = Callback(convertRef(Ref)))
+ return E;
+ }
+ return Error::success();
+}
+
+Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ UnifiedDB->setSizeLimit(SizeLimit);
+ return Error::success();
+}
+
+Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
+ return UnifiedDB->getStorageSize();
+}
+
+Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
+
+Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
+ Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
+ ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
+ sizeof(HashType));
+ if (!DB)
+ return DB.takeError();
+ return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
+}
+
+bool cas::isOnDiskCASEnabled() {
+#if LLVM_ENABLE_ONDISK_CAS
+ return true;
+#else
+ return false;
+#endif
+}
+
+Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+ // FIXME: An absolute path isn't really good enough. Should open a directory
+ // and use openat() for files underneath.
+ SmallString<256> AbsPath;
+ Path.toVector(AbsPath);
+ sys::fs::make_absolute(AbsPath);
+
+ return OnDiskCAS::open(AbsPath);
+#else
+ return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
+#endif /* LLVM_ENABLE_ONDISK_CAS */
+}
+
+std::unique_ptr<ObjectStore>
+cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
+ std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+ return std::make_unique<OnDiskCAS>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
index 64cbe9d..245b6fb 100644
--- a/llvm/lib/CAS/OnDiskGraphDB.cpp
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {
}
Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+ if (UpstreamDB) {
+ if (auto E = UpstreamDB->validate(Deep, Hasher))
+ return E;
+ }
return Index.validate([&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record)
-> Error {
@@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
return I.takeError();
TrieRecord::Data Object = I->Ref.load();
- if (Object.SK == TrieRecord::StorageKind::Unknown) {
- if (!UpstreamDB)
- return std::nullopt;
+ if (Object.SK == TrieRecord::StorageKind::Unknown)
return faultInFromUpstream(ExternalRef);
- }
if (Object.SK == TrieRecord::StorageKind::DataPool)
return ObjectHandle::fromFileOffset(Object.Offset);
@@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
TrieRecord::Data Object = I->Ref.load();
if (Object.SK != TrieRecord::StorageKind::Unknown)
return ObjectPresence::InPrimaryDB;
+
if (!CheckUpstream || !UpstreamDB)
return ObjectPresence::Missing;
+
std::optional<ObjectID> UpstreamID =
UpstreamDB->getExistingReference(getDigest(*I));
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
@@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
return std::max(IndexPercent, DataPercent);
}
-Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
- StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+Expected<std::unique_ptr<OnDiskGraphDB>>
+OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
+ unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
+ FaultInPolicy Policy) {
if (std::error_code EC = sys::fs::create_directories(AbsPath))
return createFileError(AbsPath, EC);
@@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
"unexpected user header in '" + DataPoolPath +
"'");
- return std::unique_ptr<OnDiskGraphDB>(
- new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
- std::move(UpstreamDB), Policy));
+ return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
+ AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
}
OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
OnDiskDataAllocator DataPool,
- std::unique_ptr<OnDiskGraphDB> UpstreamDB,
- FaultInPolicy Policy)
+ OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
: Index(std::move(Index)), DataPool(std::move(DataPool)),
- RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
- FIPolicy(Policy) {
+ RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
/// Lifetime for "big" objects not in DataPool.
///
/// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
@@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
// against the process dying during importing and leaving the database with an
// incomplete tree. Note that if the upstream has missing nodes then the tree
// will be copied with missing nodes as well, it won't be considered an error.
-
struct UpstreamCursor {
ObjectHandle Node;
size_t RefsCount;
@@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
// Copy the node data into the primary store.
// FIXME: Use hard-link or cloning if the file-system supports it and data is
// stored into a separate file.
-
auto Data = UpstreamDB->getObjectData(UpstreamNode);
auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
SmallVector<ObjectID, 64> Refs;
@@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
Expected<std::optional<ObjectHandle>>
OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
- assert(UpstreamDB);
+ if (!UpstreamDB)
+ return std::nullopt;
auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
if (LLVM_UNLIKELY(!UpstreamID))
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
index 2186071..15656cb 100644
--- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -20,6 +20,7 @@
#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "OnDiskCommon.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
@@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>
OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
// Check the result cache.
OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
- if (!ActionP)
+ if (ActionP) {
+ assert(isAddrAligned(Align(8), ActionP->Data.data()));
+ return ActionP->Data;
+ }
+ if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)
return std::nullopt;
- assert(isAddrAligned(Align(8), ActionP->Data.data()));
- return ActionP->Data;
+
+ // Try to fault in from upstream.
+ return UnifiedCache->faultInFromUpstreamKV(Key);
}
Expected<std::unique_ptr<OnDiskKeyValueDB>>
OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
- StringRef ValueName, size_t ValueSize) {
+ StringRef ValueName, size_t ValueSize,
+ UnifiedOnDiskCache *Cache) {
if (std::error_code EC = sys::fs::create_directories(Path))
return createFileError(Path, EC);
@@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
return std::move(E);
return std::unique_ptr<OnDiskKeyValueDB>(
- new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+ new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));
}
Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+ if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
+ if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue))
+ return E;
+ }
return Cache.validate(
[&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
new file mode 100644
index 0000000..ae9d818
--- /dev/null
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -0,0 +1,613 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
+/// directory while also restricting storage growth with a scheme of chaining
+/// the two most recent directories (primary & upstream), where the primary
+/// "faults-in" data from the upstream one. When the primary (most recent)
+/// directory exceeds its intended limit a new empty directory becomes the
+/// primary one.
+///
+/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
+/// receives) there are directories named like this:
+///
+/// 'v<version>.<x>'
+/// 'v<version>.<x+1>'
+/// 'v<version>.<x+2>'
+/// ...
+///
+/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
+/// the part after the dot is an increasing integer. The primary directory is
+/// the one with the highest integer and the upstream one is the directory
+/// before it. For example, if the sub-directories contained are:
+///
+/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
+///
+/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
+/// unused directories that can be safely deleted at any time and by any
+/// process.
+///
+/// Contained within the top-level directory is a file named "lock" which is
+/// used for processes to take shared or exclusive locks for the contents of the
+/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
+/// for the top-level directory; when it closes, if the primary sub-directory
+/// exceeded its limit, it attempts to get an exclusive lock in order to create
+/// a new empty primary directory; if it can't get the exclusive lock it gives
+/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
+/// again.
+///
+/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
+/// directory, by any process, the storage size in that directory will keep
+/// growing unrestricted. But the major benefit is that garbage-collection can
+/// be triggered on a directory concurrently, at any time and by any process,
+/// without affecting any active readers/writers in the same process or other
+/// processes.
+///
+/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
+/// the underlying on-disk storage. The low-level storage is designed to remain
+/// coherent across regular process crashes, but may be invalid after power loss
+/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
+/// validating the contents once per boot and can recover by marking invalid
+/// data for garbage collection.
+///
+/// The data recovery described above requires exclusive access to the CAS, and
+/// it is an error to attempt recovery if the CAS is open in any process/thread.
+/// In order to maximize backwards compatibility with tools that do not perform
+/// validation before opening the CAS, we do not attempt to get exclusive access
+/// until recovery is actually performed, meaning as long as the data is valid
+/// it will not conflict with concurrent use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "BuiltinCAS.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+#if __has_include(<sys/sysctl.h>)
+#include <sys/sysctl.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
+/// how to handle the leftover sub-directories of the previous version, within
+/// the \p UnifiedOnDiskCache::collectGarbage function.
+static constexpr StringLiteral DBDirPrefix = "v1.";
+
+static constexpr StringLiteral ValidationFilename = "v1.validation";
+static constexpr StringLiteral CorruptPrefix = "corrupt.";
+
+ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
+ // little endian encoded.
+ assert(Value.size() == sizeof(uint64_t));
+ return ObjectID::fromOpaqueData(support::endian::read64le(Value.data()));
+}
+
+UnifiedOnDiskCache::ValueBytes
+UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
+ // little endian encoded.
+ UnifiedOnDiskCache::ValueBytes ValBytes;
+ static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
+ support::endian::write64le(ValBytes.data(), ID.getOpaqueData());
+ return ValBytes;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
+ assert(UpstreamGraphDB);
+ assert(UpstreamKVDB);
+
+ std::optional<ArrayRef<char>> UpstreamValue;
+ if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue))
+ return std::move(E);
+ if (!UpstreamValue)
+ return std::nullopt;
+
+ // The value is the \p ObjectID in the context of the upstream
+ // \p OnDiskGraphDB instance. Translate it to the context of the primary
+ // \p OnDiskGraphDB instance.
+ ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue);
+ auto PrimaryID =
+ PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
+ if (LLVM_UNLIKELY(!PrimaryID))
+ return PrimaryID.takeError();
+ return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID));
+}
+
+/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
+/// ascending order of the integer after the dot. Corrupt directories, if
+/// included, will come first.
+static Expected<SmallVector<std::string, 4>>
+getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
+ struct DBDir {
+ uint64_t Order;
+ std::string Name;
+ };
+ SmallVector<DBDir> FoundDBDirs;
+
+ std::error_code EC;
+ for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+ DirI.increment(EC)) {
+ if (DirI->type() != sys::fs::file_type::directory_file)
+ continue;
+ StringRef SubDir = sys::path::filename(DirI->path());
+ if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) {
+ FoundDBDirs.push_back({0, std::string(SubDir)});
+ continue;
+ }
+ if (!SubDir.starts_with(DBDirPrefix))
+ continue;
+ uint64_t Order;
+ if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order))
+ return createStringError(inconvertibleErrorCode(),
+ "unexpected directory " + DirI->path());
+ FoundDBDirs.push_back({Order, std::string(SubDir)});
+ }
+ if (EC)
+ return createFileError(Path, EC);
+
+ llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
+ return LHS.Order <= RHS.Order;
+ });
+
+ SmallVector<std::string, 4> DBDirs;
+ for (DBDir &Dir : FoundDBDirs)
+ DBDirs.push_back(std::move(Dir.Name));
+ return DBDirs;
+}
+
+static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
+ auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
+ // out how to handle the leftover sub-directories of the previous version.
+
+ for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
+ StringRef Back(DBDirs->back());
+ if (Back.starts_with(CorruptPrefix))
+ break;
+ DBDirs->pop_back();
+ }
+ return *DBDirs;
+}
+
+/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
+/// 'v<version>.<x+1>' name.
+static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
+ assert(DBDir.starts_with(DBDirPrefix));
+ uint64_t Count;
+ bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count);
+ assert(!Failed);
+ (void)Failed;
+ OS << DBDirPrefix << Count + 1;
+}
+
+static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
+ bool CheckHash) {
+ SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
+ if (CheckHash)
+ Args.push_back("-check-hash");
+
+ llvm::SmallString<128> StdErrPath;
+ int StdErrFD = -1;
+ if (std::error_code EC = sys::fs::createTemporaryFile(
+ "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath,
+ llvm::sys::fs::OF_Text))
+ return createStringError(EC, "failed to create temporary file");
+ FileRemover OutputRemover(StdErrPath.c_str());
+
+ std::optional<llvm::StringRef> Redirects[] = {
+ {""}, // stdin = /dev/null
+ {""}, // stdout = /dev/null
+ StdErrPath.str(),
+ };
+
+ std::string ErrMsg;
+ int Result =
+ sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
+ /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg);
+
+ if (Result == -1)
+ return createStringError("failed to exec " + join(Args, " ") + ": " +
+ ErrMsg);
+ if (Result != 0) {
+ llvm::SmallString<64> Err("cas contents invalid");
+ if (!ErrMsg.empty()) {
+ Err += ": ";
+ Err += ErrMsg;
+ }
+ auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str());
+ if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
+ Err += ": ";
+ Err += (*StdErrBuf)->getBuffer();
+ }
+ return createStringError(Err);
+ }
+ return Error::success();
+}
+
+static Error validateInProcess(StringRef RootPath, StringRef HashName,
+ unsigned HashByteSize, bool CheckHash) {
+ std::shared_ptr<UnifiedOnDiskCache> UniDB;
+ if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName,
+ HashByteSize)
+ .moveInto(UniDB))
+ return E;
+ auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+ if (Error E = CAS->validate(CheckHash))
+ return E;
+ auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB);
+ if (Error E = Cache->validate())
+ return E;
+ return Error::success();
+}
+
+static Expected<uint64_t> getBootTime() {
+#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
+ struct timeval TV;
+ size_t TVLen = sizeof(TV);
+ int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
+ if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0)
+ return createStringError(llvm::errnoAsErrorCode(),
+ "failed to get boottime");
+ if (TVLen != sizeof(TV))
+ return createStringError("sysctl kern.boottime unexpected format");
+ return TV.tv_sec;
+#elif defined(__linux__)
+ // Use the mtime for /proc, which is recreated during system boot.
+ // We could also read /proc/stat and search for 'btime'.
+ sys::fs::file_status Status;
+ if (std::error_code EC = sys::fs::status("/proc", Status))
+ return createFileError("/proc", EC);
+ return Status.getLastModificationTime().time_since_epoch().count();
+#else
+ llvm::report_fatal_error("getBootTime unimplemented");
+#endif
+}
+
+Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
+ StringRef RootPath, StringRef HashName, unsigned HashByteSize,
+ bool CheckHash, bool AllowRecovery, bool ForceValidation,
+ std::optional<StringRef> LLVMCasBinaryPath) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, ValidationFilename);
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(FD != -1);
+
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); });
+
+ if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive))
+ return createFileError(PathBuf, EC);
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); });
+
+ SmallString<8> Bytes;
+ if (Error E = sys::fs::readNativeFileToEOF(File, Bytes))
+ return createFileError(PathBuf, std::move(E));
+
+ uint64_t ValidationBootTime = 0;
+ if (!Bytes.empty() &&
+ StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
+ return createFileError(PathBuf, errc::illegal_byte_sequence,
+ "expected integer");
+
+ static uint64_t BootTime = 0;
+ if (BootTime == 0)
+ if (Error E = getBootTime().moveInto(BootTime))
+ return std::move(E);
+
+ std::string LogValidationError;
+
+ if (ValidationBootTime == BootTime && !ForceValidation)
+ return ValidationResult::Skipped;
+
+ // Validate!
+ bool NeedsRecovery = false;
+ if (Error E =
+ LLVMCasBinaryPath
+ ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash)
+ : validateInProcess(RootPath, HashName, HashByteSize,
+ CheckHash)) {
+ if (AllowRecovery) {
+ consumeError(std::move(E));
+ NeedsRecovery = true;
+ } else {
+ return std::move(E);
+ }
+ }
+
+ if (NeedsRecovery) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, "lock");
+
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); });
+ if (std::error_code EC = tryLockFileThreadSafe(LockFD)) {
+ if (EC == std::errc::no_lock_available)
+ return createFileError(
+ PathBuf, EC,
+ "CAS validation requires exclusive access but CAS was in use");
+ return createFileError(PathBuf, EC);
+ }
+ auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ for (StringRef DBDir : *DBDirs) {
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, DBDir);
+ std::error_code EC;
+ int Attempt = 0, MaxAttempts = 100;
+ SmallString<128> GCPath;
+ for (; Attempt < MaxAttempts; ++Attempt) {
+ GCPath.assign(RootPath);
+ sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) +
+ "." + DBDir);
+ EC = sys::fs::rename(PathBuf, GCPath);
+ // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
+ if (EC != errc::directory_not_empty && EC != errc::file_exists)
+ break;
+ }
+ if (Attempt == MaxAttempts)
+ return createStringError(
+ EC, "rename " + PathBuf +
+ " failed: too many CAS directories awaiting pruning");
+ if (EC)
+ return createStringError(EC, "rename " + PathBuf + " to " + GCPath +
+ " failed: " + EC.message());
+ }
+ }
+
+ if (ValidationBootTime != BootTime) {
+ // Fix filename in case we have error to report.
+ sys::path::remove_filename(PathBuf);
+ sys::path::append(PathBuf, ValidationFilename);
+ if (std::error_code EC = sys::fs::resize_file(FD, 0))
+ return createFileError(PathBuf, EC);
+ raw_fd_ostream OS(FD, /*shouldClose=*/false);
+ OS.seek(0); // resize does not reset position
+ OS << BootTime << '\n';
+ if (OS.has_error())
+ return createFileError(PathBuf, OS.error());
+ }
+
+ return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
+}
+
+Expected<std::unique_ptr<UnifiedOnDiskCache>>
+UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
+ StringRef HashName, unsigned HashByteSize,
+ OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
+ if (std::error_code EC = sys::fs::create_directories(RootPath))
+ return createFileError(RootPath, EC);
+
+ SmallString<256> PathBuf(RootPath);
+ sys::path::append(PathBuf, "lock");
+ int LockFD = -1;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(PathBuf, EC);
+ assert(LockFD != -1);
+ // Locking the directory using shared lock, which will prevent other processes
+ // from creating a new chain (essentially while a \p UnifiedOnDiskCache
+ // instance holds a shared lock the storage for the primary directory will
+ // grow unrestricted).
+ if (std::error_code EC =
+ lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared))
+ return createFileError(PathBuf, EC);
+
+ auto DBDirs = getAllDBDirs(RootPath);
+ if (!DBDirs)
+ return DBDirs.takeError();
+ if (DBDirs->empty())
+ DBDirs->push_back((Twine(DBDirPrefix) + "1").str());
+
+ assert(!DBDirs->empty());
+
+ /// If there is only one directory open databases on it. If there are 2 or
+ /// more directories, get the most recent directories and chain them, with the
+ /// most recent being the primary one. The remaining directories are unused
+ /// data than can be garbage-collected.
+ auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
+ std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+ std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+ if (DBDirs->size() > 1) {
+ StringRef UpstreamDir = *(DBDirs->end() - 2);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, UpstreamDir);
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ /*UpstreamDB=*/nullptr, FaultInPolicy)
+ .moveInto(UpstreamGraphDB))
+ return std::move(E);
+ if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t))
+ .moveInto(UpstreamKVDB))
+ return std::move(E);
+ }
+
+ StringRef PrimaryDir = *(DBDirs->end() - 1);
+ PathBuf = RootPath;
+ sys::path::append(PathBuf, PrimaryDir);
+ std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+ if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+ UpstreamGraphDB.get(), FaultInPolicy)
+ .moveInto(PrimaryGraphDB))
+ return std::move(E);
+ std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+ // \p UnifiedOnDiskCache does manual chaining for key-value requests,
+ // including an extra translation step of the value during fault-in.
+ if (Error E =
+ OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+ /*ValueName=*/"objectid",
+ /*ValueSize=*/sizeof(uint64_t), UniDB.get())
+ .moveInto(PrimaryKVDB))
+ return std::move(E);
+
+ UniDB->RootPath = RootPath;
+ UniDB->SizeLimit = SizeLimit.value_or(0);
+ UniDB->LockFD = LockFD;
+ UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
+ UniDB->PrimaryDBDir = PrimaryDir;
+ UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
+ UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
+ UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
+ UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
+
+ return std::move(UniDB);
+}
+
+void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+ this->SizeLimit = SizeLimit.value_or(0);
+}
+
+uint64_t UnifiedOnDiskCache::getStorageSize() const {
+ uint64_t TotalSize = getPrimaryStorageSize();
+ if (UpstreamGraphDB)
+ TotalSize += UpstreamGraphDB->getStorageSize();
+ if (UpstreamKVDB)
+ TotalSize += UpstreamKVDB->getStorageSize();
+ return TotalSize;
+}
+
+uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
+ return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
+}
+
+bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
+ uint64_t CurSizeLimit = SizeLimit;
+ if (!CurSizeLimit)
+ return false;
+
+ // If the hard limit is beyond 85%, declare above limit and request clean up.
+ unsigned CurrentPercent =
+ std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
+ PrimaryKVDB->getHardStorageLimitUtilization());
+ if (CurrentPercent > 85)
+ return true;
+
+ // We allow each of the directories in the chain to reach up to half the
+ // intended size limit. Check whether the primary directory has exceeded half
+ // the limit or not, in order to decide whether we need to start a new chain.
+ //
+ // We could check the size limit against the sum of sizes of both the primary
+ // and upstream directories but then if the upstream is significantly larger
+ // than the intended limit, it would trigger a new chain to be created before
+ // the primary has reached its own limit. Essentially in such situation we
+ // prefer reclaiming the storage later in order to have more consistent cache
+ // hits behavior.
+ return (CurSizeLimit / 2) < getPrimaryStorageSize();
+}
+
+Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
+ if (LockFD == -1)
+ return Error::success(); // already closed.
+ auto CloseLock = make_scope_exit([&]() {
+ assert(LockFD >= 0);
+ sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+ sys::fs::closeFile(LockFile);
+ LockFD = -1;
+ });
+
+ bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
+ UpstreamKVDB.reset();
+ PrimaryKVDB.reset();
+ UpstreamGraphDB.reset();
+ PrimaryGraphDB.reset();
+ if (std::error_code EC = unlockFileThreadSafe(LockFD))
+ return createFileError(RootPath, EC);
+
+ if (!ExceededSizeLimit)
+ return Error::success();
+
+ // The primary directory exceeded its intended size limit. Try to get an
+ // exclusive lock in order to create a new primary directory for next time
+ // this \p UnifiedOnDiskCache path is opened.
+
+ if (std::error_code EC = tryLockFileThreadSafe(
+ LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) {
+ if (EC == errc::no_lock_available)
+ return Error::success(); // couldn't get exclusive lock, give up.
+ return createFileError(RootPath, EC);
+ }
+ auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+ // Managed to get an exclusive lock which means there are no other open
+ // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
+ // new primary directory. To start a new primary directory we just have to
+ // create a new empty directory with the next consecutive index; since this is
+ // an atomic operation we will leave the top-level directory in a consistent
+ // state even if the process dies during this code-path.
+
+ SmallString<256> PathBuf(RootPath);
+ raw_svector_ostream OS(PathBuf);
+ OS << sys::path::get_separator();
+ getNextDBDirName(PrimaryDBDir, OS);
+ if (std::error_code EC = sys::fs::create_directory(PathBuf))
+ return createFileError(PathBuf, EC);
+
+ NeedsGarbageCollection = true;
+ return Error::success();
+}
+
+UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
+
+UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); }
+
+Error UnifiedOnDiskCache::collectGarbage(StringRef Path) {
+ auto DBDirs = getAllGarbageDirs(Path);
+ if (!DBDirs)
+ return DBDirs.takeError();
+
+ SmallString<256> PathBuf(Path);
+ for (StringRef UnusedSubDir : *DBDirs) {
+ sys::path::append(PathBuf, UnusedSubDir);
+ if (std::error_code EC = sys::fs::remove_directories(PathBuf))
+ return createFileError(PathBuf, EC);
+ sys::path::remove_filename(PathBuf);
+ }
+ return Error::success();
+}
+
+Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); }