1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CAS/ObjectStore.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include <deque>
using namespace llvm;
using namespace llvm::cas;
void CASContext::anchor() {}
void ObjectStore::anchor() {}
LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
std::string CASID::toString() const {
std::string S;
raw_string_ostream(S) << *this;
return S;
}
static void printReferenceBase(raw_ostream &OS, StringRef Kind,
uint64_t InternalRef, std::optional<CASID> ID) {
OS << Kind << "=" << InternalRef;
if (ID)
OS << "[" << *ID << "]";
}
void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const {
assert(this == &This);
printReferenceBase(OS, "object-handle", InternalRef, std::nullopt);
}
void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const {
assert(this == &This);
std::optional<CASID> ID;
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
if (CAS)
ID = CAS->getID(This);
#endif
printReferenceBase(OS, "object-ref", InternalRef, ID);
}
Expected<ObjectHandle> ObjectStore::load(ObjectRef Ref) {
std::optional<ObjectHandle> Handle;
if (Error E = loadIfExists(Ref).moveInto(Handle))
return std::move(E);
if (!Handle)
return createStringError(errc::invalid_argument,
"missing object '" + getID(Ref).toString() + "'");
return *Handle;
}
std::unique_ptr<MemoryBuffer>
ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name,
bool RequiresNullTerminator) {
return MemoryBuffer::getMemBuffer(
toStringRef(getData(Node, RequiresNullTerminator)), Name,
RequiresNullTerminator);
}
void ObjectStore::readRefs(ObjectHandle Node,
SmallVectorImpl<ObjectRef> &Refs) const {
consumeError(forEachRef(Node, [&Refs](ObjectRef Ref) -> Error {
Refs.push_back(Ref);
return Error::success();
}));
}
Expected<ObjectProxy> ObjectStore::getProxy(const CASID &ID) {
std::optional<ObjectRef> Ref = getReference(ID);
if (!Ref)
return createUnknownObjectError(ID);
return getProxy(*Ref);
}
Expected<ObjectProxy> ObjectStore::getProxy(ObjectRef Ref) {
std::optional<ObjectHandle> H;
if (Error E = load(Ref).moveInto(H))
return std::move(E);
return ObjectProxy::load(*this, Ref, *H);
}
Expected<std::optional<ObjectProxy>>
ObjectStore::getProxyIfExists(ObjectRef Ref) {
std::optional<ObjectHandle> H;
if (Error E = loadIfExists(Ref).moveInto(H))
return std::move(E);
if (!H)
return std::nullopt;
return ObjectProxy::load(*this, Ref, *H);
}
Error ObjectStore::createUnknownObjectError(const CASID &ID) {
return createStringError(std::make_error_code(std::errc::invalid_argument),
"unknown object '" + ID.toString() + "'");
}
Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs,
StringRef Data) {
Expected<ObjectRef> Ref = store(Refs, arrayRefFromStringRef<char>(Data));
if (!Ref)
return Ref.takeError();
return getProxy(*Ref);
}
Expected<ObjectRef>
ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD,
std::optional<sys::fs::file_status> Status) {
// TODO: For the on-disk CAS implementation use cloning to store it as a
// standalone file if the file-system supports it and the file is large.
uint64_t Size = Status ? Status->getSize() : -1;
auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"", Size);
if (!Buffer)
return errorCodeToError(Buffer.getError());
return store({}, arrayRefFromStringRef<char>((*Buffer)->getBuffer()));
}
Error ObjectStore::validateTree(ObjectRef Root) {
SmallDenseSet<ObjectRef> ValidatedRefs;
SmallVector<ObjectRef, 16> RefsToValidate;
RefsToValidate.push_back(Root);
while (!RefsToValidate.empty()) {
ObjectRef Ref = RefsToValidate.pop_back_val();
auto [I, Inserted] = ValidatedRefs.insert(Ref);
if (!Inserted)
continue; // already validated.
if (Error E = validateObject(getID(Ref)))
return E;
Expected<ObjectHandle> Obj = load(Ref);
if (!Obj)
return Obj.takeError();
if (Error E = forEachRef(*Obj, [&RefsToValidate](ObjectRef R) -> Error {
RefsToValidate.push_back(R);
return Error::success();
}))
return E;
}
return Error::success();
}
Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
ObjectRef Other) {
// Copy the full CAS tree from upstream with depth-first ordering to ensure
// all the child nodes are available in downstream CAS before inserting
// current object. This uses a similar algorithm as
// `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
// so it can be used to import from any other ObjectStore reguardless of the
// CAS schema.
// There is no work to do if importing from self.
if (this == &Upstream)
return Other;
/// Keeps track of the state of visitation for current node and all of its
/// parents. Upstream Cursor holds information only from upstream CAS.
struct UpstreamCursor {
ObjectRef Ref;
ObjectHandle Node;
size_t RefsCount;
std::deque<ObjectRef> Refs;
};
SmallVector<UpstreamCursor, 16> CursorStack;
/// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
/// just stored in the CAS or nodes already exists in the current CAS.
SmallVector<ObjectRef, 128> PrimaryRefStack;
/// A map from upstream ObjectRef to current ObjectRef.
llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
unsigned NumRefs = Upstream.getNumRefs(Node);
std::deque<ObjectRef> Refs;
for (unsigned I = 0; I < NumRefs; ++I)
Refs.push_back(Upstream.readRef(Node, I));
CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
};
auto UpstreamHandle = Upstream.load(Other);
if (!UpstreamHandle)
return UpstreamHandle.takeError();
enqueueNode(Other, *UpstreamHandle);
while (!CursorStack.empty()) {
UpstreamCursor &Cur = CursorStack.back();
if (Cur.Refs.empty()) {
// Copy the node data into the primary store.
// The bottom of \p PrimaryRefStack contains the ObjectRef for the
// current node.
assert(PrimaryRefStack.size() >= Cur.RefsCount);
auto Refs = ArrayRef(PrimaryRefStack)
.slice(PrimaryRefStack.size() - Cur.RefsCount);
auto NewNode = store(Refs, Upstream.getData(Cur.Node));
if (!NewNode)
return NewNode.takeError();
// Remove the current node and its IDs from the stack.
PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
// Push new node into created objects.
PrimaryRefStack.push_back(*NewNode);
CreatedObjects.try_emplace(Cur.Ref, *NewNode);
// Pop the cursor in the end after all uses.
CursorStack.pop_back();
continue;
}
// Check if the node exists already.
auto CurrentID = Cur.Refs.front();
Cur.Refs.pop_front();
auto Ref = CreatedObjects.find(CurrentID);
if (Ref != CreatedObjects.end()) {
// If exists already, just need to enqueue the primary node.
PrimaryRefStack.push_back(Ref->second);
continue;
}
// Load child.
auto PrimaryID = Upstream.load(CurrentID);
if (LLVM_UNLIKELY(!PrimaryID))
return PrimaryID.takeError();
enqueueNode(CurrentID, *PrimaryID);
}
assert(PrimaryRefStack.size() == 1);
return PrimaryRefStack.front();
}
std::unique_ptr<MemoryBuffer>
ObjectProxy::getMemoryBuffer(StringRef Name,
bool RequiresNullTerminator) const {
return CAS->getMemoryBuffer(H, Name, RequiresNullTerminator);
}
|