aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp
diff options
context:
space:
mode:
authorSteven Wu <stevenwu@apple.com>2024-10-29 10:37:37 -0700
committerSteven Wu <stevenwu@apple.com>2024-10-29 10:37:37 -0700
commit0acb07c50c259b2022ef4bf60d414e4616e171d5 (patch)
tree071f604b046c8e84b725c3125c1235c3ae373fb5 /llvm/lib/CAS/MappedFileRegionBumpPtr.cpp
parentb510cdb895b9188e5819c4c85a6dab22a4d14385 (diff)
parentbe2c38e110a7e595bd27f0bf92ad5762108c96a8 (diff)
downloadllvm-users/cachemeifyoucan/spr/cas-add-ondiskcas.zip
llvm-users/cachemeifyoucan/spr/cas-add-ondiskcas.tar.gz
llvm-users/cachemeifyoucan/spr/cas-add-ondiskcas.tar.bz2
[𝘀𝗽𝗿] initial versionusers/cachemeifyoucan/spr/cas-add-ondiskcas
Created using spr 1.3.5
Diffstat (limited to 'llvm/lib/CAS/MappedFileRegionBumpPtr.cpp')
-rw-r--r--llvm/lib/CAS/MappedFileRegionBumpPtr.cpp233
1 files changed, 233 insertions, 0 deletions
diff --git a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp
new file mode 100644
index 0000000..2222644
--- /dev/null
+++ b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp
@@ -0,0 +1,233 @@
+//===- MappedFileRegionBumpPtr.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A bump pointer allocator, backed by a memory-mapped file.
+///
+/// The effect we want is:
+///
+/// 1. If it doesn't exist, create the file with an initial size.
+/// 2. Reserve virtual memory large enough for the max file size.
+/// 3. Map the file into memory in the reserved region.
+/// 4. Increase the file size and update the mapping when necessary.
+///
+/// However, updating the mapping is challenging when it needs to work portably,
+/// and across multiple processes without locking for every read. Our current
+/// implementation strategy is:
+///
+/// 1. Use \c ftruncate (\c sys::fs::resize_file) to grow the file to its max
+/// size (typically several GB). Many modern filesystems will create a sparse
+/// file, so that the trailing unused pages do not take space on disk.
+/// 2. Call \c mmap (\c sys::fs::mapped_file_region)
+/// 3. [Automatic as part of 2.]
+/// 4. [Automatic as part of 2.]
+///
+/// Additionally, we attempt to resize the file to its actual data size when
+/// closing the mapping, if this is the only concurrent instance. This is done
+/// using file locks. Shrinking the file mitigates problems with having large
+/// files: on filesystems without sparse files it avoids unnecessary space use;
+/// it also avoids allocating the full size if another process copies the file,
+/// which typically loses sparseness. These mitigations only work while the file
+/// is not in use.
+///
+/// FIXME: we assume that all concurrent users of the file will use the same
+/// value for Capacity. Otherwise a process with a larger capacity can write
+/// data that is "out of bounds" for processes with smaller capacity. Currently
+/// this is true in the CAS.
+///
+/// To support resizing, we use two separate file locks:
+/// 1. We use a shared reader lock on a ".shared" file until destruction.
+/// 2. We use a lock on the main file during initialization - shared to check
+/// the status, upgraded to exclusive to resize/initialize the file.
+///
+/// Then during destruction we attempt to get exclusive access on (1), which
+/// requires no concurrent readers. If so, we shrink the file. Using two
+/// separate locks simplifies the implementation and enables it to work on
+/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/MappedFileRegionBumpPtr.h"
+#include "OnDiskCommon.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+namespace {
+struct FileLockRAII {
+ std::string Path;
+ int FD;
+ enum LockKind { Shared, Exclusive };
+ std::optional<LockKind> Locked;
+
+ FileLockRAII(std::string Path, int FD) : Path(std::move(Path)), FD(FD) {}
+ ~FileLockRAII() { consumeError(unlock()); }
+
+ Error lock(LockKind LK) {
+ if (std::error_code EC = lockFileThreadSafe(FD, LK == Exclusive))
+ return createFileError(Path, EC);
+ Locked = LK;
+ return Error::success();
+ }
+
+ Error unlock() {
+ if (Locked) {
+ Locked = std::nullopt;
+ if (std::error_code EC = unlockFileThreadSafe(FD))
+ return createFileError(Path, EC);
+ }
+ return Error::success();
+ }
+};
+} // end anonymous namespace
+
+Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
+ const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
+ function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor) {
+ MappedFileRegionBumpPtr Result;
+ Result.Path = Path.str();
+ // Open the main file.
+ int FD;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ Result.Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+ return createFileError(Path, EC);
+ Result.FD = FD;
+
+ // Open the shared lock file. See file comment for details of locking scheme.
+ SmallString<128> SharedLockPath(Result.Path);
+ SharedLockPath.append(".shared");
+ int SharedLockFD;
+ if (std::error_code EC = sys::fs::openFileForReadWrite(
+ SharedLockPath, SharedLockFD, sys::fs::CD_OpenAlways,
+ sys::fs::OF_None))
+ return createFileError(SharedLockPath, EC);
+ Result.SharedLockFD = SharedLockFD;
+
+ // Take shared/reader lock that will be held until we close the file; unlocked
+ // by destroyImpl.
+ if (std::error_code EC =
+ lockFileThreadSafe(SharedLockFD, /*Exclusive=*/false))
+ return createFileError(Path, EC);
+
+ // Take shared/reader lock for initialization.
+ FileLockRAII InitLock(Result.Path, FD);
+ if (Error E = InitLock.lock(FileLockRAII::Shared))
+ return std::move(E);
+
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+ sys::fs::file_status Status;
+ if (std::error_code EC = sys::fs::status(File, Status))
+ return createFileError(Result.Path, EC);
+
+ if (Status.getSize() < Capacity) {
+ // Lock the file exclusively so only one process will do the initialization.
+ if (Error E = InitLock.unlock())
+ return std::move(E);
+ if (Error E = InitLock.lock(FileLockRAII::Exclusive))
+ return std::move(E);
+ // Retrieve the current size now that we have exclusive access.
+ if (std::error_code EC = sys::fs::status(File, Status))
+ return createFileError(Result.Path, EC);
+ }
+
+ // At this point either the file is still under-sized, or we have the size for
+ // the completely initialized file.
+
+ if (Status.getSize() < Capacity) {
+ // We are initializing the file; it may be empty, or may have been shrunk
+ // during a previous close.
+ // FIXME: Detect a case where someone opened it with a smaller capacity.
+ // FIXME: On Windows we should use FSCTL_SET_SPARSE and FSCTL_SET_ZERO_DATA
+ // to make this a sparse region, if supported.
+ if (std::error_code EC = sys::fs::resize_file(FD, Capacity))
+ return createFileError(Result.Path, EC);
+ } else {
+ // Someone else initialized it.
+ Capacity = Status.getSize();
+ }
+
+ // Create the mapped region.
+ {
+ std::error_code EC;
+ sys::fs::mapped_file_region Map(
+ File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC);
+ if (EC)
+ return createFileError(Result.Path, EC);
+ Result.Region = std::move(Map);
+ }
+
+ if (Status.getSize() == 0) {
+ // We are creating a new file; run the constructor.
+ if (Error E = NewFileConstructor(Result))
+ return std::move(E);
+ } else {
+ Result.initializeBumpPtr(BumpPtrOffset);
+ }
+
+ return Result;
+}
+
+void MappedFileRegionBumpPtr::destroyImpl() {
+ if (!FD)
+ return;
+
+ // Drop the shared lock indicating we are no longer accessing the file.
+ if (SharedLockFD)
+ (void)unlockFileThreadSafe(*SharedLockFD);
+
+ // Attempt to truncate the file if we can get exclusive access. Ignore any
+ // errors.
+ if (BumpPtr) {
+ assert(SharedLockFD && "Must have shared lock file open");
+ if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
+ assert(size() <= capacity());
+ (void)sys::fs::resize_file(*FD, size());
+ (void)unlockFileThreadSafe(*SharedLockFD);
+ }
+ }
+
+ auto Close = [](std::optional<int> &FD) {
+ if (FD) {
+ sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD);
+ sys::fs::closeFile(File);
+ FD = std::nullopt;
+ }
+ };
+
+ // Close the file and shared lock.
+ Close(FD);
+ Close(SharedLockFD);
+}
+
+void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) {
+ assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
+ int64_t BumpPtrEndOffset = BumpPtrOffset + sizeof(decltype(*BumpPtr));
+ assert(BumpPtrEndOffset <= (int64_t)capacity() &&
+ "Expected end offset to be pre-allocated");
+ assert(isAligned(Align::Of<decltype(*BumpPtr)>(), BumpPtrOffset) &&
+ "Expected end offset to be aligned");
+ BumpPtr = reinterpret_cast<decltype(BumpPtr)>(data() + BumpPtrOffset);
+
+ int64_t ExistingValue = 0;
+ if (!BumpPtr->compare_exchange_strong(ExistingValue, BumpPtrEndOffset))
+ assert(ExistingValue >= BumpPtrEndOffset &&
+ "Expected 0, or past the end of the BumpPtr itself");
+}
+
+int64_t MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) {
+ AllocSize = alignTo(AllocSize, getAlign());
+ int64_t OldEnd = BumpPtr->fetch_add(AllocSize);
+ int64_t NewEnd = OldEnd + AllocSize;
+ if (LLVM_UNLIKELY(NewEnd > (int64_t)capacity())) {
+ // Try to return the allocation.
+ (void)BumpPtr->compare_exchange_strong(OldEnd, NewEnd);
+ report_fatal_error(
+ errorCodeToError(std::make_error_code(std::errc::not_enough_memory)));
+ }
+ return OldEnd;
+}