diff options
author | Fred Riss <friss@apple.com> | 2022-10-19 17:29:36 -0700 |
---|---|---|
committer | Fred Riss <friss@apple.com> | 2023-01-18 14:31:27 -0800 |
commit | a033dbbe5c43247b60869b008e67ed86ed230eaa (patch) | |
tree | a6fa9fdc5a204dac20cf6db45d4be06448e5720d /clang | |
parent | 005c15812195c1de6b14b27bebcc8c48892a92a2 (diff) | |
download | llvm-a033dbbe5c43247b60869b008e67ed86ed230eaa.zip llvm-a033dbbe5c43247b60869b008e67ed86ed230eaa.tar.gz llvm-a033dbbe5c43247b60869b008e67ed86ed230eaa.tar.bz2 |
[Clang] Give Clang the ability to use a shared stat cache
Every Clang instance uses an internal FileSystemStatCache to avoid
stating the same content multiple times. However, different instances
of Clang will contend for filesystem access for their initial stats
during HeaderSearch or module validation.
On some workloads, the time spent in the kernel in these concurrent
stat calls has been measured to be over 20% of the overall compilation
time. This is extremly wassteful when most of the stat calls target
mostly immutable content like a SDK.
This commit introduces a new tool `clang-stat-cache` able to generate
an OnDiskHashmap containing the stat data for a given filesystem
hierarchy.
The driver part of this has been modeled after -ivfsoverlay given
the similarities with what it influences. It introduces a new
-ivfsstatcache driver option to instruct Clang to use a stat cache
generated by `clang-stat-cache`. These stat caches are inserted at
the bottom of the VFS stack (right above the real filesystem).
Differential Revision: https://reviews.llvm.org/D136651
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/DiagnosticFrontendKinds.td | 5 | ||||
-rw-r--r-- | clang/include/clang/Driver/Options.td | 2 | ||||
-rw-r--r-- | clang/include/clang/Frontend/CompilerInvocation.h | 1 | ||||
-rw-r--r-- | clang/include/clang/Lex/HeaderSearchOptions.h | 7 | ||||
-rw-r--r-- | clang/lib/Frontend/ASTUnit.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Frontend/CompilerInvocation.cpp | 28 | ||||
-rw-r--r-- | clang/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | clang/test/Driver/vfsstatcache.c | 5 | ||||
-rw-r--r-- | clang/test/clang-stat-cache/cache-effects.c | 63 | ||||
-rw-r--r-- | clang/test/clang-stat-cache/errors.test | 42 | ||||
-rw-r--r-- | clang/tools/CMakeLists.txt | 1 | ||||
-rw-r--r-- | clang/tools/clang-stat-cache/CMakeLists.txt | 19 | ||||
-rw-r--r-- | clang/tools/clang-stat-cache/clang-stat-cache.cpp | 318 |
13 files changed, 492 insertions, 2 deletions
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index d0f672a..e106858 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -256,6 +256,11 @@ def err_test_module_file_extension_version : Error< "test module file extension '%0' has different version (%1.%2) than expected " "(%3.%4)">; +def err_missing_vfs_stat_cache_file : Error< + "stat cache file '%0' not found">, DefaultFatal; +def err_invalid_vfs_stat_cache : Error< + "invalid stat cache file '%0'">, DefaultFatal; + def err_missing_vfs_overlay_file : Error< "virtual filesystem overlay file '%0' not found">, DefaultFatal; def err_invalid_vfs_overlay : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ba49b33..9334e63 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3357,6 +3357,8 @@ def iwithsysroot : JoinedOrSeparate<["-"], "iwithsysroot">, Group<clang_i_Group> HelpText<"Add directory to SYSTEM include search path, " "absolute paths are relative to -isysroot">, MetaVarName<"<directory>">, Flags<[CC1Option]>; +def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, Group<clang_i_Group>, Flags<[CC1Option]>, + HelpText<"Use the stat data cached in file instead of doing filesystem syscalls. See clang-stat-cache utility.">; def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group<clang_i_Group>, Flags<[CC1Option]>, HelpText<"Overlay the virtual filesystem described by file over the real file system">; def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>; diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 254f048..9cc6aa5 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -296,6 +296,7 @@ IntrusiveRefCntPtr<llvm::vfs::FileSystem> createVFSFromCompilerInvocation( IntrusiveRefCntPtr<llvm::vfs::FileSystem> createVFSFromOverlayFiles(ArrayRef<std::string> VFSOverlayFiles, + ArrayRef<std::string> VFSStatCacheFiles, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS); diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h index 6436a9b..548f7d4 100644 --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -181,6 +181,9 @@ public: /// of computing the module hash. llvm::SmallSetVector<llvm::CachedHashString, 16> ModulesIgnoreMacros; + /// The set of user-provided stat cache files. + std::vector<std::string> VFSStatCacheFiles; + /// The set of user-provided virtual filesystem overlay files. std::vector<std::string> VFSOverlayFiles; @@ -250,6 +253,10 @@ public: SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader); } + void AddVFSStatCacheFile(StringRef Name) { + VFSStatCacheFiles.push_back(std::string(Name)); + } + void AddVFSOverlayFile(StringRef Name) { VFSOverlayFiles.push_back(std::string(Name)); } diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 3b4f251..dbf55d9 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -574,7 +574,7 @@ public: // performs the initialization too late (once both target and language // options are read). PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles( - HSOpts.VFSOverlayFiles, PP.getDiagnostics(), + HSOpts.VFSOverlayFiles, HSOpts.VFSStatCacheFiles, PP.getDiagnostics(), PP.getFileManager().getVirtualFileSystemPtr())); InitializedHeaderSearchPaths = true; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0bb9c8c..b0ef37fa 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -83,6 +83,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/StatCacheFileSystem.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -3084,6 +3085,9 @@ static void GenerateHeaderSearchArgs(HeaderSearchOptions &Opts, GenerateArg(Args, Opt, P.Prefix, SA); } + for (const std::string &F : Opts.VFSStatCacheFiles) + GenerateArg(Args, OPT_ivfsstatcache, F, SA); + for (const std::string &F : Opts.VFSOverlayFiles) GenerateArg(Args, OPT_ivfsoverlay, F, SA); } @@ -3217,6 +3221,9 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, Opts.AddSystemHeaderPrefix( A->getValue(), A->getOption().matches(OPT_system_header_prefix)); + for (const auto *A : Args.filtered(OPT_ivfsstatcache)) + Opts.AddVFSStatCacheFile(A->getValue()); + for (const auto *A : Args.filtered(OPT_ivfsoverlay)) Opts.AddVFSOverlayFile(A->getValue()); @@ -4747,12 +4754,31 @@ clang::createVFSFromCompilerInvocation( const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) { return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles, + CI.getHeaderSearchOpts().VFSStatCacheFiles, Diags, std::move(BaseFS)); } IntrusiveRefCntPtr<llvm::vfs::FileSystem> clang::createVFSFromOverlayFiles( - ArrayRef<std::string> VFSOverlayFiles, DiagnosticsEngine &Diags, + ArrayRef<std::string> VFSOverlayFiles, + ArrayRef<std::string> VFSStatCacheFiles, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) { + for (const auto &File : VFSStatCacheFiles) { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer = + BaseFS->getBufferForFile(File); + if (!Buffer) { + Diags.Report(diag::err_missing_vfs_stat_cache_file) << File; + continue; + } + + auto StatCache = + llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), BaseFS); + + if (errorToBool(StatCache.takeError())) + Diags.Report(diag::err_invalid_vfs_stat_cache) << File; + else + BaseFS = std::move(*StatCache); + } + if (VFSOverlayFiles.empty()) return BaseFS; diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 1d6377b..cd3775b 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -71,6 +71,7 @@ list(APPEND CLANG_TEST_DEPS clang-refactor clang-diff clang-scan-deps + clang-stat-cache diagtool hmaptool ) diff --git a/clang/test/Driver/vfsstatcache.c b/clang/test/Driver/vfsstatcache.c new file mode 100644 index 0000000..ec3c279 --- /dev/null +++ b/clang/test/Driver/vfsstatcache.c @@ -0,0 +1,5 @@ +// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s +// CHECK: "-ivfsstatcache" "foo.h" + +// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck -check-prefix=CHECK-MISSING %s +// CHECK-MISSING: stat cache file 'foo.h' not found diff --git a/clang/test/clang-stat-cache/cache-effects.c b/clang/test/clang-stat-cache/cache-effects.c new file mode 100644 index 0000000..bf2e2db --- /dev/null +++ b/clang/test/clang-stat-cache/cache-effects.c @@ -0,0 +1,63 @@ +#include "foo.h" + +// Testing the effects of a cache is tricky, because it's just supposed to speed +// things up, not change the behavior. In this test, we are using an outdated +// cache to trick HeaderSearch into finding the wrong module and show that it is +// being used. + +// Clear the module cache. +// RUN: rm -rf %t +// RUN: mkdir -p %t/Inputs +// RUN: mkdir -p %t/Inputs/Foo1 +// RUN: mkdir -p %t/Inputs/Foo2 +// RUN: mkdir -p %t/modules-to-compare + +// === +// Create a Foo module in the Foo1 direcotry. +// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map + +// === +// Compile the module. Note that the compiler has 2 header search paths: +// Foo2 and Foo1 in that order. The module has been created in Foo1, and +// it is the only version available now. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm + +// === +// Create a stat cache for our inputs directory +// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache + +// === +// As a sanity check, re-run the same compilation with the cache and check that +// the module does not change. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s -Rmodule-build 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Now introduce a different Foo module in the Foo2 directory which is before +// Foo1 in the search paths. +// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map + +// === +// Because we're using the (now-outdated) stat cache, this compilation +// should still be using the first module. It will not see the new one +// which is earlier in the search paths. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build -Rmodule-import %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Regenerate the stat cache for our Inputs directory +// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1 + +// === +// Use the module and now see that we are recompiling the new one. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: not diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm diff --git a/clang/test/clang-stat-cache/errors.test b/clang/test/clang-stat-cache/errors.test new file mode 100644 index 0000000..ab73a134 --- /dev/null +++ b/clang/test/clang-stat-cache/errors.test @@ -0,0 +1,42 @@ +RUN: rm -rf %t +RUN: mkdir -p %t + +RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-DIR %s +NO-SUCH-DIR: Failed to stat the target directory: {{[Nn]}}o such file or directory + +RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-FILE %s +NO-SUCH-FILE: Failed to open cache file: '{{.*}}': {{[Nn]}}o such file or directory + +# Use mixed-case directories to exercise the case insensitive implementation. +RUN: mkdir -p %t/Dir +RUN: mkdir -p %t/Dir2 + +# Try to overwrite a few invalid caches +RUN: echo "Not a stat cache" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "Not a stat cache, but bigger than the stat cache header" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s + +INVALID-CACHE: The output cache file exists and is not a valid stat cache. Aborting. + +# Test the force flag +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE-FORCE %s +INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat cache. Forced update. + +# Generate a valid cache for dir +RUN: rm %t/stat.cache +RUN: clang-stat-cache %t/Dir -o %t/stat.cache +RUN: cp %t/stat.cache %t/stat.cache.save + +# Try with same base direcotry but with extraneous separators +RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck --check-prefix=EXTRA-SEP %s +EXTRA-SEP-NOT: Existing cache has different directory. Regenerating... +EXTRA-SEP: Cache up-to-date, exiting + +# Rewrite the cache with a different base directory +RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck --check-prefix=OTHER-DIR %s +OTHER-DIR: Existing cache has different directory. Regenerating... + diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt index f60db6e..147555d 100644 --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -15,6 +15,7 @@ add_clang_subdirectory(clang-scan-deps) if(HAVE_CLANG_REPL_SUPPORT) add_clang_subdirectory(clang-repl) endif() +add_clang_subdirectory(clang-stat-cache) add_clang_subdirectory(c-index-test) diff --git a/clang/tools/clang-stat-cache/CMakeLists.txt b/clang/tools/clang-stat-cache/CMakeLists.txt new file mode 100644 index 0000000..ab93d8b --- /dev/null +++ b/clang/tools/clang-stat-cache/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) + +add_clang_tool(clang-stat-cache + clang-stat-cache.cpp + ) + +if(APPLE) +set(CLANG_STAT_CACHE_LIB_DEPS + "-framework CoreServices" + ) +endif() + +clang_target_link_libraries(clang-stat-cache + PRIVATE + ${CLANG_STAT_CACHE_LIB_DEPS} + ) diff --git a/clang/tools/clang-stat-cache/clang-stat-cache.cpp b/clang/tools/clang-stat-cache/clang-stat-cache.cpp new file mode 100644 index 0000000..183d0d7 --- /dev/null +++ b/clang/tools/clang-stat-cache/clang-stat-cache.cpp @@ -0,0 +1,318 @@ +//===- clang-stat-cache.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StatCacheFileSystem.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" + +#include <assert.h> + +#ifdef __APPLE__ +#include <CoreServices/CoreServices.h> + +#include <sys/mount.h> +#include <sys/param.h> +#endif // __APPLE__ + +// The clang-stat-cache utility creates an on-disk cache for the stat data +// of a file-system tree which is expected to be immutable during a build. + +using namespace llvm; +using llvm::vfs::StatCacheFileSystem; + +cl::OptionCategory StatCacheCategory("clang-stat-cache options"); + +cl::opt<std::string> OutputFilename("o", cl::Required, + cl::desc("Specify output filename"), + cl::value_desc("filename"), + cl::cat(StatCacheCategory)); + +cl::opt<std::string> TargetDirectory(cl::Positional, cl::Required, + cl::value_desc("dirname"), + cl::cat(StatCacheCategory)); + +cl::opt<bool> Verbose("v", cl::desc("More verbose output")); +cl::opt<bool> Force("f", cl::desc("Force cache generation")); + +#if __APPLE__ +// Used by checkContentsValidity. See below. +struct CallbackInfo { + bool SeenChanges = false; +}; + +// Used by checkContentsValidity. See below. +static void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo, + size_t numEvents, void *eventPaths, + const FSEventStreamEventFlags *eventFlags, + const FSEventStreamEventId *eventIds) { + CallbackInfo *Info = static_cast<CallbackInfo *>(CtxInfo); + for (size_t i = 0; i < numEvents; ++i) { + // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical' + // event passed to the callback. This means it is passed after the callback + // all the relevant activity between the StartEvent of the stream and the + // point the stream was created. + // If the callback didn't see any other event, it means there haven't been + // any alterations to the target directory hierarchy and the cache contents + // is still up-to-date. + if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) { + // Let's stop the main queue and go back to our non-queue code. + CFRunLoopStop(CFRunLoopGetCurrent()); + break; + } + + // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone + // one, there have been changes to the target directory. + Info->SeenChanges = true; + } +} + +// FSEvents-based check for cache contents validity. We store the latest +// FSEventStreamEventId in the cache as a ValidityToken and check if any +// file system events affected the base directory since the cache was +// generated. +static bool checkContentsValidity(uint64_t &ValidityToken) { + CFStringRef TargetDir = CFStringCreateWithCStringNoCopy( + kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII, + kCFAllocatorNull); + CFArrayRef PathsToWatch = + CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr); + CallbackInfo Info; + FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr}; + FSEventStreamRef Stream; + CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait. + + // Start at the latest event stored in the cache. + FSEventStreamEventId StartEvent = ValidityToken; + // Update the Validity token with the current latest event. + ValidityToken = FSEventsGetCurrentEventId(); + + // Create the stream + Stream = + FSEventStreamCreate(NULL, &FSEventsCallback, &Ctx, PathsToWatch, + StartEvent, Latency, kFSEventStreamCreateFlagNone); + + // Associate the stream with the main queue. + FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue()); + // Start the stream (needs the queue to run to do anything). + if (!FSEventStreamStart(Stream)) { + errs() << "Failed to create FS event stream. " + << "Considering the cache up-to-date.\n"; + return true; + } + + // Start the main queue. It will be exited by our callback when it got + // confirmed it processed all events. + CFRunLoopRun(); + + return !Info.SeenChanges; +} + +#else // __APPLE__ + +// There is no cross-platform way to implement a validity check. If this +// platform doesn't support it, just consider the cache contents always +// valid. When that's the case, the tool running cache generation needs +// to have the knowledge to do it only when needed. +static bool checkContentsValidity(uint64_t &ValidityToken) { return true; } + +#endif // __APPLE__ + +// Populate Generator with the stat cache data for the filesystem tree +// rooted at BasePath. +static std::error_code +populateHashTable(StringRef BasePath, + StatCacheFileSystem::StatCacheWriter &Generator) { + using namespace llvm; + using namespace sys::fs; + + std::error_code ErrorCode; + + // Just loop over the target directory using a recursive iterator. + // This invocation follows symlinks, so we are going to potentially + // store the status of the same file multiple times with different + // names. + for (recursive_directory_iterator I(BasePath, ErrorCode), E; + I != E && !ErrorCode; I.increment(ErrorCode)) { + StringRef Path = I->path(); + sys::fs::file_status s; + // This can fail (broken symlink) and leave the file_status with + // its default values. The reader knows this. + status(Path, s); + + Generator.addEntry(Path, s); + } + + return ErrorCode; +} + +static bool checkCacheValid(int FD, raw_fd_ostream &Out, + uint64_t &ValidityToken) { + sys::fs::file_status Status; + auto EC = sys::fs::status(FD, Status); + if (EC) { + llvm::errs() << "fstat failed: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return false; + } + + auto Size = Status.getSize(); + if (Size == 0) { + // New file. +#ifdef __APPLE__ + // Get the current (global) FSEvent id and use this as ValidityToken. + ValidityToken = FSEventsGetCurrentEventId(); +#endif + return false; + } + + auto ErrorOrBuffer = MemoryBuffer::getOpenFile( + sys::fs::convertFDToNativeFile(FD), OutputFilename, Status.getSize()); + + // Refuse to write to this cache file if it exists but its contents do + // not look like a valid cache file. + StringRef BaseDir; + bool IsCaseSensitive; + bool VersionMatch; + if (auto E = StatCacheFileSystem::validateCacheFile( + (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive, + VersionMatch, ValidityToken)) { + llvm::errs() << "The output cache file exists and is not a valid stat " + "cache."; + if (!Force) { + llvm::errs() << " Aborting.\n"; + exit(1); + } + + consumeError(std::move(E)); + llvm::errs() << " Forced update.\n"; + return false; + } + + if (BaseDir != TargetDirectory && + (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) { + llvm::errs() << "Existing cache has different directory. Regenerating...\n"; + return false; + } + + if (!VersionMatch) { + llvm::errs() + << "Exisitng cache has different version number. Regenerating...\n"; + return false; + } + + // Basic structure checks have passed. Lets see if we can prove that the cache + // contents are still valid. + bool IsValid = checkContentsValidity(ValidityToken); + if (IsValid) { + // The cache is valid, but we might have gotten an updated ValidityToken. + // Update the cache with it as clang-stat-cache is just going to exit after + // returning from this function. + StatCacheFileSystem::updateValidityToken(Out, ValidityToken); + } + return IsValid && !Force; +} + +int main(int argc, char *argv[]) { + cl::ParseCommandLineOptions(argc, argv); + + llvm::SmallString<128> CanonicalDirectory = StringRef(TargetDirectory); + + // Remove extraneous separators from the end of the basename. + while (!CanonicalDirectory.empty() && + sys::path::is_separator(CanonicalDirectory.back())) + CanonicalDirectory.pop_back(); + // Canonicalize separators on Windows + llvm::sys::path::make_preferred(CanonicalDirectory); + TargetDirectory = std::string(CanonicalDirectory); + + StringRef Dirname(TargetDirectory); + + std::error_code EC; + int FD; + EC = sys::fs::openFileForReadWrite( + OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None); + if (EC) { + llvm::errs() << "Failed to open cache file: " + << toString(llvm::createFileError(OutputFilename, EC)) << "\n"; + return 1; + } + + raw_fd_ostream Out(FD, /* ShouldClose=*/true); + + uint64_t ValidityToken = 0; + // Check if the cache is valid and up-to-date. + if (checkCacheValid(FD, Out, ValidityToken)) { + if (Verbose) + outs() << "Cache up-to-date, exiting\n"; + return 0; + } + + if (Verbose) + outs() << "Building a stat cache for '" << TargetDirectory << "' into '" + << OutputFilename << "'\n"; + + // Do not generate a cache for NFS. Iterating huge directory hierarchies + // over NFS will be very slow. Better to let the compiler search only the + // pieces that it needs than use a cache that takes ages to populate. + bool IsLocal; + EC = sys::fs::is_local(Dirname, IsLocal); + if (EC) { + errs() << "Failed to stat the target directory: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return 1; + } + + if (!IsLocal && !Force) { + errs() << "Target directory is not a local filesystem. " + << "Not populating the cache.\n"; + return 0; + } + + sys::fs::file_status BaseDirStatus; + if (std::error_code EC = status(Dirname, BaseDirStatus)) { + errs() << "Failed to stat the target directory: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return 1; + } + + // Check if the filesystem hosting the target directory is case sensitive. + bool IsCaseSensitive = true; +#ifdef _PC_CASE_SENSITIVE + IsCaseSensitive = + ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1; +#endif + StatCacheFileSystem::StatCacheWriter Generator( + Dirname, BaseDirStatus, IsCaseSensitive, ValidityToken); + + // Populate the cache. + auto startTime = llvm::TimeRecord::getCurrentTime(); + populateHashTable(Dirname, Generator); + auto duration = llvm::TimeRecord::getCurrentTime(); + duration -= startTime; + + if (Verbose) + errs() << "populateHashTable took: " << duration.getWallTime() << "s\n"; + + // Write the cache to disk. + startTime = llvm::TimeRecord::getCurrentTime(); + int Size = Generator.writeStatCache(Out); + duration = llvm::TimeRecord::getCurrentTime(); + duration -= startTime; + + if (Verbose) + errs() << "writeStatCache took: " << duration.getWallTime() << "s\n"; + + // We might have opened a pre-exising cache which was bigger. + llvm::sys::fs::resize_file(FD, Size); + + return 0; +} |