diff options
author | Nuri Amari <nuri.amari99@gmail.com> | 2024-10-07 08:16:46 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-07 08:16:46 -0700 |
commit | 2edd897a4227e481af33e8e43090ab088cd9d953 (patch) | |
tree | be46eb6fc640146ca168504586db2a142ccb0659 /llvm/lib/LTO/LTO.cpp | |
parent | 2fe1f84db379bccbf0a3ac136d063a94b5dc59cb (diff) | |
download | llvm-2edd897a4227e481af33e8e43090ab088cd9d953.zip llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.gz llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.bz2 |
Make WriteIndexesThinBackend multi threaded (#109847)
We've noticed that for large builds executing thin-link can take on the
order of 10s of minutes. We are only using a single thread to write the
sharded indices and import files for each input bitcode file. While we
need to ensure the index file produced lists modules in a deterministic
order, that doesn't prevent us from executing the rest of the work in
parallel.
In this change we use a thread pool to execute as much of the backend's
work as possible in parallel. In local testing on a machine with 80
cores, this change makes a thin-link for ~100,000 input files run in ~2
minutes. Without this change it takes upwards of 10 minutes.
---------
Co-authored-by: Nuri Amari <nuriamari@fb.com>
Diffstat (limited to 'llvm/lib/LTO/LTO.cpp')
-rw-r--r-- | llvm/lib/LTO/LTO.cpp | 106 |
1 files changed, 61 insertions, 45 deletions
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index b5eb795..ccf1139 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1376,15 +1376,20 @@ protected: const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries; lto::IndexWriteCallback OnWrite; bool ShouldEmitImportsFiles; + DefaultThreadPool BackendThreadPool; + std::optional<Error> Err; + std::mutex ErrMu; public: ThinBackendProc( const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, - lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles) + lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles, + ThreadPoolStrategy ThinLTOParallelism) : Conf(Conf), CombinedIndex(CombinedIndex), ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries), - OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {} + OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles), + BackendThreadPool(ThinLTOParallelism) {} virtual ~ThinBackendProc() = default; virtual Error start( @@ -1393,13 +1398,19 @@ public: const FunctionImporter::ExportSetTy &ExportList, const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, MapVector<StringRef, BitcodeModule> &ModuleMap) = 0; - virtual Error wait() = 0; - virtual unsigned getThreadCount() = 0; + Error wait() { + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + return Error::success(); + } + unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); } + virtual bool isSensitiveToInputOrder() { return false; } // Write sharded indices and (optionally) imports to disk Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, - const std::string &NewModulePath) { + const std::string &NewModulePath) const { ModuleToSummariesForIndexTy ModuleToSummariesForIndex; GVSummaryPtrSet DeclarationSummaries; @@ -1411,16 +1422,17 @@ public: raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, sys::fs::OpenFlags::OF_None); if (EC) - return errorCodeToError(EC); + return createFileError("cannot open " + NewModulePath + ".thinlto.bc", + EC); writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, &DeclarationSummaries); if (ShouldEmitImportsFiles) { - EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", - ModuleToSummariesForIndex); - if (EC) - return errorCodeToError(EC); + Error ImportFilesError = EmitImportsFiles( + ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex); + if (ImportFilesError) + return ImportFilesError; } return Error::success(); } @@ -1428,15 +1440,11 @@ public: namespace { class InProcessThinBackend : public ThinBackendProc { - DefaultThreadPool BackendThreadPool; AddStreamFn AddStream; FileCache Cache; DenseSet<GlobalValue::GUID> CfiFunctionDefs; DenseSet<GlobalValue::GUID> CfiFunctionDecls; - std::optional<Error> Err; - std::mutex ErrMu; - bool ShouldEmitIndexFiles; public: @@ -1447,9 +1455,9 @@ public: AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, - OnWrite, ShouldEmitImportsFiles), - BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)), - Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) { + OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), + AddStream(std::move(AddStream)), Cache(std::move(Cache)), + ShouldEmitIndexFiles(ShouldEmitIndexFiles) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); @@ -1546,18 +1554,6 @@ public: OnWrite(std::string(ModulePath)); return Error::success(); } - - Error wait() override { - BackendThreadPool.wait(); - if (Err) - return std::move(*Err); - else - return Error::success(); - } - - unsigned getThreadCount() override { - return BackendThreadPool.getMaxConcurrency(); - } }; } // end anonymous namespace @@ -1618,12 +1614,13 @@ class WriteIndexesThinBackend : public ThinBackendProc { public: WriteIndexesThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, std::string OldPrefix, std::string NewPrefix, std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, - OnWrite, ShouldEmitImportsFiles), + OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), OldPrefix(OldPrefix), NewPrefix(NewPrefix), NativeObjectPrefix(NativeObjectPrefix), LinkedObjectsFile(LinkedObjectsFile) {} @@ -1635,9 +1632,11 @@ public: const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, MapVector<StringRef, BitcodeModule> &ModuleMap) override { StringRef ModulePath = BM.getModuleIdentifier(); - std::string NewModulePath = - getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + // The contents of this file may be used as input to a native link, and must + // therefore contain the processed modules in a determinstic order that + // match the order they are provided on the command line. For that reason, + // we cannot include this in the asynchronously executed lambda below. if (LinkedObjectsFile) { std::string ObjectPrefix = NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix; @@ -1646,33 +1645,49 @@ public: *LinkedObjectsFile << LinkedObjectsFilePath << '\n'; } - if (auto E = emitFiles(ImportList, ModulePath, NewModulePath)) - return E; + BackendThreadPool.async( + [this](const StringRef ModulePath, + const FunctionImporter::ImportMapTy &ImportList, + const std::string &OldPrefix, const std::string &NewPrefix) { + std::string NewModulePath = + getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + auto E = emitFiles(ImportList, ModulePath, NewModulePath); + if (E) { + std::unique_lock<std::mutex> L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + return; + } + }, + ModulePath, ImportList, OldPrefix, NewPrefix); if (OnWrite) OnWrite(std::string(ModulePath)); return Error::success(); } - Error wait() override { return Error::success(); } - - // WriteIndexesThinBackend should always return 1 to prevent module - // re-ordering and avoid non-determinism in the final link. - unsigned getThreadCount() override { return 1; } + bool isSensitiveToInputOrder() override { + // The order which modules are written to LinkedObjectsFile should be + // deterministic and match the order they are passed on the command line. + return true; + } }; } // end anonymous namespace ThinBackend lto::createWriteIndexesThinBackend( - std::string OldPrefix, std::string NewPrefix, - std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, - raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) { + ThreadPoolStrategy Parallelism, std::string OldPrefix, + std::string NewPrefix, std::string NativeObjectPrefix, + bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, + IndexWriteCallback OnWrite) { return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache) { return std::make_unique<WriteIndexesThinBackend>( - Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, - NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, + Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, + OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); }; } @@ -1854,7 +1869,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ResolvedODR[Mod.first], ThinLTO.ModuleMap); }; - if (BackendProcess->getThreadCount() == 1) { + if (BackendProcess->getThreadCount() == 1 || + BackendProcess->isSensitiveToInputOrder()) { // Process the modules in the order they were provided on the // command-line. It is important for this codepath to be used for // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists |