Make WriteIndexesThinBackend multi threaded (#109847)

We've noticed that for large builds executing thin-link can take on the order of 10s of minutes. We are only using a single thread to write the sharded indices and import files for each input bitcode file. While we need to ensure the index file produced lists modules in a deterministic order, that doesn't prevent us from executing the rest of the work in parallel. In this change we use a thread pool to execute as much of the backend's work as possible in parallel. In local testing on a machine with 80 cores, this change makes a thin-link for ~100,000 input files run in ~2 minutes. Without this change it takes upwards of 10 minutes. --------- Co-authored-by: Nuri Amari <nuriamari@fb.com>
author: Nuri Amari <nuri.amari99@gmail.com> 2024-10-07 08:16:46 -0700
committer: GitHub <noreply@github.com> 2024-10-07 08:16:46 -0700
commit: 2edd897a4227e481af33e8e43090ab088cd9d953 (patch)
tree: be46eb6fc640146ca168504586db2a142ccb0659 /llvm/lib/LTO/LTO.cpp
parent: 2fe1f84db379bccbf0a3ac136d063a94b5dc59cb (diff)
download: llvm-2edd897a4227e481af33e8e43090ab088cd9d953.zip
llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.gz
llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.bz2
1 files changed, 61 insertions, 45 deletions
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index b5eb795..ccf1139 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1376,15 +1376,20 @@ protected:
   const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
   lto::IndexWriteCallback OnWrite;
   bool ShouldEmitImportsFiles;
+  DefaultThreadPool BackendThreadPool;
+  std::optional<Error> Err;
+  std::mutex ErrMu;
 
 public:
   ThinBackendProc(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
+      lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles,
+      ThreadPoolStrategy ThinLTOParallelism)
       : Conf(Conf), CombinedIndex(CombinedIndex),
         ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
-        OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {}
+        OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles),
+        BackendThreadPool(ThinLTOParallelism) {}
 
   virtual ~ThinBackendProc() = default;
   virtual Error start(
@@ -1393,13 +1398,19 @@ public:
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
       MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
-  virtual Error wait() = 0;
-  virtual unsigned getThreadCount() = 0;
+  Error wait() {
+    BackendThreadPool.wait();
+    if (Err)
+      return std::move(*Err);
+    return Error::success();
+  }
+  unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); }
+  virtual bool isSensitiveToInputOrder() { return false; }
 
   // Write sharded indices and (optionally) imports to disk
   Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
                   llvm::StringRef ModulePath,
-                  const std::string &NewModulePath) {
+                  const std::string &NewModulePath) const {
     ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
     GVSummaryPtrSet DeclarationSummaries;
 
@@ -1411,16 +1422,17 @@ public:
     raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
                       sys::fs::OpenFlags::OF_None);
     if (EC)
-      return errorCodeToError(EC);
+      return createFileError("cannot open " + NewModulePath + ".thinlto.bc",
+                             EC);
 
     writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
                      &DeclarationSummaries);
 
     if (ShouldEmitImportsFiles) {
-      EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
-                            ModuleToSummariesForIndex);
-      if (EC)
-        return errorCodeToError(EC);
+      Error ImportFilesError = EmitImportsFiles(
+          ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex);
+      if (ImportFilesError)
+        return ImportFilesError;
     }
     return Error::success();
   }
@@ -1428,15 +1440,11 @@ public:
 
 namespace {
 class InProcessThinBackend : public ThinBackendProc {
-  DefaultThreadPool BackendThreadPool;
   AddStreamFn AddStream;
   FileCache Cache;
   DenseSet<GlobalValue::GUID> CfiFunctionDefs;
   DenseSet<GlobalValue::GUID> CfiFunctionDecls;
 
-  std::optional<Error> Err;
-  std::mutex ErrMu;
-
   bool ShouldEmitIndexFiles;
 
 public:
@@ -1447,9 +1455,9 @@ public:
       AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                        OnWrite, ShouldEmitImportsFiles),
-        BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
-        Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
+                        OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
+        AddStream(std::move(AddStream)), Cache(std::move(Cache)),
+        ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
           GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -1546,18 +1554,6 @@ public:
       OnWrite(std::string(ModulePath));
     return Error::success();
   }
-
-  Error wait() override {
-    BackendThreadPool.wait();
-    if (Err)
-      return std::move(*Err);
-    else
-      return Error::success();
-  }
-
-  unsigned getThreadCount() override {
-    return BackendThreadPool.getMaxConcurrency();
-  }
 };
 } // end anonymous namespace
 
@@ -1618,12 +1614,13 @@ class WriteIndexesThinBackend : public ThinBackendProc {
 public:
   WriteIndexesThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
       std::string OldPrefix, std::string NewPrefix,
       std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
       raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                        OnWrite, ShouldEmitImportsFiles),
+                        OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
         OldPrefix(OldPrefix), NewPrefix(NewPrefix),
         NativeObjectPrefix(NativeObjectPrefix),
         LinkedObjectsFile(LinkedObjectsFile) {}
@@ -1635,9 +1632,11 @@ public:
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
     StringRef ModulePath = BM.getModuleIdentifier();
-    std::string NewModulePath =
-        getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
 
+    // The contents of this file may be used as input to a native link, and must
+    // therefore contain the processed modules in a determinstic order that
+    // match the order they are provided on the command line. For that reason,
+    // we cannot include this in the asynchronously executed lambda below.
     if (LinkedObjectsFile) {
       std::string ObjectPrefix =
           NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
@@ -1646,33 +1645,49 @@ public:
       *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
     }
 
-    if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
-      return E;
+    BackendThreadPool.async(
+        [this](const StringRef ModulePath,
+               const FunctionImporter::ImportMapTy &ImportList,
+               const std::string &OldPrefix, const std::string &NewPrefix) {
+          std::string NewModulePath =
+              getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
+          auto E = emitFiles(ImportList, ModulePath, NewModulePath);
+          if (E) {
+            std::unique_lock<std::mutex> L(ErrMu);
+            if (Err)
+              Err = joinErrors(std::move(*Err), std::move(E));
+            else
+              Err = std::move(E);
+            return;
+          }
+        },
+        ModulePath, ImportList, OldPrefix, NewPrefix);
 
     if (OnWrite)
       OnWrite(std::string(ModulePath));
     return Error::success();
   }
 
-  Error wait() override { return Error::success(); }
-
-  // WriteIndexesThinBackend should always return 1 to prevent module
-  // re-ordering and avoid non-determinism in the final link.
-  unsigned getThreadCount() override { return 1; }
+  bool isSensitiveToInputOrder() override {
+    // The order which modules are written to LinkedObjectsFile should be
+    // deterministic and match the order they are passed on the command line.
+    return true;
+  }
 };
 } // end anonymous namespace
 
 ThinBackend lto::createWriteIndexesThinBackend(
-    std::string OldPrefix, std::string NewPrefix,
-    std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
-    raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
+    ThreadPoolStrategy Parallelism, std::string OldPrefix,
+    std::string NewPrefix, std::string NativeObjectPrefix,
+    bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
+    IndexWriteCallback OnWrite) {
   return
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
           AddStreamFn AddStream, FileCache Cache) {
         return std::make_unique<WriteIndexesThinBackend>(
-            Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix,
-            NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
+            Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+            OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
             LinkedObjectsFile, OnWrite);
       };
 }
@@ -1854,7 +1869,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
           ResolvedODR[Mod.first], ThinLTO.ModuleMap);
     };
 
-    if (BackendProcess->getThreadCount() == 1) {
+    if (BackendProcess->getThreadCount() == 1 ||
+        BackendProcess->isSensitiveToInputOrder()) {
       // Process the modules in the order they were provided on the
       // command-line. It is important for this codepath to be used for
       // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
author	Nuri Amari <nuri.amari99@gmail.com>	2024-10-07 08:16:46 -0700
committer	GitHub <noreply@github.com>	2024-10-07 08:16:46 -0700
commit	2edd897a4227e481af33e8e43090ab088cd9d953 (patch)
tree	be46eb6fc640146ca168504586db2a142ccb0659 /llvm/lib/LTO/LTO.cpp
parent	2fe1f84db379bccbf0a3ac136d063a94b5dc59cb (diff)
download	llvm-2edd897a4227e481af33e8e43090ab088cd9d953.zip llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.gz llvm-2edd897a4227e481af33e8e43090ab088cd9d953.tar.bz2