diff options
author | Matthew Voss <matthew.voss@sony.com> | 2023-07-05 14:17:20 -0700 |
---|---|---|
committer | Matthew Voss <matthew.voss@sony.com> | 2023-07-05 14:53:14 -0700 |
commit | a1ca3af31eeec61cfb9d619f55b655b0eb0b9494 (patch) | |
tree | f7127bd5940108b5fec1690ab872425afcb244a2 /llvm/lib | |
parent | 156913cb776438f87bd1580de862eac7be79ca2a (diff) | |
download | llvm-a1ca3af31eeec61cfb9d619f55b655b0eb0b9494.zip llvm-a1ca3af31eeec61cfb9d619f55b655b0eb0b9494.tar.gz llvm-a1ca3af31eeec61cfb9d619f55b655b0eb0b9494.tar.bz2 |
[llvm] A Unified LTO Bitcode Frontend
Here's a high level summary of the changes in this patch. For more
information on rational, see the RFC.
(https://discourse.llvm.org/t/rfc-a-unified-lto-bitcode-frontend/61774).
- Add config parameter to LTO backend, specifying which LTO mode is
desired when using unified LTO.
- Add unified LTO flag to the summary index for efficiency. Unified
LTO modules can be detected without parsing the module.
- Make sure that the ModuleID is generated by incorporating more types
of symbols.
Differential Revision: https://reviews.llvm.org/D123803
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/IR/ModuleSummaryIndex.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/LTO/LTO.cpp | 51 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOBackend.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Passes/PassBuilderPipelines.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 15 |
9 files changed, 118 insertions, 33 deletions
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index fd125ac..165b8f1 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -786,10 +786,14 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { assert(PSI); bool EnableSplitLTOUnit = false; + bool UnifiedLTO = false; if (auto *MD = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("EnableSplitLTOUnit"))) EnableSplitLTOUnit = MD->getZExtValue(); - ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit); + if (auto *MD = + mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index ebc88c8..4f6c5f0 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8033,14 +8033,17 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() { return std::move(Index); } -static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, - unsigned ID) { +static Expected<std::pair<bool, bool>> +getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream, + unsigned ID, + BitcodeLTOInfo <OInfo) { if (Error Err = Stream.EnterSubBlock(ID)) return std::move(Err); SmallVector<uint64_t, 64> Record; while (true) { BitstreamEntry Entry; + std::pair<bool, bool> Result = {false,false}; if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry)) return std::move(E); @@ -8048,10 +8051,10 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); - case BitstreamEntry::EndBlock: - // If no flags record found, conservatively return true to mimic - // behavior before this flag was added. - return true; + case BitstreamEntry::EndBlock: { + // If no flags record found, set both flags to false. + return Result; + } case BitstreamEntry::Record: // The interesting case. break; @@ -8068,9 +8071,13 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1ff && "Unexpected bits in flag"); + assert(Flags <= 0x2ff && "Unexpected bits in flag"); + + bool EnableSplitLTOUnit = Flags & 0x8; + bool UnifiedLTO = Flags & 0x200; + Result = {EnableSplitLTOUnit, UnifiedLTO}; - return Flags & 0x8; + return Result; } } } @@ -8096,25 +8103,31 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() { return error("Malformed block"); case BitstreamEntry::EndBlock: return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false, - /*EnableSplitLTOUnit=*/false}; + /*EnableSplitLTOUnit=*/false, /*UnifiedLTO=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected<bool> EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + Expected<std::pair<bool, bool>> Flags = + getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo); + if (!Flags) + return Flags.takeError(); + std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get(); + LTOInfo.IsThinLTO = true; + LTOInfo.HasSummary = true; + return LTOInfo; } if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected<bool> EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + Expected<std::pair<bool, bool>> Flags = + getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo); + if (!Flags) + return Flags.takeError(); + std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get(); + LTOInfo.IsThinLTO = false; + LTOInfo.HasSummary = true; + return LTOInfo; } // Ignore other sub-blocks. diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index bedae66..f2bfb4e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4086,6 +4086,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { // Bits 1-3 are set only in the combined index, skip them. if (Index->enableSplitLTOUnit()) Flags |= 0x8; + if (Index->hasUnifiedLTO()) + Flags |= 0x200; + Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags}); if (Index->begin() == Index->end()) { @@ -4112,7 +4115,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { auto Abbv = std::make_shared<BitCodeAbbrev>(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 7cba03e..15fe342 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -109,11 +109,13 @@ uint64_t ModuleSummaryIndex::getFlags() const { Flags |= 0x80; if (withSupportsHotColdNew()) Flags |= 0x100; + if (hasUnifiedLTO()) + Flags |= 0x200; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0x1ff && "Unexpected bits in flag"); + assert(Flags <= 0x2ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -151,6 +153,10 @@ void ModuleSummaryIndex::setFlags(uint64_t Flags) { // Set on combined index only. if (Flags & 0x100) setWithSupportsHotColdNew(); + // 1 bit: WithUnifiedLTO flag. + // Set on combined index only. + if (Flags & 0x200) + setUnifiedLTO(); } // Collect for the given module the list of function it defines diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 65ba3cb..6803d6a 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -603,10 +603,10 @@ LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) } LTO::LTO(Config Conf, ThinBackend Backend, - unsigned ParallelCodeGenParallelismLevel) + unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)) {} + ThinLTO(std::move(Backend)), LTOMode(LTOMode) {} // Requires a destructor for MapVector<BitcodeModule>. LTO::~LTO() = default; @@ -747,12 +747,25 @@ Error LTO::addModule(InputFile &Input, unsigned ModI, EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; BitcodeModule BM = Input.Mods[ModI]; + + if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) && + !LTOInfo->UnifiedLTO) + return make_error<StringError>( + "unified LTO compilation must use " + "compatible bitcode modules (use -funified-lto)", + inconvertibleErrorCode()); + + if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default) + LTOMode = LTOK_UnifiedThin; + + bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular); + auto ModSyms = Input.module_symbols(ModI); addModuleToGlobalRes(ModSyms, {ResI, ResE}, - LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, LTOInfo->HasSummary); - if (LTOInfo->IsThinLTO) + if (IsThinLTO) return addThinLTO(BM, ModSyms, ResI, ResE); RegularLTO.EmptyCombinedModule = false; @@ -820,6 +833,15 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, if (Error Err = M.materializeMetadata()) return std::move(Err); + + // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests + // will rename local functions in the merged module as "<function name>.1". + // This causes linking errors, since other parts of the module expect the + // original function name. + if (LTOMode == LTOK_UnifiedRegular) + if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions")) + M.eraseNamedMetadata(CfiFunctionsMD); + UpgradeDebugInfo(M); ModuleSymbolTable SymTab; @@ -1214,6 +1236,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename, Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness, Conf.RemarksHotnessThreshold); + LLVM_DEBUG(dbgs() << "Running regular LTO\n"); if (!DiagFileOrErr) return DiagFileOrErr.takeError(); DiagnosticOutputFile = std::move(*DiagFileOrErr); @@ -1277,18 +1300,33 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { if (!Conf.CodeGenOnly) { for (const auto &R : GlobalResolutions) { + GlobalValue *GV = + RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) continue; if (R.second.Partition != 0 && R.second.Partition != GlobalResolution::External) continue; - GlobalValue *GV = - RegularLTO.CombinedModule->getNamedValue(R.second.IRName); // Ignore symbols defined in other partitions. // Also skip declarations, which are not allowed to have internal linkage. if (!GV || GV->hasLocalLinkage() || GV->isDeclaration()) continue; + + // Symbols that are marked DLLImport or DLLExport should not be + // internalized, as they are either externally visible or referencing + // external symbols. Symbols that have AvailableExternally or Appending + // linkage might be used by future passes and should be kept as is. + // These linkages are seen in Unified regular LTO, because the process + // of creating split LTO units introduces symbols with that linkage into + // one of the created modules. Normally, only the ThinLTO backend would + // compile this module, but Unified Regular LTO processes both + // modules created by the splitting process as regular LTO modules. + if ((LTOMode == LTOKind::LTOK_UnifiedRegular) && + ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) || + GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage())) + continue; + GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); if (EnableLTOInternalization && R.second.Partition == 0) @@ -1606,6 +1644,7 @@ ThinBackend lto::createWriteIndexesThinBackend( Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); ThinLTO.CombinedIndex.releaseTemporaryMemory(); timeTraceProfilerBegin("ThinLink", StringRef("")); auto TimeTraceScopeExit = llvm::make_scope_exit([]() { diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 6eee789..897380c 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -504,6 +504,7 @@ Error lto::backend(const Config &C, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, Mod); + LLVM_DEBUG(dbgs() << "Running regular LTO\n"); if (!C.CodeGenOnly) { if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false, /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, @@ -566,6 +567,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); + LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index bbab55c..d86ec21 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1379,7 +1379,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, } else if (Matches[1] == "thinlto") { MPM.addPass(buildThinLTODefaultPipeline(L, nullptr)); } else if (Matches[1] == "lto-pre-link") { - MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); + if (PTO.UnifiedLTO) + // When UnifiedLTO is enabled, use the ThinLTO pre-link pipeline. This + // avoids compile-time performance regressions and keeps the pre-link + // LTO pipeline "unified" for both LTO modes. + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L)); + else + MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); MPM.addPass(buildLTODefaultPipeline(L, nullptr)); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 58a68c0..d3e625e 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -283,6 +283,7 @@ PipelineTuningOptions::PipelineTuningOptions() { LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; + UnifiedLTO = false; MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index a7adb74..574874b 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -259,26 +259,37 @@ static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM, appendToUsed(DestM, NewUsed); } +bool enableUnifiedLTO(Module &M) { + bool UnifiedLTO = false; + if (auto *MD = + mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + return UnifiedLTO; +} + // If it's possible to split M into regular and thin LTO parts, do so and write // a multi-module bitcode file with the two parts to OS. Otherwise, write only a // regular LTO bitcode file to OS. void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref<AAResults &(Function &)> AARGetter, Module &M) { + bool UnifiedLTO = enableUnifiedLTO(M); std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { + assert(!UnifiedLTO); // We couldn't generate a module ID for this module, write it out as a // regular LTO module with an index for summary-based dead stripping. ProfileSummaryInfo PSI(M); M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); - WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index); + WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index, + /*UnifiedLTO=*/false); if (ThinLinkOS) // We don't have a ThinLTO part, but still write the module to the // ThinLinkOS if requested so that the expected output file is produced. WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, - &Index); + &Index, /*UnifiedLTO=*/false); return; } |