aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2023-07-31 11:32:11 -0700
committerTeresa Johnson <tejohnson@google.com>2023-09-01 13:43:08 -0700
commitbbe8cd13335300958b04db5318c31ff52714f96f (patch)
tree52d3291f070d9f991fd61a73645978be27cef97e /llvm/lib
parentb0b3f82dd3c00cdba891f1ff6ba63abd419d0f18 (diff)
downloadllvm-bbe8cd13335300958b04db5318c31ff52714f96f.zip
llvm-bbe8cd13335300958b04db5318c31ff52714f96f.tar.gz
llvm-bbe8cd13335300958b04db5318c31ff52714f96f.tar.bz2
[LTO] Remove module id from summary index
The module paths string table mapped to both an id sequentially assigned during LTO linking, and the module hash. The former is leftover from before the module hash was added for caching and subsequently replaced use of the module id when renaming promoted symbols (to avoid affects due to link order changes). The sequentially assigned module id was not removed, however, as it was still a convenience when serializing to/from bitcode and assembly. This patch removes the module id from this table, since it isn't strictly needed and can lead to confusion on when it is appropriate to use (e.g. see fix in D156525). It also takes a (likely not significant) amount of overhead. Where an integer module id is needed (e.g. bitcode writing), one is assigned on the fly. There are a couple of test changes since the paths are now sorted alphanumerically when assigning ids on the fly during assembly writing, in order to ensure deterministic behavior. Differential Revision: https://reviews.llvm.org/D156730
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp2
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp28
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp77
-rw-r--r--llvm/lib/IR/AsmWriter.cpp22
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp16
-rw-r--r--llvm/lib/IRPrinter/IRPrintingPasses.cpp2
-rw-r--r--llvm/lib/LTO/LTO.cpp10
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp4
8 files changed, 90 insertions, 71 deletions
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 69895c0..f1f0cdf 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8181,7 +8181,7 @@ bool LLParser::parseModuleEntry(unsigned ID) {
parseToken(lltok::rparen, "expected ')' here"))
return true;
- auto ModuleEntry = Index->addModule(Path, ID, Hash);
+ auto ModuleEntry = Index->addModule(Path, Hash);
ModuleIdMap[ID] = ModuleEntry->first();
return false;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 02afc73..1d1ec98 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -904,10 +904,6 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
/// path to the bitcode file.
StringRef ModulePath;
- /// For per-module summary indexes, the unique numerical identifier given to
- /// this module by the client.
- unsigned ModuleId;
-
/// Callback to ask whether a symbol is the prevailing copy when invoked
/// during combined index building.
std::function<bool(GlobalValue::GUID)> IsPrevailing;
@@ -919,7 +915,7 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
public:
ModuleSummaryIndexBitcodeReader(
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId,
+ StringRef ModulePath,
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
Error parseModule();
@@ -6699,13 +6695,12 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId,
- std::function<bool(GlobalValue::GUID)> IsPrevailing)
+ StringRef ModulePath, std::function<bool(GlobalValue::GUID)> IsPrevailing)
: BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
- ModulePath(ModulePath), ModuleId(ModuleId), IsPrevailing(IsPrevailing) {}
+ ModulePath(ModulePath), IsPrevailing(IsPrevailing) {}
void ModuleSummaryIndexBitcodeReader::addThisModule() {
- TheIndex.addModule(ModulePath, ModuleId);
+ TheIndex.addModule(ModulePath);
}
ModuleSummaryIndex::ModuleInfo *
@@ -6936,7 +6931,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
case bitc::MODULE_CODE_HASH: {
if (Record.size() != 5)
return error("Invalid hash length " + Twine(Record.size()).str());
- auto &Hash = getThisModule()->second.second;
+ auto &Hash = getThisModule()->second;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
@@ -7697,7 +7692,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
if (convertToString(Record, 1, ModulePath))
return error("Invalid record");
- LastSeenModule = TheIndex.addModule(ModulePath, ModuleId);
+ LastSeenModule = TheIndex.addModule(ModulePath);
ModuleIdMap[ModuleId] = LastSeenModule->first();
ModulePath.clear();
@@ -7712,7 +7707,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
- LastSeenModule->second.second[Pos++] = Val;
+ LastSeenModule->second[Pos++] = Val;
}
// Reset LastSeenModule to avoid overriding the hash unexpectedly.
LastSeenModule = nullptr;
@@ -7970,14 +7965,14 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
// module path used in the combined summary (e.g. when reading summaries for
// regular LTO modules).
Error BitcodeModule::readSummary(
- ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId,
+ ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
std::function<bool(GlobalValue::GUID)> IsPrevailing) {
BitstreamCursor Stream(Buffer);
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
return JumpFailed;
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
- ModulePath, ModuleId, IsPrevailing);
+ ModulePath, IsPrevailing);
return R.parseModule();
}
@@ -8183,13 +8178,12 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
}
Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
- ModuleSummaryIndex &CombinedIndex,
- uint64_t ModuleId) {
+ ModuleSummaryIndex &CombinedIndex) {
Expected<BitcodeModule> BM = getSingleModule(Buffer);
if (!BM)
return BM.takeError();
- return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId);
+ return BM->readSummary(CombinedIndex, BM->getModuleIdentifier());
}
Expected<std::unique_ptr<ModuleSummaryIndex>>
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9416c7f..f53fbd7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -431,6 +431,10 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId = 0;
+ /// Tracks the assignment of module paths in the module path string table to
+ /// an id assigned for use in summary references to the module path.
+ DenseMap<StringRef, uint64_t> ModuleIdMap;
+
public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
@@ -512,8 +516,16 @@ public:
Callback(*MPI);
}
} else {
- for (const auto &MPSE : Index.modulePaths())
- Callback(MPSE);
+ // Since StringMap iteration order isn't guaranteed, order by path string
+ // first.
+ // FIXME: Make this a vector of StringMapEntry instead to avoid the later
+ // map lookup.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : Index.modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ for (auto &ModPath : ModulePaths)
+ Callback(*Index.modulePaths().find(ModPath));
}
}
@@ -3715,33 +3727,33 @@ void IndexBitcodeWriter::writeModStrings() {
unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
- forEachModule(
- [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
- StringRef Key = MPSE.getKey();
- const auto &Value = MPSE.getValue();
- StringEncoding Bits = getStringEncoding(Key);
- unsigned AbbrevToUse = Abbrev8Bit;
- if (Bits == SE_Char6)
- AbbrevToUse = Abbrev6Bit;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = Abbrev7Bit;
-
- Vals.push_back(Value.first);
- Vals.append(Key.begin(), Key.end());
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
-
- // Emit an optional hash for the module now
- const auto &Hash = Value.second;
- if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
- Vals.assign(Hash.begin(), Hash.end());
- // Emit the hash record.
- Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
- }
+ forEachModule([&](const StringMapEntry<ModuleHash> &MPSE) {
+ StringRef Key = MPSE.getKey();
+ const auto &Hash = MPSE.getValue();
+ StringEncoding Bits = getStringEncoding(Key);
+ unsigned AbbrevToUse = Abbrev8Bit;
+ if (Bits == SE_Char6)
+ AbbrevToUse = Abbrev6Bit;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = Abbrev7Bit;
- Vals.clear();
- });
+ auto ModuleId = ModuleIdMap.size();
+ ModuleIdMap[Key] = ModuleId;
+ Vals.push_back(ModuleId);
+ Vals.append(Key.begin(), Key.end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+ // Emit an optional hash for the module now
+ if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
+ Vals.assign(Hash.begin(), Hash.end());
+ // Emit the hash record.
+ Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+ }
+
+ Vals.clear();
+ });
Stream.ExitBlock();
}
@@ -4410,7 +4422,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
if (auto *VS = dyn_cast<GlobalVarSummary>(S)) {
NameVals.push_back(*ValueId);
- NameVals.push_back(Index.getModuleId(VS->modulePath()));
+ assert(ModuleIdMap.count(VS->modulePath()));
+ NameVals.push_back(ModuleIdMap[VS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
for (auto &RI : VS->refs()) {
@@ -4460,7 +4473,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
});
NameVals.push_back(*ValueId);
- NameVals.push_back(Index.getModuleId(FS->modulePath()));
+ assert(ModuleIdMap.count(FS->modulePath()));
+ NameVals.push_back(ModuleIdMap[FS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
@@ -4520,7 +4534,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
auto AliasValueId = SummaryToValueIdMap[AS];
assert(AliasValueId);
NameVals.push_back(AliasValueId);
- NameVals.push_back(Index.getModuleId(AS->modulePath()));
+ assert(ModuleIdMap.count(AS->modulePath()));
+ NameVals.push_back(ModuleIdMap[AS->modulePath()]);
NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()];
assert(AliaseeValueId);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index be4a3ed..e190d82 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1069,12 +1069,13 @@ int SlotTracker::processIndex() {
// The first block of slots are just the module ids, which start at 0 and are
// assigned consecutively. Since the StringMap iteration order isn't
- // guaranteed, use a std::map to order by module ID before assigning slots.
- std::map<uint64_t, StringRef> ModuleIdToPathMap;
- for (auto &[ModPath, ModId] : TheIndex->modulePaths())
- ModuleIdToPathMap[ModId.first] = ModPath;
- for (auto &ModPair : ModuleIdToPathMap)
- CreateModulePathSlot(ModPair.second);
+ // guaranteed, order by path string before assigning slots.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : TheIndex->modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths.begin(), ModulePaths.end());
+ for (auto &ModPath : ModulePaths)
+ CreateModulePathSlot(ModPath);
// Start numbering the GUIDs after the module ids.
GUIDNext = ModulePathNext;
@@ -2890,12 +2891,11 @@ void AssemblyWriter::printModuleSummaryIndex() {
std::string RegularLTOModuleName =
ModuleSummaryIndex::getRegularLTOModuleName();
moduleVec.resize(TheIndex->modulePaths().size());
- for (auto &[ModPath, ModId] : TheIndex->modulePaths())
+ for (auto &[ModPath, ModHash] : TheIndex->modulePaths())
moduleVec[Machine.getModulePathSlot(ModPath)] = std::make_pair(
- // A module id of -1 is a special entry for a regular LTO module created
- // during the thin link.
- ModId.first == -1u ? RegularLTOModuleName : std::string(ModPath),
- ModId.second);
+ // An empty module path is a special entry for a regular LTO module
+ // created during the thin link.
+ ModPath.empty() ? RegularLTOModuleName : std::string(ModPath), ModHash);
unsigned i = 0;
for (auto &ModPair : moduleVec) {
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index 15fe342..198c730 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -554,6 +554,17 @@ void ModuleSummaryIndex::exportToDot(
std::map<StringRef, GVSOrderedMapTy> ModuleToDefinedGVS;
collectDefinedGVSummariesPerModule(ModuleToDefinedGVS);
+ // Assign an id to each module path for use in graph labels. Since the
+ // StringMap iteration order isn't guaranteed, order by path string before
+ // assigning ids.
+ std::vector<StringRef> ModulePaths;
+ for (auto &[ModPath, _] : modulePaths())
+ ModulePaths.push_back(ModPath);
+ llvm::sort(ModulePaths);
+ DenseMap<StringRef, uint64_t> ModuleIdMap;
+ for (auto &ModPath : ModulePaths)
+ ModuleIdMap.try_emplace(ModPath, ModuleIdMap.size());
+
// Get node identifier in form MXXX_<GUID>. The MXXX prefix is required,
// because we may have multiple linkonce functions summaries.
auto NodeId = [](uint64_t ModId, GlobalValue::GUID Id) {
@@ -589,7 +600,10 @@ void ModuleSummaryIndex::exportToDot(
OS << "digraph Summary {\n";
for (auto &ModIt : ModuleToDefinedGVS) {
- auto ModId = getModuleId(ModIt.first);
+ // Will be empty for a just built per-module index, which doesn't setup a
+ // module paths table. In that case use 0 as the module id.
+ assert(ModuleIdMap.count(ModIt.first) || ModuleIdMap.empty());
+ auto ModId = ModuleIdMap.empty() ? 0 : ModuleIdMap[ModIt.first];
OS << " // Module: " << ModIt.first << "\n";
OS << " subgraph cluster_" << std::to_string(ModId) << " {\n";
OS << " style = filled;\n";
diff --git a/llvm/lib/IRPrinter/IRPrintingPasses.cpp b/llvm/lib/IRPrinter/IRPrintingPasses.cpp
index 9552ce3..b7da432 100644
--- a/llvm/lib/IRPrinter/IRPrintingPasses.cpp
+++ b/llvm/lib/IRPrinter/IRPrintingPasses.cpp
@@ -53,7 +53,7 @@ PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &AM) {
: nullptr;
if (Index) {
if (Index->modulePaths().empty())
- Index->addModule("", 0);
+ Index->addModule("");
Index->print(OS);
}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index f851037..2a3f44d 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -183,7 +183,7 @@ void llvm::computeLTOCacheKey(
return ModIt->second;
}
- const ModuleHash &getHash() const { return ModInfo->second.second; }
+ const ModuleHash &getHash() const { return ModInfo->second; }
};
std::vector<ImportModule> ImportModulesVector;
@@ -765,7 +765,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
// Regular LTO module summaries are added to a dummy module that represents
// the combined regular LTO module.
- if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull))
+ if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, ""))
return Err;
RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr));
return Error::success();
@@ -1013,16 +1013,14 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
}
}
- uint64_t ModuleId = ThinLTO.ModuleMap.size();
if (Error Err =
BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- ModuleId, [&](GlobalValue::GUID GUID) {
+ [&](GlobalValue::GUID GUID) {
return ThinLTO.PrevailingModuleForGUID[GUID] ==
BM.getModuleIdentifier();
}))
return Err;
- LLVM_DEBUG(dbgs() << "Module " << ModuleId << ": " << BM.getModuleIdentifier()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 942f79d..f207b27 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -617,11 +617,9 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
- uint64_t NextModuleId = 0;
for (auto &Mod : Modules) {
auto &M = Mod->getSingleBitcodeModule();
- if (Error Err =
- M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
+ if (Error Err = M.readSummary(*CombinedIndex, Mod->getName())) {
// FIXME diagnose
logAllUnhandledErrors(
std::move(Err), errs(),