diff options
author | Teresa Johnson <tejohnson@google.com> | 2023-07-31 11:32:11 -0700 |
---|---|---|
committer | Teresa Johnson <tejohnson@google.com> | 2023-09-01 13:43:08 -0700 |
commit | bbe8cd13335300958b04db5318c31ff52714f96f (patch) | |
tree | 52d3291f070d9f991fd61a73645978be27cef97e /llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | |
parent | b0b3f82dd3c00cdba891f1ff6ba63abd419d0f18 (diff) | |
download | llvm-bbe8cd13335300958b04db5318c31ff52714f96f.zip llvm-bbe8cd13335300958b04db5318c31ff52714f96f.tar.gz llvm-bbe8cd13335300958b04db5318c31ff52714f96f.tar.bz2 |
[LTO] Remove module id from summary index
The module paths string table mapped to both an id sequentially assigned
during LTO linking, and the module hash. The former is leftover from
before the module hash was added for caching and subsequently replaced
use of the module id when renaming promoted symbols (to avoid affects
due to link order changes). The sequentially assigned module id was not
removed, however, as it was still a convenience when serializing to/from
bitcode and assembly.
This patch removes the module id from this table, since it isn't
strictly needed and can lead to confusion on when it is appropriate to
use (e.g. see fix in D156525). It also takes a (likely not significant)
amount of overhead. Where an integer module id is needed (e.g. bitcode
writing), one is assigned on the fly.
There are a couple of test changes since the paths are now sorted
alphanumerically when assigning ids on the fly during assembly writing,
in order to ensure deterministic behavior.
Differential Revision: https://reviews.llvm.org/D156730
Diffstat (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp')
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 77 |
1 files changed, 46 insertions, 31 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 9416c7f..f53fbd7 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -431,6 +431,10 @@ class IndexBitcodeWriter : public BitcodeWriterBase { /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId = 0; + /// Tracks the assignment of module paths in the module path string table to + /// an id assigned for use in summary references to the module path. + DenseMap<StringRef, uint64_t> ModuleIdMap; + public: /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index @@ -512,8 +516,16 @@ public: Callback(*MPI); } } else { - for (const auto &MPSE : Index.modulePaths()) - Callback(MPSE); + // Since StringMap iteration order isn't guaranteed, order by path string + // first. + // FIXME: Make this a vector of StringMapEntry instead to avoid the later + // map lookup. + std::vector<StringRef> ModulePaths; + for (auto &[ModPath, _] : Index.modulePaths()) + ModulePaths.push_back(ModPath); + llvm::sort(ModulePaths.begin(), ModulePaths.end()); + for (auto &ModPath : ModulePaths) + Callback(*Index.modulePaths().find(ModPath)); } } @@ -3715,33 +3727,33 @@ void IndexBitcodeWriter::writeModStrings() { unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv)); SmallVector<unsigned, 64> Vals; - forEachModule( - [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) { - StringRef Key = MPSE.getKey(); - const auto &Value = MPSE.getValue(); - StringEncoding Bits = getStringEncoding(Key); - unsigned AbbrevToUse = Abbrev8Bit; - if (Bits == SE_Char6) - AbbrevToUse = Abbrev6Bit; - else if (Bits == SE_Fixed7) - AbbrevToUse = Abbrev7Bit; - - Vals.push_back(Value.first); - Vals.append(Key.begin(), Key.end()); - - // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); - - // Emit an optional hash for the module now - const auto &Hash = Value.second; - if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { - Vals.assign(Hash.begin(), Hash.end()); - // Emit the hash record. - Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); - } + forEachModule([&](const StringMapEntry<ModuleHash> &MPSE) { + StringRef Key = MPSE.getKey(); + const auto &Hash = MPSE.getValue(); + StringEncoding Bits = getStringEncoding(Key); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; - Vals.clear(); - }); + auto ModuleId = ModuleIdMap.size(); + ModuleIdMap[Key] = ModuleId; + Vals.push_back(ModuleId); + Vals.append(Key.begin(), Key.end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + // Emit an optional hash for the module now + if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { + Vals.assign(Hash.begin(), Hash.end()); + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } + + Vals.clear(); + }); Stream.ExitBlock(); } @@ -4410,7 +4422,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { if (auto *VS = dyn_cast<GlobalVarSummary>(S)) { NameVals.push_back(*ValueId); - NameVals.push_back(Index.getModuleId(VS->modulePath())); + assert(ModuleIdMap.count(VS->modulePath())); + NameVals.push_back(ModuleIdMap[VS->modulePath()]); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); NameVals.push_back(getEncodedGVarFlags(VS->varflags())); for (auto &RI : VS->refs()) { @@ -4460,7 +4473,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { }); NameVals.push_back(*ValueId); - NameVals.push_back(Index.getModuleId(FS->modulePath())); + assert(ModuleIdMap.count(FS->modulePath())); + NameVals.push_back(ModuleIdMap[FS->modulePath()]); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); @@ -4520,7 +4534,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { auto AliasValueId = SummaryToValueIdMap[AS]; assert(AliasValueId); NameVals.push_back(AliasValueId); - NameVals.push_back(Index.getModuleId(AS->modulePath())); + assert(ModuleIdMap.count(AS->modulePath())); + NameVals.push_back(ModuleIdMap[AS->modulePath()]); NameVals.push_back(getEncodedGVSummaryFlags(AS->flags())); auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()]; assert(AliaseeValueId); |