diff options
author | Chuanqi Xu <yedeng.yd@linux.alibaba.com> | 2024-12-06 10:31:37 +0800 |
---|---|---|
committer | Chuanqi Xu <yedeng.yd@linux.alibaba.com> | 2024-12-06 10:52:35 +0800 |
commit | b5bd19211118c6d43bc525a4e3fb65d2c750d61e (patch) | |
tree | 120533b9cd7eb8912acdbdfeefe09fccdb3485ba /clang/lib/Serialization/ASTWriter.cpp | |
parent | d88a0c7322a42227e45ebc09b98132490d6a67b5 (diff) | |
download | llvm-b5bd19211118c6d43bc525a4e3fb65d2c750d61e.zip llvm-b5bd19211118c6d43bc525a4e3fb65d2c750d61e.tar.gz llvm-b5bd19211118c6d43bc525a4e3fb65d2c750d61e.tar.bz2 |
[Serialization] Support load lazy specialization lazily
Currently all the specializations of a template (including
instantiation, specialization and partial specializations) will be
loaded at once if we want to instantiate another instance for the
template, or find instantiation for the template, or just want to
complete the redecl chain.
This means basically we need to load every specializations for the
template once the template declaration got loaded. This is bad since
when we load a specialization, we need to load all of its template
arguments. Then we have to deserialize a lot of unnecessary
declarations.
For example,
```
// M.cppm
export module M;
export template <class T>
class A {};
export class ShouldNotBeLoaded {};
export class Temp {
A<ShouldNotBeLoaded> AS;
};
// use.cpp
import M;
A<int> a;
```
We should a specialization ` A<ShouldNotBeLoaded>` in `M.cppm` and we
instantiate the template `A` in `use.cpp`. Then we will deserialize
`ShouldNotBeLoaded` surprisingly when compiling `use.cpp`. And this
patch tries to avoid that.
Given that the templates are heavily used in C++, this is a pain point
for the performance.
This patch adds MultiOnDiskHashTable for specializations in the
ASTReader. Then we will only deserialize the specializations with the
same template arguments. We made that by using ODRHash for the template
arguments as the key of the hash table.
To review this patch, I think `ASTReaderDecl::AddLazySpecializations`
may be a good entry point.
The patch was reviewed in
https://github.com/llvm/llvm-project/pull/83237 but that PR is a stacked
PR. But I feel the intention of the stacked PRs get lost during the
review process. So I feel it is better to merge the commits into a
single commit instead of merging them in the PR page. It is better for
us to cherry-pick and revert.
Diffstat (limited to 'clang/lib/Serialization/ASTWriter.cpp')
-rw-r--r-- | clang/lib/Serialization/ASTWriter.cpp | 209 |
1 files changed, 206 insertions, 3 deletions
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f8158a6..83fbb70 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -13,6 +13,7 @@ #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "MultiOnDiskHashTable.h" +#include "TemplateArgumentHasher.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTUnresolvedSet.h" #include "clang/AST/AbstractTypeWriter.h" @@ -4167,6 +4168,175 @@ public: } // namespace +namespace { +class LazySpecializationInfoLookupTrait { + ASTWriter &Writer; + llvm::SmallVector<serialization::reader::LazySpecializationInfo, 64> Specs; + +public: + using key_type = unsigned; + using key_type_ref = key_type; + + /// A start and end index into Specs, representing a sequence of decls. + using data_type = std::pair<unsigned, unsigned>; + using data_type_ref = const data_type &; + + using hash_value_type = unsigned; + using offset_type = unsigned; + + explicit LazySpecializationInfoLookupTrait(ASTWriter &Writer) + : Writer(Writer) {} + + template <typename Col, typename Col2> + data_type getData(Col &&C, Col2 &ExistingInfo) { + unsigned Start = Specs.size(); + for (auto *D : C) { + NamedDecl *ND = getDeclForLocalLookup(Writer.getLangOpts(), + const_cast<NamedDecl *>(D)); + Specs.push_back(GlobalDeclID(Writer.GetDeclRef(ND).getRawValue())); + } + for (const serialization::reader::LazySpecializationInfo &Info : + ExistingInfo) + Specs.push_back(Info); + return std::make_pair(Start, Specs.size()); + } + + data_type ImportData( + const reader::LazySpecializationInfoLookupTrait::data_type &FromReader) { + unsigned Start = Specs.size(); + for (auto ID : FromReader) + Specs.push_back(ID); + return std::make_pair(Start, Specs.size()); + } + + static bool EqualKey(key_type_ref a, key_type_ref b) { return a == b; } + + hash_value_type ComputeHash(key_type Name) { return Name; } + + void EmitFileRef(raw_ostream &Out, ModuleFile *F) const { + assert(Writer.hasChain() && + "have reference to loaded module file but no chain?"); + + using namespace llvm::support; + endian::write<uint32_t>(Out, Writer.getChain()->getModuleFileID(F), + llvm::endianness::little); + } + + std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &Out, + key_type HashValue, + data_type_ref Lookup) { + // 4 bytes for each slot. + unsigned KeyLen = 4; + unsigned DataLen = sizeof(serialization::reader::LazySpecializationInfo) * + (Lookup.second - Lookup.first); + + return emitULEBKeyDataLength(KeyLen, DataLen, Out); + } + + void EmitKey(raw_ostream &Out, key_type HashValue, unsigned) { + using namespace llvm::support; + + endian::Writer LE(Out, llvm::endianness::little); + LE.write<uint32_t>(HashValue); + } + + void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup, + unsigned DataLen) { + using namespace llvm::support; + + endian::Writer LE(Out, llvm::endianness::little); + uint64_t Start = Out.tell(); + (void)Start; + for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I) { + LE.write<DeclID>(Specs[I].getRawValue()); + } + assert(Out.tell() - Start == DataLen && "Data length is wrong"); + } +}; + +unsigned CalculateODRHashForSpecs(const Decl *Spec) { + ArrayRef<TemplateArgument> Args; + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(Spec)) + Args = CTSD->getTemplateArgs().asArray(); + else if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(Spec)) + Args = VTSD->getTemplateArgs().asArray(); + else if (auto *FD = dyn_cast<FunctionDecl>(Spec)) + Args = FD->getTemplateSpecializationArgs()->asArray(); + else + llvm_unreachable("New Specialization Kind?"); + + return StableHashForTemplateArguments(Args); +} +} // namespace + +void ASTWriter::GenerateSpecializationInfoLookupTable( + const NamedDecl *D, llvm::SmallVectorImpl<const Decl *> &Specializations, + llvm::SmallVectorImpl<char> &LookupTable, bool IsPartial) { + assert(D->isFirstDecl()); + + // Create the on-disk hash table representation. + MultiOnDiskHashTableGenerator<reader::LazySpecializationInfoLookupTrait, + LazySpecializationInfoLookupTrait> + Generator; + LazySpecializationInfoLookupTrait Trait(*this); + + llvm::DenseMap<unsigned, llvm::SmallVector<const NamedDecl *, 4>> + SpecializationMaps; + + for (auto *Specialization : Specializations) { + unsigned HashedValue = CalculateODRHashForSpecs(Specialization); + + auto Iter = SpecializationMaps.find(HashedValue); + if (Iter == SpecializationMaps.end()) + Iter = SpecializationMaps + .try_emplace(HashedValue, + llvm::SmallVector<const NamedDecl *, 4>()) + .first; + + Iter->second.push_back(cast<NamedDecl>(Specialization)); + } + + auto *Lookups = + Chain ? Chain->getLoadedSpecializationsLookupTables(D, IsPartial) + : nullptr; + + for (auto &[HashValue, Specs] : SpecializationMaps) { + SmallVector<serialization::reader::LazySpecializationInfo, 16> + ExisitingSpecs; + // We have to merge the lookup table manually here. We can't depend on the + // merge mechanism offered by + // clang::serialization::MultiOnDiskHashTableGenerator since that generator + // assumes the we'll get the same value with the same key. + // And also underlying llvm::OnDiskChainedHashTableGenerator assumes that we + // won't insert the values with the same key twice. So we have to merge the + // lookup table here manually. + if (Lookups) + ExisitingSpecs = Lookups->Table.find(HashValue); + + Generator.insert(HashValue, Trait.getData(Specs, ExisitingSpecs), Trait); + } + + Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr); +} + +uint64_t ASTWriter::WriteSpecializationInfoLookupTable( + const NamedDecl *D, llvm::SmallVectorImpl<const Decl *> &Specializations, + bool IsPartial) { + + llvm::SmallString<4096> LookupTable; + GenerateSpecializationInfoLookupTable(D, Specializations, LookupTable, + IsPartial); + + uint64_t Offset = Stream.GetCurrentBitNo(); + RecordData::value_type Record[] = {IsPartial ? DECL_PARTIAL_SPECIALIZATIONS + : DECL_SPECIALIZATIONS}; + Stream.EmitRecordWithBlob(IsPartial ? DeclPartialSpecializationsAbbrev + : DeclSpecializationsAbbrev, + Record, LookupTable); + + return Offset; +} + bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC) { return Result.hasExternalDecls() && @@ -5748,7 +5918,7 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { // Keep writing types, declarations, and declaration update records // until we've emitted all of them. RecordData DeclUpdatesOffsetsRecord; - Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5); + Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/ 6); DeclTypesBlockStartOffset = Stream.GetCurrentBitNo(); WriteTypeAbbrevs(); WriteDeclAbbrevs(); @@ -5822,6 +5992,16 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { FunctionToLambdaMapAbbrev); } + if (!SpecializationsUpdates.empty()) { + WriteSpecializationsUpdates(/*IsPartial=*/false); + SpecializationsUpdates.clear(); + } + + if (!PartialSpecializationsUpdates.empty()) { + WriteSpecializationsUpdates(/*IsPartial=*/true); + PartialSpecializationsUpdates.clear(); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. @@ -5865,6 +6045,31 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { WriteDeclContextVisibleUpdate(Context, DC); } +void ASTWriter::WriteSpecializationsUpdates(bool IsPartial) { + auto RecordType = IsPartial ? CXX_ADDED_TEMPLATE_PARTIAL_SPECIALIZATION + : CXX_ADDED_TEMPLATE_SPECIALIZATION; + + auto Abv = std::make_shared<llvm::BitCodeAbbrev>(); + Abv->Add(llvm::BitCodeAbbrevOp(RecordType)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + auto UpdateSpecializationAbbrev = Stream.EmitAbbrev(std::move(Abv)); + + auto &SpecUpdates = + IsPartial ? PartialSpecializationsUpdates : SpecializationsUpdates; + for (auto &SpecializationUpdate : SpecUpdates) { + const NamedDecl *D = SpecializationUpdate.first; + + llvm::SmallString<4096> LookupTable; + GenerateSpecializationInfoLookupTable(D, SpecializationUpdate.second, + LookupTable, IsPartial); + + // Write the lookup table + RecordData::value_type Record[] = {RecordType, getDeclID(D).getRawValue()}; + Stream.EmitRecordWithBlob(UpdateSpecializationAbbrev, Record, LookupTable); + } +} + void ASTWriter::WriteDeclUpdatesBlocks(ASTContext &Context, RecordDataImpl &OffsetsRecord) { if (DeclUpdates.empty()) @@ -5894,12 +6099,10 @@ void ASTWriter::WriteDeclUpdatesBlocks(ASTContext &Context, switch (Kind) { case UPD_CXX_ADDED_IMPLICIT_MEMBER: - case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION: case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: assert(Update.getDecl() && "no decl to add?"); Record.AddDeclRef(Update.getDecl()); break; - case UPD_CXX_ADDED_FUNCTION_DEFINITION: case UPD_CXX_ADDED_VAR_DEFINITION: break; |