diff options
author | Chuanqi Xu <yedeng.yd@linux.alibaba.com> | 2024-06-21 09:21:40 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-21 09:21:40 +0800 |
commit | 03921b979d67657bfc9cf8240add2484cc4df6a7 (patch) | |
tree | 10bce423e388f085b559ea4fe2542fe00d3a608d /clang/lib | |
parent | e1e5ed5893c50918dc9b6b56acfe6242f03354dc (diff) | |
download | llvm-03921b979d67657bfc9cf8240add2484cc4df6a7.zip llvm-03921b979d67657bfc9cf8240add2484cc4df6a7.tar.gz llvm-03921b979d67657bfc9cf8240add2484cc4df6a7.tar.bz2 |
[serialization] No transitive type change (#92511)
Following of https://github.com/llvm/llvm-project/pull/92085.
#### motivation
The motivation is still cutting of the unnecessary change in the
dependency chain. See the above link (recursively) for details.
And this will be the last patch of the `no-transitive-*-change` series.
If there are any following patches, they might be C++20 Named modules
specific to handle special grammars like `ADL` (See the reply in
https://discourse.llvm.org/t/rfc-c-20-modules-introduce-thin-bmi-and-decls-hash/74755/53
for example). So they won't affect the whole serialization part as the
series patch did.
#### example
After this patch, finally we are able to cut of unnecessary change of
types. For example,
```
//--- m-partA.cppm
export module m:partA;
//--- m-partA.v1.cppm
export module m:partA;
namespace NS {
class A {
public:
int getValue() {
return 43;
}
};
}
//--- m-partB.cppm
export module m:partB;
export inline int getB() {
return 430;
}
//--- m.cppm
export module m;
export import :partA;
export import :partB;
//--- useBOnly.cppm
export module useBOnly;
import m;
export inline int get() {
return getB();
}
```
The BMI of `useBOnly.cppm` is expected to not change if we only add a
new class in `m:partA`. This will be pretty useful in practice.
#### implementation details
The key idea of this patch is similar with the previous patches: extend
the 32bits type ID to 64bits so that we can store the module file index
in the higher bits. Then the encoding of the type ID is independent on
the imported modules.
But there are two differences from the previous patches:
- TypeID is not completely an index of serialized types. We used the
lower 3 bits to store the qualifiers.
- TypeID won't take part in any lookup process. So the uses of TypeID is
much less than the previous patches.
The first difference make we have some more slightly complex bit
operations. And the second difference makes the patch much simpler than
the previous ones.
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Serialization/ASTCommon.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Serialization/ASTReader.cpp | 112 | ||||
-rw-r--r-- | clang/lib/Serialization/ASTWriter.cpp | 41 | ||||
-rw-r--r-- | clang/lib/Serialization/ModuleFile.cpp | 1 |
4 files changed, 88 insertions, 68 deletions
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index 3385cb8..444a8a3 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -283,7 +283,7 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { break; } - return TypeIdx(ID); + return TypeIdx(0, ID); } unsigned serialization::ComputeHash(Selector Sel) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 43013ab..552a3af 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3395,20 +3395,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, "duplicate TYPE_OFFSET record in AST file"); F.TypeOffsets = reinterpret_cast<const UnalignedUInt64 *>(Blob.data()); F.LocalNumTypes = Record[0]; - unsigned LocalBaseTypeIndex = Record[1]; F.BaseTypeIndex = getTotalNumTypes(); - if (F.LocalNumTypes > 0) { - // Introduce the global -> local mapping for types within this module. - GlobalTypeMap.insert(std::make_pair(getTotalNumTypes(), &F)); - - // Introduce the local -> global mapping for types within this module. - F.TypeRemap.insertOrReplace( - std::make_pair(LocalBaseTypeIndex, - F.BaseTypeIndex - LocalBaseTypeIndex)); - + if (F.LocalNumTypes > 0) TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes); - } + break; } @@ -4084,7 +4075,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); RemapBuilder SubmoduleRemap(F.SubmoduleRemap); RemapBuilder SelectorRemap(F.SelectorRemap); - RemapBuilder TypeRemap(F.TypeRemap); auto &ImportedModuleVector = F.TransitiveImports; assert(ImportedModuleVector.empty()); @@ -4120,8 +4110,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t SelectorIDOffset = endian::readNext<uint32_t, llvm::endianness::little>(Data); - uint32_t TypeIndexOffset = - endian::readNext<uint32_t, llvm::endianness::little>(Data); auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, RemapBuilder &Remap) { @@ -4136,7 +4124,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { PreprocessedEntityRemap); mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap); mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap); - mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap); } } @@ -5115,12 +5102,12 @@ void ASTReader::InitializeContext() { // Load the special types. if (SpecialTypes.size() >= NumSpecialTypeIDs) { - if (unsigned String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) { + if (TypeID String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) { if (!Context.CFConstantStringTypeDecl) Context.setCFConstantStringType(GetType(String)); } - if (unsigned File = SpecialTypes[SPECIAL_TYPE_FILE]) { + if (TypeID File = SpecialTypes[SPECIAL_TYPE_FILE]) { QualType FileType = GetType(File); if (FileType.isNull()) { Error("FILE type is NULL"); @@ -5141,7 +5128,7 @@ void ASTReader::InitializeContext() { } } - if (unsigned Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) { + if (TypeID Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) { QualType Jmp_bufType = GetType(Jmp_buf); if (Jmp_bufType.isNull()) { Error("jmp_buf type is NULL"); @@ -5162,7 +5149,7 @@ void ASTReader::InitializeContext() { } } - if (unsigned Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) { + if (TypeID Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) { QualType Sigjmp_bufType = GetType(Sigjmp_buf); if (Sigjmp_bufType.isNull()) { Error("sigjmp_buf type is NULL"); @@ -5180,25 +5167,24 @@ void ASTReader::InitializeContext() { } } - if (unsigned ObjCIdRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) { + if (TypeID ObjCIdRedef = SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) { if (Context.ObjCIdRedefinitionType.isNull()) Context.ObjCIdRedefinitionType = GetType(ObjCIdRedef); } - if (unsigned ObjCClassRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) { + if (TypeID ObjCClassRedef = + SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) { if (Context.ObjCClassRedefinitionType.isNull()) Context.ObjCClassRedefinitionType = GetType(ObjCClassRedef); } - if (unsigned ObjCSelRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) { + if (TypeID ObjCSelRedef = + SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) { if (Context.ObjCSelRedefinitionType.isNull()) Context.ObjCSelRedefinitionType = GetType(ObjCSelRedef); } - if (unsigned Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) { + if (TypeID Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) { QualType Ucontext_tType = GetType(Ucontext_t); if (Ucontext_tType.isNull()) { Error("ucontext_t type is NULL"); @@ -6683,10 +6669,8 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { } /// Get the correct cursor and offset for loading a type. -ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) { - GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index); - assert(I != GlobalTypeMap.end() && "Corrupted global type map"); - ModuleFile *M = I->second; +ASTReader::RecordLocation ASTReader::TypeCursorForIndex(TypeID ID) { + auto [M, Index] = translateTypeIDToIndex(ID); return RecordLocation(M, M->TypeOffsets[Index - M->BaseTypeIndex].get() + M->DeclsBlockStartOffset); } @@ -6707,10 +6691,10 @@ static std::optional<Type::TypeClass> getTypeClassForCode(TypeCode code) { /// routine actually reads the record corresponding to the type at the given /// location. It is a helper routine for GetType, which deals with reading type /// IDs. -QualType ASTReader::readTypeRecord(unsigned Index) { +QualType ASTReader::readTypeRecord(TypeID ID) { assert(ContextObj && "reading type with no AST context"); ASTContext &Context = *ContextObj; - RecordLocation Loc = TypeCursorForIndex(Index); + RecordLocation Loc = TypeCursorForIndex(ID); BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor; // Keep track of where we are in the stream, then jump back there @@ -7151,15 +7135,44 @@ TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() { return TInfo; } +static unsigned getIndexForTypeID(serialization::TypeID ID) { + return (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth; +} + +static unsigned getModuleFileIndexForTypeID(serialization::TypeID ID) { + return ID >> 32; +} + +static bool isPredefinedType(serialization::TypeID ID) { + // We don't need to erase the higher bits since if these bits are not 0, + // it must be larger than NUM_PREDEF_TYPE_IDS. + return (ID >> Qualifiers::FastWidth) < NUM_PREDEF_TYPE_IDS; +} + +std::pair<ModuleFile *, unsigned> +ASTReader::translateTypeIDToIndex(serialization::TypeID ID) const { + assert(!isPredefinedType(ID) && + "Predefined type shouldn't be in TypesLoaded"); + unsigned ModuleFileIndex = getModuleFileIndexForTypeID(ID); + assert(ModuleFileIndex && "Untranslated Local Decl?"); + + ModuleFile *OwningModuleFile = &getModuleManager()[ModuleFileIndex - 1]; + assert(OwningModuleFile && + "untranslated type ID or local type ID shouldn't be in TypesLoaded"); + + return {OwningModuleFile, + OwningModuleFile->BaseTypeIndex + getIndexForTypeID(ID)}; +} + QualType ASTReader::GetType(TypeID ID) { assert(ContextObj && "reading type with no AST context"); ASTContext &Context = *ContextObj; unsigned FastQuals = ID & Qualifiers::FastMask; - unsigned Index = ID >> Qualifiers::FastWidth; - if (Index < NUM_PREDEF_TYPE_IDS) { + if (isPredefinedType(ID)) { QualType T; + unsigned Index = getIndexForTypeID(ID); switch ((PredefinedTypeIDs)Index) { case PREDEF_TYPE_LAST_ID: // We should never use this one. @@ -7432,10 +7445,11 @@ QualType ASTReader::GetType(TypeID ID) { return T.withFastQualifiers(FastQuals); } - Index -= NUM_PREDEF_TYPE_IDS; + unsigned Index = translateTypeIDToIndex(ID).second; + assert(Index < TypesLoaded.size() && "Type index out-of-range"); if (TypesLoaded[Index].isNull()) { - TypesLoaded[Index] = readTypeRecord(Index); + TypesLoaded[Index] = readTypeRecord(ID); if (TypesLoaded[Index].isNull()) return QualType(); @@ -7448,27 +7462,28 @@ QualType ASTReader::GetType(TypeID ID) { return TypesLoaded[Index].withFastQualifiers(FastQuals); } -QualType ASTReader::getLocalType(ModuleFile &F, unsigned LocalID) { +QualType ASTReader::getLocalType(ModuleFile &F, LocalTypeID LocalID) { return GetType(getGlobalTypeID(F, LocalID)); } -serialization::TypeID -ASTReader::getGlobalTypeID(ModuleFile &F, unsigned LocalID) const { - unsigned FastQuals = LocalID & Qualifiers::FastMask; - unsigned LocalIndex = LocalID >> Qualifiers::FastWidth; - - if (LocalIndex < NUM_PREDEF_TYPE_IDS) +serialization::TypeID ASTReader::getGlobalTypeID(ModuleFile &F, + LocalTypeID LocalID) const { + if (isPredefinedType(LocalID)) return LocalID; if (!F.ModuleOffsetMap.empty()) ReadModuleOffsetMap(F); - ContinuousRangeMap<uint32_t, int, 2>::iterator I - = F.TypeRemap.find(LocalIndex - NUM_PREDEF_TYPE_IDS); - assert(I != F.TypeRemap.end() && "Invalid index into type index remap"); + unsigned ModuleFileIndex = getModuleFileIndexForTypeID(LocalID); + LocalID &= llvm::maskTrailingOnes<TypeID>(32); + + if (ModuleFileIndex == 0) + LocalID -= NUM_PREDEF_TYPE_IDS << Qualifiers::FastWidth; - unsigned GlobalIndex = LocalIndex + I->second; - return (GlobalIndex << Qualifiers::FastWidth) | FastQuals; + ModuleFile &MF = + ModuleFileIndex ? *F.TransitiveImports[ModuleFileIndex - 1] : F; + ModuleFileIndex = MF.Index + 1; + return ((uint64_t)ModuleFileIndex << 32) | LocalID; } TemplateArgumentLocInfo @@ -8224,7 +8239,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() { llvm::errs() << "*** PCH/ModuleFile Remappings:\n"; dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap); dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap); - dumpModuleIDMap("Global type map", GlobalTypeMap); dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 346bab3..0297e20 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3270,17 +3270,18 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, /// Write the representation of a type to the AST stream. void ASTWriter::WriteType(QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; - if (IdxRef.getIndex() == 0) // we haven't seen this type before. - IdxRef = TypeIdx(NextTypeID++); + if (IdxRef.getValue() == 0) // we haven't seen this type before. + IdxRef = TypeIdx(0, NextTypeID++); TypeIdx Idx = IdxRef; - assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); + assert(Idx.getModuleFileIndex() == 0 && "Re-writing a type from a prior AST"); + assert(Idx.getValue() >= FirstTypeID && "Writing predefined type"); // Emit the type's representation. uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. - unsigned Index = Idx.getIndex() - FirstTypeID; + uint64_t Index = Idx.getValue() - FirstTypeID; if (TypeOffsets.size() == Index) TypeOffsets.emplace_back(Offset); else if (TypeOffsets.size() < Index) { @@ -3353,12 +3354,10 @@ void ASTWriter::WriteTypeDeclOffsets() { auto Abbrev = std::make_shared<BitCodeAbbrev>(); Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { - RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(), - FirstTypeID - NUM_PREDEF_TYPE_IDS}; + RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size()}; Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets)); } @@ -5464,7 +5463,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, M.NumPreprocessedEntities); writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules); writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors); - writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes); } } RecordData::value_type Record[] = {MODULE_OFFSET_MAP}; @@ -6138,9 +6136,9 @@ static TypeID MakeTypeID(ASTContext &Context, QualType T, return TypeIdxFromBuiltin(BT).asTypeID(FastQuals); if (T == Context.AutoDeductTy) - return TypeIdx(PREDEF_TYPE_AUTO_DEDUCT).asTypeID(FastQuals); + return TypeIdx(0, PREDEF_TYPE_AUTO_DEDUCT).asTypeID(FastQuals); if (T == Context.AutoRRefDeductTy) - return TypeIdx(PREDEF_TYPE_AUTO_RREF_DEDUCT).asTypeID(FastQuals); + return TypeIdx(0, PREDEF_TYPE_AUTO_RREF_DEDUCT).asTypeID(FastQuals); return IdxForType(T).asTypeID(FastQuals); } @@ -6153,7 +6151,7 @@ TypeID ASTWriter::GetOrCreateTypeID(QualType T) { assert(!T.getLocalFastQualifiers()); TypeIdx &Idx = TypeIdxs[T]; - if (Idx.getIndex() == 0) { + if (Idx.getValue() == 0) { if (DoneWritingDeclsAndTypes) { assert(0 && "New type seen after serializing all the types to emit!"); return TypeIdx(); @@ -6161,7 +6159,7 @@ TypeID ASTWriter::GetOrCreateTypeID(QualType T) { // We haven't seen this type before. Assign it a new ID and put it // into the queue of types to emit. - Idx = TypeIdx(NextTypeID++); + Idx = TypeIdx(0, NextTypeID++); DeclTypesToEmit.push(T); } return Idx; @@ -6658,11 +6656,9 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { // Note, this will get called multiple times, once one the reader starts up // and again each time it's done reading a PCH or module. - FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes(); FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); - NextTypeID = FirstTypeID; NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; @@ -6691,13 +6687,24 @@ void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) { } void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { - // Always take the highest-numbered type index. This copes with an interesting + // Always take the type index that comes in later module files. + // This copes with an interesting // case for chained AST writing where we schedule writing the type and then, // later, deserialize the type from another AST. In this case, we want to - // keep the higher-numbered entry so that we can properly write it out to + // keep the entry from a later module so that we can properly write it out to // the AST file. TypeIdx &StoredIdx = TypeIdxs[T]; - if (Idx.getIndex() >= StoredIdx.getIndex()) + + // Ignore it if the type comes from the current being written module file. + // Since the current module file being written logically has the highest + // index. + unsigned ModuleFileIndex = StoredIdx.getModuleFileIndex(); + if (ModuleFileIndex == 0 && StoredIdx.getValue()) + return; + + // Otherwise, keep the highest ID since the module file comes later has + // higher module file indexes. + if (Idx.getModuleFileIndex() >= StoredIdx.getModuleFileIndex()) StoredIdx = Idx; } diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index 7976c28..4858cdb 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -84,7 +84,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { llvm::errs() << " Base type index: " << BaseTypeIndex << '\n' << " Number of types: " << LocalNumTypes << '\n'; - dumpLocalRemap("Type index local -> global map", TypeRemap); llvm::errs() << " Base decl index: " << BaseDeclIndex << '\n' << " Number of decls: " << LocalNumDecls << '\n'; |