aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
authorChuanqi Xu <yedeng.yd@linux.alibaba.com>2024-06-21 17:05:42 +0800
committerChuanqi Xu <yedeng.yd@linux.alibaba.com>2024-06-21 17:50:30 +0800
commitd4d95ee65159db1ea1a8c4159cfdaf8b81097897 (patch)
tree9708f59f6a6de9d4053e85e6b7b595b1a55c94bf /clang
parent2b5d1fb889fca7287858db0791bfecc1465f23e1 (diff)
downloadllvm-d4d95ee65159db1ea1a8c4159cfdaf8b81097897.zip
llvm-d4d95ee65159db1ea1a8c4159cfdaf8b81097897.tar.gz
llvm-d4d95ee65159db1ea1a8c4159cfdaf8b81097897.tar.bz2
[Serialization] Register identifiers in ahead and don't emit predefined decls
See the added test for the motivation example. In that example, we add a new function declaration in `a.cppm` and this is not used in the reduced BMI of `b.cppm`. We expect that the change won't affect the BMI of `b.cppm`. But it is the not the case. There are 2 reason for unexpected result: 1. We would register the interesting identifiers in a pretty late phase. This may cause some some predefined identifier ID change due to we insert other identifiers during emitting decls and types. 2. In `GenerateNameLookup`, we would generate information for predefined decls. This may not be intended. Since every predefined decl doesn't belong to any module. And this patch solves the first issue by registering the identifiers in the very early posititon to make sure the ID won't get affected by the process to emit decls and types. And we solve the second question by filtering predefined decls simply.
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Serialization/ASTWriter.h13
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp94
-rw-r--r--clang/test/Modules/decl-params-determinisim.m16
-rw-r--r--clang/test/Modules/no-transitive-identifier-change-2.cppm28
4 files changed, 105 insertions, 46 deletions
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index ce79c75..71a7c28 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -228,6 +228,11 @@ private:
/// unit, while 0 is reserved for NULL.
llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
+ /// Set of predefined decls. This is a helper data to determine if a decl
+ /// is predefined. It should be more clear and safer to query the set
+ /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
+ llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
+
/// Offset of each declaration in the bitstream, indexed by
/// the declaration's ID.
std::vector<serialization::DeclOffset> DeclOffsets;
@@ -563,8 +568,6 @@ private:
void WriteType(QualType T);
bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
- bool isLookupResultEntirelyExternalOrUnreachable(StoredDeclsList &Result,
- DeclContext *DC);
void GenerateNameLookupTable(const DeclContext *DC,
llvm::SmallVectorImpl<char> &LookupTable);
@@ -844,6 +847,8 @@ public:
bool hasChain() const { return Chain; }
ASTReader *getChain() const { return Chain; }
+ bool isWritingModule() const { return WritingModule; }
+
bool isWritingStdCXXNamedModules() const {
return WritingModule && WritingModule->isNamedModule();
}
@@ -852,6 +857,10 @@ public:
bool getDoneWritingDeclsAndTypes() const { return DoneWritingDeclsAndTypes; }
+ bool isDeclPredefined(const Decl *D) const {
+ return PredefinedDecls.count(D);
+ }
+
void handleVTable(CXXRecordDecl *RD);
private:
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index e00bacf..e6a58dc 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3728,6 +3728,29 @@ static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts,
namespace {
+bool IsInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset,
+ bool IsModule, bool IsCPlusPlus) {
+ bool NeedDecls = !IsModule || !IsCPlusPlus;
+
+ bool IsInteresting =
+ II->getNotableIdentifierID() != tok::NotableIdentifierKind::not_notable ||
+ II->getBuiltinID() != Builtin::ID::NotBuiltin ||
+ II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
+ if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
+ II->hasRevertedTokenIDToIdentifier() ||
+ (NeedDecls && II->getFETokenInfo()))
+ return true;
+
+ return false;
+}
+
+bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II,
+ ASTWriter &Writer) {
+ bool IsModule = Writer.isWritingModule();
+ bool IsCPlusPlus = Writer.getLangOpts().CPlusPlus;
+ return IsInterestingIdentifier(II, /*MacroOffset=*/0, IsModule, IsCPlusPlus);
+}
+
class ASTIdentifierTableTrait {
ASTWriter &Writer;
Preprocessor &PP;
@@ -3741,17 +3764,8 @@ class ASTIdentifierTableTrait {
/// doesn't check whether the name has macros defined; use PublicMacroIterator
/// to check that.
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
- bool IsInteresting =
- II->getNotableIdentifierID() !=
- tok::NotableIdentifierKind::not_notable ||
- II->getBuiltinID() != Builtin::ID::NotBuiltin ||
- II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
- if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
- II->hasRevertedTokenIDToIdentifier() ||
- (NeedDecls && II->getFETokenInfo()))
- return true;
-
- return false;
+ return IsInterestingIdentifier(II, MacroOffset, IsModule,
+ Writer.getLangOpts().CPlusPlus);
}
public:
@@ -3782,10 +3796,6 @@ public:
return isInterestingIdentifier(II, MacroOffset);
}
- bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
- return isInterestingIdentifier(II, 0);
- }
-
std::pair<unsigned, unsigned>
EmitKeyDataLength(raw_ostream &Out, const IdentifierInfo *II, IdentifierID ID) {
// Record the location of the identifier data. This is used when generating
@@ -3887,21 +3897,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule,
IsModule ? &InterestingIdents : nullptr);
- // Look for any identifiers that were named while processing the
- // headers, but are otherwise not needed. We add these to the hash
- // table to enable checking of the predefines buffer in the case
- // where the user adds new macro definitions when building the AST
- // file.
- SmallVector<const IdentifierInfo *, 128> IIs;
- for (const auto &ID : PP.getIdentifierTable())
- if (Trait.isInterestingNonMacroIdentifier(ID.second))
- IIs.push_back(ID.second);
- // Sort the identifiers lexicographically before getting the references so
- // that their order is stable.
- llvm::sort(IIs, llvm::deref<std::less<>>());
- for (const IdentifierInfo *II : IIs)
- getIdentifierRef(II);
-
// Create the on-disk hash table representation. We only store offsets
// for identifiers that appear here for the first time.
IdentifierOffsets.resize(NextIdentID - FirstIdentID);
@@ -4125,16 +4120,24 @@ bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result,
DC->hasNeedToReconcileExternalVisibleStorage();
}
-bool ASTWriter::isLookupResultEntirelyExternalOrUnreachable(
- StoredDeclsList &Result, DeclContext *DC) {
+/// Returns ture if all of the lookup result are either external, not emitted or
+/// predefined. In such cases, the lookup result is not interesting and we don't
+/// need to record the result in the current being written module. Return false
+/// otherwise.
+static bool isLookupResultNotInteresting(ASTWriter &Writer,
+ StoredDeclsList &Result) {
for (auto *D : Result.getLookupResult()) {
- auto *LocalD = getDeclForLocalLookup(getLangOpts(), D);
+ auto *LocalD = getDeclForLocalLookup(Writer.getLangOpts(), D);
if (LocalD->isFromASTFile())
continue;
// We can only be sure whether the local declaration is reachable
// after we done writing the declarations and types.
- if (DoneWritingDeclsAndTypes && !wasDeclEmitted(LocalD))
+ if (Writer.getDoneWritingDeclsAndTypes() && !Writer.wasDeclEmitted(LocalD))
+ continue;
+
+ // We don't need to emit the predefined decls.
+ if (Writer.isDeclPredefined(LocalD))
continue;
return false;
@@ -4182,11 +4185,11 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
// that entirely external or unreachable.
//
// FIMXE: It looks sufficient to test
- // isLookupResultEntirelyExternalOrUnreachable here. But due to bug we have
+ // isLookupResultNotInteresting here. But due to bug we have
// to test isLookupResultExternal here. See
// https://github.com/llvm/llvm-project/issues/61065 for details.
if ((GeneratingReducedBMI || isLookupResultExternal(Result, DC)) &&
- isLookupResultEntirelyExternalOrUnreachable(Result, DC))
+ isLookupResultNotInteresting(*this, Result))
continue;
// We also skip empty results. If any of the results could be external and
@@ -5007,6 +5010,7 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
if (D) {
assert(D->isCanonicalDecl() && "predefined decl is not canonical");
DeclIDs[D] = ID;
+ PredefinedDecls.insert(D);
}
};
RegisterPredefDecl(Context.getTranslationUnitDecl(),
@@ -5355,6 +5359,24 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
writeUnhashedControlBlock(PP, Context);
+ // Look for any identifiers that were named while processing the
+ // headers, but are otherwise not needed. We add these to the hash
+ // table to enable checking of the predefines buffer in the case
+ // where the user adds new macro definitions when building the AST
+ // file.
+ //
+ // We do this before emitting any Decl and Types to make sure the
+ // Identifier ID is stable.
+ SmallVector<const IdentifierInfo *, 128> IIs;
+ for (const auto &ID : PP.getIdentifierTable())
+ if (IsInterestingNonMacroIdentifier(ID.second, *this))
+ IIs.push_back(ID.second);
+ // Sort the identifiers lexicographically before getting the references so
+ // that their order is stable.
+ llvm::sort(IIs, llvm::deref<std::less<>>());
+ for (const IdentifierInfo *II : IIs)
+ getIdentifierRef(II);
+
// Write the set of weak, undeclared identifiers. We always write the
// entire table, since later PCH files in a PCH chain are only interested in
// the results at the end of the chain.
diff --git a/clang/test/Modules/decl-params-determinisim.m b/clang/test/Modules/decl-params-determinisim.m
index 9cf37ac..db4ed33 100644
--- a/clang/test/Modules/decl-params-determinisim.m
+++ b/clang/test/Modules/decl-params-determinisim.m
@@ -28,23 +28,23 @@
// CHECK: <TYPE_FUNCTION_PROTO
// CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=2
+// CHECK-SAME: op5=13
// CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=3
+// CHECK-SAME: op5=14
// CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=4
+// CHECK-SAME: op5=15
// CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=5
+// CHECK-SAME: op5=16
/// Decl records start at 43
// CHECK: <DECL_RECORD
-// CHECK-SAME: op5=43
+// CHECK-SAME: op5=54
// CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=44
+// CHECK-SAME: op5=55
// CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=45
+// CHECK-SAME: op5=56
// CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=46
+// CHECK-SAME: op5=57
//--- headers/a.h
void f(struct A0 *a0,
diff --git a/clang/test/Modules/no-transitive-identifier-change-2.cppm b/clang/test/Modules/no-transitive-identifier-change-2.cppm
new file mode 100644
index 0000000..50a54c6
--- /dev/null
+++ b/clang/test/Modules/no-transitive-identifier-change-2.cppm
@@ -0,0 +1,28 @@
+// Test that adding a new identifier within reduced BMI may not produce a transitive change.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.pcm \
+// RUN: -fmodule-file=A=%t/A.pcm
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.v1.cppm -o %t/A.v1.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.v1.pcm \
+// RUN: -fmodule-file=A=%t/A.v1.pcm
+//
+// RUN: diff %t/B.pcm %t/B.v1.pcm &> /dev/null
+
+//--- A.cppm
+export module A;
+export int a();
+
+//--- A.v1.cppm
+export module A;
+export int a();
+export int a2();
+
+//--- B.cppm
+export module B;
+import A;
+export int b() { return a(); }