diff options
author | yronglin <yronglin777@gmail.com> | 2025-06-21 18:58:56 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-21 18:58:56 +0800 |
commit | ea321392ebc487c1000e43576f44af99edf28a5f (patch) | |
tree | e259fa69b0eec9757771f96085e12337c94b9b65 /clang/lib | |
parent | 1b5d6ec6855369d109fcb740ecd3812231b7a279 (diff) | |
download | llvm-ea321392ebc487c1000e43576f44af99edf28a5f.zip llvm-ea321392ebc487c1000e43576f44af99edf28a5f.tar.gz llvm-ea321392ebc487c1000e43576f44af99edf28a5f.tar.bz2 |
[C++][Modules] A module directive may only appear as the first preprocessing tokens in a file (#144233)
This PR is 2nd part of
[P1857R3](https://github.com/llvm/llvm-project/pull/107168)
implementation, and mainly implement the restriction `A module directive
may only appear as the first preprocessing tokens in a file (excluding
the global module fragment.)`:
[cpp.pre](https://eel.is/c++draft/cpp.pre):
```
module-file:
pp-global-module-fragment[opt] pp-module group[opt] pp-private-module-fragment[opt]
```
We also refine tests use `split-file` instead of conditional macro.
Signed-off-by: yronglin <yronglin777@gmail.com>
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 13 | ||||
-rw-r--r-- | clang/lib/Lex/PPDirectives.cpp | 3 | ||||
-rw-r--r-- | clang/lib/Lex/PPMacroExpansion.cpp | 3 | ||||
-rw-r--r-- | clang/lib/Lex/Preprocessor.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Parse/Parser.cpp | 7 | ||||
-rw-r--r-- | clang/lib/Sema/SemaModule.cpp | 15 |
6 files changed, 31 insertions, 12 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 9320045..b61ea3b 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -174,6 +174,8 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, ExtendedTokenMode = 0; NewLinePtr = nullptr; + + IsFirstPPToken = true; } /// Lexer constructor - Create a new lexer object for the specified buffer @@ -3725,6 +3727,11 @@ bool Lexer::Lex(Token &Result) { HasLeadingEmptyMacro = false; } + if (IsFirstPPToken) { + Result.setFlag(Token::FirstPPToken); + IsFirstPPToken = false; + } + bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; IsAtPhysicalStartOfLine = false; bool isRawLex = isLexingRawMode(); @@ -3732,6 +3739,10 @@ bool Lexer::Lex(Token &Result) { bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); + + if (returnedToken && Result.isFirstPPToken() && PP && + !PP->hasSeenMainFileFirstPPToken()) + PP->HandleMainFileFirstPPToken(Result); return returnedToken; } @@ -4535,6 +4546,8 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); BufferPtr = TokPtr + DDTok.Length; + if (PP && !PP->hasSeenMainFileFirstPPToken() && Result.isFirstPPToken()) + PP->HandleMainFileFirstPPToken(Result); return TokPtr; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 04a30f6..70934b9 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1242,6 +1242,9 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); + if (!hasSeenMainFileFirstPPToken()) + HandleMainFileFirstPPToken(Result); + // Save the '#' token in case we need to return it later. Token SavedHash = Result; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 37ac1bf..97bdeb8 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -469,6 +469,9 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // to disable the optimization in this case. if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); + if (!hasSeenMainFileFirstPPToken()) + HandleMainFileFirstPPToken(Identifier); + // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { if (Callbacks) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 21fc7a2..18b2f5f 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -247,6 +247,8 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << " [LeadingSpace]"; if (Tok.isExpandDisabled()) llvm::errs() << " [ExpandDisabled]"; + if (Tok.isFirstPPToken()) + llvm::errs() << " [First pp-token]"; if (Tok.needsCleaning()) { const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 788ed79..18f399a 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2340,7 +2340,8 @@ void Parser::ParseMicrosoftIfExistsExternalDeclaration() { Parser::DeclGroupPtrTy Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { - SourceLocation StartLoc = Tok.getLocation(); + Token Introducer = Tok; + SourceLocation StartLoc = Introducer.getLocation(); Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export) ? Sema::ModuleDeclKind::Interface @@ -2359,7 +2360,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { // Parse a global-module-fragment, if present. if (getLangOpts().CPlusPlusModules && Tok.is(tok::semi)) { SourceLocation SemiLoc = ConsumeToken(); - if (ImportState != Sema::ModuleImportState::FirstDecl) { + if (!Introducer.isFirstPPToken()) { Diag(StartLoc, diag::err_global_module_introducer_not_at_start) << SourceRange(StartLoc, SemiLoc); return nullptr; @@ -2416,7 +2417,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { ExpectAndConsumeSemi(diag::err_module_expected_semi); return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, - ImportState); + ImportState, Introducer.isFirstPPToken()); } Decl *Parser::ParseModuleImport(SourceLocation AtLoc, diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 54ee048..fe70ce3 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -263,11 +263,11 @@ static bool DiagReservedModuleName(Sema &S, const IdentifierInfo *II, Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path, - ModuleIdPath Partition, ModuleImportState &ImportState) { + ModuleIdPath Partition, ModuleImportState &ImportState, + bool IntroducerIsFirstPPToken) { assert(getLangOpts().CPlusPlusModules && "should only have module decl in standard C++ modules"); - bool IsFirstDecl = ImportState == ModuleImportState::FirstDecl; bool SeenGMF = ImportState == ModuleImportState::GlobalFragment; // If any of the steps here fail, we count that as invalidating C++20 // module state; @@ -333,14 +333,11 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, SeenGMF == (bool)this->TheGlobalModuleFragment) && "mismatched global module state"); - // In C++20, the module-declaration must be the first declaration if there - // is no global module fragment. - if (getLangOpts().CPlusPlusModules && !IsFirstDecl && !SeenGMF) { + // In C++20, A module directive may only appear as the first preprocessing + // tokens in a file (excluding the global module fragment.). + if (getLangOpts().CPlusPlusModules && !IntroducerIsFirstPPToken && !SeenGMF) { Diag(ModuleLoc, diag::err_module_decl_not_at_start); - SourceLocation BeginLoc = - ModuleScopes.empty() - ? SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()) - : ModuleScopes.back().BeginLoc; + SourceLocation BeginLoc = PP.getMainFileFirstPPToken().getLocation(); if (BeginLoc.isValid()) { Diag(BeginLoc, diag::note_global_module_introducer_missing) << FixItHint::CreateInsertion(BeginLoc, "module;\n"); |