From 44af53b22aaa1fe382b22329bbc7e4610ecbacc8 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 28 Mar 2024 13:02:48 -0700 Subject: [clang][modules] Avoid calling expensive `SourceManager::translateFile()` (#86216) The `ASTWriter` algorithm for computing affecting module maps uses `SourceManager::translateFile()` to get a `FileID` from a `FileEntry`. This is slow (O(n)) since the function performs a linear walk over `SLocEntries` until it finds one with a matching `FileEntry`. This patch removes this use of `SourceManager::translateFile()` by tracking `FileID` instead of `FileEntry` in couple of places in `ModuleMap`, giving `ASTWriter` the desired `FileID` directly. There are no changes required for clients that still want a `FileEntry` from `ModuleMap`: the existing APIs internally use `SourceManager` to perform the reverse `FileID` to `FileEntry` conversion in O(1). --- clang/include/clang/Lex/ModuleMap.h | 15 ++--- clang/lib/Frontend/CompilerInstance.cpp | 19 ++++++- clang/lib/Frontend/FrontendAction.cpp | 10 +++- clang/lib/Lex/ModuleMap.cpp | 66 ++++++++++++---------- clang/lib/Serialization/ASTWriter.cpp | 17 +++--- .../test/ClangScanDeps/modules-extern-unrelated.m | 1 + 6 files changed, 78 insertions(+), 50 deletions(-) diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 867cb6e..2e28ff6 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -263,8 +263,8 @@ private: Attributes Attrs; /// If \c InferModules is non-zero, the module map file that allowed - /// inferred modules. Otherwise, nullopt. - OptionalFileEntryRef ModuleMapFile; + /// inferred modules. Otherwise, invalid. + FileID ModuleMapFID; /// The names of modules that cannot be inferred within this /// directory. @@ -279,8 +279,7 @@ private: /// A mapping from an inferred module to the module map that allowed the /// inference. - // FIXME: Consider making the values non-optional. - llvm::DenseMap InferredModuleAllowedBy; + llvm::DenseMap InferredModuleAllowedBy; llvm::DenseMap AdditionalModMaps; @@ -618,8 +617,9 @@ public: /// /// \param Module The module whose module map file will be returned, if known. /// - /// \returns The file entry for the module map file containing the given - /// module, or nullptr if the module definition was inferred. + /// \returns The FileID for the module map file containing the given module, + /// invalid if the module definition was inferred. + FileID getContainingModuleMapFileID(const Module *Module) const; OptionalFileEntryRef getContainingModuleMapFile(const Module *Module) const; /// Get the module map file that (along with the module name) uniquely @@ -631,9 +631,10 @@ public: /// of inferred modules, returns the module map that allowed the inference /// (e.g. contained 'module *'). Otherwise, returns /// getContainingModuleMapFile(). + FileID getModuleMapFileIDForUniquing(const Module *M) const; OptionalFileEntryRef getModuleMapFileForUniquing(const Module *M) const; - void setInferredModuleAllowedBy(Module *M, OptionalFileEntryRef ModMap); + void setInferredModuleAllowedBy(Module *M, FileID ModMapFID); /// Canonicalize \p Path in a manner suitable for a module map file. In /// particular, this canonicalizes the parent directory separately from the diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 019f847..79ebb0a 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1337,9 +1337,24 @@ static bool compileModule(CompilerInstance &ImportingInstance, // Get or create the module map that we'll use to build this module. ModuleMap &ModMap = ImportingInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap(); + SourceManager &SourceMgr = ImportingInstance.getSourceManager(); bool Result; - if (OptionalFileEntryRef ModuleMapFile = - ModMap.getContainingModuleMapFile(Module)) { + if (FileID ModuleMapFID = ModMap.getContainingModuleMapFileID(Module); + ModuleMapFID.isValid()) { + // We want to use the top-level module map. If we don't, the compiling + // instance may think the containing module map is a top-level one, while + // the importing instance knows it's included from a parent module map via + // the extern directive. This mismatch could bite us later. + SourceLocation Loc = SourceMgr.getIncludeLoc(ModuleMapFID); + while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) { + ModuleMapFID = SourceMgr.getFileID(Loc); + Loc = SourceMgr.getIncludeLoc(ModuleMapFID); + } + + OptionalFileEntryRef ModuleMapFile = + SourceMgr.getFileEntryRefForID(ModuleMapFID); + assert(ModuleMapFile && "Top-level module map with no FileID"); + // Canonicalize compilation to start with the public module map. This is // vital for submodules declarations in the private module maps to be // correctly parsed when depending on a top level module in the public one. diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index b9fd9b8..b7c9967 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -535,8 +535,14 @@ static Module *prepareToBuildModule(CompilerInstance &CI, if (*OriginalModuleMap != CI.getSourceManager().getFileEntryRefForID( CI.getSourceManager().getMainFileID())) { M->IsInferred = true; - CI.getPreprocessor().getHeaderSearchInfo().getModuleMap() - .setInferredModuleAllowedBy(M, *OriginalModuleMap); + auto FileCharacter = + M->IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; + FileID OriginalModuleMapFID = CI.getSourceManager().getOrCreateFileID( + *OriginalModuleMap, FileCharacter); + CI.getPreprocessor() + .getHeaderSearchInfo() + .getModuleMap() + .setInferredModuleAllowedBy(M, OriginalModuleMapFID); } } diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 10c475f..eed7eca 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -648,8 +648,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { UmbrellaModule = UmbrellaModule->Parent; if (UmbrellaModule->InferSubmodules) { - OptionalFileEntryRef UmbrellaModuleMap = - getModuleMapFileForUniquing(UmbrellaModule); + FileID UmbrellaModuleMap = getModuleMapFileIDForUniquing(UmbrellaModule); // Infer submodules for each of the directories we found between // the directory of the umbrella header and the directory where @@ -1021,7 +1020,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, // If the framework has a parent path from which we're allowed to infer // a framework module, do so. - OptionalFileEntryRef ModuleMapFile; + FileID ModuleMapFID; if (!Parent) { // Determine whether we're allowed to infer a module map. bool canInfer = false; @@ -1060,7 +1059,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, Attrs.IsExhaustive |= inferred->second.Attrs.IsExhaustive; Attrs.NoUndeclaredIncludes |= inferred->second.Attrs.NoUndeclaredIncludes; - ModuleMapFile = inferred->second.ModuleMapFile; + ModuleMapFID = inferred->second.ModuleMapFID; } } } @@ -1069,7 +1068,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, if (!canInfer) return nullptr; } else { - ModuleMapFile = getModuleMapFileForUniquing(Parent); + ModuleMapFID = getModuleMapFileIDForUniquing(Parent); } // Look for an umbrella header. @@ -1086,7 +1085,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, Module *Result = new Module(ModuleName, SourceLocation(), Parent, /*IsFramework=*/true, /*IsExplicit=*/false, NumCreatedModules++); - InferredModuleAllowedBy[Result] = ModuleMapFile; + InferredModuleAllowedBy[Result] = ModuleMapFID; Result->IsInferred = true; if (!Parent) { if (LangOpts.CurrentModule == ModuleName) @@ -1307,28 +1306,34 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header, Cb->moduleMapAddHeader(Header.Entry.getName()); } -OptionalFileEntryRef -ModuleMap::getContainingModuleMapFile(const Module *Module) const { +FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const { if (Module->DefinitionLoc.isInvalid()) - return std::nullopt; + return {}; - return SourceMgr.getFileEntryRefForID( - SourceMgr.getFileID(Module->DefinitionLoc)); + return SourceMgr.getFileID(Module->DefinitionLoc); } OptionalFileEntryRef -ModuleMap::getModuleMapFileForUniquing(const Module *M) const { +ModuleMap::getContainingModuleMapFile(const Module *Module) const { + return SourceMgr.getFileEntryRefForID(getContainingModuleMapFileID(Module)); +} + +FileID ModuleMap::getModuleMapFileIDForUniquing(const Module *M) const { if (M->IsInferred) { assert(InferredModuleAllowedBy.count(M) && "missing inferred module map"); return InferredModuleAllowedBy.find(M)->second; } - return getContainingModuleMapFile(M); + return getContainingModuleMapFileID(M); +} + +OptionalFileEntryRef +ModuleMap::getModuleMapFileForUniquing(const Module *M) const { + return SourceMgr.getFileEntryRefForID(getModuleMapFileIDForUniquing(M)); } -void ModuleMap::setInferredModuleAllowedBy(Module *M, - OptionalFileEntryRef ModMap) { +void ModuleMap::setInferredModuleAllowedBy(Module *M, FileID ModMapFID) { assert(M->IsInferred && "module not inferred"); - InferredModuleAllowedBy[M] = ModMap; + InferredModuleAllowedBy[M] = ModMapFID; } std::error_code @@ -1517,7 +1522,7 @@ namespace clang { ModuleMap ⤅ /// The current module map file. - FileEntryRef ModuleMapFile; + FileID ModuleMapFID; /// Source location of most recent parsed module declaration SourceLocation CurrModuleDeclLoc; @@ -1585,13 +1590,12 @@ namespace clang { bool parseOptionalAttributes(Attributes &Attrs); public: - explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr, - const TargetInfo *Target, DiagnosticsEngine &Diags, - ModuleMap &Map, FileEntryRef ModuleMapFile, - DirectoryEntryRef Directory, bool IsSystem) + ModuleMapParser(Lexer &L, SourceManager &SourceMgr, + const TargetInfo *Target, DiagnosticsEngine &Diags, + ModuleMap &Map, FileID ModuleMapFID, + DirectoryEntryRef Directory, bool IsSystem) : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), - ModuleMapFile(ModuleMapFile), Directory(Directory), - IsSystem(IsSystem) { + ModuleMapFID(ModuleMapFID), Directory(Directory), IsSystem(IsSystem) { Tok.clear(); consumeToken(); } @@ -2011,11 +2015,13 @@ void ModuleMapParser::parseModuleDecl() { } if (TopLevelModule && - ModuleMapFile != Map.getContainingModuleMapFile(TopLevelModule)) { - assert(ModuleMapFile != Map.getModuleMapFileForUniquing(TopLevelModule) && + ModuleMapFID != Map.getContainingModuleMapFileID(TopLevelModule)) { + assert(ModuleMapFID != + Map.getModuleMapFileIDForUniquing(TopLevelModule) && "submodule defined in same file as 'module *' that allowed its " "top-level module"); - Map.addAdditionalModuleMapFile(TopLevelModule, ModuleMapFile); + Map.addAdditionalModuleMapFile( + TopLevelModule, *SourceMgr.getFileEntryRefForID(ModuleMapFID)); } } @@ -2120,7 +2126,8 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule->NoUndeclaredIncludes = true; ActiveModule->Directory = Directory; - StringRef MapFileName(ModuleMapFile.getName()); + StringRef MapFileName( + SourceMgr.getFileEntryRefForID(ModuleMapFID)->getName()); if (MapFileName.ends_with("module.private.modulemap") || MapFileName.ends_with("module_private.map")) { ActiveModule->ModuleMapIsPrivate = true; @@ -2906,7 +2913,7 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) { // We'll be inferring framework modules for this directory. Map.InferredDirectories[Directory].InferModules = true; Map.InferredDirectories[Directory].Attrs = Attrs; - Map.InferredDirectories[Directory].ModuleMapFile = ModuleMapFile; + Map.InferredDirectories[Directory].ModuleMapFID = ModuleMapFID; // FIXME: Handle the 'framework' keyword. } @@ -3139,8 +3146,7 @@ bool ModuleMap::parseModuleMapFile(FileEntryRef File, bool IsSystem, Buffer->getBufferStart() + (Offset ? *Offset : 0), Buffer->getBufferEnd()); SourceLocation Start = L.getSourceLocation(); - ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir, - IsSystem); + ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, ID, Dir, IsSystem); bool Result = Parser.parseModuleMapFile(); ParsedModuleMap[File] = Result; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 221409d..2cc7f21 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -193,17 +193,17 @@ std::set GetAffectingModuleMaps(const Preprocessor &PP, const ModuleMap &MM = HS.getModuleMap(); SourceManager &SourceMgr = PP.getSourceManager(); - std::set ModuleMaps{}; - auto CollectIncludingModuleMaps = [&](FileEntryRef F) { + std::set ModuleMaps; + auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) { if (!ModuleMaps.insert(F).second) return; - FileID FID = SourceMgr.translateFile(F); SourceLocation Loc = SourceMgr.getIncludeLoc(FID); // The include location of inferred module maps can point into the header // file that triggered the inferring. Cut off the walk if that's the case. while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) { FID = SourceMgr.getFileID(Loc); - if (!ModuleMaps.insert(*SourceMgr.getFileEntryRefForID(FID)).second) + F = *SourceMgr.getFileEntryRefForID(FID); + if (!ModuleMaps.insert(F).second) break; Loc = SourceMgr.getIncludeLoc(FID); } @@ -216,13 +216,13 @@ std::set GetAffectingModuleMaps(const Preprocessor &PP, break; // The containing module map is affecting, because it's being pointed // into by Module::DefinitionLoc. - if (auto ModuleMapFile = MM.getContainingModuleMapFile(Mod)) - CollectIncludingModuleMaps(*ModuleMapFile); + if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid()) + CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); // For inferred modules, the module map that allowed inferring is not in // the include chain of the virtual containing module map file. It did // affect the compilation, though. - if (auto ModuleMapFile = MM.getModuleMapFileForUniquing(Mod)) - CollectIncludingModuleMaps(*ModuleMapFile); + if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid()) + CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); } }; @@ -4728,7 +4728,6 @@ void ASTWriter::computeNonAffectingInputFiles() { continue; if (!isModuleMap(File.getFileCharacteristic()) || - AffectingModuleMaps.empty() || llvm::is_contained(AffectingModuleMaps, *Cache->OrigEntry)) continue; diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m index 442ee90..76611c5 100644 --- a/clang/test/ClangScanDeps/modules-extern-unrelated.m +++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m @@ -71,6 +71,7 @@ module second { header "second.h" } // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/second/second.h", // CHECK-NEXT: "[[PREFIX]]/second/second.modulemap" // CHECK-NEXT: ], -- cgit v1.1