aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Lex/TokenLexer.cpp
diff options
context:
space:
mode:
authorHaojian Wu <hokein.wu@gmail.com>2022-10-26 11:47:44 +0200
committerHaojian Wu <hokein.wu@gmail.com>2022-10-26 12:03:21 +0200
commit11c1d8b7fd82b32b37db47bcd8eac813b9667b5c (patch)
tree37ed2ab0adbd62bd2a116c3a228e94c67bf9f885 /clang/lib/Lex/TokenLexer.cpp
parent1a726cfa83667585dd87a9955ed5e331cad45d18 (diff)
downloadllvm-11c1d8b7fd82b32b37db47bcd8eac813b9667b5c.zip
llvm-11c1d8b7fd82b32b37db47bcd8eac813b9667b5c.tar.gz
llvm-11c1d8b7fd82b32b37db47bcd8eac813b9667b5c.tar.bz2
[Lex] Bring back the magic number 50 in updateConsecutiveMacroArgTokens.
This patch is a reland of 74e4f778cf16cbf7163b5c6de6027a43f5e9169f and f83347b0bedb22ea676861c8e4e2ed9c31371ade with the removed 50 trick back. The magic number 50 was removed in D134942, as a behavior change for performance reason. While it reduces the number of SLocEntry, it increases the usage of SourceLocation address space usage, which is critical for compiling large TU. This fixes a regression caused in D134942 -- clang failed to compile one of our internal files, complaining the file is too large to process because clang runs out of source location space (we spend 40% more address space!) Differential Revision: https://reviews.llvm.org/D136539
Diffstat (limited to 'clang/lib/Lex/TokenLexer.cpp')
-rw-r--r--clang/lib/Lex/TokenLexer.cpp107
1 files changed, 57 insertions, 50 deletions
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index efda6d0..0d141c6 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -25,6 +25,7 @@
#include "clang/Lex/Token.h"
#include "clang/Lex/VariadicMacroSupport.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
@@ -984,65 +985,71 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
/// \arg begin_tokens will be updated to a position past all the found
/// consecutive tokens.
static void updateConsecutiveMacroArgTokens(SourceManager &SM,
- SourceLocation InstLoc,
+ SourceLocation ExpandLoc,
Token *&begin_tokens,
Token * end_tokens) {
- assert(begin_tokens < end_tokens);
-
- SourceLocation FirstLoc = begin_tokens->getLocation();
- SourceLocation CurLoc = FirstLoc;
-
- // Compare the source location offset of tokens and group together tokens that
- // are close, even if their locations point to different FileIDs. e.g.
- //
- // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
- // ^ ^
- // |bar foo cake| (one SLocEntry chunk for all tokens)
- //
- // we can perform this "merge" since the token's spelling location depends
- // on the relative offset.
-
- Token *NextTok = begin_tokens + 1;
- for (; NextTok < end_tokens; ++NextTok) {
- SourceLocation NextLoc = NextTok->getLocation();
- if (CurLoc.isFileID() != NextLoc.isFileID())
- break; // Token from different kind of FileID.
-
- SourceLocation::IntTy RelOffs;
- if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
- break; // Token from different local/loaded location.
- // Check that token is not before the previous token or more than 50
- // "characters" away.
- if (RelOffs < 0 || RelOffs > 50)
- break;
-
- if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
- break; // Token from a different macro.
-
- CurLoc = NextLoc;
+ assert(begin_tokens + 1 < end_tokens);
+ SourceLocation BeginLoc = begin_tokens->getLocation();
+ llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
+ llvm::MutableArrayRef<Token> Partition;
+
+ auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
+ // The maximum distance between two consecutive tokens in a partition.
+ // This is an important trick to avoid using too much SourceLocation address
+ // space!
+ static constexpr SourceLocation::IntTy MaxDistance = 50;
+ auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
+ Last = Loc;
+ return Distance <= MaxDistance;
+ };
+
+ // Partition the tokens by their FileID.
+ // This is a hot function, and calling getFileID can be expensive, the
+ // implementation is optimized by reducing the number of getFileID.
+ if (BeginLoc.isFileID()) {
+ // Consecutive tokens not written in macros must be from the same file.
+ // (Neither #include nor eof can occur inside a macro argument.)
+ Partition = All.take_while([&](const Token &T) {
+ return T.getLocation().isFileID() && NearLast(T.getLocation());
+ });
+ } else {
+ // Call getFileID once to calculate the bounds, and use the cheaper
+ // sourcelocation-against-bounds comparison.
+ FileID BeginFID = SM.getFileID(BeginLoc);
+ SourceLocation Limit =
+ SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
+ Partition = All.take_while([&](const Token &T) {
+ return T.getLocation() >= BeginLoc && T.getLocation() < Limit &&
+ NearLast(T.getLocation());
+ });
}
+ assert(!Partition.empty());
// For the consecutive tokens, find the length of the SLocEntry to contain
// all of them.
- Token &LastConsecutiveTok = *(NextTok-1);
- SourceLocation::IntTy LastRelOffs = 0;
- SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
- &LastRelOffs);
SourceLocation::UIntTy FullLength =
- LastRelOffs + LastConsecutiveTok.getLength();
-
+ Partition.back().getEndLoc().getRawEncoding() -
+ Partition.front().getLocation().getRawEncoding();
// Create a macro expansion SLocEntry that will "contain" all of the tokens.
SourceLocation Expansion =
- SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
-
+ SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
+
+#ifdef EXPENSIVE_CHECKS
+ assert(llvm::all_of(Partition.drop_front(),
+ [&SM, ID = SM.getFileID(Partition.front().getLocation())](
+ const Token &T) {
+ return ID == SM.getFileID(T.getLocation());
+ }) &&
+ "Must have the same FIleID!");
+#endif
// Change the location of the tokens from the spelling location to the new
// expanded location.
- for (; begin_tokens < NextTok; ++begin_tokens) {
- Token &Tok = *begin_tokens;
- SourceLocation::IntTy RelOffs = 0;
- SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
- Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
+ for (Token& T : Partition) {
+ SourceLocation::IntTy RelativeOffset =
+ T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
+ T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
}
+ begin_tokens = &Partition.back() + 1;
}
/// Creates SLocEntries and updates the locations of macro argument
@@ -1055,7 +1062,7 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
Token *end_tokens) {
SourceManager &SM = PP.getSourceManager();
- SourceLocation InstLoc =
+ SourceLocation ExpandLoc =
getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
while (begin_tokens < end_tokens) {
@@ -1063,12 +1070,12 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
if (end_tokens - begin_tokens == 1) {
Token &Tok = *begin_tokens;
Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
- InstLoc,
+ ExpandLoc,
Tok.getLength()));
return;
}
- updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
+ updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
}
}