diff options
author | Mariya Podchishchaeva <mariya.podchishchaeva@intel.com> | 2024-06-20 14:38:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-20 14:38:46 +0200 |
commit | 41c6e4379204ffc00948edd33d59ba5ebbceaba2 (patch) | |
tree | b8508b8f0e7f108d1f6759922f49f367bd24fb00 /clang/lib/Frontend/PrintPreprocessedOutput.cpp | |
parent | af82e63c28f67bf61a9b2b0e64bc55be4acf520e (diff) | |
download | llvm-41c6e4379204ffc00948edd33d59ba5ebbceaba2.zip llvm-41c6e4379204ffc00948edd33d59ba5ebbceaba2.tar.gz llvm-41c6e4379204ffc00948edd33d59ba5ebbceaba2.tar.bz2 |
Reland [clang][Sema, Lex, Parse] Preprocessor embed in C and C++ (#95802)
This commit implements the entirety of the now-accepted [N3017
-Preprocessor
Embed](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm) and
its sister C++ paper [p1967](https://wg21.link/p1967). It implements
everything in the specification, and includes an implementation that
drastically improves the time it takes to embed data in specific
scenarios (the initialization of character type arrays). The mechanisms
used to do this are used under the "as-if" rule, and in general when the
system cannot detect it is initializing an array object in a variable
declaration, will generate EmbedExpr AST node which will be expanded by
AST consumers (CodeGen or constant expression evaluators) or expand
embed directive as a comma expression.
This reverts commit
https://github.com/llvm/llvm-project/commit/682d461d5a231cee54d65910e6341769419a67d7.
---------
Co-authored-by: The Phantom Derpstorm <phdofthehouse@gmail.com>
Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
Co-authored-by: cor3ntin <corentinjabot@gmail.com>
Co-authored-by: H. Vetinari <h.vetinari@gmx.com>
Diffstat (limited to 'clang/lib/Frontend/PrintPreprocessedOutput.cpp')
-rw-r--r-- | clang/lib/Frontend/PrintPreprocessedOutput.cpp | 122 |
1 files changed, 115 insertions, 7 deletions
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index a26d2c3..0592423 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/Frontend/Utils.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/PreprocessorOutputOptions.h" +#include "clang/Frontend/Utils.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Pragma.h" @@ -93,6 +93,7 @@ private: bool DisableLineMarkers; bool DumpDefines; bool DumpIncludeDirectives; + bool DumpEmbedDirectives; bool UseLineDirectives; bool IsFirstFileEntered; bool MinimizeWhitespace; @@ -100,6 +101,7 @@ private: bool KeepSystemIncludes; raw_ostream *OrigOS; std::unique_ptr<llvm::raw_null_ostream> NullOS; + unsigned NumToksToSkip; Token PrevTok; Token PrevPrevTok; @@ -107,14 +109,16 @@ private: public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, bool defines, bool DumpIncludeDirectives, - bool UseLineDirectives, bool MinimizeWhitespace, - bool DirectivesOnly, bool KeepSystemIncludes) + bool DumpEmbedDirectives, bool UseLineDirectives, + bool MinimizeWhitespace, bool DirectivesOnly, + bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), + DumpEmbedDirectives(DumpEmbedDirectives), UseLineDirectives(UseLineDirectives), MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), - KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { + KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) { CurLine = 0; CurFilename += "<uninit>"; EmittedTokensOnThisLine = false; @@ -129,6 +133,10 @@ public: PrevPrevTok.startToken(); } + /// Returns true if #embed directives should be expanded into a comma- + /// delimited list of integer constants or not. + bool expandEmbedContents() const { return !DumpEmbedDirectives; } + bool isMinimizeWhitespace() const { return MinimizeWhitespace; } void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } @@ -149,6 +157,9 @@ public: void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -232,6 +243,9 @@ public: void BeginModule(const Module *M); void EndModule(const Module *M); + + unsigned GetNumToksToSkip() const { return NumToksToSkip; } + void ResetSkipToks() { NumToksToSkip = 0; } }; } // end anonymous namespace @@ -399,6 +413,74 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, } } +void PrintPPOutputPPCallbacks::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, const LexEmbedParametersResult &Params) { + if (!DumpEmbedDirectives) + return; + + // The EmbedDirective() callback is called before we produce the annotation + // token stream for the directive. We skip printing the annotation tokens + // within PrintPreprocessedTokens(), but we also need to skip the prefix, + // suffix, and if_empty tokens as those are inserted directly into the token + // stream and would otherwise be printed immediately after printing the + // #embed directive. + // + // FIXME: counting tokens to skip is a kludge but we have no way to know + // which tokens were inserted as part of the embed and which ones were + // explicitly written by the user. + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + *OS << "#embed " << (IsAngled ? '<' : '"') << FileName + << (IsAngled ? '>' : '"'); + + auto PrintToks = [&](llvm::ArrayRef<Token> Toks) { + SmallString<128> SpellingBuffer; + for (const Token &T : Toks) { + if (T.hasLeadingSpace()) + *OS << " "; + *OS << PP.getSpelling(T, SpellingBuffer); + } + }; + bool SkipAnnotToks = true; + if (Params.MaybeIfEmptyParam) { + *OS << " if_empty("; + PrintToks(Params.MaybeIfEmptyParam->Tokens); + *OS << ")"; + // If the file is empty, we can skip those tokens. If the file is not + // empty, we skip the annotation tokens. + if (File && !File->getSize()) { + NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size(); + SkipAnnotToks = false; + } + } + + if (Params.MaybeLimitParam) { + *OS << " limit(" << Params.MaybeLimitParam->Limit << ")"; + } + if (Params.MaybeOffsetParam) { + *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")"; + } + if (Params.MaybePrefixParam) { + *OS << " prefix("; + PrintToks(Params.MaybePrefixParam->Tokens); + *OS << ")"; + NumToksToSkip += Params.MaybePrefixParam->Tokens.size(); + } + if (Params.MaybeSuffixParam) { + *OS << " suffix("; + PrintToks(Params.MaybeSuffixParam->Tokens); + *OS << ")"; + NumToksToSkip += Params.MaybeSuffixParam->Tokens.size(); + } + + // We may need to skip the annotation token. + if (SkipAnnotToks) + NumToksToSkip++; + + *OS << " /* clang -E -dE */"; + setEmittedDirectiveOnThisLine(); +} + void PrintPPOutputPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, @@ -678,7 +760,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, if (Tok.is(tok::eof) || (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end))) + !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -878,6 +960,27 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, std::string Name = M->getFullModuleName(); Callbacks->OS->write(Name.data(), Name.size()); Callbacks->HandleNewlinesInToken(Name.data(), Name.size()); + } else if (Tok.is(tok::annot_embed)) { + // Manually explode the binary data out to a stream of comma-delimited + // integer values. If the user passed -dE, that is handled by the + // EmbedDirective() callback. We should only get here if the user did not + // pass -dE. + assert(Callbacks->expandEmbedContents() && + "did not expect an embed annotation"); + auto *Data = + reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue()); + + // Loop over the contents and print them as a comma-delimited list of + // values. + bool PrintComma = false; + for (auto Iter = Data->BinaryData.begin(), End = Data->BinaryData.end(); + Iter != End; ++Iter) { + if (PrintComma) + *Callbacks->OS << ", "; + *Callbacks->OS << static_cast<unsigned>(*Iter); + PrintComma = true; + } + IsStartOfLine = true; } else if (Tok.isAnnotation()) { // Ignore annotation tokens created by pragmas - the pragmas themselves // will be reproduced in the preprocessed output. @@ -926,6 +1029,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, if (Tok.is(tok::eof)) break; PP.Lex(Tok); + // If lexing that token causes us to need to skip future tokens, do so now. + for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I) + PP.Lex(Tok); + Callbacks->ResetSkipToks(); } } @@ -982,8 +1089,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives, - Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, + Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, + Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. |