aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Frontend
diff options
context:
space:
mode:
authorThe Phantom Derpstorm <phdofthehouse@gmail.com>2024-06-12 03:16:02 -0400
committerGitHub <noreply@github.com>2024-06-12 09:16:02 +0200
commit5989450e0061dce8cff89d8acfdd5225c14cd065 (patch)
treef27a31bdd90497b029718f283d0702fec58e8477 /clang/lib/Frontend
parentb83f8c75e4cccf25abbe4ad76406ba0c382bf336 (diff)
downloadllvm-5989450e0061dce8cff89d8acfdd5225c14cd065.zip
llvm-5989450e0061dce8cff89d8acfdd5225c14cd065.tar.gz
llvm-5989450e0061dce8cff89d8acfdd5225c14cd065.tar.bz2
[clang][Sema, Lex, Parse] Preprocessor embed in C and C++ (and Obj-C and Obj-C++ by-proxy) (#68620)
This commit implements the entirety of the now-accepted [N3017 - Preprocessor Embed](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm) and its sister C++ paper [p1967](https://wg21.link/p1967). It implements everything in the specification, and includes an implementation that drastically improves the time it takes to embed data in specific scenarios (the initialization of character type arrays). The mechanisms used to do this are used under the "as-if" rule, and in general when the system cannot detect it is initializing an array object in a variable declaration, will generate EmbedExpr AST node which will be expanded by AST consumers (CodeGen or constant expression evaluators) or expand embed directive as a comma expression. --------- Co-authored-by: Aaron Ballman <aaron@aaronballman.com> Co-authored-by: cor3ntin <corentinjabot@gmail.com> Co-authored-by: H. Vetinari <h.vetinari@gmx.com> Co-authored-by: Podchishchaeva, Mariya <mariya.podchishchaeva@intel.com>
Diffstat (limited to 'clang/lib/Frontend')
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp8
-rw-r--r--clang/lib/Frontend/DependencyFile.cpp25
-rw-r--r--clang/lib/Frontend/DependencyGraph.cpp24
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp8
-rw-r--r--clang/lib/Frontend/PrintPreprocessedOutput.cpp122
5 files changed, 179 insertions, 8 deletions
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 58694e5..cde4a84 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4492,6 +4492,9 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
if (Opts.DefineTargetOSMacros)
GenerateArg(Consumer, OPT_fdefine_target_os_macros);
+ for (const auto &EmbedEntry : Opts.EmbedEntries)
+ GenerateArg(Consumer, OPT_embed_dir_EQ, EmbedEntry);
+
// Don't handle LexEditorPlaceholders. It is implied by the action that is
// generated elsewhere.
}
@@ -4584,6 +4587,11 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
}
}
+ for (const auto *A : Args.filtered(OPT_embed_dir_EQ)) {
+ StringRef Val = A->getValue();
+ Opts.EmbedEntries.push_back(std::string(Val));
+ }
+
// Always avoid lexing editor placeholders when we're just running the
// preprocessor as we never want to emit the
// "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 369816e..528eae2 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -62,6 +62,19 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
/*IsMissing=*/false);
}
+ void EmbedDirective(SourceLocation, StringRef, bool,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &) override {
+ assert(File && "expected to only be called when the file is found");
+ StringRef FileName =
+ llvm::sys::path::remove_leading_dotslash(File->getName());
+ DepCollector.maybeAddDependency(FileName,
+ /*FromModule*/ false,
+ /*IsSystem*/ false,
+ /*IsModuleFile*/ false,
+ /*IsMissing*/ false);
+ }
+
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -77,6 +90,18 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
// Files that actually exist are handled by FileChanged.
}
+ void HasEmbed(SourceLocation, StringRef, bool,
+ OptionalFileEntryRef File) override {
+ if (!File)
+ return;
+ StringRef Filename =
+ llvm::sys::path::remove_leading_dotslash(File->getName());
+ DepCollector.maybeAddDependency(Filename,
+ /*FromModule=*/false, false,
+ /*IsModuleFile=*/false,
+ /*IsMissing=*/false);
+ }
+
void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
OptionalFileEntryRef File,
SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 20e5f23..c23ce66 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -43,7 +43,7 @@ private:
public:
DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
StringRef SysRoot)
- : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+ : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
@@ -53,6 +53,10 @@ public:
bool ModuleImported,
SrcMgr::CharacteristicKind FileType) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override;
+
void EndOfMainFile() override {
OutputGraphFile();
}
@@ -86,6 +90,24 @@ void DependencyGraphCallback::InclusionDirective(
AllFiles.insert(*FromFile);
}
+void DependencyGraphCallback::EmbedDirective(SourceLocation HashLoc, StringRef,
+ bool, OptionalFileEntryRef File,
+ const LexEmbedParametersResult &) {
+ if (!File)
+ return;
+
+ SourceManager &SM = PP->getSourceManager();
+ OptionalFileEntryRef FromFile =
+ SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+ if (!FromFile)
+ return;
+
+ Dependencies[*FromFile].push_back(*File);
+
+ AllFiles.insert(*File);
+ AllFiles.insert(*FromFile);
+}
+
raw_ostream &
DependencyGraphCallback::writeNodeReference(raw_ostream &OS,
const FileEntry *Node) {
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index e8c8a51..2d5c94c 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -508,6 +508,14 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__STDC_UTF_16__", "1");
Builder.defineMacro("__STDC_UTF_32__", "1");
+ // __has_embed definitions
+ Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+ Builder.defineMacro("__STDC_EMBED_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::Found)));
+ Builder.defineMacro("__STDC_EMBED_EMPTY__",
+ llvm::itostr(static_cast<int>(EmbedResult::Empty)));
+
if (LangOpts.ObjC)
Builder.defineMacro("__OBJC__");
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index a26d2c3..0592423 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Frontend/Utils.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Frontend/Utils.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Pragma.h"
@@ -93,6 +93,7 @@ private:
bool DisableLineMarkers;
bool DumpDefines;
bool DumpIncludeDirectives;
+ bool DumpEmbedDirectives;
bool UseLineDirectives;
bool IsFirstFileEntered;
bool MinimizeWhitespace;
@@ -100,6 +101,7 @@ private:
bool KeepSystemIncludes;
raw_ostream *OrigOS;
std::unique_ptr<llvm::raw_null_ostream> NullOS;
+ unsigned NumToksToSkip;
Token PrevTok;
Token PrevPrevTok;
@@ -107,14 +109,16 @@ private:
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
bool defines, bool DumpIncludeDirectives,
- bool UseLineDirectives, bool MinimizeWhitespace,
- bool DirectivesOnly, bool KeepSystemIncludes)
+ bool DumpEmbedDirectives, bool UseLineDirectives,
+ bool MinimizeWhitespace, bool DirectivesOnly,
+ bool KeepSystemIncludes)
: PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
DisableLineMarkers(lineMarkers), DumpDefines(defines),
DumpIncludeDirectives(DumpIncludeDirectives),
+ DumpEmbedDirectives(DumpEmbedDirectives),
UseLineDirectives(UseLineDirectives),
MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
- KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
+ KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) {
CurLine = 0;
CurFilename += "<uninit>";
EmittedTokensOnThisLine = false;
@@ -129,6 +133,10 @@ public:
PrevPrevTok.startToken();
}
+ /// Returns true if #embed directives should be expanded into a comma-
+ /// delimited list of integer constants or not.
+ bool expandEmbedContents() const { return !DumpEmbedDirectives; }
+
bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
@@ -149,6 +157,9 @@ public:
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -232,6 +243,9 @@ public:
void BeginModule(const Module *M);
void EndModule(const Module *M);
+
+ unsigned GetNumToksToSkip() const { return NumToksToSkip; }
+ void ResetSkipToks() { NumToksToSkip = 0; }
};
} // end anonymous namespace
@@ -399,6 +413,74 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
}
}
+void PrintPPOutputPPCallbacks::EmbedDirective(
+ SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File, const LexEmbedParametersResult &Params) {
+ if (!DumpEmbedDirectives)
+ return;
+
+ // The EmbedDirective() callback is called before we produce the annotation
+ // token stream for the directive. We skip printing the annotation tokens
+ // within PrintPreprocessedTokens(), but we also need to skip the prefix,
+ // suffix, and if_empty tokens as those are inserted directly into the token
+ // stream and would otherwise be printed immediately after printing the
+ // #embed directive.
+ //
+ // FIXME: counting tokens to skip is a kludge but we have no way to know
+ // which tokens were inserted as part of the embed and which ones were
+ // explicitly written by the user.
+ MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+ *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+ << (IsAngled ? '>' : '"');
+
+ auto PrintToks = [&](llvm::ArrayRef<Token> Toks) {
+ SmallString<128> SpellingBuffer;
+ for (const Token &T : Toks) {
+ if (T.hasLeadingSpace())
+ *OS << " ";
+ *OS << PP.getSpelling(T, SpellingBuffer);
+ }
+ };
+ bool SkipAnnotToks = true;
+ if (Params.MaybeIfEmptyParam) {
+ *OS << " if_empty(";
+ PrintToks(Params.MaybeIfEmptyParam->Tokens);
+ *OS << ")";
+ // If the file is empty, we can skip those tokens. If the file is not
+ // empty, we skip the annotation tokens.
+ if (File && !File->getSize()) {
+ NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size();
+ SkipAnnotToks = false;
+ }
+ }
+
+ if (Params.MaybeLimitParam) {
+ *OS << " limit(" << Params.MaybeLimitParam->Limit << ")";
+ }
+ if (Params.MaybeOffsetParam) {
+ *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")";
+ }
+ if (Params.MaybePrefixParam) {
+ *OS << " prefix(";
+ PrintToks(Params.MaybePrefixParam->Tokens);
+ *OS << ")";
+ NumToksToSkip += Params.MaybePrefixParam->Tokens.size();
+ }
+ if (Params.MaybeSuffixParam) {
+ *OS << " suffix(";
+ PrintToks(Params.MaybeSuffixParam->Tokens);
+ *OS << ")";
+ NumToksToSkip += Params.MaybeSuffixParam->Tokens.size();
+ }
+
+ // We may need to skip the annotation token.
+ if (SkipAnnotToks)
+ NumToksToSkip++;
+
+ *OS << " /* clang -E -dE */";
+ setEmittedDirectiveOnThisLine();
+}
+
void PrintPPOutputPPCallbacks::InclusionDirective(
SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -678,7 +760,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
if (Tok.is(tok::eof) ||
(Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
- !Tok.is(tok::annot_repl_input_end)))
+ !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
return;
// EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -878,6 +960,27 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
std::string Name = M->getFullModuleName();
Callbacks->OS->write(Name.data(), Name.size());
Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
+ } else if (Tok.is(tok::annot_embed)) {
+ // Manually explode the binary data out to a stream of comma-delimited
+ // integer values. If the user passed -dE, that is handled by the
+ // EmbedDirective() callback. We should only get here if the user did not
+ // pass -dE.
+ assert(Callbacks->expandEmbedContents() &&
+ "did not expect an embed annotation");
+ auto *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+
+ // Loop over the contents and print them as a comma-delimited list of
+ // values.
+ bool PrintComma = false;
+ for (auto Iter = Data->BinaryData.begin(), End = Data->BinaryData.end();
+ Iter != End; ++Iter) {
+ if (PrintComma)
+ *Callbacks->OS << ", ";
+ *Callbacks->OS << static_cast<unsigned>(*Iter);
+ PrintComma = true;
+ }
+ IsStartOfLine = true;
} else if (Tok.isAnnotation()) {
// Ignore annotation tokens created by pragmas - the pragmas themselves
// will be reproduced in the preprocessed output.
@@ -926,6 +1029,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
if (Tok.is(tok::eof)) break;
PP.Lex(Tok);
+ // If lexing that token causes us to need to skip future tokens, do so now.
+ for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I)
+ PP.Lex(Tok);
+ Callbacks->ResetSkipToks();
}
}
@@ -982,8 +1089,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
- Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
- Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+ Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+ Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+ Opts.KeepSystemIncludes);
// Expand macros in pragmas with -fms-extensions. The assumption is that
// the majority of pragmas in such a file will be Microsoft pragmas.