aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThe Phantom Derpstorm <phdofthehouse@gmail.com>2024-06-12 03:16:02 -0400
committerGitHub <noreply@github.com>2024-06-12 09:16:02 +0200
commit5989450e0061dce8cff89d8acfdd5225c14cd065 (patch)
treef27a31bdd90497b029718f283d0702fec58e8477
parentb83f8c75e4cccf25abbe4ad76406ba0c382bf336 (diff)
downloadllvm-5989450e0061dce8cff89d8acfdd5225c14cd065.zip
llvm-5989450e0061dce8cff89d8acfdd5225c14cd065.tar.gz
llvm-5989450e0061dce8cff89d8acfdd5225c14cd065.tar.bz2
[clang][Sema, Lex, Parse] Preprocessor embed in C and C++ (and Obj-C and Obj-C++ by-proxy) (#68620)
This commit implements the entirety of the now-accepted [N3017 - Preprocessor Embed](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm) and its sister C++ paper [p1967](https://wg21.link/p1967). It implements everything in the specification, and includes an implementation that drastically improves the time it takes to embed data in specific scenarios (the initialization of character type arrays). The mechanisms used to do this are used under the "as-if" rule, and in general when the system cannot detect it is initializing an array object in a variable declaration, will generate EmbedExpr AST node which will be expanded by AST consumers (CodeGen or constant expression evaluators) or expand embed directive as a comma expression. --------- Co-authored-by: Aaron Ballman <aaron@aaronballman.com> Co-authored-by: cor3ntin <corentinjabot@gmail.com> Co-authored-by: H. Vetinari <h.vetinari@gmx.com> Co-authored-by: Podchishchaeva, Mariya <mariya.podchishchaeva@intel.com>
-rw-r--r--clang-tools-extra/test/pp-trace/pp-trace-macro.cpp9
-rw-r--r--clang/docs/LanguageExtensions.rst24
-rw-r--r--clang/include/clang/AST/Expr.h160
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h5
-rw-r--r--clang/include/clang/AST/TextNodeDumper.h1
-rw-r--r--clang/include/clang/Basic/DiagnosticCommonKinds.td3
-rw-r--r--clang/include/clang/Basic/DiagnosticLexKinds.td12
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td2
-rw-r--r--clang/include/clang/Basic/FileManager.h11
-rw-r--r--clang/include/clang/Basic/StmtNodes.td1
-rw-r--r--clang/include/clang/Basic/TokenKinds.def6
-rw-r--r--clang/include/clang/Driver/Options.td6
-rw-r--r--clang/include/clang/Frontend/PreprocessorOutputOptions.h3
-rw-r--r--clang/include/clang/Lex/PPCallbacks.h54
-rw-r--r--clang/include/clang/Lex/PPDirectiveParameter.h33
-rw-r--r--clang/include/clang/Lex/PPEmbedParameters.h94
-rw-r--r--clang/include/clang/Lex/Preprocessor.h71
-rw-r--r--clang/include/clang/Lex/PreprocessorOptions.h3
-rw-r--r--clang/include/clang/Parse/Parser.h3
-rw-r--r--clang/include/clang/Sema/Sema.h4
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h3
-rw-r--r--clang/lib/AST/Expr.cpp12
-rw-r--r--clang/lib/AST/ExprClassification.cpp5
-rw-r--r--clang/lib/AST/ExprConstant.cpp63
-rw-r--r--clang/lib/AST/Interp/ByteCodeExprGen.cpp20
-rw-r--r--clang/lib/AST/Interp/ByteCodeExprGen.h1
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp1
-rw-r--r--clang/lib/AST/StmtPrinter.cpp4
-rw-r--r--clang/lib/AST/StmtProfile.cpp2
-rw-r--r--clang/lib/AST/TextNodeDumper.cpp5
-rw-r--r--clang/lib/Basic/FileManager.cpp7
-rw-r--r--clang/lib/Basic/IdentifierTable.cpp5
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp40
-rw-r--r--clang/lib/CodeGen/CGExprConstant.cpp118
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp7
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp6
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp8
-rw-r--r--clang/lib/Frontend/DependencyFile.cpp25
-rw-r--r--clang/lib/Frontend/DependencyGraph.cpp24
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp8
-rw-r--r--clang/lib/Frontend/PrintPreprocessedOutput.cpp122
-rw-r--r--clang/lib/Lex/PPDirectives.cpp477
-rw-r--r--clang/lib/Lex/PPExpressions.cpp49
-rw-r--r--clang/lib/Lex/PPMacroExpansion.cpp111
-rw-r--r--clang/lib/Lex/TokenConcatenation.cpp5
-rw-r--r--clang/lib/Parse/ParseExpr.cpp37
-rw-r--r--clang/lib/Parse/ParseInit.cpp32
-rw-r--r--clang/lib/Parse/ParseTemplate.cpp41
-rw-r--r--clang/lib/Sema/SemaExceptionSpec.cpp1
-rw-r--r--clang/lib/Sema/SemaExpr.cpp17
-rw-r--r--clang/lib/Sema/SemaInit.cpp113
-rw-r--r--clang/lib/Sema/TreeTransform.h5
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp15
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp11
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp4
-rw-r--r--clang/test/C/C2x/Inputs/bits.bin1
-rw-r--r--clang/test/C/C2x/Inputs/boop.h1
-rw-r--r--clang/test/C/C2x/Inputs/i.dat1
-rw-r--r--clang/test/C/C2x/Inputs/jump.wav1
-rw-r--r--clang/test/C/C2x/Inputs/s.dat1
-rw-r--r--clang/test/C/C2x/n3017.c216
-rw-r--r--clang/test/Preprocessor/Inputs/jk.txt1
-rw-r--r--clang/test/Preprocessor/Inputs/media/art.txt9
-rw-r--r--clang/test/Preprocessor/Inputs/media/empty0
-rw-r--r--clang/test/Preprocessor/Inputs/null_byte.binbin0 -> 1 bytes
-rw-r--r--clang/test/Preprocessor/Inputs/numbers.txt1
-rw-r--r--clang/test/Preprocessor/Inputs/single_byte.txt1
-rw-r--r--clang/test/Preprocessor/embed___has_embed.c60
-rw-r--r--clang/test/Preprocessor/embed___has_embed_parsing_errors.c240
-rw-r--r--clang/test/Preprocessor/embed___has_embed_supported.c24
-rw-r--r--clang/test/Preprocessor/embed_art.c104
-rw-r--r--clang/test/Preprocessor/embed_codegen.cpp84
-rw-r--r--clang/test/Preprocessor/embed_constexpr.cpp97
-rw-r--r--clang/test/Preprocessor/embed_dependencies.c20
-rw-r--r--clang/test/Preprocessor/embed_ext_compat_diags.c16
-rw-r--r--clang/test/Preprocessor/embed_feature_test.cpp7
-rw-r--r--clang/test/Preprocessor/embed_file_not_found_chevron.c4
-rw-r--r--clang/test/Preprocessor/embed_file_not_found_quote.c4
-rw-r--r--clang/test/Preprocessor/embed_init.c29
-rw-r--r--clang/test/Preprocessor/embed_parameter_if_empty.c24
-rw-r--r--clang/test/Preprocessor/embed_parameter_limit.c94
-rw-r--r--clang/test/Preprocessor/embed_parameter_offset.c89
-rw-r--r--clang/test/Preprocessor/embed_parameter_prefix.c38
-rw-r--r--clang/test/Preprocessor/embed_parameter_suffix.c39
-rw-r--r--clang/test/Preprocessor/embed_parameter_unrecognized.c9
-rw-r--r--clang/test/Preprocessor/embed_parsing_errors.c130
-rw-r--r--clang/test/Preprocessor/embed_path_chevron.c8
-rw-r--r--clang/test/Preprocessor/embed_path_quote.c8
-rw-r--r--clang/test/Preprocessor/embed_preprocess_to_file.c39
-rw-r--r--clang/test/Preprocessor/embed_single_entity.c7
-rw-r--r--clang/test/Preprocessor/embed_weird.cpp98
-rw-r--r--clang/test/Preprocessor/init-aarch64.c3
-rw-r--r--clang/test/Preprocessor/init.c3
-rw-r--r--clang/test/Preprocessor/single_byte.txt1
-rw-r--r--clang/tools/libclang/CXCursor.cpp1
-rw-r--r--clang/www/c_status.html2
96 files changed, 3317 insertions, 107 deletions
diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
index 1d85607..7c2a231 100644
--- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
+++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp
@@ -31,6 +31,15 @@ X
// CHECK: MacroNameTok: __STDC_UTF_32__
// CHECK-NEXT: MacroDirective: MD_Define
// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_NOT_FOUND__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_FOUND__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
+// CHECK-NEXT: MacroNameTok: __STDC_EMBED_EMPTY__
+// CHECK-NEXT: MacroDirective: MD_Define
+// CHECK: - Callback: MacroDefined
// CHECK: - Callback: MacroDefined
// CHECK-NEXT: MacroNameTok: MACRO
// CHECK-NEXT: MacroDirective: MD_Define
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a49e412..1b5db38 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1502,6 +1502,7 @@ Attributes on Structured Bindings __cpp_structured_bindings C+
Designated initializers (N494) C99 C89
Array & element qualification (N2607) C23 C89
Attributes (N2335) C23 C89
+``#embed`` (N3017) C23 C89, C++
============================================ ================================ ============= =============
Type Trait Primitives
@@ -5664,3 +5665,26 @@ Compiling different TUs depending on these flags (including use of
``std::hardware_destructive_interference``) with different compilers, macro
definitions, or architecture flags will lead to ODR violations and should be
avoided.
+
+``#embed`` Parameters
+=====================
+
+``clang::offset``
+-----------------
+The ``clang::offset`` embed parameter may appear zero or one time in the
+embed parameter sequence. Its preprocessor argument clause shall be present and
+have the form:
+
+..code-block: text
+
+ ( constant-expression )
+
+and shall be an integer constant expression. The integer constant expression
+shall not evaluate to a value less than 0. The token ``defined`` shall not
+appear within the constant expression.
+
+The offset will be used when reading the contents of the embedded resource to
+specify the starting offset to begin embedding from. The resources is treated
+as being empty if the specified offset is larger than the number of bytes in
+the resource. The offset will be applied *before* any ``limit`` parameters are
+applied.
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index f2bf667..352e446 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4799,6 +4799,166 @@ private:
friend class ASTStmtReader;
};
+/// Stores data related to a single #embed directive.
+struct EmbedDataStorage {
+ StringLiteral *Filename;
+ StringLiteral *BinaryData;
+ size_t getDataElementCount() const { return BinaryData->getByteLength(); }
+};
+
+/// Represents a reference to #emded data. By default, this references the whole
+/// range. Otherwise it represents a subrange of data imported by #embed
+/// directive. Needed to handle nested initializer lists with #embed directives.
+/// Example:
+/// struct S {
+/// int x, y;
+/// };
+///
+/// struct T {
+/// int x[2];
+/// struct S s
+/// };
+///
+/// struct T t[] = {
+/// #embed "data" // data contains 10 elements;
+/// };
+///
+/// The resulting semantic form of initializer list will contain (EE stands
+/// for EmbedExpr):
+/// { {EE(first two data elements), {EE(3rd element), EE(4th element) }},
+/// { {EE(5th and 6th element), {EE(7th element), EE(8th element) }},
+/// { {EE(9th and 10th element), { zeroinitializer }}}
+///
+/// EmbedExpr inside of a semantic initializer list and referencing more than
+/// one element can only appear for arrays of scalars.
+class EmbedExpr final : public Expr {
+ SourceLocation EmbedKeywordLoc;
+ IntegerLiteral *FakeChildNode = nullptr;
+ const ASTContext *Ctx = nullptr;
+ EmbedDataStorage *Data;
+ unsigned Begin = 0;
+ unsigned NumOfElements;
+
+public:
+ EmbedExpr(const ASTContext &Ctx, SourceLocation Loc, EmbedDataStorage *Data,
+ unsigned Begin, unsigned NumOfElements);
+ explicit EmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {}
+
+ SourceLocation getLocation() const { return EmbedKeywordLoc; }
+ SourceLocation getBeginLoc() const { return EmbedKeywordLoc; }
+ SourceLocation getEndLoc() const { return EmbedKeywordLoc; }
+
+ StringLiteral *getFilenameStringLiteral() const { return Data->Filename; }
+ StringLiteral *getDataStringLiteral() const { return Data->BinaryData; }
+ EmbedDataStorage *getData() const { return Data; }
+
+ unsigned getStartingElementPos() const { return Begin; }
+ size_t getDataElementCount() const { return NumOfElements; }
+
+ // Allows accessing every byte of EmbedExpr data and iterating over it.
+ // An Iterator knows the EmbedExpr that it refers to, and an offset value
+ // within the data.
+ // Dereferencing an Iterator results in construction of IntegerLiteral AST
+ // node filled with byte of data of the corresponding EmbedExpr within offset
+ // that the Iterator currently has.
+ template <bool Const>
+ class ChildElementIter
+ : public llvm::iterator_facade_base<
+ ChildElementIter<Const>, std::random_access_iterator_tag,
+ std::conditional_t<Const, const IntegerLiteral *,
+ IntegerLiteral *>> {
+ friend class EmbedExpr;
+
+ EmbedExpr *EExpr = nullptr;
+ unsigned long long CurOffset = ULLONG_MAX;
+ using BaseTy = typename ChildElementIter::iterator_facade_base;
+
+ ChildElementIter(EmbedExpr *E) : EExpr(E) {
+ if (E)
+ CurOffset = E->getStartingElementPos();
+ }
+
+ public:
+ ChildElementIter() : CurOffset(ULLONG_MAX) {}
+ typename BaseTy::reference operator*() const {
+ assert(EExpr && CurOffset != ULLONG_MAX &&
+ "trying to dereference an invalid iterator");
+ IntegerLiteral *N = EExpr->FakeChildNode;
+ StringRef DataRef = EExpr->Data->BinaryData->getBytes();
+ N->setValue(*EExpr->Ctx,
+ llvm::APInt(N->getValue().getBitWidth(), DataRef[CurOffset],
+ N->getType()->isSignedIntegerType()));
+ // We want to return a reference to the fake child node in the
+ // EmbedExpr, not the local variable N.
+ return const_cast<typename BaseTy::reference>(EExpr->FakeChildNode);
+ }
+ typename BaseTy::pointer operator->() const { return **this; }
+ using BaseTy::operator++;
+ ChildElementIter &operator++() {
+ assert(EExpr && "trying to increment an invalid iterator");
+ assert(CurOffset != ULLONG_MAX &&
+ "Already at the end of what we can iterate over");
+ if (++CurOffset >=
+ EExpr->getDataElementCount() + EExpr->getStartingElementPos()) {
+ CurOffset = ULLONG_MAX;
+ EExpr = nullptr;
+ }
+ return *this;
+ }
+ bool operator==(ChildElementIter Other) const {
+ return (EExpr == Other.EExpr && CurOffset == Other.CurOffset);
+ }
+ }; // class ChildElementIter
+
+public:
+ using fake_child_range = llvm::iterator_range<ChildElementIter<false>>;
+ using const_fake_child_range = llvm::iterator_range<ChildElementIter<true>>;
+
+ fake_child_range underlying_data_elements() {
+ return fake_child_range(ChildElementIter<false>(this),
+ ChildElementIter<false>());
+ }
+
+ const_fake_child_range underlying_data_elements() const {
+ return const_fake_child_range(
+ ChildElementIter<true>(const_cast<EmbedExpr *>(this)),
+ ChildElementIter<true>());
+ }
+
+ child_range children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+
+ const_child_range children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == EmbedExprClass;
+ }
+
+ ChildElementIter<false> begin() { return ChildElementIter<false>(this); }
+
+ ChildElementIter<true> begin() const {
+ return ChildElementIter<true>(const_cast<EmbedExpr *>(this));
+ }
+
+ template <typename Call, typename... Targs>
+ bool doForEachDataElement(Call &&C, unsigned &StartingIndexInArray,
+ Targs &&...Fargs) const {
+ for (auto It : underlying_data_elements()) {
+ if (!std::invoke(std::forward<Call>(C), const_cast<IntegerLiteral *>(It),
+ StartingIndexInArray, std::forward<Targs>(Fargs)...))
+ return false;
+ StartingIndexInArray++;
+ }
+ return true;
+ }
+
+private:
+ friend class ASTStmtReader;
+};
+
/// Describes an C or C++ initializer list.
///
/// InitListExpr describes an initializer list, which can be used to
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index aa55e2e..2785afd 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2864,6 +2864,11 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
DEF_TRAVERSE_STMT(StmtExpr, {})
DEF_TRAVERSE_STMT(SourceLocExpr, {})
+DEF_TRAVERSE_STMT(EmbedExpr, {
+ for (IntegerLiteral *IL : S->underlying_data_elements()) {
+ TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(IL);
+ }
+})
DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index abfafca..39dd1f5 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -409,6 +409,7 @@ public:
void VisitHLSLBufferDecl(const HLSLBufferDecl *D);
void VisitOpenACCConstructStmt(const OpenACCConstructStmt *S);
void VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S);
+ void VisitEmbedExpr(const EmbedExpr *S);
};
} // namespace clang
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 1e44bc4..de758cb 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -275,6 +275,9 @@ def err_too_large_for_fixed_point : Error<
def err_unimplemented_conversion_with_fixed_point_type : Error<
"conversion between fixed point and %0 is not yet supported">;
+def err_requires_positive_value : Error<
+ "%select{invalid value '%0'; must be positive|value '%0' is too large}1">;
+
// SEH
def err_seh_expected_handler : Error<
"expected '__except' or '__finally' block">;
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 25fbfe8..12d7b8c 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -436,6 +436,14 @@ def warn_cxx23_compat_warning_directive : Warning<
def warn_c23_compat_warning_directive : Warning<
"#warning is incompatible with C standards before C23">,
InGroup<CPre23Compat>, DefaultIgnore;
+def ext_pp_embed_directive : ExtWarn<
+ "#embed is a %select{C23|Clang}0 extension">,
+ InGroup<C23>;
+def warn_compat_pp_embed_directive : Warning<
+ "#embed is incompatible with C standards before C23">,
+ InGroup<CPre23Compat>, DefaultIgnore;
+def err_pp_embed_dup_params : Error<
+ "cannot specify parameter '%0' twice in the same '#embed' directive">;
def ext_pp_extra_tokens_at_eol : ExtWarn<
"extra tokens at end of #%0 directive">, InGroup<ExtraTokens>;
@@ -505,6 +513,8 @@ def err_pp_invalid_directive : Error<
"invalid preprocessing directive%select{|, did you mean '#%1'?}0">;
def warn_pp_invalid_directive : Warning<
err_pp_invalid_directive.Summary>, InGroup<DiagGroup<"unknown-directives">>;
+def err_pp_unknown_parameter : Error<
+ "unknown%select{ | embed}0 preprocessor parameter '%1'">;
def err_pp_directive_required : Error<
"%0 must be used within a preprocessing directive">;
def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal;
@@ -719,6 +729,8 @@ def err_pp_module_build_missing_end : Error<
"no matching '#pragma clang module endbuild' for this '#pragma clang module build'">;
def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">;
+def err_defined_in_pp_embed : Error<
+ "'defined' cannot appear within this context">;
def err_paste_at_start : Error<
"'##' cannot appear at start of macro expansion">;
def err_paste_at_end : Error<"'##' cannot appear at end of macro expansion">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 193eae3..a104dfb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1097,8 +1097,6 @@ def note_surrounding_namespace_starts_here : Note<
"surrounding namespace with visibility attribute starts here">;
def err_pragma_loop_invalid_argument_type : Error<
"invalid argument of type %0; expected an integer type">;
-def err_pragma_loop_invalid_argument_value : Error<
- "%select{invalid value '%0'; must be positive|value '%0' is too large}1">;
def err_pragma_loop_compatibility : Error<
"%select{incompatible|duplicate}0 directives '%1' and '%2'">;
def err_pragma_loop_precedes_nonloop : Error<
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index e1f33d5..527bbef 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -286,12 +286,15 @@ public:
/// MemoryBuffer if successful, otherwise returning null.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBufferForFile(FileEntryRef Entry, bool isVolatile = false,
- bool RequiresNullTerminator = true);
+ bool RequiresNullTerminator = true,
+ std::optional<int64_t> MaybeLimit = std::nullopt);
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBufferForFile(StringRef Filename, bool isVolatile = false,
- bool RequiresNullTerminator = true) const {
- return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
- RequiresNullTerminator);
+ bool RequiresNullTerminator = true,
+ std::optional<int64_t> MaybeLimit = std::nullopt) const {
+ return getBufferForFileImpl(Filename,
+ /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1),
+ isVolatile, RequiresNullTerminator);
}
private:
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 6ca08ab..c59a17b 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -204,6 +204,7 @@ def OpaqueValueExpr : StmtNode<Expr>;
def TypoExpr : StmtNode<Expr>;
def RecoveryExpr : StmtNode<Expr>;
def BuiltinBitCastExpr : StmtNode<ExplicitCastExpr>;
+def EmbedExpr : StmtNode<Expr>;
// Microsoft Extensions.
def MSPropertyRefExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 9c4b174..37d570c 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -126,6 +126,9 @@ PPKEYWORD(error)
// C99 6.10.6 - Pragma Directive.
PPKEYWORD(pragma)
+// C23 & C++26 #embed
+PPKEYWORD(embed)
+
// GNU Extensions.
PPKEYWORD(import)
PPKEYWORD(include_next)
@@ -999,6 +1002,9 @@ ANNOTATION(header_unit)
// Annotation for end of input in clang-repl.
ANNOTATION(repl_input_end)
+// Annotation for #embed
+ANNOTATION(embed)
+
#undef PRAGMA_ANNOTATION
#undef ANNOTATION
#undef TESTING_KEYWORD
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d44faa5..9f7904d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -880,6 +880,9 @@ will be ignored}]>;
def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group<Link_Group>,
Visibility<[ClangOption, FlangOption]>,
MetaVarName<"<dir>">, HelpText<"Add directory to library search path">;
+def embed_dir_EQ : Joined<["--"], "embed-dir=">, Group<Preprocessor_Group>,
+ Visibility<[ClangOption, CC1Option]>, MetaVarName<"<dir>">,
+ HelpText<"Add directory to embed search path">;
def MD : Flag<["-"], "MD">, Group<M_Group>,
HelpText<"Write a depfile containing user and system headers">;
def MMD : Flag<["-"], "MMD">, Group<M_Group>,
@@ -1473,6 +1476,9 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Print include directives in -E mode in addition to normal output">,
MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
+def dE : Flag<["-"], "dE">, Group<d_Group>, Visibility<[CC1Option]>,
+ HelpText<"Print embed directives in -E mode in addition to normal output">,
+ MarshallingInfoFlag<PreprocessorOutputOpts<"ShowEmbedDirectives">>;
def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Print macro definitions in -E mode instead of normal output">;
def dead__strip : Flag<["-"], "dead_strip">;
diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
index 6e19cae..654cf22 100644
--- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -32,6 +32,8 @@ public:
LLVM_PREFERRED_TYPE(bool)
unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output.
LLVM_PREFERRED_TYPE(bool)
+ unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed
+ LLVM_PREFERRED_TYPE(bool)
unsigned RewriteIncludes : 1; ///< Preprocess include directives only.
LLVM_PREFERRED_TYPE(bool)
unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules.
@@ -51,6 +53,7 @@ public:
ShowMacroComments = 0;
ShowMacros = 0;
ShowIncludeDirectives = 0;
+ ShowEmbedDirectives = 0;
RewriteIncludes = 0;
RewriteImports = 0;
MinimizeWhitespace = 0;
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index dfc74b5..46cc564 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -27,6 +27,7 @@ class IdentifierInfo;
class MacroDefinition;
class MacroDirective;
class MacroArgs;
+struct LexEmbedParametersResult;
/// This interface provides a way to observe the actions of the
/// preprocessor as it does its thing.
@@ -84,6 +85,34 @@ public:
SrcMgr::CharacteristicKind FileType) {}
/// Callback invoked whenever the preprocessor cannot find a file for an
+ /// embed directive.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \returns true to indicate that the preprocessor should skip this file
+ /// and not issue any diagnostic.
+ virtual bool EmbedFileNotFound(StringRef FileName) { return false; }
+
+ /// Callback invoked whenever an embed directive has been processed,
+ /// regardless of whether the embed will actually find a file.
+ ///
+ /// \param HashLoc The location of the '#' that starts the embed directive.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \param IsAngled Whether the file name was enclosed in angle brackets;
+ /// otherwise, it was enclosed in quotes.
+ ///
+ /// \param File The actual file that may be included by this embed directive.
+ ///
+ /// \param Params The parameters used by the directive.
+ virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName,
+ bool IsAngled, OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) {}
+
+ /// Callback invoked whenever the preprocessor cannot find a file for an
/// inclusion directive.
///
/// \param FileName The name of the file being included, as written in the
@@ -333,6 +362,10 @@ public:
SourceRange Range) {
}
+ /// Hook called when a '__has_embed' directive is read.
+ virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File) {}
+
/// Hook called when a '__has_include' or '__has_include_next' directive is
/// read.
virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
@@ -464,6 +497,21 @@ public:
Second->FileSkipped(SkippedFile, FilenameTok, FileType);
}
+ bool EmbedFileNotFound(StringRef FileName) override {
+ bool Skip = First->FileNotFound(FileName);
+ // Make sure to invoke the second callback, no matter if the first already
+ // returned true to skip the file.
+ Skip |= Second->FileNotFound(FileName);
+ return Skip;
+ }
+
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override {
+ First->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
+ Second->EmbedDirective(HashLoc, FileName, IsAngled, File, Params);
+ }
+
bool FileNotFound(StringRef FileName) override {
bool Skip = First->FileNotFound(FileName);
// Make sure to invoke the second callback, no matter if the first already
@@ -565,6 +613,12 @@ public:
Second->PragmaDiagnostic(Loc, Namespace, mapping, Str);
}
+ void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File) override {
+ First->HasEmbed(Loc, FileName, IsAngled, File);
+ Second->HasEmbed(Loc, FileName, IsAngled, File);
+ }
+
void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled,
OptionalFileEntryRef File,
SrcMgr::CharacteristicKind FileType) override;
diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h
new file mode 100644
index 0000000..83f0566d
--- /dev/null
+++ b/clang/include/clang/Lex/PPDirectiveParameter.h
@@ -0,0 +1,33 @@
+//===--- PPDirectiveParameter.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the base class for preprocessor directive parameters, such
+// as limit(1) or suffix(x) for #embed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H
+
+#include "clang/Basic/SourceLocation.h"
+
+namespace clang {
+
+/// Captures basic information about a preprocessor directive parameter.
+class PPDirectiveParameter {
+ SourceRange R;
+
+public:
+ PPDirectiveParameter(SourceRange R) : R(R) {}
+
+ SourceRange getParameterRange() const { return R; }
+};
+
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h
new file mode 100644
index 0000000..51bf908
--- /dev/null
+++ b/clang/include/clang/Lex/PPEmbedParameters.h
@@ -0,0 +1,94 @@
+//===--- PPEmbedParameters.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines all of the preprocessor directive parmeters for #embed
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H
+
+#include "clang/Lex/PPDirectiveParameter.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+/// Preprocessor extension embed parameter "clang::offset"
+/// `clang::offset( constant-expression )`
+class PPEmbedParameterOffset : public PPDirectiveParameter {
+public:
+ size_t Offset;
+
+ PPEmbedParameterOffset(size_t Offset, SourceRange R)
+ : PPDirectiveParameter(R), Offset(Offset) {}
+};
+
+/// Preprocessor standard embed parameter "limit"
+/// `limit( constant-expression )`
+class PPEmbedParameterLimit : public PPDirectiveParameter {
+public:
+ size_t Limit;
+
+ PPEmbedParameterLimit(size_t Limit, SourceRange R)
+ : PPDirectiveParameter(R), Limit(Limit) {}
+};
+
+/// Preprocessor standard embed parameter "prefix"
+/// `prefix( balanced-token-seq )`
+class PPEmbedParameterPrefix : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterPrefix(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+ : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+/// Preprocessor standard embed parameter "suffix"
+/// `suffix( balanced-token-seq )`
+class PPEmbedParameterSuffix : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterSuffix(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+ : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+/// Preprocessor standard embed parameter "if_empty"
+/// `if_empty( balanced-token-seq )`
+class PPEmbedParameterIfEmpty : public PPDirectiveParameter {
+public:
+ SmallVector<Token, 2> Tokens;
+
+ PPEmbedParameterIfEmpty(SmallVectorImpl<Token> &&Tokens, SourceRange R)
+ : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {}
+};
+
+struct LexEmbedParametersResult {
+ std::optional<PPEmbedParameterLimit> MaybeLimitParam;
+ std::optional<PPEmbedParameterOffset> MaybeOffsetParam;
+ std::optional<PPEmbedParameterIfEmpty> MaybeIfEmptyParam;
+ std::optional<PPEmbedParameterPrefix> MaybePrefixParam;
+ std::optional<PPEmbedParameterSuffix> MaybeSuffixParam;
+ SourceRange ParamRange;
+ int UnrecognizedParams;
+
+ size_t PrefixTokenCount() const {
+ if (MaybePrefixParam)
+ return MaybePrefixParam->Tokens.size();
+ return 0;
+ }
+ size_t SuffixTokenCount() const {
+ if (MaybeSuffixParam)
+ return MaybeSuffixParam->Tokens.size();
+ return 0;
+ }
+};
+} // end namespace clang
+
+#endif
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 9b1628d..a47df42 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,8 +29,10 @@
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PPEmbedParameters.h"
#include "clang/Lex/Token.h"
#include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -119,6 +121,13 @@ enum MacroUse {
MU_Undef = 2
};
+enum class EmbedResult {
+ Invalid = -1, // Parsing error occurred.
+ NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
+ Found = 1, // Corresponds to __STDC_EMBED_FOUND__
+ Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
+};
+
/// Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
@@ -165,6 +174,7 @@ class Preprocessor {
IdentifierInfo *Ident__has_builtin; // __has_builtin
IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
IdentifierInfo *Ident__has_attribute; // __has_attribute
+ IdentifierInfo *Ident__has_embed; // __has_embed
IdentifierInfo *Ident__has_include; // __has_include
IdentifierInfo *Ident__has_include_next; // __has_include_next
IdentifierInfo *Ident__has_warning; // __has_warning
@@ -1734,6 +1744,10 @@ public:
/// Lex a token, forming a header-name token if possible.
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
+ /// Lex the parameters for an #embed directive, returns nullopt on error.
+ std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
+ bool ForHasEmbed);
+
bool LexAfterModuleImport(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
@@ -2314,7 +2328,13 @@ public:
/// Read and discard all tokens remaining on the current line until
/// the tok::eod token is found. Returns the range of the skipped tokens.
- SourceRange DiscardUntilEndOfDirective();
+ SourceRange DiscardUntilEndOfDirective() {
+ Token Tmp;
+ return DiscardUntilEndOfDirective(Tmp);
+ }
+
+ /// Same as above except retains the token that was found.
+ SourceRange DiscardUntilEndOfDirective(Token &Tok);
/// Returns true if the preprocessor has seen a use of
/// __DATE__ or __TIME__ in the file so far.
@@ -2419,6 +2439,18 @@ public:
bool *IsFrameworkFound, bool SkipCache = false,
bool OpenFile = true, bool CacheFailures = true);
+ /// Given a "Filename" or \<Filename> reference, look up the indicated embed
+ /// resource. \p isAngled indicates whether the file reference is for
+ /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
+ /// is true, the file looked up is opened for reading, otherwise it only
+ /// validates that the file exists. Quoted filenames are looked up relative
+ /// to \p LookupFromFile if it is nonnull.
+ ///
+ /// Returns std::nullopt on failure.
+ OptionalFileEntryRef
+ LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+ const FileEntry *LookupFromFile = nullptr);
+
/// Return true if we're in the top-level file, not in a \#include.
bool isInPrimaryFile() const;
@@ -2524,6 +2556,9 @@ private:
/// Information about the result for evaluating an expression for a
/// preprocessor directive.
struct DirectiveEvalResult {
+ /// The integral value of the expression.
+ std::optional<llvm::APSInt> Value;
+
/// Whether the expression was evaluated as true or not.
bool Conditional;
@@ -2538,7 +2573,25 @@ private:
/// \#if or \#elif directive and return a \p DirectiveEvalResult object.
///
/// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
- DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
+ DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ bool CheckForEoD = true);
+
+ /// Evaluate an integer constant expression that may occur after a
+ /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
+ ///
+ /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
+ /// \p EvaluatedDefined will contain the result of whether "defined" appeared
+ /// in the evaluated expression or not.
+ DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ Token &Tok,
+ bool &EvaluatedDefined,
+ bool CheckForEoD = true);
+
+ /// Process a '__has_embed("path" [, ...])' expression.
+ ///
+ /// Returns predefined `__STDC_EMBED_*` macro values if
+ /// successful.
+ EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
/// Process a '__has_include("path")' expression.
///
@@ -2686,6 +2739,13 @@ private:
const FileEntry *LookupFromFile, StringRef &LookupFilename,
SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
+ // Binary data inclusion
+ void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
+ const FileEntry *LookupFromFile = nullptr);
+ void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
+ StringRef ResolvedFilename,
+ const LexEmbedParametersResult &Params,
+ StringRef BinaryContents);
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
@@ -2960,6 +3020,13 @@ public:
virtual void HandleEmptyline(SourceRange Range) = 0;
};
+/// Helper class to shuttle information about #embed directives from the
+/// preprocessor to the parser through an annotation token.
+struct EmbedAnnotationData {
+ llvm::SmallString<32> FileName;
+ StringRef BinaryData;
+};
+
/// Registry of pragma handlers added by plugins
using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 635971d..c2e3d68 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -170,6 +170,9 @@ public:
/// of the specified memory buffer (the second part of each pair).
std::vector<std::pair<std::string, llvm::MemoryBuffer *>> RemappedFileBuffers;
+ /// User specified embed entries.
+ std::vector<std::string> EmbedEntries;
+
/// Whether the compiler instance should retain (i.e., not free)
/// the buffers associated with remapped files.
///
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index d054b8c..95c0655 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2122,6 +2122,8 @@ private:
QualType PreferredBaseType;
};
ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo);
+ ExprResult createEmbedExpr();
+ void ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs);
//===--------------------------------------------------------------------===//
// clang Expressions
@@ -3813,6 +3815,7 @@ private:
AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS,
ImplicitTypenameContext AllowImplicitTypename,
bool IsClassName = false);
+ void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs);
bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
TemplateTy Template, SourceLocation OpenLoc);
ParsedTemplateArgument ParseTemplateTemplateArgument();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 4d4579f..9bf0141 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5729,6 +5729,10 @@ public:
SourceLocation BuiltinLoc,
SourceLocation RPLoc);
+ // #embed
+ ExprResult ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
+ StringLiteral *Filename, StringLiteral *BinaryData);
+
// Build a potentially resolved SourceLocExpr.
ExprResult BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
SourceLocation BuiltinLoc, SourceLocation RPLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 52a6c5e..69b71e4 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1649,6 +1649,9 @@ enum StmtCode {
/// A SourceLocExpr record.
EXPR_SOURCE_LOC,
+ /// A EmbedExpr record.
+ EXPR_BUILTIN_PP_EMBED,
+
/// A ShuffleVectorExpr record.
EXPR_SHUFFLE_VECTOR,
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 7e55568..04b331a 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2373,6 +2373,17 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
llvm_unreachable("unhandled case");
}
+EmbedExpr::EmbedExpr(const ASTContext &Ctx, SourceLocation Loc,
+ EmbedDataStorage *Data, unsigned Begin,
+ unsigned NumOfElements)
+ : Expr(EmbedExprClass, Ctx.UnsignedCharTy, VK_PRValue, OK_Ordinary),
+ EmbedKeywordLoc(Loc), Ctx(&Ctx), Data(Data), Begin(Begin),
+ NumOfElements(NumOfElements) {
+ setDependence(ExprDependence::None);
+ FakeChildNode = IntegerLiteral::Create(
+ Ctx, llvm::APInt::getZero(Ctx.getTypeSize(getType())), getType(), Loc);
+}
+
InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc,
ArrayRef<Expr *> initExprs, SourceLocation rbraceloc)
: Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary),
@@ -3615,6 +3626,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
case CXXUuidofExprClass:
case OpaqueValueExprClass:
case SourceLocExprClass:
+ case EmbedExprClass:
case ConceptSpecializationExprClass:
case RequiresExprClass:
case SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index 390000e3..6482cb6 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
case Expr::RequiresExprClass:
return Cl::CL_PRValue;
+ case Expr::EmbedExprClass:
+ // Nominally, this just goes through as a PRValue until we actually expand
+ // it and check it.
+ return Cl::CL_PRValue;
+
// Make HLSL this reference-like
case Expr::CXXThisExprClass:
return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d505745..af1f18a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7727,6 +7727,11 @@ public:
return Error(E);
}
+ bool VisitEmbedExpr(const EmbedExpr *E) {
+ const auto It = E->begin();
+ return StmtVisitorTy::Visit(*It);
+ }
+
bool VisitPredefinedExpr(const PredefinedExpr *E) {
return StmtVisitorTy::Visit(E->getFunctionName());
}
@@ -9145,6 +9150,11 @@ public:
return true;
}
+ bool VisitEmbedExpr(const EmbedExpr *E) {
+ llvm_unreachable("Not yet implemented for ExprConstant.cpp");
+ return true;
+ }
+
bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
std::string ResultStr = E->ComputeName(Info.Ctx);
@@ -11249,8 +11259,17 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr(
// If the initializer might depend on the array index, run it for each
// array element.
- if (NumEltsToInit != NumElts && MaybeElementDependentArrayFiller(ArrayFiller))
+ if (NumEltsToInit != NumElts &&
+ MaybeElementDependentArrayFiller(ArrayFiller)) {
NumEltsToInit = NumElts;
+ } else {
+ for (auto *Init : Args) {
+ if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts()))
+ NumEltsToInit += EmbedS->getDataElementCount() - 1;
+ }
+ if (NumEltsToInit > NumElts)
+ NumEltsToInit = NumElts;
+ }
LLVM_DEBUG(llvm::dbgs() << "The number of elements to initialize: "
<< NumEltsToInit << ".\n");
@@ -11268,16 +11287,49 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr(
LValue Subobject = This;
Subobject.addArray(Info, ExprToVisit, CAT);
- for (unsigned Index = 0; Index != NumEltsToInit; ++Index) {
- const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller;
- if (!EvaluateInPlace(Result.getArrayInitializedElt(Index),
- Info, Subobject, Init) ||
+ auto Eval = [&](const Expr *Init, unsigned ArrayIndex) {
+ if (!EvaluateInPlace(Result.getArrayInitializedElt(ArrayIndex), Info,
+ Subobject, Init) ||
!HandleLValueArrayAdjustment(Info, Init, Subobject,
CAT->getElementType(), 1)) {
if (!Info.noteFailure())
return false;
Success = false;
}
+ return true;
+ };
+ unsigned ArrayIndex = 0;
+ QualType DestTy = CAT->getElementType();
+ APSInt Value(Info.Ctx.getTypeSize(DestTy), DestTy->isUnsignedIntegerType());
+ for (unsigned Index = 0; Index != NumEltsToInit; ++Index) {
+ const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller;
+ if (ArrayIndex >= NumEltsToInit)
+ break;
+ if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+ StringLiteral *SL = EmbedS->getDataStringLiteral();
+ for (unsigned I = EmbedS->getStartingElementPos(),
+ N = EmbedS->getDataElementCount();
+ I != EmbedS->getStartingElementPos() + N; ++I) {
+ Value = SL->getCodeUnit(I);
+ if (DestTy->isIntegerType()) {
+ Result.getArrayInitializedElt(ArrayIndex) = APValue(Value);
+ } else {
+ assert(DestTy->isFloatingType() && "unexpected type");
+ const FPOptions FPO =
+ Init->getFPFeaturesInEffect(Info.Ctx.getLangOpts());
+ APFloat FValue(0.0);
+ if (!HandleIntToFloatCast(Info, Init, FPO, EmbedS->getType(), Value,
+ DestTy, FValue))
+ return false;
+ Result.getArrayInitializedElt(ArrayIndex) = APValue(FValue);
+ }
+ ArrayIndex++;
+ }
+ } else {
+ if (!Eval(Init, ArrayIndex))
+ return false;
+ ++ArrayIndex;
+ }
}
if (!Result.hasArrayFiller())
@@ -16363,6 +16415,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
case Expr::SizeOfPackExprClass:
case Expr::GNUNullExprClass:
case Expr::SourceLocExprClass:
+ case Expr::EmbedExprClass:
return NoDiag();
case Expr::PackIndexingExprClass:
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 1393ef1..e766558 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1201,11 +1201,19 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
}
if (T->isArrayType()) {
+ auto Eval = [&](Expr *Init, unsigned ElemIndex) {
+ return visitArrayElemInit(ElemIndex, Init);
+ };
unsigned ElementIndex = 0;
for (const Expr *Init : Inits) {
- if (!this->visitArrayElemInit(ElementIndex, Init))
- return false;
- ++ElementIndex;
+ if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+ if (!EmbedS->doForEachDataElement(Eval, ElementIndex))
+ return false;
+ } else {
+ if (!this->visitArrayElemInit(ElementIndex, Init))
+ return false;
+ ++ElementIndex;
+ }
}
// Expand the filler expression.
@@ -1351,6 +1359,12 @@ bool ByteCodeExprGen<Emitter>::VisitConstantExpr(const ConstantExpr *E) {
return this->delegate(E->getSubExpr());
}
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitEmbedExpr(const EmbedExpr *E) {
+ auto It = E->begin();
+ return this->visit(*It);
+}
+
static CharUnits AlignOfType(QualType T, const ASTContext &ASTCtx,
UnaryExprOrTypeTrait Kind) {
bool AlignOfReturnsPreferred =
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 295cfef..f9f508e 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -115,6 +115,7 @@ public:
bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
bool VisitGenericSelectionExpr(const GenericSelectionExpr *E);
bool VisitChooseExpr(const ChooseExpr *E);
+ bool VisitEmbedExpr(const EmbedExpr *E);
bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E);
bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index ed9e6eeb..eac1801 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4760,6 +4760,7 @@ recurse:
case Expr::PseudoObjectExprClass:
case Expr::AtomicExprClass:
case Expr::SourceLocExprClass:
+ case Expr::EmbedExprClass:
case Expr::BuiltinBitCastExprClass:
{
NotPrimaryExpr();
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 8f51d16..2d223a9 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1177,6 +1177,10 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) {
OS << Node->getBuiltinStr() << "()";
}
+void StmtPrinter::VisitEmbedExpr(EmbedExpr *Node) {
+ assert(false && "not yet implemented");
+}
+
void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) {
PrintExpr(Node->getSubExpr());
}
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d165590..1add5ca 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2313,6 +2313,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) {
VisitExpr(E);
}
+void StmtProfiler::VisitEmbedExpr(const EmbedExpr *E) { VisitExpr(E); }
+
void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); }
void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) {
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 1076dcd..e1a2709 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -2884,3 +2884,8 @@ void TextNodeDumper::VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S) {
else
OS << " parent: " << S->getParentComputeConstruct();
}
+
+void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) {
+ AddChild("begin", [=] { OS << S->getStartingElementPos(); });
+ AddChild("number of elements", [=] { OS << S->getDataElementCount(); });
+}
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index 1dc51de..4509cee 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -530,13 +530,18 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile,
- bool RequiresNullTerminator) {
+ bool RequiresNullTerminator,
+ std::optional<int64_t> MaybeLimit) {
const FileEntry *Entry = &FE.getFileEntry();
// If the content is living on the file entry, return a reference to it.
if (Entry->Content)
return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef());
uint64_t FileSize = Entry->getSize();
+
+ if (MaybeLimit)
+ FileSize = *MaybeLimit;
+
// If there's a high enough chance that the file have changed since we
// got its size, force a stat before opening it.
if (isVolatile || Entry->isNamedPipe())
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index feea845..04cc9c7 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -425,8 +425,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
// collisions (if there were, the switch below would complain about duplicate
// case values). Note that this depends on 'if' being null terminated.
-#define HASH(LEN, FIRST, THIRD) \
- (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
+#define HASH(LEN, FIRST, THIRD) \
+ (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
#define CASE(LEN, FIRST, THIRD, NAME) \
case HASH(LEN, FIRST, THIRD): \
return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
@@ -441,6 +441,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
CASE( 4, 'e', 's', else);
CASE( 4, 'l', 'n', line);
CASE( 4, 's', 'c', sccs);
+ CASE(5, 'e', 'b', embed);
CASE( 5, 'e', 'd', endif);
CASE( 5, 'e', 'r', error);
CASE( 5, 'i', 'e', ident);
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b2a5cee..a8bb254 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -509,6 +509,16 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
uint64_t NumInitElements = Args.size();
uint64_t NumArrayElements = AType->getNumElements();
+ for (const auto *Init : Args) {
+ if (const auto *Embed = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+ NumInitElements += Embed->getDataElementCount() - 1;
+ if (NumInitElements > NumArrayElements) {
+ NumInitElements = NumArrayElements;
+ break;
+ }
+ }
+ }
+
assert(NumInitElements <= NumArrayElements);
QualType elementType =
@@ -577,23 +587,37 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1);
- // Emit the explicit initializers.
- for (uint64_t i = 0; i != NumInitElements; ++i) {
+ auto Emit = [&](Expr *Init, uint64_t ArrayIndex) {
llvm::Value *element = begin;
- if (i > 0) {
- element = Builder.CreateInBoundsGEP(llvmElementType, begin,
- llvm::ConstantInt::get(CGF.SizeTy, i),
- "arrayinit.element");
+ if (ArrayIndex > 0) {
+ element = Builder.CreateInBoundsGEP(
+ llvmElementType, begin,
+ llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex), "arrayinit.element");
// Tell the cleanup that it needs to destroy up to this
// element. TODO: some of these stores can be trivially
// observed to be unnecessary.
- if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit);
+ if (endOfInit.isValid())
+ Builder.CreateStore(element, endOfInit);
}
LValue elementLV = CGF.MakeAddrLValue(
Address(element, llvmElementType, elementAlign), elementType);
- EmitInitializationToLValue(Args[i], elementLV);
+ EmitInitializationToLValue(Init, elementLV);
+ return true;
+ };
+
+ unsigned ArrayIndex = 0;
+ // Emit the explicit initializers.
+ for (uint64_t i = 0; i != NumInitElements; ++i) {
+ if (ArrayIndex >= NumInitElements)
+ break;
+ if (auto *EmbedS = dyn_cast<EmbedExpr>(Args[i]->IgnoreParenImpCasts())) {
+ EmbedS->doForEachDataElement(Emit, ArrayIndex);
+ } else {
+ Emit(Args[i], ArrayIndex);
+ ArrayIndex++;
+ }
}
// Check whether there's a non-trivial array-fill expression.
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 0712f40..0fd3792 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1061,6 +1061,24 @@ public:
return Visit(E->getInitializer(), T);
}
+ llvm::Constant *ProduceIntToIntCast(const Expr *E, QualType DestType) {
+ QualType FromType = E->getType();
+ // See also HandleIntToIntCast in ExprConstant.cpp
+ if (FromType->isIntegerType())
+ if (llvm::Constant *C = Visit(E, FromType))
+ if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) {
+ unsigned SrcWidth = CGM.getContext().getIntWidth(FromType);
+ unsigned DstWidth = CGM.getContext().getIntWidth(DestType);
+ if (DstWidth == SrcWidth)
+ return CI;
+ llvm::APInt A = FromType->isSignedIntegerType()
+ ? CI->getValue().sextOrTrunc(DstWidth)
+ : CI->getValue().zextOrTrunc(DstWidth);
+ return llvm::ConstantInt::get(CGM.getLLVMContext(), A);
+ }
+ return nullptr;
+ }
+
llvm::Constant *VisitCastExpr(const CastExpr *E, QualType destType) {
if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
CGM.EmitExplicitCastExprType(ECE, Emitter.CGF);
@@ -1142,23 +1160,8 @@ public:
case CK_IntToOCLSampler:
llvm_unreachable("global sampler variables are not generated");
- case CK_IntegralCast: {
- QualType FromType = subExpr->getType();
- // See also HandleIntToIntCast in ExprConstant.cpp
- if (FromType->isIntegerType())
- if (llvm::Constant *C = Visit(subExpr, FromType))
- if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) {
- unsigned SrcWidth = CGM.getContext().getIntWidth(FromType);
- unsigned DstWidth = CGM.getContext().getIntWidth(destType);
- if (DstWidth == SrcWidth)
- return CI;
- llvm::APInt A = FromType->isSignedIntegerType()
- ? CI->getValue().sextOrTrunc(DstWidth)
- : CI->getValue().zextOrTrunc(DstWidth);
- return llvm::ConstantInt::get(CGM.getLLVMContext(), A);
- }
- return nullptr;
- }
+ case CK_IntegralCast:
+ return ProduceIntToIntCast(subExpr, destType);
case CK_Dependent: llvm_unreachable("saw dependent cast!");
@@ -1249,15 +1252,42 @@ public:
return llvm::ConstantInt::get(CGM.getLLVMContext(), I->getValue());
}
+ static APValue withDestType(ASTContext &Ctx, const Expr *E, QualType SrcType,
+ QualType DestType, const llvm::APSInt &Value) {
+ if (!Ctx.hasSameType(SrcType, DestType)) {
+ if (DestType->isFloatingType()) {
+ llvm::APFloat Result =
+ llvm::APFloat(Ctx.getFloatTypeSemantics(DestType), 1);
+ llvm::RoundingMode RM =
+ E->getFPFeaturesInEffect(Ctx.getLangOpts()).getRoundingMode();
+ if (RM == llvm::RoundingMode::Dynamic)
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ Result.convertFromAPInt(Value, Value.isSigned(), RM);
+ return APValue(Result);
+ }
+ }
+ return APValue(Value);
+ }
+
llvm::Constant *EmitArrayInitialization(const InitListExpr *ILE, QualType T) {
auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType());
assert(CAT && "can't emit array init for non-constant-bound array");
+ uint64_t NumInitElements = ILE->getNumInits();
const uint64_t NumElements = CAT->getZExtSize();
+ for (const auto *Init : ILE->inits()) {
+ if (const auto *Embed =
+ dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+ NumInitElements += Embed->getDataElementCount() - 1;
+ if (NumInitElements > NumElements) {
+ NumInitElements = NumElements;
+ break;
+ }
+ }
+ }
// Initialising an array requires us to automatically
// initialise any elements that have not been initialised explicitly
- uint64_t NumInitableElts =
- std::min<uint64_t>(ILE->getNumInits(), NumElements);
+ uint64_t NumInitableElts = std::min<uint64_t>(NumInitElements, NumElements);
QualType EltType = CAT->getElementType();
@@ -1270,23 +1300,61 @@ public:
}
// Copy initializer elements.
- SmallVector<llvm::Constant*, 16> Elts;
+ SmallVector<llvm::Constant *, 16> Elts;
if (fillC && fillC->isNullValue())
Elts.reserve(NumInitableElts + 1);
else
Elts.reserve(NumElements);
llvm::Type *CommonElementType = nullptr;
- for (unsigned i = 0; i < NumInitableElts; ++i) {
- const Expr *Init = ILE->getInit(i);
- llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
+ auto Emit = [&](const Expr *Init, unsigned ArrayIndex) {
+ llvm::Constant *C = nullptr;
+ C = Emitter.tryEmitPrivateForMemory(Init, EltType);
if (!C)
- return nullptr;
- if (i == 0)
+ return false;
+ if (ArrayIndex == 0)
CommonElementType = C->getType();
else if (C->getType() != CommonElementType)
CommonElementType = nullptr;
Elts.push_back(C);
+ return true;
+ };
+
+ unsigned ArrayIndex = 0;
+ QualType DestTy = CAT->getElementType();
+ for (unsigned i = 0; i < ILE->getNumInits(); ++i) {
+ const Expr *Init = ILE->getInit(i);
+ if (auto *EmbedS = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
+ StringLiteral *SL = EmbedS->getDataStringLiteral();
+ llvm::APSInt Value(CGM.getContext().getTypeSize(DestTy),
+ DestTy->isUnsignedIntegerType());
+ llvm::Constant *C;
+ for (unsigned I = EmbedS->getStartingElementPos(),
+ N = EmbedS->getDataElementCount();
+ I != EmbedS->getStartingElementPos() + N; ++I) {
+ Value = SL->getCodeUnit(I);
+ if (DestTy->isIntegerType()) {
+ C = llvm::ConstantInt::get(CGM.getLLVMContext(), Value);
+ } else {
+ C = Emitter.tryEmitPrivateForMemory(
+ withDestType(CGM.getContext(), Init, EmbedS->getType(), DestTy,
+ Value),
+ EltType);
+ }
+ if (!C)
+ return nullptr;
+ Elts.push_back(C);
+ ArrayIndex++;
+ }
+ if ((ArrayIndex - EmbedS->getDataElementCount()) == 0)
+ CommonElementType = C->getType();
+ else if (C->getType() != CommonElementType)
+ CommonElementType = nullptr;
+ } else {
+ if (!Emit(Init, ArrayIndex))
+ return nullptr;
+ ArrayIndex++;
+ }
}
llvm::ArrayType *Desired =
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1b144c1..cbbe9fa 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -506,6 +506,7 @@ public:
}
Value *VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E);
+ Value *VisitEmbedExpr(EmbedExpr *E);
Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) {
if (E->isGLValue())
@@ -1796,6 +1797,12 @@ ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
"usn_addr_cast");
}
+Value *ScalarExprEmitter::VisitEmbedExpr(EmbedExpr *E) {
+ assert(E->getDataElementCount() == 1);
+ auto It = E->begin();
+ return Builder.getInt((*It)->getValue());
+}
+
Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
// Vector Mask Case
if (E->getNumSubExprs() == 2) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index b8d8ff3..1f85915 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1220,7 +1220,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
Args.addAllArgs(CmdArgs,
{options::OPT_D, options::OPT_U, options::OPT_I_Group,
- options::OPT_F, options::OPT_index_header_map});
+ options::OPT_F, options::OPT_index_header_map,
+ options::OPT_embed_dir_EQ});
// Add -Wp, and -Xpreprocessor if using the preprocessor.
@@ -8505,6 +8506,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
// Pass along any -I options so we get proper .include search paths.
Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
+ // Pass along any --embed-dir or similar options so we get proper embed paths.
+ Args.AddAllArgs(CmdArgs, options::OPT_embed_dir_EQ);
+
// Determine the original source input.
auto FindSource = [](const Action *S) -> const Action * {
while (S->getKind() != Action::InputClass) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 58694e5..cde4a84 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4492,6 +4492,9 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts,
if (Opts.DefineTargetOSMacros)
GenerateArg(Consumer, OPT_fdefine_target_os_macros);
+ for (const auto &EmbedEntry : Opts.EmbedEntries)
+ GenerateArg(Consumer, OPT_embed_dir_EQ, EmbedEntry);
+
// Don't handle LexEditorPlaceholders. It is implied by the action that is
// generated elsewhere.
}
@@ -4584,6 +4587,11 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
}
}
+ for (const auto *A : Args.filtered(OPT_embed_dir_EQ)) {
+ StringRef Val = A->getValue();
+ Opts.EmbedEntries.push_back(std::string(Val));
+ }
+
// Always avoid lexing editor placeholders when we're just running the
// preprocessor as we never want to emit the
// "editor placeholder in source file" error in PP only mode.
diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp
index 369816e..528eae2 100644
--- a/clang/lib/Frontend/DependencyFile.cpp
+++ b/clang/lib/Frontend/DependencyFile.cpp
@@ -62,6 +62,19 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
/*IsMissing=*/false);
}
+ void EmbedDirective(SourceLocation, StringRef, bool,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &) override {
+ assert(File && "expected to only be called when the file is found");
+ StringRef FileName =
+ llvm::sys::path::remove_leading_dotslash(File->getName());
+ DepCollector.maybeAddDependency(FileName,
+ /*FromModule*/ false,
+ /*IsSystem*/ false,
+ /*IsModuleFile*/ false,
+ /*IsMissing*/ false);
+ }
+
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -77,6 +90,18 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
// Files that actually exist are handled by FileChanged.
}
+ void HasEmbed(SourceLocation, StringRef, bool,
+ OptionalFileEntryRef File) override {
+ if (!File)
+ return;
+ StringRef Filename =
+ llvm::sys::path::remove_leading_dotslash(File->getName());
+ DepCollector.maybeAddDependency(Filename,
+ /*FromModule=*/false, false,
+ /*IsModuleFile=*/false,
+ /*IsMissing=*/false);
+ }
+
void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled,
OptionalFileEntryRef File,
SrcMgr::CharacteristicKind FileType) override {
diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp
index 20e5f23..c23ce66 100644
--- a/clang/lib/Frontend/DependencyGraph.cpp
+++ b/clang/lib/Frontend/DependencyGraph.cpp
@@ -43,7 +43,7 @@ private:
public:
DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile,
StringRef SysRoot)
- : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { }
+ : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {}
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
@@ -53,6 +53,10 @@ public:
bool ModuleImported,
SrcMgr::CharacteristicKind FileType) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override;
+
void EndOfMainFile() override {
OutputGraphFile();
}
@@ -86,6 +90,24 @@ void DependencyGraphCallback::InclusionDirective(
AllFiles.insert(*FromFile);
}
+void DependencyGraphCallback::EmbedDirective(SourceLocation HashLoc, StringRef,
+ bool, OptionalFileEntryRef File,
+ const LexEmbedParametersResult &) {
+ if (!File)
+ return;
+
+ SourceManager &SM = PP->getSourceManager();
+ OptionalFileEntryRef FromFile =
+ SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc)));
+ if (!FromFile)
+ return;
+
+ Dependencies[*FromFile].push_back(*File);
+
+ AllFiles.insert(*File);
+ AllFiles.insert(*FromFile);
+}
+
raw_ostream &
DependencyGraphCallback::writeNodeReference(raw_ostream &OS,
const FileEntry *Node) {
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index e8c8a51..2d5c94c 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -508,6 +508,14 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__STDC_UTF_16__", "1");
Builder.defineMacro("__STDC_UTF_32__", "1");
+ // __has_embed definitions
+ Builder.defineMacro("__STDC_EMBED_NOT_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::NotFound)));
+ Builder.defineMacro("__STDC_EMBED_FOUND__",
+ llvm::itostr(static_cast<int>(EmbedResult::Found)));
+ Builder.defineMacro("__STDC_EMBED_EMPTY__",
+ llvm::itostr(static_cast<int>(EmbedResult::Empty)));
+
if (LangOpts.ObjC)
Builder.defineMacro("__OBJC__");
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index a26d2c3..0592423 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Frontend/Utils.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
+#include "clang/Frontend/Utils.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Pragma.h"
@@ -93,6 +93,7 @@ private:
bool DisableLineMarkers;
bool DumpDefines;
bool DumpIncludeDirectives;
+ bool DumpEmbedDirectives;
bool UseLineDirectives;
bool IsFirstFileEntered;
bool MinimizeWhitespace;
@@ -100,6 +101,7 @@ private:
bool KeepSystemIncludes;
raw_ostream *OrigOS;
std::unique_ptr<llvm::raw_null_ostream> NullOS;
+ unsigned NumToksToSkip;
Token PrevTok;
Token PrevPrevTok;
@@ -107,14 +109,16 @@ private:
public:
PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
bool defines, bool DumpIncludeDirectives,
- bool UseLineDirectives, bool MinimizeWhitespace,
- bool DirectivesOnly, bool KeepSystemIncludes)
+ bool DumpEmbedDirectives, bool UseLineDirectives,
+ bool MinimizeWhitespace, bool DirectivesOnly,
+ bool KeepSystemIncludes)
: PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
DisableLineMarkers(lineMarkers), DumpDefines(defines),
DumpIncludeDirectives(DumpIncludeDirectives),
+ DumpEmbedDirectives(DumpEmbedDirectives),
UseLineDirectives(UseLineDirectives),
MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
- KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
+ KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) {
CurLine = 0;
CurFilename += "<uninit>";
EmittedTokensOnThisLine = false;
@@ -129,6 +133,10 @@ public:
PrevPrevTok.startToken();
}
+ /// Returns true if #embed directives should be expanded into a comma-
+ /// delimited list of integer constants or not.
+ bool expandEmbedContents() const { return !DumpEmbedDirectives; }
+
bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
@@ -149,6 +157,9 @@ public:
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override;
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
@@ -232,6 +243,9 @@ public:
void BeginModule(const Module *M);
void EndModule(const Module *M);
+
+ unsigned GetNumToksToSkip() const { return NumToksToSkip; }
+ void ResetSkipToks() { NumToksToSkip = 0; }
};
} // end anonymous namespace
@@ -399,6 +413,74 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
}
}
+void PrintPPOutputPPCallbacks::EmbedDirective(
+ SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File, const LexEmbedParametersResult &Params) {
+ if (!DumpEmbedDirectives)
+ return;
+
+ // The EmbedDirective() callback is called before we produce the annotation
+ // token stream for the directive. We skip printing the annotation tokens
+ // within PrintPreprocessedTokens(), but we also need to skip the prefix,
+ // suffix, and if_empty tokens as those are inserted directly into the token
+ // stream and would otherwise be printed immediately after printing the
+ // #embed directive.
+ //
+ // FIXME: counting tokens to skip is a kludge but we have no way to know
+ // which tokens were inserted as part of the embed and which ones were
+ // explicitly written by the user.
+ MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
+ *OS << "#embed " << (IsAngled ? '<' : '"') << FileName
+ << (IsAngled ? '>' : '"');
+
+ auto PrintToks = [&](llvm::ArrayRef<Token> Toks) {
+ SmallString<128> SpellingBuffer;
+ for (const Token &T : Toks) {
+ if (T.hasLeadingSpace())
+ *OS << " ";
+ *OS << PP.getSpelling(T, SpellingBuffer);
+ }
+ };
+ bool SkipAnnotToks = true;
+ if (Params.MaybeIfEmptyParam) {
+ *OS << " if_empty(";
+ PrintToks(Params.MaybeIfEmptyParam->Tokens);
+ *OS << ")";
+ // If the file is empty, we can skip those tokens. If the file is not
+ // empty, we skip the annotation tokens.
+ if (File && !File->getSize()) {
+ NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size();
+ SkipAnnotToks = false;
+ }
+ }
+
+ if (Params.MaybeLimitParam) {
+ *OS << " limit(" << Params.MaybeLimitParam->Limit << ")";
+ }
+ if (Params.MaybeOffsetParam) {
+ *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")";
+ }
+ if (Params.MaybePrefixParam) {
+ *OS << " prefix(";
+ PrintToks(Params.MaybePrefixParam->Tokens);
+ *OS << ")";
+ NumToksToSkip += Params.MaybePrefixParam->Tokens.size();
+ }
+ if (Params.MaybeSuffixParam) {
+ *OS << " suffix(";
+ PrintToks(Params.MaybeSuffixParam->Tokens);
+ *OS << ")";
+ NumToksToSkip += Params.MaybeSuffixParam->Tokens.size();
+ }
+
+ // We may need to skip the annotation token.
+ if (SkipAnnotToks)
+ NumToksToSkip++;
+
+ *OS << " /* clang -E -dE */";
+ setEmittedDirectiveOnThisLine();
+}
+
void PrintPPOutputPPCallbacks::InclusionDirective(
SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
@@ -678,7 +760,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
if (Tok.is(tok::eof) ||
(Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
- !Tok.is(tok::annot_repl_input_end)))
+ !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
return;
// EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -878,6 +960,27 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
std::string Name = M->getFullModuleName();
Callbacks->OS->write(Name.data(), Name.size());
Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
+ } else if (Tok.is(tok::annot_embed)) {
+ // Manually explode the binary data out to a stream of comma-delimited
+ // integer values. If the user passed -dE, that is handled by the
+ // EmbedDirective() callback. We should only get here if the user did not
+ // pass -dE.
+ assert(Callbacks->expandEmbedContents() &&
+ "did not expect an embed annotation");
+ auto *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+
+ // Loop over the contents and print them as a comma-delimited list of
+ // values.
+ bool PrintComma = false;
+ for (auto Iter = Data->BinaryData.begin(), End = Data->BinaryData.end();
+ Iter != End; ++Iter) {
+ if (PrintComma)
+ *Callbacks->OS << ", ";
+ *Callbacks->OS << static_cast<unsigned>(*Iter);
+ PrintComma = true;
+ }
+ IsStartOfLine = true;
} else if (Tok.isAnnotation()) {
// Ignore annotation tokens created by pragmas - the pragmas themselves
// will be reproduced in the preprocessed output.
@@ -926,6 +1029,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
if (Tok.is(tok::eof)) break;
PP.Lex(Tok);
+ // If lexing that token causes us to need to skip future tokens, do so now.
+ for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I)
+ PP.Lex(Tok);
+ Callbacks->ResetSkipToks();
}
}
@@ -982,8 +1089,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
- Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
- Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
+ Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives,
+ Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly,
+ Opts.KeepSystemIncludes);
// Expand macros in pragmas with -fms-extensions. The assumption is that
// the majority of pragmas in such a file will be Microsoft pragmas.
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 8e73864..b7ee0c0 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -19,6 +19,7 @@
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/HeaderSearch.h"
@@ -39,6 +40,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/AlignOf.h"
@@ -82,8 +84,7 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
/// Read and discard all tokens remaining on the current line until
/// the tok::eod token is found.
-SourceRange Preprocessor::DiscardUntilEndOfDirective() {
- Token Tmp;
+SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
SourceRange Res;
LexUnexpandedToken(Tmp);
@@ -1073,6 +1074,74 @@ OptionalFileEntryRef Preprocessor::LookupFile(
return std::nullopt;
}
+OptionalFileEntryRef
+Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
+ const FileEntry *LookupFromFile) {
+ FileManager &FM = this->getFileManager();
+ if (llvm::sys::path::is_absolute(Filename)) {
+ // lookup path or immediately fail
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(Filename, OpenFile);
+ return llvm::expectedToOptional(std::move(ShouldBeEntry));
+ }
+
+ auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
+ StringRef StartingFrom, StringRef FileName,
+ bool RemoveInitialFileComponentFromLookupPath) {
+ llvm::sys::path::native(StartingFrom, LookupPath);
+ if (RemoveInitialFileComponentFromLookupPath)
+ llvm::sys::path::remove_filename(LookupPath);
+ if (!LookupPath.empty() &&
+ !llvm::sys::path::is_separator(LookupPath.back())) {
+ LookupPath.push_back(llvm::sys::path::get_separator().front());
+ }
+ LookupPath.append(FileName.begin(), FileName.end());
+ };
+
+ // Otherwise, it's search time!
+ SmallString<512> LookupPath;
+ // Non-angled lookup
+ if (!isAngled) {
+ if (LookupFromFile) {
+ // Use file-based lookup.
+ StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
+ if (!FullFileDir.empty()) {
+ SeparateComponents(LookupPath, FullFileDir, Filename, true);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, OpenFile);
+ if (ShouldBeEntry)
+ return llvm::expectedToOptional(std::move(ShouldBeEntry));
+ llvm::consumeError(ShouldBeEntry.takeError());
+ }
+ }
+
+ // Otherwise, do working directory lookup.
+ LookupPath.clear();
+ auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
+ if (MaybeWorkingDirEntry) {
+ DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
+ StringRef WorkingDir = WorkingDirEntry.getName();
+ if (!WorkingDir.empty()) {
+ SeparateComponents(LookupPath, WorkingDir, Filename, false);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, OpenFile);
+ if (ShouldBeEntry)
+ return llvm::expectedToOptional(std::move(ShouldBeEntry));
+ llvm::consumeError(ShouldBeEntry.takeError());
+ }
+ }
+ }
+
+ for (const auto &Entry : PPOpts->EmbedEntries) {
+ LookupPath.clear();
+ SeparateComponents(LookupPath, Entry, Filename, false);
+ llvm::Expected<FileEntryRef> ShouldBeEntry =
+ FM.getFileRef(LookupPath, OpenFile);
+ return llvm::expectedToOptional(std::move(ShouldBeEntry));
+ }
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// Preprocessor Directive Handling.
//===----------------------------------------------------------------------===//
@@ -1168,6 +1237,7 @@ void Preprocessor::HandleDirective(Token &Result) {
case tok::pp_include_next:
case tok::pp___include_macros:
case tok::pp_pragma:
+ case tok::pp_embed:
Diag(Result, diag::err_embedded_directive) << II->getName();
Diag(*ArgMacro, diag::note_macro_expansion_here)
<< ArgMacro->getIdentifierInfo();
@@ -1282,6 +1352,11 @@ void Preprocessor::HandleDirective(Token &Result) {
return HandleIdentSCCSDirective(Result);
case tok::pp_sccs:
return HandleIdentSCCSDirective(Result);
+ case tok::pp_embed:
+ return HandleEmbedDirective(SavedHash.getLocation(), Result,
+ getCurrentFileLexer()
+ ? *getCurrentFileLexer()->getFileEntry()
+ : static_cast<FileEntry *>(nullptr));
case tok::pp_assert:
//isExtension = true; // FIXME: implement #assert
break;
@@ -3543,3 +3618,401 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
/*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
}
+
+std::optional<LexEmbedParametersResult>
+Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
+ LexEmbedParametersResult Result{};
+ SmallVector<Token, 2> ParameterTokens;
+ tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
+ Result.ParamRange = {CurTok.getLocation(), CurTok.getLocation()};
+
+ auto DiagMismatchedBracesAndSkipToEOD =
+ [&](tok::TokenKind Expected,
+ std::pair<tok::TokenKind, SourceLocation> Matches) {
+ Result.ParamRange.setEnd(CurTok.getEndLoc());
+ Diag(CurTok, diag::err_expected) << Expected;
+ Diag(Matches.second, diag::note_matching) << Matches.first;
+ if (CurTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective(CurTok);
+ };
+
+ auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
+ if (CurTok.isNot(Kind)) {
+ Result.ParamRange.setEnd(CurTok.getEndLoc());
+ Diag(CurTok, diag::err_expected) << Kind;
+ if (CurTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective(CurTok);
+ return false;
+ }
+ return true;
+ };
+
+ // C23 6.10:
+ // pp-parameter-name:
+ // pp-standard-parameter
+ // pp-prefixed-parameter
+ //
+ // pp-standard-parameter:
+ // identifier
+ //
+ // pp-prefixed-parameter:
+ // identifier :: identifier
+ auto LexPPParameterName = [&]() -> std::optional<std::string> {
+ // We expect the current token to be an identifier; if it's not, things
+ // have gone wrong.
+ if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
+ return std::nullopt;
+
+ const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
+
+ // Lex another token; it is either a :: or we're done with the parameter
+ // name.
+ LexNonComment(CurTok);
+ if (CurTok.is(tok::coloncolon)) {
+ // We found a ::, so lex another identifier token.
+ LexNonComment(CurTok);
+ if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
+ return std::nullopt;
+
+ const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
+
+ // Lex another token so we're past the name.
+ LexNonComment(CurTok);
+ return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
+ }
+ return Prefix->getName().str();
+ };
+
+ // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
+ // this document as an identifier pp_param and an identifier of the form
+ // __pp_param__ shall behave the same when used as a preprocessor parameter,
+ // except for the spelling.
+ auto NormalizeParameterName = [](StringRef Name) {
+ if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
+ return Name.substr(2, Name.size() - 4);
+ return Name;
+ };
+
+ auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
+ // we have a limit parameter and its internals are processed using
+ // evaluation rules from #if.
+ if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
+ return std::nullopt;
+
+ // We do not consume the ( because EvaluateDirectiveExpression will lex
+ // the next token for us.
+ IdentifierInfo *ParameterIfNDef = nullptr;
+ bool EvaluatedDefined;
+ DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
+ ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
+
+ if (!LimitEvalResult.Value) {
+ // If there was an error evaluating the directive expression, we expect
+ // to be at the end of directive token.
+ assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
+ return std::nullopt;
+ }
+
+ if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
+ return std::nullopt;
+
+ // Eat the ).
+ LexNonComment(CurTok);
+
+ // C23 6.10.3.2p2: The token defined shall not appear within the constant
+ // expression.
+ if (EvaluatedDefined) {
+ Diag(CurTok, diag::err_defined_in_pp_embed);
+ return std::nullopt;
+ }
+
+ if (LimitEvalResult.Value) {
+ const llvm::APSInt &Result = *LimitEvalResult.Value;
+ if (Result.isNegative()) {
+ Diag(CurTok, diag::err_requires_positive_value)
+ << toString(Result, 10) << /*positive*/ 0;
+ return std::nullopt;
+ }
+ return Result.getLimitedValue();
+ }
+ return std::nullopt;
+ };
+
+ auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
+ switch (Kind) {
+ case tok::l_paren:
+ return tok::r_paren;
+ case tok::l_brace:
+ return tok::r_brace;
+ case tok::l_square:
+ return tok::r_square;
+ default:
+ llvm_unreachable("should not get here");
+ }
+ };
+
+ auto LexParenthesizedBalancedTokenSoup =
+ [&](llvm::SmallVectorImpl<Token> &Tokens) {
+ std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
+
+ // We expect the current token to be a left paren.
+ if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
+ return false;
+ LexNonComment(CurTok); // Eat the (
+
+ bool WaitingForInnerCloseParen = false;
+ while (CurTok.isNot(tok::eod) &&
+ (WaitingForInnerCloseParen ||
+ (!WaitingForInnerCloseParen && CurTok.isNot(tok::r_paren)))) {
+ switch (CurTok.getKind()) {
+ default: // Shutting up diagnostics about not fully-covered switch.
+ break;
+ case tok::l_paren:
+ WaitingForInnerCloseParen = true;
+ [[fallthrough]];
+ case tok::l_brace:
+ case tok::l_square:
+ BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
+ break;
+ case tok::r_paren:
+ WaitingForInnerCloseParen = false;
+ [[fallthrough]];
+ case tok::r_brace:
+ case tok::r_square: {
+ tok::TokenKind Matching =
+ GetMatchingCloseBracket(BracketStack.back().first);
+ if (BracketStack.empty() || CurTok.getKind() != Matching) {
+ DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
+ return false;
+ }
+ BracketStack.pop_back();
+ } break;
+ }
+ Tokens.push_back(CurTok);
+ LexNonComment(CurTok);
+ }
+
+ // When we're done, we want to eat the closing paren.
+ if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
+ return false;
+
+ LexNonComment(CurTok); // Eat the )
+ return true;
+ };
+
+ LexNonComment(CurTok); // Prime the pump.
+ while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
+ SourceLocation ParamStartLoc = CurTok.getLocation();
+ std::optional<std::string> ParamName = LexPPParameterName();
+ if (!ParamName)
+ return std::nullopt;
+ StringRef Parameter = NormalizeParameterName(*ParamName);
+
+ // Lex the parameters (dependent on the parameter type we want!).
+ //
+ // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
+ // one time in the embed parameter sequence.
+ if (Parameter == "limit") {
+ if (Result.MaybeLimitParam)
+ Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+ std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
+ if (!Limit)
+ return std::nullopt;
+ Result.MaybeLimitParam =
+ PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
+ } else if (Parameter == "clang::offset") {
+ if (Result.MaybeOffsetParam)
+ Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+ std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
+ if (!Offset)
+ return std::nullopt;
+ Result.MaybeOffsetParam = PPEmbedParameterOffset{
+ *Offset, {ParamStartLoc, CurTok.getLocation()}};
+ } else if (Parameter == "prefix") {
+ if (Result.MaybePrefixParam)
+ Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+ SmallVector<Token, 4> Soup;
+ if (!LexParenthesizedBalancedTokenSoup(Soup))
+ return std::nullopt;
+ Result.MaybePrefixParam = PPEmbedParameterPrefix{
+ std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+ } else if (Parameter == "suffix") {
+ if (Result.MaybeSuffixParam)
+ Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+ SmallVector<Token, 4> Soup;
+ if (!LexParenthesizedBalancedTokenSoup(Soup))
+ return std::nullopt;
+ Result.MaybeSuffixParam = PPEmbedParameterSuffix{
+ std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+ } else if (Parameter == "if_empty") {
+ if (Result.MaybeIfEmptyParam)
+ Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
+
+ SmallVector<Token, 4> Soup;
+ if (!LexParenthesizedBalancedTokenSoup(Soup))
+ return std::nullopt;
+ Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
+ std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
+ } else {
+ ++Result.UnrecognizedParams;
+
+ // If there's a left paren, we need to parse a balanced token sequence
+ // and just eat those tokens.
+ if (CurTok.is(tok::l_paren)) {
+ SmallVector<Token, 4> Soup;
+ if (!LexParenthesizedBalancedTokenSoup(Soup))
+ return std::nullopt;
+ }
+ if (!ForHasEmbed) {
+ Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
+ return std::nullopt;
+ }
+ }
+ }
+ Result.ParamRange.setEnd(CurTok.getLocation());
+ return Result;
+}
+
+void Preprocessor::HandleEmbedDirectiveImpl(
+ SourceLocation HashLoc, StringRef ResolvedFilename,
+ const LexEmbedParametersResult &Params, StringRef BinaryContents) {
+ if (BinaryContents.empty()) {
+ // If we have no binary contents, the only thing we need to emit are the
+ // if_empty tokens, if any.
+ // FIXME: this loses AST fidelity; nothing in the compiler will see that
+ // these tokens came from #embed. We have to hack around this when printing
+ // preprocessed output. The same is true for prefix and suffix tokens.
+ if (Params.MaybeIfEmptyParam) {
+ ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
+ size_t TokCount = Toks.size();
+ auto NewToks = std::make_unique<Token[]>(TokCount);
+ llvm::copy(Toks, NewToks.get());
+ EnterTokenStream(std::move(NewToks), TokCount, true, true);
+ }
+ return;
+ }
+
+ size_t NumPrefixToks = Params.PrefixTokenCount(),
+ NumSuffixToks = Params.SuffixTokenCount();
+ size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
+ size_t CurIdx = 0;
+ auto Toks = std::make_unique<Token[]>(TotalNumToks);
+
+ // Add the prefix tokens, if any.
+ if (Params.MaybePrefixParam) {
+ llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
+ CurIdx += NumPrefixToks;
+ }
+
+ EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
+ Data->FileName = ResolvedFilename;
+ Data->BinaryData = BinaryContents;
+
+ Toks[CurIdx].startToken();
+ Toks[CurIdx].setKind(tok::annot_embed);
+ Toks[CurIdx].setAnnotationRange(HashLoc);
+ Toks[CurIdx++].setAnnotationValue(Data);
+
+ // Now add the suffix tokens, if any.
+ if (Params.MaybeSuffixParam) {
+ llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
+ CurIdx += NumSuffixToks;
+ }
+
+ assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
+ EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
+}
+
+void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
+ const FileEntry *LookupFromFile) {
+ // Give the usual extension/compatibility warnings.
+ if (LangOpts.C23)
+ Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
+ else
+ Diag(EmbedTok, diag::ext_pp_embed_directive)
+ << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
+
+ // Parse the filename header
+ Token FilenameTok;
+ if (LexHeaderName(FilenameTok))
+ return;
+
+ if (FilenameTok.isNot(tok::header_name)) {
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ if (FilenameTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Parse the optional sequence of
+ // directive-parameters:
+ // identifier parameter-name-list[opt] directive-argument-list[opt]
+ // directive-argument-list:
+ // '(' balanced-token-sequence ')'
+ // parameter-name-list:
+ // '::' identifier parameter-name-list[opt]
+ Token CurTok;
+ std::optional<LexEmbedParametersResult> Params =
+ LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
+
+ assert((Params || CurTok.is(tok::eod)) &&
+ "expected success or to be at the end of the directive");
+ if (!Params)
+ return;
+
+ // Now, splat the data out!
+ SmallString<128> FilenameBuffer;
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+ StringRef OriginalFilename = Filename;
+ bool isAngled =
+ GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ assert(!Filename.empty());
+ OptionalFileEntryRef MaybeFileRef =
+ this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
+ if (!MaybeFileRef) {
+ // could not find file
+ if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
+ return;
+ }
+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+ return;
+ }
+ std::optional<llvm::MemoryBufferRef> MaybeFile =
+ getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
+ if (!MaybeFile) {
+ // could not find file
+ Diag(FilenameTok, diag::err_cannot_open_file)
+ << Filename << "a buffer to the contents could not be created";
+ return;
+ }
+ StringRef BinaryContents = MaybeFile->getBuffer();
+
+ // The order is important between 'offset' and 'limit'; we want to offset
+ // first and then limit second; otherwise we may reduce the notional resource
+ // size to something too small to offset into.
+ if (Params->MaybeOffsetParam) {
+ // FIXME: just like with the limit() and if_empty() parameters, this loses
+ // source fidelity in the AST; it has no idea that there was an offset
+ // involved.
+ // offsets all the way to the end of the file make for an empty file.
+ BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
+ }
+
+ if (Params->MaybeLimitParam) {
+ // FIXME: just like with the clang::offset() and if_empty() parameters,
+ // this loses source fidelity in the AST; it has no idea there was a limit
+ // involved.
+ BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
+ }
+
+ if (Callbacks)
+ Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
+ *Params);
+ HandleEmbedDirectiveImpl(HashLoc, Filename, *Params, BinaryContents);
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index f267efabd..8bb82bd 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -870,7 +870,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
/// may occur after a #if or #elif directive. If the expression is equivalent
/// to "!defined(X)" return X in IfNDefMacro.
Preprocessor::DirectiveEvalResult
-Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ Token &Tok, bool &EvaluatedDefined,
+ bool CheckForEoD) {
SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
// Save the current state of 'DisableMacroExpansion' and reset it to false. If
// 'DisableMacroExpansion' is true, then we must be in a macro argument list
@@ -882,7 +884,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
DisableMacroExpansion = false;
// Peek ahead one token.
- Token Tok;
LexNonComment(Tok);
// C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
@@ -895,7 +896,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// Parse error, skip the rest of the macro line.
SourceRange ConditionRange = ExprStartLoc;
if (Tok.isNot(tok::eod))
- ConditionRange = DiscardUntilEndOfDirective();
+ ConditionRange = DiscardUntilEndOfDirective(Tok);
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
@@ -903,11 +904,14 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// We cannot trust the source range from the value because there was a
// parse error. Track the range manually -- the end of the directive is the
// end of the condition range.
- return {false,
+ return {std::nullopt,
+ false,
DT.IncludedUndefinedIds,
{ExprStartLoc, ConditionRange.getEnd()}};
}
+ EvaluatedDefined = DT.State != DefinedTracker::Unknown;
+
// If we are at the end of the expression after just parsing a value, there
// must be no (unparenthesized) binary operators involved, so we can exit
// directly.
@@ -919,7 +923,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+ bool IsNonZero = ResVal.Val != 0;
+ SourceRange ValRange = ResVal.getRange();
+ return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
+ ValRange};
}
// Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -928,21 +935,37 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
Tok, true, DT.IncludedUndefinedIds, *this)) {
// Parse error, skip the rest of the macro line.
if (Tok.isNot(tok::eod))
- DiscardUntilEndOfDirective();
+ DiscardUntilEndOfDirective(Tok);
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+ SourceRange ValRange = ResVal.getRange();
+ return {std::nullopt, false, DT.IncludedUndefinedIds, ValRange};
}
- // If we aren't at the tok::eod token, something bad happened, like an extra
- // ')' token.
- if (Tok.isNot(tok::eod)) {
- Diag(Tok, diag::err_pp_expected_eol);
- DiscardUntilEndOfDirective();
+ if (CheckForEoD) {
+ // If we aren't at the tok::eod token, something bad happened, like an extra
+ // ')' token.
+ if (Tok.isNot(tok::eod)) {
+ Diag(Tok, diag::err_pp_expected_eol);
+ DiscardUntilEndOfDirective(Tok);
+ }
}
+ EvaluatedDefined = EvaluatedDefined || DT.State != DefinedTracker::Unknown;
+
// Restore 'DisableMacroExpansion'.
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
- return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+ bool IsNonZero = ResVal.Val != 0;
+ SourceRange ValRange = ResVal.getRange();
+ return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
+}
+
+Preprocessor::DirectiveEvalResult
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
+ bool CheckForEoD) {
+ Token Tok;
+ bool EvaluatedDefined;
+ return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined,
+ CheckForEoD);
}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index f085b94..3913ff0 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() {
Ident__has_c_attribute = nullptr;
Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+ Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed");
Ident__has_include = RegisterBuiltinMacro(*this, "__has_include");
Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning");
@@ -1279,6 +1280,105 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
return File.has_value();
}
+/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
+/// Returns a filled optional with the value if successful; otherwise, empty.
+EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
+ // These expressions are only allowed within a preprocessor directive.
+ if (!this->isParsingIfOrElifDirective()) {
+ Diag(Tok, diag::err_pp_directive_required) << II;
+ // Return a valid identifier token.
+ assert(Tok.is(tok::identifier));
+ Tok.setIdentifierInfo(II);
+ return EmbedResult::Invalid;
+ }
+
+ // Ensure we have a '('.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(Tok, diag::err_pp_expected_after) << II << tok::l_paren;
+ // If the next token looks like a filename or the start of one,
+ // assume it is and process it as such.
+ return EmbedResult::Invalid;
+ }
+
+ // Save '(' location for possible missing ')' message and then lex the header
+ // name token for the embed resource.
+ SourceLocation LParenLoc = Tok.getLocation();
+ if (this->LexHeaderName(Tok))
+ return EmbedResult::Invalid;
+
+ if (Tok.isNot(tok::header_name)) {
+ Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+ return EmbedResult::Invalid;
+ }
+
+ SourceLocation FilenameLoc = Tok.getLocation();
+ Token FilenameTok = Tok;
+
+ std::optional<LexEmbedParametersResult> Params =
+ this->LexEmbedParameters(Tok, /*ForHasEmbed=*/true);
+ assert((Params || Tok.is(tok::eod)) &&
+ "expected success or to be at the end of the directive");
+
+ if (!Params)
+ return EmbedResult::Invalid;
+
+ if (Params->UnrecognizedParams > 0)
+ return EmbedResult::NotFound;
+
+ if (!Tok.is(tok::r_paren)) {
+ Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+ << II << tok::r_paren;
+ Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ if (Tok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return EmbedResult::Invalid;
+ }
+
+ SmallString<128> FilenameBuffer;
+ StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
+ bool isAngled =
+ this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ assert(!Filename.empty());
+ const FileEntry *LookupFromFile =
+ this->getCurrentFileLexer() ? *this->getCurrentFileLexer()->getFileEntry()
+ : static_cast<FileEntry *>(nullptr);
+ OptionalFileEntryRef MaybeFileEntry =
+ this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile);
+ if (Callbacks) {
+ Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
+ }
+ if (!MaybeFileEntry)
+ return EmbedResult::NotFound;
+
+ size_t FileSize = MaybeFileEntry->getSize();
+ // First, "offset" into the file (this reduces the amount of data we can read
+ // from the file).
+ if (Params->MaybeOffsetParam) {
+ if (Params->MaybeOffsetParam->Offset > FileSize)
+ FileSize = 0;
+ else
+ FileSize -= Params->MaybeOffsetParam->Offset;
+ }
+
+ // Second, limit the data from the file (this also reduces the amount of data
+ // we can read from the file).
+ if (Params->MaybeLimitParam) {
+ if (Params->MaybeLimitParam->Limit > FileSize)
+ FileSize = 0;
+ else
+ FileSize = Params->MaybeLimitParam->Limit;
+ }
+
+ // If we have no data left to read, the file is empty, otherwise we have the
+ // expected resource.
+ if (FileSize == 0)
+ return EmbedResult::Empty;
+ return EmbedResult::Found;
+}
+
bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
}
@@ -1820,6 +1920,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
return;
OS << (int)Value;
Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__has_embed) {
+ // The argument to these two builtins should be a parenthesized
+ // file name string literal using angle brackets (<>) or
+ // double-quotes (""), optionally followed by a series of
+ // arguments similar to form like attributes.
+ EmbedResult Value = EvaluateHasEmbed(Tok, II);
+ if (Value == EmbedResult::Invalid)
+ return;
+
+ Tok.setKind(tok::numeric_constant);
+ OS << static_cast<int>(Value);
} else if (II == Ident__has_warning) {
// The argument should be a parenthesized string literal.
EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 1b3201b..865879d 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -193,9 +193,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
if (Tok.isAnnotation()) {
// Modules annotation can show up when generated automatically for includes.
assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
- tok::annot_module_end) &&
+ tok::annot_module_end, tok::annot_embed) &&
"unexpected annotation in AvoidConcat");
+
ConcatInfo = 0;
+ if (Tok.is(tok::annot_embed))
+ return true;
}
if (ConcatInfo == 0)
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index eb7447f..9fc3cd7 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1066,6 +1066,21 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
break;
}
+ case tok::annot_embed: {
+ // We've met #embed in a context where a single value is expected. Take last
+ // element from #embed data as if it were a comma expression.
+ EmbedAnnotationData *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+ SourceLocation StartLoc = ConsumeAnnotationToken();
+ ASTContext &Context = Actions.getASTContext();
+ Res = IntegerLiteral::Create(Context,
+ llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
+ Context.UnsignedCharTy, StartLoc);
+ if (Data->BinaryData.size() > 1)
+ Diag(StartLoc, diag::warn_unused_comma_left_operand);
+ break;
+ }
+
case tok::kw___super:
case tok::kw_decltype:
// Annotate the token and tail recurse.
@@ -3563,6 +3578,17 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS,
T.getCloseLocation());
}
+void Parser::ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs) {
+ EmbedAnnotationData *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+ SourceLocation StartLoc = ConsumeAnnotationToken();
+ ASTContext &Context = Actions.getASTContext();
+ for (auto Byte : Data->BinaryData) {
+ Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
+ Context.UnsignedCharTy, StartLoc));
+ }
+}
+
/// ParseExpressionList - Used for C/C++ (argument-)expression-list.
///
/// \verbatim
@@ -3598,8 +3624,17 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs,
if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
Expr = ParseBraceInitializer();
- } else
+ } else if (Tok.is(tok::annot_embed)) {
+ ExpandEmbedDirective(Exprs);
+ if (Tok.isNot(tok::comma))
+ break;
+ Token Comma = Tok;
+ ConsumeToken();
+ checkPotentialAngleBracketDelimiter(Comma);
+ continue;
+ } else {
Expr = ParseAssignmentExpression();
+ }
if (EarlyTypoCorrection)
Expr = Actions.CorrectDelayedTyposInExpr(Expr);
diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp
index 432ddc7..cd11f90 100644
--- a/clang/lib/Parse/ParseInit.cpp
+++ b/clang/lib/Parse/ParseInit.cpp
@@ -428,6 +428,36 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator(
return ExprError();
}
+ExprResult Parser::createEmbedExpr() {
+ assert(Tok.getKind() == tok::annot_embed);
+ EmbedAnnotationData *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+ ExprResult Res;
+ ASTContext &Context = Actions.getASTContext();
+ SourceLocation StartLoc = ConsumeAnnotationToken();
+ if (Data->BinaryData.size() == 1) {
+ Res = IntegerLiteral::Create(Context,
+ llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
+ Context.UnsignedCharTy, StartLoc);
+ } else {
+ auto CreateStringLiteralFromStringRef = [&](StringRef Str, QualType Ty) {
+ llvm::APSInt ArraySize =
+ Context.MakeIntValue(Str.size(), Context.getSizeType());
+ QualType ArrayTy = Context.getConstantArrayType(
+ Ty, ArraySize, nullptr, ArraySizeModifier::Normal, 0);
+ return StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary,
+ false, ArrayTy, StartLoc);
+ };
+
+ StringLiteral *FileNameArg =
+ CreateStringLiteralFromStringRef(Data->FileName, Context.CharTy);
+ StringLiteral *BinaryDataArg = CreateStringLiteralFromStringRef(
+ Data->BinaryData, Context.UnsignedCharTy);
+ Res = Actions.ActOnEmbedExpr(StartLoc, FileNameArg, BinaryDataArg);
+ }
+ return Res;
+}
+
/// ParseBraceInitializer - Called when parsing an initializer that has a
/// leading open brace.
///
@@ -501,6 +531,8 @@ ExprResult Parser::ParseBraceInitializer() {
ExprResult SubElt;
if (MayBeDesignationStart())
SubElt = ParseInitializerWithPotentialDesignator(DesignatorCompletion);
+ else if (Tok.getKind() == tok::annot_embed)
+ SubElt = createEmbedExpr();
else
SubElt = ParseInitializer();
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index a5130f5..7e30afa 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -1523,6 +1523,19 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
ExprArg.get(), Loc);
}
+void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) {
+ EmbedAnnotationData *Data =
+ reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
+ SourceLocation StartLoc = ConsumeAnnotationToken();
+ ASTContext &Context = Actions.getASTContext();
+ for (auto Byte : Data->BinaryData) {
+ Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
+ Context.UnsignedCharTy, StartLoc);
+ TemplateArgs.push_back(
+ ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc));
+ }
+}
+
/// ParseTemplateArgumentList - Parse a C++ template-argument-list
/// (C++ [temp.names]). Returns true if there was an error.
///
@@ -1547,19 +1560,23 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
do {
PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp);
- ParsedTemplateArgument Arg = ParseTemplateArgument();
- SourceLocation EllipsisLoc;
- if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
- Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
-
- if (Arg.isInvalid()) {
- if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
- RunSignatureHelp();
- return true;
- }
+ if (Tok.is(tok::annot_embed)) {
+ ExpandEmbedIntoTemplateArgList(TemplateArgs);
+ } else {
+ ParsedTemplateArgument Arg = ParseTemplateArgument();
+ SourceLocation EllipsisLoc;
+ if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
+ Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);
+
+ if (Arg.isInvalid()) {
+ if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
+ RunSignatureHelp();
+ return true;
+ }
- // Save this template argument.
- TemplateArgs.push_back(Arg);
+ // Save this template argument.
+ TemplateArgs.push_back(Arg);
+ }
// If the next token is a comma, consume it and keep reading
// arguments.
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 17acfca..0febfa8 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1414,6 +1414,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Expr::PackIndexingExprClass:
case Expr::StringLiteralClass:
case Expr::SourceLocExprClass:
+ case Expr::EmbedExprClass:
case Expr::ConceptSpecializationExprClass:
case Expr::RequiresExprClass:
// These expressions can never throw.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 76145f2..44f886b 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3711,7 +3711,7 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) {
bool ValueIsPositive =
AllowZero ? ValueAPS.isNonNegative() : ValueAPS.isStrictlyPositive();
if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) {
- Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value)
+ Diag(E->getExprLoc(), diag::err_requires_positive_value)
<< toString(ValueAPS, 10) << ValueIsPositive;
return true;
}
@@ -7290,8 +7290,8 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
}
}
- InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList,
- RBraceLoc);
+ InitListExpr *E =
+ new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
E->setType(Context.VoidTy); // FIXME: just a place holder for now.
return E;
}
@@ -16679,6 +16679,17 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
}
+ExprResult Sema::ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
+ StringLiteral *Filename,
+ StringLiteral *BinaryData) {
+ EmbedDataStorage *Data = new (Context) EmbedDataStorage;
+ Data->Filename = Filename;
+ Data->BinaryData = BinaryData;
+ return new (Context)
+ EmbedExpr(Context, EmbedKeywordLoc, Data, /*NumOfElements=*/0,
+ Data->getDataElementCount());
+}
+
static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType,
const Expr *SrcExpr) {
if (!DstType->isFunctionPointerType() ||
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 7244f3e..4f2a46d 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -313,6 +313,8 @@ class InitListChecker {
InitListExpr *FullyStructuredList = nullptr;
NoInitExpr *DummyExpr = nullptr;
SmallVectorImpl<QualType> *AggrDeductionCandidateParamTypes = nullptr;
+ EmbedExpr *CurEmbed = nullptr; // Save current embed we're processing.
+ unsigned CurEmbedIndex = 0;
NoInitExpr *getDummyInit() {
if (!DummyExpr)
@@ -501,6 +503,42 @@ class InitListChecker {
void CheckEmptyInitializable(const InitializedEntity &Entity,
SourceLocation Loc);
+ Expr *HandleEmbed(EmbedExpr *Embed, const InitializedEntity &Entity) {
+ Expr *Result = nullptr;
+ // Undrestand which part of embed we'd like to reference.
+ if (!CurEmbed) {
+ CurEmbed = Embed;
+ CurEmbedIndex = 0;
+ }
+ // Reference just one if we're initializing a single scalar.
+ uint64_t ElsCount = 1;
+ // Otherwise try to fill whole array with embed data.
+ if (Entity.getKind() == InitializedEntity::EK_ArrayElement) {
+ ValueDecl *ArrDecl = Entity.getParent()->getDecl();
+ auto *AType = SemaRef.Context.getAsArrayType(ArrDecl->getType());
+ assert(AType && "expected array type when initializing array");
+ ElsCount = Embed->getDataElementCount();
+ if (const auto *CAType = dyn_cast<ConstantArrayType>(AType))
+ ElsCount = std::min(CAType->getSize().getZExtValue(),
+ ElsCount - CurEmbedIndex);
+ if (ElsCount == Embed->getDataElementCount()) {
+ CurEmbed = nullptr;
+ CurEmbedIndex = 0;
+ return Embed;
+ }
+ }
+
+ Result = new (SemaRef.Context)
+ EmbedExpr(SemaRef.Context, Embed->getLocation(), Embed->getData(),
+ CurEmbedIndex, ElsCount);
+ CurEmbedIndex += ElsCount;
+ if (CurEmbedIndex >= Embed->getDataElementCount()) {
+ CurEmbed = nullptr;
+ CurEmbedIndex = 0;
+ }
+ return Result;
+ }
+
public:
InitListChecker(
Sema &S, const InitializedEntity &Entity, InitListExpr *IL, QualType &T,
@@ -1459,6 +1497,9 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
// Brace elision is never performed if the element is not an
// assignment-expression.
if (Seq || isa<InitListExpr>(expr)) {
+ if (auto *Embed = dyn_cast<EmbedExpr>(expr)) {
+ expr = HandleEmbed(Embed, Entity);
+ }
if (!VerifyOnly) {
ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr);
if (Result.isInvalid())
@@ -1472,7 +1513,8 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
UpdateStructuredListElement(StructuredList, StructuredIndex,
getDummyInit());
}
- ++Index;
+ if (!CurEmbed)
+ ++Index;
if (AggrDeductionCandidateParamTypes)
AggrDeductionCandidateParamTypes->push_back(ElemType);
return;
@@ -1665,6 +1707,8 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
++Index;
++StructuredIndex;
return;
+ } else if (auto *Embed = dyn_cast<EmbedExpr>(expr)) {
+ expr = HandleEmbed(Embed, Entity);
}
ExprResult Result;
@@ -1686,14 +1730,16 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
else {
ResultExpr = Result.getAs<Expr>();
- if (ResultExpr != expr && !VerifyOnly) {
+ if (ResultExpr != expr && !VerifyOnly && !CurEmbed) {
// The type was promoted, update initializer list.
// FIXME: Why are we updating the syntactic init list?
IList->setInit(Index, ResultExpr);
}
}
+
UpdateStructuredListElement(StructuredList, StructuredIndex, ResultExpr);
- ++Index;
+ if (!CurEmbed)
+ ++Index;
if (AggrDeductionCandidateParamTypes)
AggrDeductionCandidateParamTypes->push_back(DeclType);
}
@@ -1932,6 +1978,30 @@ static bool checkDestructorReference(QualType ElementType, SourceLocation Loc,
return SemaRef.DiagnoseUseOfDecl(Destructor, Loc);
}
+static bool canInitializeArrayWithEmbedDataString(ArrayRef<Expr *> ExprList,
+ QualType InitType,
+ ASTContext &Context) {
+ // Only one initializer, it's an embed and the types match;
+ EmbedExpr *EE =
+ ExprList.size() == 1
+ ? dyn_cast_if_present<EmbedExpr>(ExprList[0]->IgnoreParens())
+ : nullptr;
+ if (!EE)
+ return false;
+
+ if (InitType->isArrayType()) {
+ const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe();
+ QualType InitElementTy = InitArrayType->getElementType();
+ QualType EmbedExprElementTy = EE->getType();
+ const bool TypesMatch =
+ Context.typesAreCompatible(InitElementTy, EmbedExprElementTy) ||
+ (InitElementTy->isCharType() && EmbedExprElementTy->isCharType());
+ if (TypesMatch)
+ return true;
+ }
+ return false;
+}
+
void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
InitListExpr *IList, QualType &DeclType,
llvm::APSInt elementIndex,
@@ -1949,6 +2019,12 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
}
}
+ if (canInitializeArrayWithEmbedDataString(IList->inits(), DeclType,
+ SemaRef.Context)) {
+ EmbedExpr *Embed = cast<EmbedExpr>(IList->inits()[0]);
+ IList->setInit(0, Embed->getDataStringLiteral());
+ }
+
// Check for the special-case of initializing an array with a string.
if (Index < IList->getNumInits()) {
if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) ==
@@ -2051,13 +2127,24 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
if (maxElementsKnown && elementIndex == maxElements)
break;
- InitializedEntity ElementEntity =
- InitializedEntity::InitializeElement(SemaRef.Context, StructuredIndex,
- Entity);
+ InitializedEntity ElementEntity = InitializedEntity::InitializeElement(
+ SemaRef.Context, StructuredIndex, Entity);
+
+ unsigned EmbedElementIndexBeforeInit = CurEmbedIndex;
// Check this element.
CheckSubElementType(ElementEntity, IList, elementType, Index,
StructuredList, StructuredIndex);
++elementIndex;
+ if ((CurEmbed || isa<EmbedExpr>(Init)) && elementType->isScalarType()) {
+ if (CurEmbed) {
+ elementIndex =
+ elementIndex + CurEmbedIndex - EmbedElementIndexBeforeInit - 1;
+ } else {
+ auto Embed = cast<EmbedExpr>(Init);
+ elementIndex = elementIndex + Embed->getDataElementCount() -
+ EmbedElementIndexBeforeInit - 1;
+ }
+ }
// If the array is of incomplete type, keep track of the number of
// elements in the initializer.
@@ -9063,19 +9150,18 @@ ExprResult InitializationSequence::Perform(Sema &S,
}
}
}
-
+ Expr *Init = CurInit.get();
CheckedConversionKind CCK =
Kind.isCStyleCast() ? CheckedConversionKind::CStyleCast
: Kind.isFunctionalCast() ? CheckedConversionKind::FunctionalCast
: Kind.isExplicitCast() ? CheckedConversionKind::OtherCast
: CheckedConversionKind::Implicit;
- ExprResult CurInitExprRes =
- S.PerformImplicitConversion(CurInit.get(), Step->Type, *Step->ICS,
- getAssignmentAction(Entity), CCK);
+ ExprResult CurInitExprRes = S.PerformImplicitConversion(
+ Init, Step->Type, *Step->ICS, getAssignmentAction(Entity), CCK);
if (CurInitExprRes.isInvalid())
return ExprError();
- S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), CurInit.get());
+ S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), Init);
CurInit = CurInitExprRes;
@@ -9230,10 +9316,11 @@ ExprResult InitializationSequence::Perform(Sema &S,
case SK_CAssignment: {
QualType SourceType = CurInit.get()->getType();
+ Expr *Init = CurInit.get();
// Save off the initial CurInit in case we need to emit a diagnostic
- ExprResult InitialCurInit = CurInit;
- ExprResult Result = CurInit;
+ ExprResult InitialCurInit = Init;
+ ExprResult Result = Init;
Sema::AssignConvertType ConvTy =
S.CheckSingleAssignmentConstraints(Step->Type, Result, true,
Entity.getKind() == InitializedEntity::EK_Parameter_CF_Audited);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 3bfda09..f117fe9 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12939,6 +12939,11 @@ ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
getSema().CurContext);
}
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformEmbedExpr(EmbedExpr *E) {
+ return E;
+}
+
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 67ef170..a0ffe24 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1323,6 +1323,17 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) {
E->SourceLocExprBits.Kind = Record.readInt();
}
+void ASTStmtReader::VisitEmbedExpr(EmbedExpr *E) {
+ VisitExpr(E);
+ E->EmbedKeywordLoc = readSourceLocation();
+ EmbedDataStorage *Data = new (Record.getContext()) EmbedDataStorage;
+ Data->Filename = cast<StringLiteral>(Record.readSubStmt());
+ Data->BinaryData = cast<StringLiteral>(Record.readSubStmt());
+ E->Data = Data;
+ E->Begin = Record.readInt();
+ E->NumOfElements = Record.readInt();
+}
+
void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
VisitExpr(E);
E->setAmpAmpLoc(readSourceLocation());
@@ -3233,6 +3244,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = new (Context) SourceLocExpr(Empty);
break;
+ case EXPR_BUILTIN_PP_EMBED:
+ S = new (Context) EmbedExpr(Empty);
+ break;
+
case EXPR_ADDR_LABEL:
S = new (Context) AddrLabelExpr(Empty);
break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 1a98e30..ed2145e 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1262,6 +1262,17 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) {
Code = serialization::EXPR_SOURCE_LOC;
}
+void ASTStmtWriter::VisitEmbedExpr(EmbedExpr *E) {
+ VisitExpr(E);
+ Record.AddSourceLocation(E->getBeginLoc());
+ Record.AddSourceLocation(E->getEndLoc());
+ Record.AddStmt(E->getFilenameStringLiteral());
+ Record.AddStmt(E->getDataStringLiteral());
+ Record.writeUInt32(E->getStartingElementPos());
+ Record.writeUInt32(E->getDataElementCount());
+ Code = serialization::EXPR_BUILTIN_PP_EMBED;
+}
+
void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
VisitExpr(E);
Record.AddSourceLocation(E->getAmpAmpLoc());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 197d673..b331be8 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2422,6 +2422,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
Bldr.addNodes(Dst);
break;
}
+
+ case Stmt::EmbedExprClass:
+ llvm_unreachable("Support for EmbedExpr is not implemented.");
+ break;
}
}
diff --git a/clang/test/C/C2x/Inputs/bits.bin b/clang/test/C/C2x/Inputs/bits.bin
new file mode 100644
index 0000000..ad47100
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/bits.bin
@@ -0,0 +1 @@
+0123456789 \ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/boop.h b/clang/test/C/C2x/Inputs/boop.h
new file mode 100644
index 0000000..d3e3967
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/boop.h
@@ -0,0 +1 @@
+*boop* \ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/i.dat b/clang/test/C/C2x/Inputs/i.dat
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/i.dat
@@ -0,0 +1 @@
+0 \ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/jump.wav b/clang/test/C/C2x/Inputs/jump.wav
new file mode 100644
index 0000000..a711006
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/jump.wav
@@ -0,0 +1 @@
+RIFF \ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/s.dat b/clang/test/C/C2x/Inputs/s.dat
new file mode 100644
index 0000000..3a332e6
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/s.dat
@@ -0,0 +1 @@
+012345678 \ No newline at end of file
diff --git a/clang/test/C/C2x/n3017.c b/clang/test/C/C2x/n3017.c
new file mode 100644
index 0000000..0d22d31
--- /dev/null
+++ b/clang/test/C/C2x/n3017.c
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -verify -fsyntax-only --embed-dir=%S/Inputs -std=c2x %s -Wno-constant-logical-operand
+
+/* WG14 N3017: full
+ * #embed - a scannable, tooling-friendly binary resource inclusion mechanism
+ */
+
+// C23 6.10p6
+char b1[] = {
+#embed "boop.h" limit(5)
+,
+#embed "boop.h" __limit__(5)
+};
+
+// C23 6.10.1p19
+#if __has_embed(__FILE__ ext::token(0xB055))
+#error "Supports an extension parameter Clang never claimed to support?"
+#endif
+
+#if !__has_embed(__FILE__ clang::offset(0))
+#error "Doesn't support an extension Clang claims to support?"
+#endif
+
+// C23 6.10.1p20
+void parse_into_s(short* ptr, unsigned char* ptr_bytes, unsigned long long size);
+int f() {
+#if __has_embed ("bits.bin" ds9000::element_type(short))
+ /* Implementation extension: create short integers from the */
+ /* translation environment resource into */
+ /* a sequence of integer constants */
+ short meow[] = {
+#embed "bits.bin" ds9000::element_type(short)
+ };
+#elif __has_embed ("bits.bin")
+ /* no support for implementation-specific */
+ /* ds9000::element_type(short) parameter */
+ unsigned char meow_bytes[] = {
+ #embed "bits.bin"
+ };
+ short meow[sizeof(meow_bytes) / sizeof(short)] = {};
+ /* parse meow_bytes into short values by-hand! */
+ parse_into_s(meow, meow_bytes, sizeof(meow_bytes));
+#else
+#error "cannot find bits.bin resource"
+#endif
+ return (int)(meow[0] + meow[(sizeof(meow) / sizeof(*meow)) - 1]);
+}
+
+// NOTE: we don't have a good way to test infinite resources from within lit.
+int g() {
+#if __has_embed(<infinite-resource> limit(0)) == 2
+ // if <infinite-resource> exists, this
+ // token sequence is always taken.
+ return 0;
+#else
+ // the ’infinite-resource’ resource does not exist
+ #error "The resource does not exist"
+#endif
+ // expected-error@-2 {{"The resource does not exist"}}
+}
+
+#include <stddef.h>
+void have_you_any_wool(const unsigned char*, size_t);
+int h() {
+ static const unsigned char baa_baa[] = {
+#embed __FILE__
+ };
+ have_you_any_wool(baa_baa, sizeof(baa_baa));
+ return 0;
+}
+
+// C23 6.10.3.1p17: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.1p18
+int i() {
+/* Braces may be kept or elided as per normal initialization rules */
+ int i = {
+#embed "i.dat"
+ }; /* valid if i.dat produces 1 value,
+ i value is [0, 2(embed element width)) */
+ int i2 =
+#embed "i.dat"
+ ; /* valid if i.dat produces 1 value,
+ i2 value is [0, 2(embed element width)) */
+ struct s {
+ double a, b, c;
+ struct { double e, f, g; };
+ double h, i, j;
+ };
+ struct s x = {
+ /* initializes each element in order according to initialization
+ rules with comma-separated list of integer constant expressions
+ inside of braces */
+ #embed "s.dat"
+ };
+ return 0;
+}
+
+// C23 6.10.3.1p19: not tested here because it's a runtime test rather than one
+// which can be handled at compile time (it validates file contents via fread).
+
+// C23 6.10.3.2p5
+int j() {
+ static const char sound_signature[] = {
+#embed <jump.wav> limit(2+2)
+ };
+ static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+ "There should only be 4 elements in this array.");
+ // verify PCM WAV resource
+ static_assert(sound_signature[0] == 'R');
+ static_assert(sound_signature[1] == 'I');
+ static_assert(sound_signature[2] == 'F');
+ static_assert(sound_signature[3] == 'F');
+ static_assert(sizeof(sound_signature) == 4);
+ return 0;
+}
+
+// C23 6.10.3p6
+int k() {
+#define TWO_PLUS_TWO 2+2
+ static const char sound_signature[] = {
+#embed <jump.wav> limit(TWO_PLUS_TWO)
+ };
+ static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+ "There should only be 4 elements in this array.");
+ // verify PCM WAV resource
+ static_assert(sound_signature[0] == 'R');
+ static_assert(sound_signature[1] == 'I');
+ static_assert(sound_signature[2] == 'F');
+ static_assert(sound_signature[3] == 'F');
+ static_assert(sizeof(sound_signature) == 4);
+ return 0;
+}
+
+// C23 6.10.3.2p7: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.2p8: not tested here because it requires access to an infinite
+// resource like /dev/urandom.
+
+// C23 6.10.3.3p4
+char *strcpy(char *, const char *);
+#ifndef SHADER_TARGET
+ #define SHADER_TARGET "bits.bin"
+#endif
+extern char* null_term_shader_data;
+void fill_in_data () {
+ const char internal_data[] = {
+#embed SHADER_TARGET \
+ suffix(,)
+ 0
+ };
+ strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.4p4
+#ifndef SHADER_TARGET
+#define SHADER_TARGET "bits.bin"
+#endif
+extern char* merp;
+void init_data () {
+ const char whl[] = {
+#embed SHADER_TARGET \
+ prefix(0xEF, 0xBB, 0xBF, ) /* UTF-8 BOM */ \
+ suffix(,)
+ 0
+ };
+ // always null terminated,
+ // contains BOM if not-empty
+ const int is_good = (sizeof(whl) == 1 && whl[0] == '\0')
+ || (whl[0] == '\xEF' && whl[1] == '\xBB'
+ && whl[2] == '\xBF' && whl[sizeof(whl) - 1] == '\0');
+ static_assert(is_good);
+ strcpy(merp, whl);
+}
+
+// C23 6.10.3.5p3
+int l() {
+ return
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+ ;
+ // becomes:
+ // return 0;
+
+ // Validating the assumption from the example in the standard.
+ static_assert(
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+ == 0);
+}
+
+// C23 6.10.3.5p4
+void fill_in_data_again() {
+ const char internal_data[] = {
+#embed SHADER_TARGET \
+ suffix(, 0) \
+ if_empty(0)
+ };
+ strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.5p5
+int m() {
+ return
+#embed __FILE__ limit(0) if_empty(45540)
+ ;
+
+ // Validating the assumption from the example in the standard.
+ static_assert(
+#embed __FILE__ limit(0) if_empty(45540)
+ == 45540);
+}
+
+// 6.10.9.1p1
+static_assert(__STDC_EMBED_NOT_FOUND__ == 0);
+static_assert(__STDC_EMBED_FOUND__ == 1);
+static_assert(__STDC_EMBED_EMPTY__ == 2);
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 0000000..93d177a
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk \ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 0000000..1ce9ab9
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+ __ _
+ .-.' `; `-._ __ _
+ (_, .-:' `; `-._
+ ,'o"( (_, )
+ (__,-' ,'o"( )>
+ ( (__,-' )
+ `-'._.--._( )
+ ||| |||`-'._.--._.-'
+ ||| |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/empty
diff --git a/clang/test/Preprocessor/Inputs/null_byte.bin b/clang/test/Preprocessor/Inputs/null_byte.bin
new file mode 100644
index 0000000..f76dd23
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/null_byte.bin
Binary files differ
diff --git a/clang/test/Preprocessor/Inputs/numbers.txt b/clang/test/Preprocessor/Inputs/numbers.txt
new file mode 100644
index 0000000..11f11f9
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/numbers.txt
@@ -0,0 +1 @@
+0123456789
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 0000000..63d8dbd
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b \ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 0000000..43a3068
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed("media/art.txt") != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk") != __STDC_EMBED_NOT_FOUND__
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) != __STDC_EMBED_NOT_FOUND__
+#error 6
+#elif __has_embed(__FILE__ limit(2) prefix(y)) != __STDC_EMBED_FOUND__
+#error 7
+#elif __has_embed(__FILE__ limit(2)) != __STDC_EMBED_FOUND__
+#error 8
+// 6.10.1p7, if the search fails or any of the embed parameters in the embed
+// parameter sequence specified are not supported by the implementation for the
+// #embed directive;
+// We don't support one of the embed parameters.
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) != __STDC_EMBED_NOT_FOUND__
+#error 9
+#elif __has_embed(<media/empty>) != __STDC_EMBED_EMPTY__
+#error 10
+// 6.10.1p7: if the search for the resource succeeds and all embed parameters
+// in the embed parameter sequence specified are supported by the
+// implementation for the #embed directive and the resource is empty
+// Limiting to zero characters means the resource is empty.
+#elif __has_embed(<media/empty> limit(0)) != __STDC_EMBED_EMPTY__
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error 12
+// Test that an offset past the end of the file produces an empty file.
+#elif __has_embed(<single_byte.txt> clang::offset(1)) != __STDC_EMBED_EMPTY__
+#error 13
+// Test that we apply the offset before we apply the limit. If we did this in
+// the reverse order, this would cause the file to be empty because we would
+// have limited it to 1 byte and then offset past it.
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(12)) != __STDC_EMBED_FOUND__
+#error 14
+#elif __has_embed(<media/art.txt>) != __STDC_EMBED_FOUND__
+#error 15
+#elif __has_embed(<media/art.txt> if_empty(meow)) != __STDC_EMBED_FOUND__
+#error 16
+#endif
+
+// Ensure that when __has_embed returns true, the file can actually be
+// embedded. This was previously failing because the way in which __has_embed
+// would search for files was differentl from how #embed would resolve them
+// when the file path included relative path markers like `./` or `../`.
+#if __has_embed("./embed___has_embed.c") == __STDC_EMBED_FOUND__
+unsigned char buffer[] = {
+#embed "./embed___has_embed.c"
+};
+#else
+#error 17
+#endif
diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
new file mode 100644
index 0000000..fcaf693
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
@@ -0,0 +1,240 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for __has_embed and all of its parameters to ensure we
+// recover from failures gracefully.
+
+// expected-error@+2 {{missing '(' after '__has_embed'}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed
+#endif
+
+// expected-error@+3 {{expected '>'}} \
+ expected-note@+3 {{to match this '<'}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed(<)
+#endif
+
+// expected-error@+3 {{expected "FILENAME" or <FILENAME>}} \
+ expected-warning@+3 {{missing terminating '"' character}} \
+ expected-error@+3 {{invalid token at start of a preprocessor expression}}
+#if __has_embed(")
+#endif
+
+// expected-error@+2 {{missing '(' after '__has_embed'}} \
+ expected-error@+2 {{token is not a valid binary operator in a preprocessor subexpression}}
+#if __has_embed file.txt
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx)
+#endif
+
+// expected-error@+2 {{expected identifier}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::)
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx::xxx)
+#endif
+
+// expected-error@+2 {{expected identifier}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::42)
+#endif
+
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within limit() and the second one is because the #if does
+// not resolve to a value due to the earlier error.
+// expected-error@+1 2 {{expected value in expression}}
+#if __has_embed("media/empty" limit()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" limit(xxx)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" limit(42)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([))
+#endif
+
+// expected-error@+2 {{division by zero in preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit(1/0))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within clang::offset() and the second one is because the
+// #if does not resolve to a value due to the earlier error.
+// expected-error@+1 2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(xxx)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(42)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([))
+#endif
+
+// expected-error@+2 {{division by zero in preprocessor expression}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset(1/0))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset 42)
+#endif
+
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" prefix()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" prefix(xxx)
+#endif
+
+#if __has_embed("media/empty" prefix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" prefix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+ expected-note@+3 {{to match this '{'}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+ expected-note@+3 {{to match this '['}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+ expected-note@+3 {{to match this '('}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{}]}))
+#endif
+#if __has_embed("media/empty" prefix()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" suffix()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+ expected-error@+3 {{expected value in expression}} \
+ expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" suffix(xxx)
+#endif
+
+#if __has_embed("media/empty" suffix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" suffix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+ expected-note@+3 {{to match this '{'}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+ expected-note@+3 {{to match this '['}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+ expected-note@+3 {{to match this '('}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{}]}))
+#endif
+#if __has_embed("media/empty" suffix()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix))
+#endif
+
+#if __has_embed("media/art.txt" if_empty(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/art.txt" if_empty(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+ expected-note@+3 {{to match this '{'}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+ expected-note@+3 {{to match this '['}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+ expected-note@+3 {{to match this '('}} \
+ expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{}]}))
+#endif
+#if __has_embed("media/art.txt" if_empty()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+ expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty))
+#endif
+
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 0000000..e51dbb8
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed(__FILE__ suffix(x)) != __STDC_EMBED_FOUND__
+#error 3
+#elif __has_embed(__FILE__ suffix(x) limit(1)) != __STDC_EMBED_FOUND__
+#error 4
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1)) != __STDC_EMBED_FOUND__
+#error 5
+#elif __has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 6
+#elif __has_embed(__FILE__ suffix(x) limit(0) prefix(1)) != __STDC_EMBED_EMPTY__
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != __STDC_EMBED_EMPTY__
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != __STDC_EMBED_EMPTY__
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 0000000..a664715
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data) == 274);
+static_assert(' ' == data[0]);
+static_assert('_' == data[11]);
+static_assert('\n' == data[273]);
+static_assert(sizeof(data2) == 275);
+static_assert(' ' == data2[0]);
+static_assert('_' == data2[11]);
+static_assert('\n' == data2[273]);
+static_assert('\0' == data2[274]);
+static_assert(sizeof(data3) == 275);
+static_assert(' ' == data3[0]);
+static_assert('_' == data3[11]);
+static_assert('\n' == data3[273]);
+static_assert('\0' == data3[274]);
+static_assert(sizeof(data4) == 275);
+static_assert(' ' == data4[0]);
+static_assert('_' == data4[11]);
+static_assert('\n' == data4[273]);
+static_assert('\0' == data4[274]);
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data5) == 274);
+static_assert(' ' == data5[0]);
+static_assert('_' == data5[11]);
+static_assert('\n' == data5[273]);
+static_assert(sizeof(data6) == 275);
+static_assert(' ' == data6[0]);
+static_assert('_' == data6[11]);
+static_assert('\n' == data6[273]);
+static_assert('\0' == data6[274]);
+static_assert(sizeof(data7) == 275);
+static_assert(' ' == data7[0]);
+static_assert('_' == data7[11]);
+static_assert('\n' == data7[273]);
+static_assert('\0' == data7[274]);
+static_assert(sizeof(data8) == 275);
+static_assert(' ' == data8[0]);
+static_assert('_' == data8[11]);
+static_assert('\n' == data8[273]);
+static_assert('\0' == data8[274]);
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+static_assert(sizeof(data9) == 274);
+static_assert(' ' == data9[0]);
+static_assert('_' == data9[11]);
+static_assert('\n' == data9[273]);
+static_assert(sizeof(data10) == 275);
+static_assert(' ' == data10[1]);
+static_assert('_' == data10[12]);
+static_assert('\n' == data10[274]);
+static_assert('\0' == data10[0]);
+static_assert(sizeof(data11) == 275);
+static_assert(' ' == data11[1]);
+static_assert('_' == data11[12]);
+static_assert('\n' == data11[274]);
+static_assert('\0' == data11[0]);
+static_assert(sizeof(data12) == 275);
+static_assert(' ' == data12[1]);
+static_assert('_' == data12[12]);
+static_assert('\n' == data12[274]);
+static_assert('\0' == data12[0]);
diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp
new file mode 100644
index 0000000..64110af
--- /dev/null
+++ b/clang/test/Preprocessor/embed_codegen.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 %s -triple x86_64 --embed-dir=%S/Inputs -emit-llvm -o - | FileCheck %s
+
+// CHECK: @__const._Z3fooi.ca = private unnamed_addr constant [3 x i32] [i32 0, i32 106, i32 107], align 4
+// CHECK: @__const._Z3fooi.sc = private unnamed_addr constant %struct.S1 { i32 106, i32 107, i32 0 }, align 4
+// CHECK: @__const._Z3fooi.t = private unnamed_addr constant [3 x %struct.T] [%struct.T { [2 x i32] [i32 48, i32 49], %struct.S1 { i32 50, i32 51, i32 52 } }, %struct.T { [2 x i32] [i32 53, i32 54], %struct.S1 { i32 55, i32 56, i32 57 } }, %struct.T { [2 x i32] [i32 10, i32 0], %struct.S1 zeroinitializer }], align 16
+void foo(int a) {
+// CHECK: %a.addr = alloca i32, align 4
+// CHECK: store i32 %a, ptr %a.addr, align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %ca, ptr align 4 @__const._Z3fooi.ca, i64 12, i1 false)
+int ca[] = {
+0
+#embed <jk.txt> prefix(,)
+};
+
+// CHECK: %arrayinit.element = getelementptr inbounds i32, ptr %notca, i64 1
+// CHECK: store i8 106, ptr %arrayinit.element, align 4
+// CHECK: %arrayinit.element1 = getelementptr inbounds i32, ptr %notca, i64 2
+// CHECK: store i8 107, ptr %arrayinit.element1, align 4
+int notca[] = {
+a
+#embed <jk.txt> prefix(,)
+};
+
+struct S1 {
+ int x, y, z;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %sc, ptr align 4 @__const._Z3fooi.sc, i64 12, i1 false)
+S1 sc = {
+#embed <jk.txt> suffix(,)
+0
+};
+
+// CHECK: %x = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 0
+// CHECK: store i32 106, ptr %x, align 4
+// CHECK: %y = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 1
+// CHECK: store i32 107, ptr %y, align 4
+// CHECK: %z = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 2
+// CHECK: %1 = load i32, ptr %a.addr, align 4
+S1 s = {
+#embed <jk.txt> suffix(,)
+a
+};
+
+// CHECK: store i32 107, ptr %b, align 4
+int b =
+#embed<jk.txt>
+;
+
+
+struct T {
+ int arr[2];
+ struct S1 s;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %t, ptr align 16 @__const._Z3fooi.t, i64 60, i1 false)
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+
+// CHECK: %arr = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 0
+// CHECK: %2 = load i32, ptr %a.addr, align 4
+// CHECK: store i32 %2, ptr %arr, align 4
+// CHECK: %arrayinit.element2 = getelementptr inbounds i32, ptr %arr, i64 1
+// CHECK: store i32 300, ptr %arrayinit.element2, align 4
+// CHECK: %s3 = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 1
+// CHECK: %x4 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 0
+// CHECK: store i32 1, ptr %x4, align 4
+// CHECK: %y5 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 1
+// CHECK: store i32 2, ptr %y5, align 4
+// CHECK: %z6 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 2
+// CHECK: store i32 3, ptr %z6, align 4
+// CHECK: %arrayinit.element7 = getelementptr inbounds %struct.T, ptr %tnonc, i64 1
+// CHECK: call void @llvm.memset.p0.i64(ptr align 4 %arrayinit.element7, i8 0, i64 20, i1 false)
+// CHECK: %arr8 = getelementptr inbounds %struct.T, ptr %arrayinit.element7, i32 0, i32 0
+// CHECK: store i8 106, ptr %arr8, align 4
+// CHECK: %arrayinit.element9 = getelementptr inbounds i32, ptr %arr8, i64 1
+// CHECK: store i8 107, ptr %arrayinit.element9, align 4
+struct T tnonc[] = {
+ a, 300, 1, 2, 3
+#embed <jk.txt> prefix(,)
+};
+
+}
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
new file mode 100644
index 0000000..1cadff7
--- /dev/null
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions
+
+constexpr int value(int a, int b) {
+ return a + b;
+}
+
+constexpr int func_call() {
+ return value(
+#embed <jk.txt>
+ );
+}
+
+constexpr int init_list_expr() {
+ int vals[] = {
+#embed <jk.txt>
+ };
+ return value(vals[0], vals[1]);
+}
+
+template <int N, int M>
+struct Hurr {
+ static constexpr int V1 = N;
+ static constexpr int V2 = M;
+};
+
+constexpr int template_args() {
+ Hurr<
+#embed <jk.txt>
+ > H;
+ return value(H.V1, H.V2);
+}
+
+constexpr int ExpectedValue = 'j' + 'k';
+static_assert(func_call() == ExpectedValue);
+static_assert(init_list_expr() == ExpectedValue);
+static_assert(template_args() == ExpectedValue);
+
+static_assert(
+#embed <jk.txt> limit(1) suffix(== 'j')
+);
+
+int array[
+#embed <jk.txt> limit(1)
+];
+static_assert(sizeof(array) / sizeof(int) == 'j');
+
+constexpr int comma_expr = (
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+);
+static_assert(comma_expr == 'k');
+
+constexpr int comma_expr_init_list{ (
+#embed <jk.txt> limit(1)
+) };
+static_assert(comma_expr_init_list == 'j');
+
+constexpr int paren_init(
+#embed <jk.txt> limit(1)
+);
+static_assert(paren_init == 'j');
+
+struct S {
+ const char buffer[2] = {
+#embed "jk.txt"
+ };
+};
+
+constexpr struct S s;
+static_assert(s.buffer[1] == 'k');
+
+struct S1 {
+ int x, y;
+};
+
+struct T {
+ int x, y;
+ struct S1 s;
+};
+
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+static_assert(t[0].s.x == '2');
+
+constexpr int func(int i, int) { return i; }
+static_assert(
+ func(
+#embed <jk.txt>
+ ) == 'j');
+
+template <int N>
+struct ST {};
+
+ST<
+#embed <jk.txt> limit(1)
+> st;
diff --git a/clang/test/Preprocessor/embed_dependencies.c b/clang/test/Preprocessor/embed_dependencies.c
new file mode 100644
index 0000000..4e00dc7
--- /dev/null
+++ b/clang/test/Preprocessor/embed_dependencies.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -fsyntax-only -std=c23 -M --embed-dir=%S/Inputs -Xclang -verify | FileCheck %s
+
+// Yes this looks very strange indeed, but the goal is to test that we add
+// files referenced by both __has_embed and #embed when we generate
+// dependencies, so we're trying to see that both of these files are in the
+// output.
+#if __has_embed(<jk.txt>)
+const char data =
+#embed "Inputs/single_byte.txt"
+;
+_Static_assert('b' == data);
+#else
+#error "oops"
+#endif
+// expected-no-diagnostics
+
+// CHECK: embed_dependencies.c \
+// CHECK-NEXT: jk.txt \
+// CHECK-NEXT: Inputs{{[/\\]}}single_byte.txt
+
diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c
new file mode 100644
index 0000000..74f2417
--- /dev/null
+++ b/clang/test/Preprocessor/embed_ext_compat_diags.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=none -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
+// RUN: %clang_cc1 -std=c17 %s -fsyntax-only --embed-dir=%S/Inputs -verify=ext -pedantic
+// RUN: %clang_cc1 -x c++ %s -fsyntax-only --embed-dir=%S/Inputs -verify=cxx -pedantic
+// none-no-diagnostics
+
+#if __has_embed("jk.txt")
+
+const char buffer[] = {
+#embed "jk.txt" /* compat-warning {{#embed is incompatible with C standards before C23}}
+ ext-warning {{#embed is a C23 extension}}
+ cxx-warning {{#embed is a Clang extension}}
+ */
+};
+#endif
+
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 0000000..2648804
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+// expected-no-diagnostics
+
+#if !defined(__has_embed)
+#error 1
+#endif
diff --git a/clang/test/Preprocessor/embed_file_not_found_chevron.c b/clang/test/Preprocessor/embed_file_not_found_chevron.c
new file mode 100644
index 0000000..472222a
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_chevron.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed <nfejfNejAKFe>
+// expected-error@-1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_file_not_found_quote.c b/clang/test/Preprocessor/embed_file_not_found_quote.c
new file mode 100644
index 0000000..bf9c62b
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_quote.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed "nfejfNejAKFe"
+// expected-error@-1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 0000000..79b1743
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
+// expected-no-diagnostics
+
+typedef struct kitty {
+ int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+ int here;
+ kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+static_assert(meow == 'b');
+static_assert(kit.purr == 'b');
+static_assert(kit_kit.here == 'j');
+static_assert(kit_kit.kit.purr == 'k');
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 0000000..70f1bc6
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+static_assert(sizeof(data) == 3);
+static_assert(123 == data[0]);
+static_assert(124 == data[1]);
+static_assert(125 == data[2]);
+static_assert(sizeof(non_empty_data) == 2);
+static_assert('j' == non_empty_data[0]);
+static_assert('k' == non_empty_data[1]);
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> if_empty(1) prefix() if_empty(2)
+// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> if_empty(1) suffix() if_empty(2)
+// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 0000000..da3e4fb
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('j' == offset_data[0]);
+static_assert(offset_data[0] == data[0]);
+
+// Cannot have a negative limit.
+#embed <jk.txt> limit(-1)
+// expected-error@-1 {{invalid value '-1'; must be positive}}
+
+// It can have a limit of 0, in which case the __has_embed should return false.
+#if __has_embed(<jk.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the limit is zero, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> limit(0) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+ 1,
+#embed <jk.txt> limit(0) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can limit to something larger than the file size as well.
+const unsigned char third_buffer[] = {
+#embed <jk.txt> limit(100)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the limits of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> limit(10)
+};
+static_assert(sizeof(fourth_buffer) == 10);
+static_assert(' ' == fourth_buffer[0]);
+static_assert(' ' == fourth_buffer[1]);
+static_assert(' ' == fourth_buffer[2]);
+static_assert(' ' == fourth_buffer[3]);
+static_assert(' ' == fourth_buffer[4]);
+static_assert(' ' == fourth_buffer[5]);
+static_assert(' ' == fourth_buffer[6]);
+static_assert(' ' == fourth_buffer[7]);
+static_assert(' ' == fourth_buffer[8]);
+static_assert(' ' == fourth_buffer[9]);
+
+// Ensure that a limit larger than what can fit into a 64-bit value is
+// rejected. This limit is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 2);
+static_assert('j' == fifth_buffer[0]);
+static_assert('k' == fifth_buffer[1]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error@-2 {{integer literal is too large to be represented in any integer type}}
+// Note: the preprocessor will continue with the truncated value, so the parser
+// will treat this case and the previous one identically in terms of what
+// contents are retained from the embedded resource (which is the entire file).
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> limit(1) prefix() limit(1)
+// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> limit(1) if_empty() limit(2)
+// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+};
+
+// C23 6.10.3.2p2
+static_assert(
+#embed <jk.txt> limit(defined(FOO)) // expected-error {{'defined' cannot appear within this context}}
+ == 0); // expected-error {{expected expression}}
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 0000000..ab1bd3f
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('k' == offset_data[0]);
+static_assert(offset_data[0] == data[1]);
+
+// Cannot have a negative offset.
+#embed <jk.txt> clang::offset(-1)
+// expected-error@-1 {{invalid value '-1'; must be positive}}
+
+// If the offset is past the end of the file, the file should be considered
+// empty.
+#if __has_embed(<jk.txt> clang::offset(3)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the offset is past the end of the file, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> clang::offset(3) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+ 1,
+#embed <jk.txt> clang::offset(3) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can offset to zero (that's the default behavior)
+const unsigned char third_buffer[] = {
+#embed <jk.txt> clang::offset(0)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the offsets of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> clang::offset(24) limit(4)
+};
+static_assert(sizeof(fourth_buffer) == 4);
+static_assert('.' == fourth_buffer[0]);
+static_assert('-' == fourth_buffer[1]);
+static_assert('.' == fourth_buffer[2]);
+static_assert('\'' == fourth_buffer[3]);
+
+// Ensure that an offset larger than what can fit into a 64-bit value is
+// rejected. This offset is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+ 1,
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 1);
+static_assert(1 == fifth_buffer[0]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error@-2 {{integer literal is too large to be represented in any integer type}}
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> clang::offset(1) prefix() clang::offset(1)
+// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> clang::offset(1) if_empty() clang::offset(2)
+// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+};
+
+// Matches with C23 6.10.3.2p2, is documented as part of our extension.
+static_assert(
+#embed <jk.txt> clang::offset(defined(FOO))
+ == 0); // expected-error {{expected expression}}
+ /* expected-error@-2 {{'defined' cannot appear within this context}}
+ pedantic-warning@-2 {{'clang::offset' is a Clang extension}}
+ */
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 0000000..b55c08f
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('\xA' == data[0]);
+static_assert('b' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+ int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> prefix( .x = 100, .y = 10, )
+};
+static_assert(s.x == 100);
+static_assert(s.y == 10);
+static_assert(s.z == 'b');
+
+// Ensure that an empty file does not produce any prefix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> prefix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> prefix(1,) limit(1) prefix(1,)
+// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> prefix(1,) if_empty() prefix(2,)
+// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 0000000..7d768268
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('b' == data[0]);
+static_assert('\xA' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+ int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> suffix( , .y = 100, .z = 10 )
+};
+
+static_assert(s.x == 'b');
+static_assert(s.y == 100);
+static_assert(s.z == 10);
+
+// Ensure that an empty file does not produce any suffix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> suffix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> suffix(,1) prefix() suffix(,1)
+// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> suffix(,1) if_empty() suffix(,2)
+// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 0000000..b033843
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -std=c23 -E -verify
+// okay-no-diagnostics
+
+#embed __FILE__ unrecognized
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized'}}
+#embed __FILE__ unrecognized::param
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
diff --git a/clang/test/Preprocessor/embed_parsing_errors.c b/clang/test/Preprocessor/embed_parsing_errors.c
new file mode 100644
index 0000000..490ec6d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parsing_errors.c
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for #embed and all of its parameters to ensure we
+// recover from failures gracefully.
+char buffer[] = {
+#embed
+// expected-error@-1 {{expected "FILENAME" or <FILENAME>}}
+
+#embed <
+// expected-error@-1 {{expected '>'}} \
+ expected-note@-1 {{to match this '<'}}
+
+#embed "
+// expected-error@-1 {{expected "FILENAME" or <FILENAME>}} \
+ expected-warning@-1 {{missing terminating '"' character}}
+
+#embed file.txt
+// expected-error@-1{{expected "FILENAME" or <FILENAME>}}
+
+#embed "embed_parsing_errors.c" xxx
+// expected-error@-1 {{unknown embed preprocessor parameter 'xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::
+// expected-error@-1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" xxx::xxx
+// expected-error@-1 {{unknown embed preprocessor parameter 'xxx::xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::42
+// expected-error@-1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" limit
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" limit(
+// expected-error@-1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" limit(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit(42
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit([
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit([)
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit(1/0)
+// expected-error@-1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" clang::offset(
+// expected-error@-1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset(42
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset([
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset([)
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(1/0)
+// expected-error@-1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset 42
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix(
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" prefix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" prefix(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" prefix(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" prefix(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" prefix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" prefix)
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix(
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" suffix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" suffix(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" suffix(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" suffix(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" suffix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" suffix)
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" if_empty(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" if_empty(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" if_empty(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" if_empty(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" if_empty(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" if_empty() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" if_empty)
+// expected-error@-1 {{expected '('}}
+};
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 0000000..b12cb9ce
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -std=c23 -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+static_assert(sizeof(data) == 1);
+static_assert('b' == data[0]);
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 0000000..79ca1e5
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+static_assert(sizeof(data) == 1);
+static_assert('a' == data[0]);
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 0000000..9895d95
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
+// RUN: %clang_cc1 -std=c23 %s -E -dE --embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
+
+// Ensure that we correctly preprocess to a file, both with expanding embed
+// directives fully and with printing the directive instead.
+const char data[] = {
+#embed <jk.txt> if_empty('a', 'b') clang::offset(0) limit(1) suffix(, 'a', 0) prefix('h',)
+};
+
+// EXPANDED: const char data[] = {'h',106 , 'a', 0};
+// DIRECTIVE: const char data[] = {
+// DIRECTIVE-NEXT: #embed <jk.txt> if_empty('a', 'b') limit(1) clang::offset(0) prefix('h',) suffix(, 'a', 0) /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char more[] = {
+#embed <media/empty> if_empty('a', 'b')
+};
+
+// EXPANDED: const char more[] = {'a', 'b'}
+// DIRECTIVE: const char more[] = {
+// DIRECTIVE-NEXT: #embed <media/empty> if_empty('a', 'b') /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char even_more[] = {
+ 1, 2, 3,
+#embed <jk.txt> prefix(4, 5,) suffix(, 6, 7)
+ , 8, 9, 10
+};
+
+// EXPANDED: const char even_more[] = {
+// EXPANDED-NEXT: 1, 2, 3,4, 5,106, 107 , 6, 7 , 8, 9, 10
+// EXPANDED-EMPTY:
+// EXPANDED-EMPTY:
+// EXPANDED-NEXT: };
+// DIRECTIVE: const char even_more[] = {
+// DIRECTIVE-NEXT: 1, 2, 3,
+// DIRECTIVE-NEXT: #embed <jk.txt> prefix(4, 5,) suffix(, 6, 7) /* clang -E -dE */
+// DIRECTIVE-NEXT: , 8, 9, 10
+// DIRECTIVE-NEXT: };
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 0000000..2019118
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify
+
+const char data =
+#embed <single_byte.txt>
+;
+_Static_assert('b' == data);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 0000000..a31b083
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,cxx -Wno-c23-extensions
+// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,c
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+ return
+#embed <single_byte.txt>
+ ;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+ static_assert(First == 'j', "");
+ static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+ f(
+#embed <single_byte.txt>
+ );
+ g(
+#embed <media/empty>
+ );
+ h(
+#embed <jk.txt>
+ );
+ int r = i();
+ (void)r;
+#ifdef __cplusplus
+ j<
+#embed <jk.txt>
+ >(
+#embed <media/empty>
+ );
+#endif
+}
+
+// Ensure that we don't accidentally allow you to initialize an unsigned char *
+// from embedded data; the data is modeled as a string literal internally, but
+// is not actually a string literal.
+const unsigned char *ptr =
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+; // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \
+ cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}}
+
+// However, there are some cases where this is fine and should work.
+const unsigned char *null_ptr_1 =
+#embed <media/empty> if_empty(0)
+;
+
+const unsigned char *null_ptr_2 =
+#embed <null_byte.bin>
+;
+
+const unsigned char *null_ptr_3 = {
+#embed <null_byte.bin>
+};
+
+#define FILE_NAME <null_byte.bin>
+#define LIMIT 1
+#define OFFSET 0
+#define EMPTY_SUFFIX suffix()
+
+constexpr unsigned char ch =
+#embed FILE_NAME limit(LIMIT) clang::offset(OFFSET) EMPTY_SUFFIX
+;
+static_assert(ch == 0);
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index f084598..9e425ac 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -272,6 +272,9 @@
// AARCH64-NEXT: #define __SIZE_WIDTH__ 64
// AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
// AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
// AARCH64-NEXT: #define __STDC_HOSTED__ 1
// AARCH64-NEXT: #define __STDC_UTF_16__ 1
// AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 2641fee..57bf671 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1875,6 +1875,9 @@
// WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
// WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
// WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
// WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
// WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 0000000..2e65efe
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a \ No newline at end of file
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 3800205..bc4b162 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::ObjCSubscriptRefExprClass:
case Stmt::RecoveryExprClass:
case Stmt::SYCLUniqueStableNameExprClass:
+ case Stmt::EmbedExprClass:
K = CXCursor_UnexposedExpr;
break;
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index a94c606..7fe633a 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -1213,7 +1213,7 @@ conforms by not defining the <code>__STDC_IEC_559_COMPLEX__</code> macro.
<tr>
<td>#embed</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3017.htm">N3017</a></td>
- <td class="none" align="center">No</td>
+ <td class="unreleased" align="center">Clang 19</td>
</tr>
</table>
</details>