39 files changed, 1512 insertions, 0 deletions
diff --git a/clang/test/C/C2x/Inputs/bits.bin b/clang/test/C/C2x/Inputs/bits.bin
new file mode 100644
index 0000000..ad47100
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/bits.bin
@@ -0,0 +1 @@
+0123456789
+\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/boop.h b/clang/test/C/C2x/Inputs/boop.h
new file mode 100644
index 0000000..d3e3967
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/boop.h
@@ -0,0 +1 @@
+*boop*
+\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/i.dat b/clang/test/C/C2x/Inputs/i.dat
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/i.dat
@@ -0,0 +1 @@
+0
+\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/jump.wav b/clang/test/C/C2x/Inputs/jump.wav
new file mode 100644
index 0000000..a711006
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/jump.wav
@@ -0,0 +1 @@
+RIFF
+\ No newline at end of file
diff --git a/clang/test/C/C2x/Inputs/s.dat b/clang/test/C/C2x/Inputs/s.dat
new file mode 100644
index 0000000..3a332e6
--- /dev/null
+++ b/clang/test/C/C2x/Inputs/s.dat
@@ -0,0 +1 @@
+012345678
+\ No newline at end of file
diff --git a/clang/test/C/C2x/n3017.c b/clang/test/C/C2x/n3017.c
new file mode 100644
index 0000000..0d22d31
--- /dev/null
+++ b/clang/test/C/C2x/n3017.c
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -verify -fsyntax-only --embed-dir=%S/Inputs -std=c2x %s -Wno-constant-logical-operand
+
+/* WG14 N3017: full
+ * #embed - a scannable, tooling-friendly binary resource inclusion mechanism
+ */
+
+// C23 6.10p6
+char b1[] = {
+#embed "boop.h" limit(5)
+,
+#embed "boop.h" __limit__(5)
+};
+
+// C23 6.10.1p19
+#if __has_embed(__FILE__ ext::token(0xB055))
+#error "Supports an extension parameter Clang never claimed to support?"
+#endif
+
+#if !__has_embed(__FILE__ clang::offset(0))
+#error "Doesn't support an extension Clang claims to support?"
+#endif
+
+// C23 6.10.1p20
+void parse_into_s(short* ptr, unsigned char* ptr_bytes, unsigned long long size);
+int f() {
+#if __has_embed ("bits.bin" ds9000::element_type(short))
+  /* Implementation extension: create short integers from the */
+  /* translation environment resource into */
+  /* a sequence of integer constants */
+  short meow[] = {
+#embed "bits.bin" ds9000::element_type(short)
+  };
+#elif __has_embed ("bits.bin")
+  /* no support for implementation-specific */
+  /* ds9000::element_type(short) parameter */
+  unsigned char meow_bytes[] = {
+  #embed "bits.bin"
+  };
+  short meow[sizeof(meow_bytes) / sizeof(short)] = {};
+  /* parse meow_bytes into short values by-hand! */
+  parse_into_s(meow, meow_bytes, sizeof(meow_bytes));
+#else
+#error "cannot find bits.bin resource"
+#endif
+  return (int)(meow[0] + meow[(sizeof(meow) / sizeof(*meow)) - 1]);
+}
+
+// NOTE: we don't have a good way to test infinite resources from within lit.
+int g() {
+#if __has_embed(<infinite-resource> limit(0)) == 2
+  // if <infinite-resource> exists, this
+  // token sequence is always taken.
+  return 0;
+#else
+  // the �infinite-resource� resource does not exist
+  #error "The resource does not exist"
+#endif
+  // expected-error@-2 {{"The resource does not exist"}}
+}
+
+#include <stddef.h>
+void have_you_any_wool(const unsigned char*, size_t);
+int h() {
+  static const unsigned char baa_baa[] = {
+#embed __FILE__
+  };
+  have_you_any_wool(baa_baa, sizeof(baa_baa));
+  return 0;
+}
+
+// C23 6.10.3.1p17: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.1p18
+int i() {
+/* Braces may be kept or elided as per normal initialization rules */
+  int i = {
+#embed "i.dat"
+  }; /* valid if i.dat produces 1 value,
+        i value is [0, 2(embed element width)) */
+  int i2 =
+#embed "i.dat"
+  ; /* valid if i.dat produces 1 value,
+       i2 value is [0, 2(embed element width)) */
+  struct s {
+    double a, b, c;
+    struct { double e, f, g; };
+    double h, i, j;
+  };
+  struct s x = {
+    /* initializes each element in order according to initialization
+    rules with comma-separated list of integer constant expressions
+    inside of braces */
+    #embed "s.dat"
+  };
+  return 0;
+}
+
+// C23 6.10.3.1p19: not tested here because it's a runtime test rather than one
+// which can be handled at compile time (it validates file contents via fread).
+
+// C23 6.10.3.2p5
+int j() {
+  static const char sound_signature[] = {
+#embed <jump.wav> limit(2+2)
+  };
+  static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+    "There should only be 4 elements in this array.");
+  // verify PCM WAV resource
+  static_assert(sound_signature[0] == 'R');
+  static_assert(sound_signature[1] == 'I');
+  static_assert(sound_signature[2] == 'F');
+  static_assert(sound_signature[3] == 'F');
+  static_assert(sizeof(sound_signature) == 4);
+  return 0;
+}
+
+// C23 6.10.3p6
+int k() {
+#define TWO_PLUS_TWO 2+2
+  static const char sound_signature[] = {
+#embed <jump.wav> limit(TWO_PLUS_TWO)
+  };
+  static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4,
+    "There should only be 4 elements in this array.");
+  // verify PCM WAV resource
+  static_assert(sound_signature[0] == 'R');
+  static_assert(sound_signature[1] == 'I');
+  static_assert(sound_signature[2] == 'F');
+  static_assert(sound_signature[3] == 'F');
+  static_assert(sizeof(sound_signature) == 4);
+  return 0;
+}
+
+// C23 6.10.3.2p7: not tested here because we do not currently support any
+// platforms where CHAR_BIT != 8.
+
+// C23 6.10.3.2p8: not tested here because it requires access to an infinite
+// resource like /dev/urandom.
+
+// C23 6.10.3.3p4
+char *strcpy(char *, const char *);
+#ifndef SHADER_TARGET
+  #define SHADER_TARGET "bits.bin"
+#endif
+extern char* null_term_shader_data;
+void fill_in_data () {
+  const char internal_data[] = {
+#embed SHADER_TARGET \
+  suffix(,)
+  0
+  };
+  strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.4p4
+#ifndef SHADER_TARGET
+#define SHADER_TARGET "bits.bin"
+#endif
+extern char* merp;
+void init_data () {
+  const char whl[] = {
+#embed SHADER_TARGET \
+    prefix(0xEF, 0xBB, 0xBF, ) /* UTF-8 BOM */ \
+    suffix(,)
+    0
+  };
+  // always null terminated,
+  // contains BOM if not-empty
+  const int is_good = (sizeof(whl) == 1 && whl[0] == '\0')
+    || (whl[0] == '\xEF' && whl[1] == '\xBB'
+    && whl[2] == '\xBF' && whl[sizeof(whl) - 1] == '\0');
+  static_assert(is_good);
+  strcpy(merp, whl);
+}
+
+// C23 6.10.3.5p3
+int l() {
+  return
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+  ;
+  // becomes:
+  // return 0;
+
+  // Validating the assumption from the example in the standard.
+  static_assert(
+#embed <bits.bin> limit(0) prefix(1) if_empty(0)
+    == 0);
+}
+
+// C23 6.10.3.5p4
+void fill_in_data_again() {
+  const char internal_data[] = {
+#embed SHADER_TARGET \
+  suffix(, 0) \
+  if_empty(0)
+  };
+  strcpy(null_term_shader_data, internal_data);
+}
+
+// C23 6.10.3.5p5
+int m() {
+  return
+#embed __FILE__ limit(0) if_empty(45540)
+  ;
+
+  // Validating the assumption from the example in the standard.
+  static_assert(
+#embed __FILE__ limit(0) if_empty(45540)
+    == 45540);
+}
+
+// 6.10.9.1p1
+static_assert(__STDC_EMBED_NOT_FOUND__ == 0);
+static_assert(__STDC_EMBED_FOUND__ == 1);
+static_assert(__STDC_EMBED_EMPTY__ == 2);
diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt
new file mode 100644
index 0000000..93d177a
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/jk.txt
@@ -0,0 +1 @@
+jk
+\ No newline at end of file
diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt
new file mode 100644
index 0000000..1ce9ab9
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/art.txt
@@ -0,0 +1,9 @@
+           __  _
+       .-.'  `; `-._  __  _
+      (_,         .-:'  `; `-._
+    ,'o"(        (_,           )
+   (__,-'      ,'o"(            )>
+      (       (__,-'            )
+       `-'._.--._(             )
+          |||  |||`-'._.--._.-'
+                     |||  |||
diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/media/empty
diff --git a/clang/test/Preprocessor/Inputs/null_byte.bin b/clang/test/Preprocessor/Inputs/null_byte.bin
new file mode 100644
index 0000000..f76dd23
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/null_byte.bin
diff --git a/clang/test/Preprocessor/Inputs/numbers.txt b/clang/test/Preprocessor/Inputs/numbers.txt
new file mode 100644
index 0000000..11f11f9
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/numbers.txt
@@ -0,0 +1 @@
+0123456789
diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt
new file mode 100644
index 0000000..63d8dbd
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/single_byte.txt
@@ -0,0 +1 @@
+b
+\ No newline at end of file
diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c
new file mode 100644
index 0000000..43a3068
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed("media/art.txt") != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed("asdkasdjkadsjkdsfjk") != __STDC_EMBED_NOT_FOUND__
+#error 3
+#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 4
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) != __STDC_EMBED_NOT_FOUND__
+#error 5
+#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) != __STDC_EMBED_NOT_FOUND__
+#error 6
+#elif __has_embed(__FILE__ limit(2) prefix(y)) != __STDC_EMBED_FOUND__
+#error 7
+#elif __has_embed(__FILE__ limit(2)) != __STDC_EMBED_FOUND__
+#error 8
+// 6.10.1p7, if the search fails or any of the embed parameters in the embed
+// parameter sequence specified are not supported by the implementation for the
+// #embed directive;
+// We don't support one of the embed parameters.
+#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) != __STDC_EMBED_NOT_FOUND__
+#error 9
+#elif __has_embed(<media/empty>) != __STDC_EMBED_EMPTY__
+#error 10
+// 6.10.1p7: if the search for the resource succeeds and all embed parameters
+// in the embed parameter sequence specified are supported by the
+// implementation for the #embed directive and the resource is empty
+// Limiting to zero characters means the resource is empty.
+#elif __has_embed(<media/empty> limit(0)) != __STDC_EMBED_EMPTY__
+#error 11
+#elif __has_embed(<media/art.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error 12
+// Test that an offset past the end of the file produces an empty file.
+#elif __has_embed(<single_byte.txt> clang::offset(1)) != __STDC_EMBED_EMPTY__
+#error 13
+// Test that we apply the offset before we apply the limit. If we did this in
+// the reverse order, this would cause the file to be empty because we would
+// have limited it to 1 byte and then offset past it.
+#elif __has_embed(<media/art.txt> limit(1) clang::offset(12)) != __STDC_EMBED_FOUND__
+#error 14
+#elif __has_embed(<media/art.txt>) != __STDC_EMBED_FOUND__
+#error 15
+#elif __has_embed(<media/art.txt> if_empty(meow)) != __STDC_EMBED_FOUND__
+#error 16
+#endif
+
+// Ensure that when __has_embed returns true, the file can actually be
+// embedded. This was previously failing because the way in which __has_embed
+// would search for files was differentl from how #embed would resolve them
+// when the file path included relative path markers like `./` or `../`.
+#if __has_embed("./embed___has_embed.c") == __STDC_EMBED_FOUND__
+unsigned char buffer[] = {
+#embed "./embed___has_embed.c"
+};
+#else
+#error 17
+#endif
diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
new file mode 100644
index 0000000..fcaf693
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
@@ -0,0 +1,240 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for __has_embed and all of its parameters to ensure we
+// recover from failures gracefully.
+
+// expected-error@+2 {{missing '(' after '__has_embed'}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed
+#endif
+
+// expected-error@+3 {{expected '>'}} \
+   expected-note@+3 {{to match this '<'}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed(<)
+#endif
+
+// expected-error@+3 {{expected "FILENAME" or <FILENAME>}} \
+   expected-warning@+3 {{missing terminating '"' character}} \
+   expected-error@+3 {{invalid token at start of a preprocessor expression}}
+#if __has_embed(")
+#endif
+
+// expected-error@+2 {{missing '(' after '__has_embed'}} \
+   expected-error@+2 {{token is not a valid binary operator in a preprocessor subexpression}}
+#if __has_embed file.txt
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx)
+#endif
+
+// expected-error@+2 {{expected identifier}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::)
+#endif
+
+// OK, no diagnostic for an unknown embed parameter.
+#if __has_embed("media/empty" xxx::xxx)
+#endif
+
+// expected-error@+2 {{expected identifier}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" xxx::42)
+#endif
+
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within limit() and the second one is because the #if does
+// not resolve to a value due to the earlier error.
+// expected-error@+1 2 {{expected value in expression}}
+#if __has_embed("media/empty" limit()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" limit(xxx)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" limit(42)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit([))
+#endif
+
+// expected-error@+2 {{division by zero in preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" limit(1/0))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset)
+#endif
+
+// We get the same diagnostic twice intentionally. The first one is because of
+// the missing value within clang::offset() and the second one is because the
+// #if does not resolve to a value due to the earlier error.
+// expected-error@+1 2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(xxx)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" clang::offset(42)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([)
+#endif
+
+// expected-error@+2 {{invalid token at start of a preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset([))
+#endif
+
+// expected-error@+2 {{division by zero in preprocessor expression}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset(1/0))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" clang::offset 42)
+#endif
+
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" prefix()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" prefix(xxx)
+#endif
+
+#if __has_embed("media/empty" prefix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" prefix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+   expected-note@+3 {{to match this '{'}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+   expected-note@+3 {{to match this '['}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+   expected-note@+3 {{to match this '('}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" prefix(([{}]}))
+#endif
+#if __has_embed("media/empty" prefix()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" prefix))
+#endif
+
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix)
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" suffix()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed("media/empty" suffix(xxx)
+#endif
+
+#if __has_embed("media/empty" suffix(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/empty" suffix(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+   expected-note@+3 {{to match this '{'}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+   expected-note@+3 {{to match this '['}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+   expected-note@+3 {{to match this '('}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/empty" suffix(([{}]}))
+#endif
+#if __has_embed("media/empty" suffix()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/empty" suffix))
+#endif
+
+#if __has_embed("media/art.txt" if_empty(1/0)) // OK: emitted as tokens, not evaluated yet.
+#endif
+#if __has_embed("media/art.txt" if_empty(([{}]))) // OK: delimiters balanced
+#endif
+// expected-error@+3 {{expected '}'}} \
+   expected-note@+3 {{to match this '{'}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{)]}))
+#endif
+// expected-error@+3 {{expected ']'}} \
+   expected-note@+3 {{to match this '['}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{})}))
+#endif
+// expected-error@+3 {{expected ')'}} \
+   expected-note@+3 {{to match this '('}} \
+   expected-error@+3 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty(([{}]}))
+#endif
+#if __has_embed("media/art.txt" if_empty()) // OK: tokens within parens are optional
+#endif
+// expected-error@+2 {{expected '('}} \
+   expected-error@+2 {{expected value in expression}}
+#if __has_embed("media/art.txt" if_empty))
+#endif
+
diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c
new file mode 100644
index 0000000..e51dbb8
--- /dev/null
+++ b/clang/test/Preprocessor/embed___has_embed_supported.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 1
+#elif __has_embed(__FILE__) != __STDC_EMBED_FOUND__
+#error 2
+#elif __has_embed(__FILE__ suffix(x)) != __STDC_EMBED_FOUND__
+#error 3
+#elif __has_embed(__FILE__ suffix(x) limit(1)) != __STDC_EMBED_FOUND__
+#error 4
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1)) != __STDC_EMBED_FOUND__
+#error 5
+#elif __has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 6
+#elif __has_embed(__FILE__ suffix(x) limit(0) prefix(1)) != __STDC_EMBED_EMPTY__
+#error 7
+#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__
+#error 8
+#elif __has_embed(__FILE__ suffix(x) limit(0)) != __STDC_EMBED_EMPTY__
+#error 9
+#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != __STDC_EMBED_EMPTY__
+#error 10
+#endif
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c
new file mode 100644
index 0000000..a664715
--- /dev/null
+++ b/clang/test/Preprocessor/embed_art.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <media/art.txt>
+};
+const char data2[] = {
+#embed <media/art.txt>
+, 0
+};
+const char data3[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const char data4[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data) == 274);
+static_assert(' ' == data[0]);
+static_assert('_' == data[11]);
+static_assert('\n' == data[273]);
+static_assert(sizeof(data2) == 275);
+static_assert(' ' == data2[0]);
+static_assert('_' == data2[11]);
+static_assert('\n' == data2[273]);
+static_assert('\0' == data2[274]);
+static_assert(sizeof(data3) == 275);
+static_assert(' ' == data3[0]);
+static_assert('_' == data3[11]);
+static_assert('\n' == data3[273]);
+static_assert('\0' == data3[274]);
+static_assert(sizeof(data4) == 275);
+static_assert(' ' == data4[0]);
+static_assert('_' == data4[11]);
+static_assert('\n' == data4[273]);
+static_assert('\0' == data4[274]);
+
+const signed char data5[] = {
+#embed <media/art.txt>
+};
+const signed char data6[] = {
+#embed <media/art.txt>
+, 0
+};
+const signed char data7[] = {
+#embed <media/art.txt> suffix(, 0)
+};
+const signed char data8[] = {
+#embed <media/art.txt> suffix(,)
+0
+};
+static_assert(sizeof(data5) == 274);
+static_assert(' ' == data5[0]);
+static_assert('_' == data5[11]);
+static_assert('\n' == data5[273]);
+static_assert(sizeof(data6) == 275);
+static_assert(' ' == data6[0]);
+static_assert('_' == data6[11]);
+static_assert('\n' == data6[273]);
+static_assert('\0' == data6[274]);
+static_assert(sizeof(data7) == 275);
+static_assert(' ' == data7[0]);
+static_assert('_' == data7[11]);
+static_assert('\n' == data7[273]);
+static_assert('\0' == data7[274]);
+static_assert(sizeof(data8) == 275);
+static_assert(' ' == data8[0]);
+static_assert('_' == data8[11]);
+static_assert('\n' == data8[273]);
+static_assert('\0' == data8[274]);
+
+const unsigned char data9[] = {
+#embed <media/art.txt>
+};
+const unsigned char data10[] = {
+0,
+#embed <media/art.txt>
+};
+const unsigned char data11[] = {
+#embed <media/art.txt> prefix(0,)
+};
+const unsigned char data12[] = {
+0
+#embed <media/art.txt> prefix(,)
+};
+static_assert(sizeof(data9) == 274);
+static_assert(' ' == data9[0]);
+static_assert('_' == data9[11]);
+static_assert('\n' == data9[273]);
+static_assert(sizeof(data10) == 275);
+static_assert(' ' == data10[1]);
+static_assert('_' == data10[12]);
+static_assert('\n' == data10[274]);
+static_assert('\0' == data10[0]);
+static_assert(sizeof(data11) == 275);
+static_assert(' ' == data11[1]);
+static_assert('_' == data11[12]);
+static_assert('\n' == data11[274]);
+static_assert('\0' == data11[0]);
+static_assert(sizeof(data12) == 275);
+static_assert(' ' == data12[1]);
+static_assert('_' == data12[12]);
+static_assert('\n' == data12[274]);
+static_assert('\0' == data12[0]);
diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp
new file mode 100644
index 0000000..64110af
--- /dev/null
+++ b/clang/test/Preprocessor/embed_codegen.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 %s -triple x86_64 --embed-dir=%S/Inputs -emit-llvm -o - | FileCheck %s
+
+// CHECK: @__const._Z3fooi.ca = private unnamed_addr constant [3 x i32] [i32 0, i32 106, i32 107], align 4
+// CHECK: @__const._Z3fooi.sc = private unnamed_addr constant %struct.S1 { i32 106, i32 107, i32 0 }, align 4
+// CHECK: @__const._Z3fooi.t = private unnamed_addr constant [3 x %struct.T] [%struct.T { [2 x i32] [i32 48, i32 49], %struct.S1 { i32 50, i32 51, i32 52 } }, %struct.T { [2 x i32] [i32 53, i32 54], %struct.S1 { i32 55, i32 56, i32 57 } }, %struct.T { [2 x i32] [i32 10, i32 0], %struct.S1 zeroinitializer }], align 16
+void foo(int a) {
+// CHECK: %a.addr = alloca i32, align 4
+// CHECK: store i32 %a, ptr %a.addr, align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %ca, ptr align 4 @__const._Z3fooi.ca, i64 12, i1 false)
+int ca[] = {
+0
+#embed <jk.txt> prefix(,)
+};
+
+// CHECK: %arrayinit.element = getelementptr inbounds i32, ptr %notca, i64 1
+// CHECK: store i8 106, ptr %arrayinit.element, align 4
+// CHECK: %arrayinit.element1 = getelementptr inbounds i32, ptr %notca, i64 2
+// CHECK: store i8 107, ptr %arrayinit.element1, align 4
+int notca[] = {
+a
+#embed <jk.txt> prefix(,)
+};
+
+struct S1 {
+  int x, y, z;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %sc, ptr align 4 @__const._Z3fooi.sc, i64 12, i1 false)
+S1 sc = {
+#embed <jk.txt> suffix(,)
+0
+};
+
+// CHECK: %x = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 0
+// CHECK: store i32 106, ptr %x, align 4
+// CHECK: %y = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 1
+// CHECK: store i32 107, ptr %y, align 4
+// CHECK: %z = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 2
+// CHECK: %1 = load i32, ptr %a.addr, align 4
+S1 s = {
+#embed <jk.txt> suffix(,)
+a
+};
+
+// CHECK: store i32 107, ptr %b, align 4
+int b =
+#embed<jk.txt>
+;
+
+
+struct T {
+  int arr[2];
+  struct S1 s;
+};
+
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %t, ptr align 16 @__const._Z3fooi.t, i64 60, i1 false)
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+
+// CHECK:  %arr = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 0
+// CHECK:  %2 = load i32, ptr %a.addr, align 4
+// CHECK:  store i32 %2, ptr %arr, align 4
+// CHECK:  %arrayinit.element2 = getelementptr inbounds i32, ptr %arr, i64 1
+// CHECK:  store i32 300, ptr %arrayinit.element2, align 4
+// CHECK:  %s3 = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 1
+// CHECK:  %x4 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 0
+// CHECK:  store i32 1, ptr %x4, align 4
+// CHECK:  %y5 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 1
+// CHECK:  store i32 2, ptr %y5, align 4
+// CHECK:  %z6 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 2
+// CHECK:  store i32 3, ptr %z6, align 4
+// CHECK:  %arrayinit.element7 = getelementptr inbounds %struct.T, ptr %tnonc, i64 1
+// CHECK:  call void @llvm.memset.p0.i64(ptr align 4 %arrayinit.element7, i8 0, i64 20, i1 false)
+// CHECK:  %arr8 = getelementptr inbounds %struct.T, ptr %arrayinit.element7, i32 0, i32 0
+// CHECK:  store i8 106, ptr %arr8, align 4
+// CHECK:  %arrayinit.element9 = getelementptr inbounds i32, ptr %arr8, i64 1
+// CHECK:  store i8 107, ptr %arrayinit.element9, align 4
+struct T tnonc[] = {
+  a, 300, 1, 2, 3
+#embed <jk.txt> prefix(,)
+};
+
+}
diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp
new file mode 100644
index 0000000..1cadff7
--- /dev/null
+++ b/clang/test/Preprocessor/embed_constexpr.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions
+
+constexpr int value(int a, int b) {
+  return a + b;
+}
+
+constexpr int func_call() {
+  return value(
+#embed <jk.txt>
+  );
+}
+
+constexpr int init_list_expr() {
+  int vals[] = {
+#embed <jk.txt>
+  };
+  return value(vals[0], vals[1]);
+}
+
+template <int N, int M>
+struct Hurr {
+  static constexpr int V1 = N;
+  static constexpr int V2 = M;
+};
+
+constexpr int template_args() {
+  Hurr<
+#embed <jk.txt>
+  > H;
+  return value(H.V1, H.V2);
+}
+
+constexpr int ExpectedValue = 'j' + 'k';
+static_assert(func_call() == ExpectedValue);
+static_assert(init_list_expr() == ExpectedValue);
+static_assert(template_args() == ExpectedValue);
+
+static_assert(
+#embed <jk.txt> limit(1) suffix(== 'j')
+);
+
+int array[
+#embed <jk.txt> limit(1)
+];
+static_assert(sizeof(array) / sizeof(int) == 'j');
+
+constexpr int comma_expr = (
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+);
+static_assert(comma_expr == 'k');
+
+constexpr int comma_expr_init_list{ (
+#embed <jk.txt> limit(1)
+) };
+static_assert(comma_expr_init_list == 'j');
+
+constexpr int paren_init(
+#embed <jk.txt> limit(1)
+);
+static_assert(paren_init == 'j');
+
+struct S {
+  const char buffer[2] = {
+#embed "jk.txt"
+  };
+};
+
+constexpr struct S s;
+static_assert(s.buffer[1] == 'k');
+
+struct S1 {
+  int x, y;
+};
+
+struct T {
+  int x, y;
+  struct S1 s;
+};
+
+constexpr struct T t[] = {
+#embed <numbers.txt>
+};
+static_assert(t[0].s.x == '2');
+
+constexpr int func(int i, int) { return i; }
+static_assert(
+  func(
+#embed <jk.txt>
+  ) == 'j');
+
+template <int N>
+struct ST {};
+
+ST<
+#embed <jk.txt> limit(1)
+> st;
diff --git a/clang/test/Preprocessor/embed_dependencies.c b/clang/test/Preprocessor/embed_dependencies.c
new file mode 100644
index 0000000..4e00dc7
--- /dev/null
+++ b/clang/test/Preprocessor/embed_dependencies.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -fsyntax-only -std=c23 -M --embed-dir=%S/Inputs -Xclang -verify | FileCheck %s
+
+// Yes this looks very strange indeed, but the goal is to test that we add
+// files referenced by both __has_embed and #embed when we generate
+// dependencies, so we're trying to see that both of these files are in the
+// output.
+#if __has_embed(<jk.txt>)
+const char data =
+#embed "Inputs/single_byte.txt"
+;
+_Static_assert('b' == data);
+#else
+#error "oops"
+#endif
+// expected-no-diagnostics
+
+// CHECK: embed_dependencies.c \
+// CHECK-NEXT: jk.txt \
+// CHECK-NEXT: Inputs{{[/\\]}}single_byte.txt
+
diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c
new file mode 100644
index 0000000..74f2417
--- /dev/null
+++ b/clang/test/Preprocessor/embed_ext_compat_diags.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=none -pedantic
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat
+// RUN: %clang_cc1 -std=c17 %s -fsyntax-only --embed-dir=%S/Inputs -verify=ext -pedantic
+// RUN: %clang_cc1 -x c++ %s -fsyntax-only --embed-dir=%S/Inputs -verify=cxx -pedantic
+// none-no-diagnostics
+
+#if __has_embed("jk.txt")
+
+const char buffer[] = {
+#embed "jk.txt" /* compat-warning {{#embed is incompatible with C standards before C23}}
+                   ext-warning {{#embed is a C23 extension}}
+                   cxx-warning {{#embed is a Clang extension}}
+                 */
+};
+#endif
+
diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp
new file mode 100644
index 0000000..2648804
--- /dev/null
+++ b/clang/test/Preprocessor/embed_feature_test.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -E -CC -verify
+// RUN: %clang_cc1 -x c %s -E -CC -verify
+// expected-no-diagnostics
+
+#if !defined(__has_embed)
+#error 1
+#endif
diff --git a/clang/test/Preprocessor/embed_file_not_found_chevron.c b/clang/test/Preprocessor/embed_file_not_found_chevron.c
new file mode 100644
index 0000000..472222a
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_chevron.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed <nfejfNejAKFe>
+// expected-error@-1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_file_not_found_quote.c b/clang/test/Preprocessor/embed_file_not_found_quote.c
new file mode 100644
index 0000000..bf9c62b
--- /dev/null
+++ b/clang/test/Preprocessor/embed_file_not_found_quote.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+#embed "nfejfNejAKFe"
+// expected-error@-1 {{'nfejfNejAKFe' file not found}}
diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c
new file mode 100644
index 0000000..79b1743
--- /dev/null
+++ b/clang/test/Preprocessor/embed_init.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter
+// expected-no-diagnostics
+
+typedef struct kitty {
+	int purr;
+} kitty;
+
+typedef struct kitty_kitty {
+	int here;
+	kitty kit;
+} kitty_kitty;
+
+const int meow =
+#embed <single_byte.txt>
+;
+
+const kitty kit = {
+#embed <single_byte.txt>
+};
+
+const kitty_kitty kit_kit = {
+#embed <jk.txt>
+};
+
+static_assert(meow == 'b');
+static_assert(kit.purr == 'b');
+static_assert(kit_kit.here == 'j');
+static_assert(kit_kit.kit.purr == 'k');
diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c
new file mode 100644
index 0000000..70f1bc6
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_if_empty.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <media/empty> if_empty(123, 124, 125)
+};
+const char non_empty_data[] = {
+#embed <jk.txt> if_empty(123, 124, 125)
+};
+static_assert(sizeof(data) == 3);
+static_assert(123 == data[0]);
+static_assert(124 == data[1]);
+static_assert(125 == data[2]);
+static_assert(sizeof(non_empty_data) == 2);
+static_assert('j' == non_empty_data[0]);
+static_assert('k' == non_empty_data[1]);
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> if_empty(1) prefix() if_empty(2)
+// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> if_empty(1) suffix() if_empty(2)
+// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c
new file mode 100644
index 0000000..da3e4fb
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_limit.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> limit(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('j' == offset_data[0]);
+static_assert(offset_data[0] == data[0]);
+
+// Cannot have a negative limit.
+#embed <jk.txt> limit(-1)
+// expected-error@-1 {{invalid value '-1'; must be positive}}
+
+// It can have a limit of 0, in which case the __has_embed should return false.
+#if __has_embed(<jk.txt> limit(0)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the limit is zero, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> limit(0) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+  1,
+#embed <jk.txt> limit(0) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can limit to something larger than the file size as well.
+const unsigned char third_buffer[] = {
+#embed <jk.txt> limit(100)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the limits of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> limit(10)
+};
+static_assert(sizeof(fourth_buffer) == 10);
+static_assert(' ' == fourth_buffer[0]);
+static_assert(' ' == fourth_buffer[1]);
+static_assert(' ' == fourth_buffer[2]);
+static_assert(' ' == fourth_buffer[3]);
+static_assert(' ' == fourth_buffer[4]);
+static_assert(' ' == fourth_buffer[5]);
+static_assert(' ' == fourth_buffer[6]);
+static_assert(' ' == fourth_buffer[7]);
+static_assert(' ' == fourth_buffer[8]);
+static_assert(' ' == fourth_buffer[9]);
+
+// Ensure that a limit larger than what can fit into a 64-bit value is
+// rejected. This limit is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 2);
+static_assert('j' == fifth_buffer[0]);
+static_assert('k' == fifth_buffer[1]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> limit(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error@-2 {{integer literal is too large to be represented in any integer type}}
+// Note: the preprocessor will continue with the truncated value, so the parser
+// will treat this case and the previous one identically in terms of what
+// contents are retained from the embedded resource (which is the entire file).
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> limit(1) prefix() limit(1)
+// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> limit(1) if_empty() limit(2)
+// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}}
+};
+
+// C23 6.10.3.2p2
+static_assert(
+#embed <jk.txt> limit(defined(FOO)) // expected-error {{'defined' cannot appear within this context}}
+  == 0); // expected-error {{expected expression}}
diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c
new file mode 100644
index 0000000..ab1bd3f
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_offset.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <jk.txt>
+};
+const char offset_data[] = {
+#embed <jk.txt> clang::offset(1)
+};
+static_assert(sizeof(data) == 2);
+static_assert('j' == data[0]);
+static_assert('k' == data[1]);
+static_assert(sizeof(offset_data) == 1);
+static_assert('k' == offset_data[0]);
+static_assert(offset_data[0] == data[1]);
+
+// Cannot have a negative offset.
+#embed <jk.txt> clang::offset(-1)
+// expected-error@-1 {{invalid value '-1'; must be positive}}
+
+// If the offset is past the end of the file, the file should be considered
+// empty.
+#if __has_embed(<jk.txt> clang::offset(3)) != __STDC_EMBED_EMPTY__
+#error "__has_embed should return false when there's no data"
+#endif
+
+// When the offset is past the end of the file, the resource is empty, so if_empty kicks in.
+const unsigned char buffer[] = {
+#embed <jk.txt> clang::offset(3) if_empty(1)
+};
+static_assert(sizeof(buffer) == 1);
+static_assert(buffer[0] == 1);
+
+// However, prefix and suffix do not kick in.
+const unsigned char other_buffer[] = {
+  1,
+#embed <jk.txt> clang::offset(3) prefix(2,) suffix(3)
+};
+static_assert(sizeof(other_buffer) == 1);
+static_assert(other_buffer[0] == 1);
+
+// Ensure we can offset to zero (that's the default behavior)
+const unsigned char third_buffer[] = {
+#embed <jk.txt> clang::offset(0)
+};
+static_assert(sizeof(third_buffer) == 2);
+static_assert('j' == third_buffer[0]);
+static_assert('k' == third_buffer[1]);
+
+// Test the offsets of a file with more than one character in it.
+const unsigned char fourth_buffer[] = {
+#embed <media/art.txt> clang::offset(24) limit(4)
+};
+static_assert(sizeof(fourth_buffer) == 4);
+static_assert('.' == fourth_buffer[0]);
+static_assert('-' == fourth_buffer[1]);
+static_assert('.' == fourth_buffer[2]);
+static_assert('\'' == fourth_buffer[3]);
+
+// Ensure that an offset larger than what can fit into a 64-bit value is
+// rejected. This offset is fine because it fits in a 64-bit value.
+const unsigned char fifth_buffer[] = {
+  1,
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF)
+};
+static_assert(sizeof(fifth_buffer) == 1);
+static_assert(1 == fifth_buffer[0]);
+
+// But this one is not fine because it does not fit into a 64-bit value.
+const unsigned char sixth_buffer[] = {
+#embed <jk.txt> clang::offset(0xFFFF'FFFF'FFFF'FFFF'1)
+};
+// expected-error@-2 {{integer literal is too large to be represented in any integer type}}
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> clang::offset(1) prefix() clang::offset(1)
+// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> clang::offset(1) if_empty() clang::offset(2)
+// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}}
+};
+
+// Matches with C23 6.10.3.2p2, is documented as part of our extension.
+static_assert(
+#embed <jk.txt> clang::offset(defined(FOO))
+  == 0); // expected-error {{expected expression}}
+ /* expected-error@-2 {{'defined' cannot appear within this context}}
+    pedantic-warning@-2 {{'clang::offset' is a Clang extension}}
+  */
diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c
new file mode 100644
index 0000000..b55c08f
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_prefix.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> prefix('\xA', )
+};
+const char empty_data[] = {
+#embed <media/empty> prefix('\xA', )
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('\xA' == data[0]);
+static_assert('b' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> prefix( .x = 100, .y = 10, )
+};
+static_assert(s.x == 100);
+static_assert(s.y == 10);
+static_assert(s.z == 'b');
+
+// Ensure that an empty file does not produce any prefix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> prefix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> prefix(1,) limit(1) prefix(1,)
+// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> prefix(1,) if_empty() prefix(2,)
+// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c
new file mode 100644
index 0000000..7d768268
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_suffix.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify
+
+const char data[] = {
+#embed <single_byte.txt> suffix(, '\xA')
+};
+const char empty_data[] = {
+#embed <media/empty> suffix(, '\xA')
+1
+};
+static_assert(sizeof(data) == 2);
+static_assert('b' == data[0]);
+static_assert('\xA' == data[1]);
+static_assert(sizeof(empty_data) == 1);
+static_assert(1 == empty_data[0]);
+
+struct S {
+  int x, y, z;
+};
+
+const struct S s = {
+#embed <single_byte.txt> suffix( , .y = 100, .z = 10 )
+};
+
+static_assert(s.x == 'b');
+static_assert(s.y == 100);
+static_assert(s.z == 10);
+
+// Ensure that an empty file does not produce any suffix tokens. If it did,
+// there would be random tokens here that the parser would trip on.
+#embed <media/empty> suffix(0)
+
+// Ensure we diagnose duplicate parameters even if they're the same value.
+const unsigned char a[] = {
+#embed <jk.txt> suffix(,1) prefix() suffix(,1)
+// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+,
+#embed <jk.txt> suffix(,1) if_empty() suffix(,2)
+// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}}
+};
diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c
new file mode 100644
index 0000000..b033843
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -std=c23 -E -verify
+// okay-no-diagnostics
+
+#embed __FILE__ unrecognized
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized'}}
+#embed __FILE__ unrecognized::param
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
+#embed __FILE__ unrecognized::param(with, args)
+// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}}
diff --git a/clang/test/Preprocessor/embed_parsing_errors.c b/clang/test/Preprocessor/embed_parsing_errors.c
new file mode 100644
index 0000000..490ec6d
--- /dev/null
+++ b/clang/test/Preprocessor/embed_parsing_errors.c
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -std=c23 %s -E -verify
+
+// Test the parsing behavior for #embed and all of its parameters to ensure we
+// recover from failures gracefully.
+char buffer[] = {
+#embed
+// expected-error@-1 {{expected "FILENAME" or <FILENAME>}}
+
+#embed <
+// expected-error@-1 {{expected '>'}} \
+   expected-note@-1 {{to match this '<'}}
+
+#embed "
+// expected-error@-1 {{expected "FILENAME" or <FILENAME>}} \
+   expected-warning@-1 {{missing terminating '"' character}}
+
+#embed file.txt
+// expected-error@-1{{expected "FILENAME" or <FILENAME>}}
+
+#embed "embed_parsing_errors.c" xxx
+// expected-error@-1 {{unknown embed preprocessor parameter 'xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::
+// expected-error@-1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" xxx::xxx
+// expected-error@-1 {{unknown embed preprocessor parameter 'xxx::xxx'}}
+
+#embed "embed_parsing_errors.c" xxx::42
+// expected-error@-1 {{expected identifier}}
+
+#embed "embed_parsing_errors.c" limit
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" limit(
+// expected-error@-1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" limit(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit(42
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" limit([
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit([)
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" limit(1/0)
+// expected-error@-1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" clang::offset(
+// expected-error@-1 {{expected value in expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset(42
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" clang::offset([
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset([)
+// expected-error@-1 {{invalid token at start of a preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset(1/0)
+// expected-error@-1 {{division by zero in preprocessor expression}}
+
+#embed "embed_parsing_errors.c" clang::offset 42
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" prefix(
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" prefix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" prefix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" prefix(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" prefix(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" prefix(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" prefix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" prefix)
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" suffix(
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(xxx
+// expected-error@-1 {{expected ')'}}
+
+#embed "embed_parsing_errors.c" suffix(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" suffix(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" suffix(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" suffix(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" suffix(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" suffix() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" suffix)
+// expected-error@-1 {{expected '('}}
+
+#embed "embed_parsing_errors.c" if_empty(1/0) // OK: emitted as tokens, not evaluated yet.
+#embed "embed_parsing_errors.c" if_empty(([{}])) // OK: delimiters balanced
+#embed "embed_parsing_errors.c" if_empty(([{)]})
+// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}}
+#embed "embed_parsing_errors.c" if_empty(([{})})
+// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}}
+#embed "embed_parsing_errors.c" if_empty(([{}]})
+// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}}
+#embed "embed_parsing_errors.c" if_empty() // OK: tokens within parens are optional
+#embed "embed_parsing_errors.c" if_empty)
+// expected-error@-1 {{expected '('}}
+};
diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c
new file mode 100644
index 0000000..b12cb9ce
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_chevron.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -std=c23 -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed <single_byte.txt>
+};
+static_assert(sizeof(data) == 1);
+static_assert('b' == data[0]);
diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c
new file mode 100644
index 0000000..79ca1e5
--- /dev/null
+++ b/clang/test/Preprocessor/embed_path_quote.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify
+// expected-no-diagnostics
+
+const char data[] = {
+#embed "single_byte.txt"
+};
+static_assert(sizeof(data) == 1);
+static_assert('a' == data[0]);
diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c
new file mode 100644
index 0000000..9895d95
--- /dev/null
+++ b/clang/test/Preprocessor/embed_preprocess_to_file.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED
+// RUN: %clang_cc1 -std=c23 %s -E -dE --embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE
+
+// Ensure that we correctly preprocess to a file, both with expanding embed
+// directives fully and with printing the directive instead.
+const char data[] = {
+#embed <jk.txt> if_empty('a', 'b') clang::offset(0) limit(1) suffix(, 'a', 0) prefix('h',)
+};
+
+// EXPANDED: const char data[] = {'h',106 , 'a', 0};
+// DIRECTIVE: const char data[] = {
+// DIRECTIVE-NEXT: #embed <jk.txt> if_empty('a', 'b') limit(1) clang::offset(0) prefix('h',) suffix(, 'a', 0) /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char more[] = {
+#embed <media/empty> if_empty('a', 'b')
+};
+
+// EXPANDED: const char more[] = {'a', 'b'}
+// DIRECTIVE: const char more[] = {
+// DIRECTIVE-NEXT: #embed <media/empty> if_empty('a', 'b') /* clang -E -dE */
+// DIRECTIVE-NEXT: };
+
+const char even_more[] = {
+  1, 2, 3,
+#embed <jk.txt> prefix(4, 5,) suffix(, 6, 7)
+  , 8, 9, 10
+};
+
+// EXPANDED: const char even_more[] = {
+// EXPANDED-NEXT:   1, 2, 3,4, 5,106, 107 , 6, 7 , 8, 9, 10
+// EXPANDED-EMPTY:
+// EXPANDED-EMPTY:
+// EXPANDED-NEXT: };
+// DIRECTIVE: const char even_more[] = {
+// DIRECTIVE-NEXT:  1, 2, 3,
+// DIRECTIVE-NEXT: #embed <jk.txt> prefix(4, 5,) suffix(, 6, 7) /* clang -E -dE */
+// DIRECTIVE-NEXT:  , 8, 9, 10
+// DIRECTIVE-NEXT: };
diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c
new file mode 100644
index 0000000..2019118
--- /dev/null
+++ b/clang/test/Preprocessor/embed_single_entity.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify
+
+const char data =
+#embed <single_byte.txt>
+;
+_Static_assert('b' == data);
+// expected-no-diagnostics
diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp
new file mode 100644
index 0000000..a31b083
--- /dev/null
+++ b/clang/test/Preprocessor/embed_weird.cpp
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,cxx -Wno-c23-extensions
+// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,c
+#embed <media/empty>
+;
+
+void f (unsigned char x) { (void)x;}
+void g () {}
+void h (unsigned char x, int y) {(void)x; (void)y;}
+int i () {
+	return
+#embed <single_byte.txt>
+		;
+}
+
+_Static_assert(
+#embed <single_byte.txt> suffix(,)
+""
+);
+_Static_assert(
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <single_byte.txt>
+) ==
+sizeof(unsigned char)
+, ""
+);
+_Static_assert(sizeof
+#embed <single_byte.txt>
+, ""
+);
+_Static_assert(sizeof(
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+) ==
+sizeof(unsigned char)
+, ""
+);
+
+#ifdef __cplusplus
+template <int First, int Second>
+void j() {
+	static_assert(First == 'j', "");
+	static_assert(Second == 'k', "");
+}
+#endif
+
+void do_stuff() {
+	f(
+#embed <single_byte.txt>
+	);
+	g(
+#embed <media/empty>
+	);
+	h(
+#embed <jk.txt>
+	);
+	int r = i();
+	(void)r;
+#ifdef __cplusplus
+	j<
+#embed <jk.txt>
+	>(
+#embed <media/empty>
+	);
+#endif
+}
+
+// Ensure that we don't accidentally allow you to initialize an unsigned char *
+// from embedded data; the data is modeled as a string literal internally, but
+// is not actually a string literal.
+const unsigned char *ptr =
+#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
+; // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \
+     cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}}
+
+// However, there are some cases where this is fine and should work.
+const unsigned char *null_ptr_1 =
+#embed <media/empty> if_empty(0)
+;
+
+const unsigned char *null_ptr_2 =
+#embed <null_byte.bin>
+;
+
+const unsigned char *null_ptr_3 = {
+#embed <null_byte.bin>
+};
+
+#define FILE_NAME <null_byte.bin>
+#define LIMIT 1
+#define OFFSET 0
+#define EMPTY_SUFFIX suffix()
+
+constexpr unsigned char ch =
+#embed FILE_NAME limit(LIMIT) clang::offset(OFFSET) EMPTY_SUFFIX
+;
+static_assert(ch == 0);
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index f084598..9e425ac 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -272,6 +272,9 @@
 // AARCH64-NEXT: #define __SIZE_WIDTH__ 64
 // AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL
 // AARCH64_CXX: #define __STDCPP_THREADS__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2
+// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1
+// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0
 // AARCH64-NEXT: #define __STDC_HOSTED__ 1
 // AARCH64-NEXT: #define __STDC_UTF_16__ 1
 // AARCH64-NEXT: #define __STDC_UTF_32__ 1
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 2641fee..57bf671 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -1875,6 +1875,9 @@
 // WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int
 // WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32
 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1
+// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0
 // WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0
 // WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
 // WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__
diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt
new file mode 100644
index 0000000..2e65efe
--- /dev/null
+++ b/clang/test/Preprocessor/single_byte.txt
@@ -0,0 +1 @@
+a
+\ No newline at end of file