aboutsummaryrefslogtreecommitdiff
path: root/libcpp/directives.cc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2024-09-12 18:17:05 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2024-09-12 18:17:05 +0200
commitce0aecc7df1ff0be24c278dff5575ec28042ee58 (patch)
tree0fbd4ae6771550607816a7ba4fb6fd391e4f3c98 /libcpp/directives.cc
parentc5009eb887910271ea35a857aa68941c7227b9c7 (diff)
downloadgcc-ce0aecc7df1ff0be24c278dff5575ec28042ee58.zip
gcc-ce0aecc7df1ff0be24c278dff5575ec28042ee58.tar.gz
gcc-ce0aecc7df1ff0be24c278dff5575ec28042ee58.tar.bz2
libcpp, v2: Add support for gnu::base64 #embed parameter
This patch which adds another #embed extension, gnu::base64. As mentioned in the documentation, this extension is primarily intended for use by the preprocessor, so that for the larger (say 32+ or 64+ bytes long embeds it doesn't have to emit tens of thousands or millions of comma separated string literals which would be very expensive to parse again, but can emit #embed "." __gnu__::__base64__( \ "Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1" \ "aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOt" \ "c3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9z" \ "dGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVp" \ "YsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2Mg" \ "ZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjD" \ "qW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVl" \ "IG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDD" \ "qXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOp" \ "bnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFt" \ "IG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3Is" \ "IHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGks" \ "IGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBu" \ "ZWdlbnQu") with the meaning don't actually load some file, instead base64 decode (RFC4648 with A-Za-z0-9+/ chars and = padding, no newlines in between) the string and use that as data. This is chosen because it should be -pedantic-errors clean, fairly cheap to decode and then in optimizing compiler could be handled as similar binary blob to normal #embed, while the data isn't left somewhere on the disk, so distcc/ccache etc. can move the preprocessed source without issues. It makes no sense to support limit and gnu::offset parameters together with it IMHO, why would somebody waste providing full data and then threw some away? prefix/suffix/if_empty are normally supported though, but not intended to be used by the preprocessor. This patch adds just the extension side, not the actual emitting of this during -E or -E -fdirectives-only for now, that will be included in the upcoming patch. Compared to the earlier posted version of this extension, this patch allows the string concatenation in the parameter argument (but still doesn't allow escapes in the string, why would anyone use them when only A-Za-z0-9+/= are valid). The patch also adds support for parsing this even in -fpreprocessed compilation. 2024-09-12 Jakub Jelinek <jakub@redhat.com> libcpp/ * internal.h (struct cpp_embed_params): Add base64 member. (_cpp_free_embed_params_tokens): Declare. * directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED. (save_token_for_embed, _cpp_free_embed_params_tokens): New functions. (EMBED_PARAMS): Add gnu::base64 entry. (_cpp_parse_embed_params): Parse gnu::base64 parameter. If -fpreprocessed without -fdirectives-only, require #embed to have gnu::base64 parameter. Diagnose conflict between gnu::base64 and limit or gnu::offset parameters. (do_embed): Use _cpp_free_embed_params_tokens. * files.cc (finish_embed, base64_dec_fn): New functions. (base64_dec): New array. (B64D0, B64D1, B64D2, B64D3): Define. (finish_base64_embed): New function. (_cpp_stack_embed): Use finish_embed. Handle params->base64 using finish_base64_embed. * macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens. gcc/ * doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64 parameter. gcc/testsuite/ * c-c++-common/cpp/embed-17.c: New test. * c-c++-common/cpp/embed-18.c: New test. * c-c++-common/cpp/embed-19.c: New test. * c-c++-common/cpp/embed-27.c: New test. * gcc.dg/cpp/embed-6.c: New test. * gcc.dg/cpp/embed-7.c: New test.
Diffstat (limited to 'libcpp/directives.cc')
-rw-r--r--libcpp/directives.cc170
1 files changed, 124 insertions, 46 deletions
diff --git a/libcpp/directives.cc b/libcpp/directives.cc
index 859cd04..866ac9a 100644
--- a/libcpp/directives.cc
+++ b/libcpp/directives.cc
@@ -159,7 +159,7 @@ static void cpp_pop_definition (cpp_reader *, struct def_pragma_macro *);
D(error, T_ERROR, STDC89, 0) \
D(pragma, T_PRAGMA, STDC89, IN_I) \
D(warning, T_WARNING, STDC23, 0) \
- D(embed, T_EMBED, STDC23, INCL | EXPAND) \
+ D(embed, T_EMBED, STDC23, IN_I | INCL | EXPAND) \
D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) \
D(ident, T_IDENT, EXTENSION, IN_I) \
D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* ObjC */ \
@@ -941,6 +941,50 @@ do_include_next (cpp_reader *pfile)
do_include_common (pfile, type);
}
+/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params.
+ Save one token *TOKEN into *SAVE. */
+
+static void
+save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token)
+{
+ if (save->count == 0)
+ {
+ _cpp_init_tokenrun (&save->base_run, 4);
+ save->cur_run = &save->base_run;
+ save->cur_token = save->base_run.base;
+ }
+ else if (save->cur_token == save->cur_run->limit)
+ {
+ save->cur_run->next = XNEW (tokenrun);
+ save->cur_run->next->prev = save->cur_run;
+ _cpp_init_tokenrun (save->cur_run->next, 4);
+ save->cur_run = save->cur_run->next;
+ save->cur_token = save->cur_run->base;
+ }
+ *save->cur_token = *token;
+ save->cur_token->flags |= NO_EXPAND;
+ save->cur_token++;
+ save->count++;
+}
+
+/* Free memory associated with saved tokens in *SAVE. */
+
+void
+_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save)
+{
+ if (save->count == 0)
+ return;
+ tokenrun *n;
+ for (tokenrun *t = &save->base_run; t; t = n)
+ {
+ n = t->next;
+ XDELETEVEC (t->base);
+ if (t != &save->base_run)
+ XDELETE (t);
+ }
+ save->count = 0;
+}
+
/* Skip over balanced preprocessing tokens until END is found.
If SAVE is non-NULL, remember the parsed tokens in it. NESTED is
false in the outermost invocation of the function and true
@@ -970,26 +1014,7 @@ skip_balanced_token_seq (cpp_reader *pfile, cpp_ttype end,
if (save
&& (token->type != CPP_PADDING || save->count)
&& (token->type != end || nested))
- {
- if (save->count == 0)
- {
- _cpp_init_tokenrun (&save->base_run, 4);
- save->cur_run = &save->base_run;
- save->cur_token = save->base_run.base;
- }
- else if (save->cur_token == save->cur_run->limit)
- {
- save->cur_run->next = XNEW (tokenrun);
- save->cur_run->next->prev = save->cur_run;
- _cpp_init_tokenrun (save->cur_run->next, 4);
- save->cur_run = save->cur_run->next;
- save->cur_token = save->cur_run->base;
- }
- *save->cur_token = *token;
- save->cur_token->flags |= NO_EXPAND;
- save->cur_token++;
- save->count++;
- }
+ save_token_for_embed (save, token);
if (token->type == end)
return;
switch (token->type)
@@ -1024,6 +1049,7 @@ skip_balanced_token_seq (cpp_reader *pfile, cpp_ttype end,
EMBED_PARAM (PREFIX, "prefix") \
EMBED_PARAM (SUFFIX, "suffix") \
EMBED_PARAM (IF_EMPTY, "if_empty") \
+ EMBED_PARAM (GNU_BASE64, "base64") \
EMBED_PARAM (GNU_OFFSET, "offset")
enum embed_param_kind {
@@ -1067,12 +1093,33 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct cpp_embed_params *params)
cpp_error (pfile, CPP_DL_ERROR, "expected ')'");
return false;
}
- return ret;
}
- else if (token->type == CPP_CLOSE_PAREN && params->has_embed)
- return ret;
- cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
- return false;
+ else if (token->type != CPP_CLOSE_PAREN || !params->has_embed)
+ {
+ cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
+ return false;
+ }
+ if (params->base64.count
+ && (seen & ((1 << EMBED_PARAM_LIMIT)
+ | (1 << EMBED_PARAM_GNU_OFFSET))) != 0)
+ {
+ ret = false;
+ if (!params->has_embed)
+ cpp_error_with_line (pfile, CPP_DL_ERROR,
+ params->base64.base_run.base->src_loc, 0,
+ "'gnu::base64' parameter conflicts with "
+ "'limit' or 'gnu::offset' parameters");
+ }
+ else if (params->base64.count == 0
+ && CPP_OPTION (pfile, preprocessed))
+ {
+ ret = false;
+ if (!params->has_embed)
+ cpp_error_with_line (pfile, CPP_DL_ERROR, params->loc, 0,
+ "'gnu::base64' parameter required in "
+ "preprocessed source");
+ }
+ return ret;
}
param_name = NODE_NAME (token->val.node.spelling);
param_name_len = NODE_LEN (token->val.node.spelling);
@@ -1197,6 +1244,53 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct cpp_embed_params *params)
}
token = _cpp_get_token_no_padding (pfile);
}
+ else if (param_kind == EMBED_PARAM_GNU_BASE64)
+ {
+ token = _cpp_get_token_no_padding (pfile);
+ while (token->type == CPP_OTHER
+ && CPP_OPTION (pfile, preprocessed)
+ && !CPP_OPTION (pfile, directives_only)
+ && token->val.str.len == 1
+ && token->val.str.text[0] == '\\')
+ {
+ /* Allow backslash newline inside of gnu::base64 argument
+ for -fpreprocessed, so that it doesn't have to be
+ megabytes long line. */
+ pfile->state.in_directive = 0;
+ token = _cpp_get_token_no_padding (pfile);
+ pfile->state.in_directive = 3;
+ }
+ if (token->type == CPP_STRING)
+ {
+ do
+ {
+ save_token_for_embed (&params->base64, token);
+ token = _cpp_get_token_no_padding (pfile);
+ while (token->type == CPP_OTHER
+ && CPP_OPTION (pfile, preprocessed)
+ && !CPP_OPTION (pfile, directives_only)
+ && token->val.str.len == 1
+ && token->val.str.text[0] == '\\')
+ {
+ pfile->state.in_directive = 0;
+ token = _cpp_get_token_no_padding (pfile);
+ pfile->state.in_directive = 3;
+ }
+ }
+ while (token->type == CPP_STRING);
+ if (token->type != CPP_CLOSE_PAREN)
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+ "expected ')'");
+ }
+ else
+ {
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+ "expected character string literal");
+ if (token->type != CPP_CLOSE_PAREN)
+ token = _cpp_get_token_no_padding (pfile);
+ }
+ token = _cpp_get_token_no_padding (pfile);
+ }
else if (token->type == CPP_OPEN_PAREN)
{
cpp_embed_params_tokens *save = NULL;
@@ -1277,26 +1371,10 @@ do_embed (cpp_reader *pfile)
if (ok)
_cpp_stack_embed (pfile, fname, angle_brackets, &params);
- for (int i = 0; i < 3; ++i)
- {
- cpp_embed_params_tokens *p;
- if (i == 0)
- p = &params.prefix;
- else if (i == 1)
- p = &params.suffix;
- else
- p = &params.if_empty;
- if (p->count == 0)
- continue;
- tokenrun *n;
- for (tokenrun *t = &p->base_run; t; t = n)
- {
- n = t->next;
- XDELETEVEC (t->base);
- if (t != &p->base_run)
- XDELETE (t);
- }
- }
+ _cpp_free_embed_params_tokens (&params.prefix);
+ _cpp_free_embed_params_tokens (&params.suffix);
+ _cpp_free_embed_params_tokens (&params.if_empty);
+ _cpp_free_embed_params_tokens (&params.base64);
done:
XDELETEVEC (fname);