aboutsummaryrefslogtreecommitdiff
path: root/libcpp/lex.c
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@acm.org>2020-11-18 10:24:12 -0800
committerNathan Sidwell <nathan@acm.org>2020-11-18 10:24:12 -0800
commitc9c3d5f28a589cd00be5748010783657189e9855 (patch)
tree9a1b904ee5ea9b639bd2b43fa16050edd3321044 /libcpp/lex.c
parent7ceb899e9343493f646434f74a149395f3913d9a (diff)
downloadgcc-c9c3d5f28a589cd00be5748010783657189e9855.zip
gcc-c9c3d5f28a589cd00be5748010783657189e9855.tar.gz
gcc-c9c3d5f28a589cd00be5748010783657189e9855.tar.bz2
preprocessor: C++ module-directives
C++20 modules introduces a new kind of preprocessor directive -- a module directive. These are directives but without the leading '#'. We have to detect them by sniffing the start of a logical line. When detected we replace the initial identifiers with unspellable tokens and pass them through to the language parser the same way deferred pragmas are. There's a PRAGMA_EOL at the logical end of line too. One additional complication is that we have to do header-name lexing after the initial tokens, and that requires changes in the macro-aware piece of the preprocessor. The above sniffer sets a counter in the lexer state, and that triggers at the appropriate point. We then do the same header-name lexing that occurs on a #include directive or has_include pseudo-macro. Except that the header name ends up in the token stream. A couple of token emitters need to deal with the new token possibility. gcc/c-family/ * c-lex.c (c_lex_with_flags): CPP_HEADER_NAMEs can now be seen. libcpp/ * include/cpplib.h (struct cpp_options): Add module_directives option. (NODE_MODULE): New node flag. (struct cpp_hashnode): Make rid-code a bitfield, increase bits in flags and swap with type field. * init.c (post_options): Create module-directive identifier nodes. * internal.h (struct lexer_state): Add directive_file_token & n_modules fields. Add module node enumerator. * lex.c (cpp_maybe_module_directive): New. (_cpp_lex_token): Call it. (cpp_output_token): Add '"' around CPP_HEADER_NAME token. (do_peek_ident, do_peek_module): New. (cpp_directives_only): Detect module-directive lines. * macro.c (cpp_get_token_1): Deal with directive_file_token triggering.
Diffstat (limited to 'libcpp/lex.c')
-rw-r--r--libcpp/lex.c392
1 files changed, 392 insertions, 0 deletions
diff --git a/libcpp/lex.c b/libcpp/lex.c
index f58a882..2343ed5 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -2615,6 +2615,150 @@ _cpp_temp_token (cpp_reader *pfile)
return result;
}
+/* We're at the beginning of a logical line (so not in
+ directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
+ if we should enter deferred_pragma mode to tokenize the rest of the
+ line as a module control-line. */
+
+static void
+cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
+{
+ unsigned backup = 0; /* Tokens we peeked. */
+ cpp_hashnode *node = result->val.node.node;
+ cpp_token *peek = result;
+ cpp_token *keyword = peek;
+ cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
+ int header_count = 0;
+
+ /* Make sure the incoming state is as we expect it. This way we
+ can restore it using constants. */
+ gcc_checking_assert (!pfile->state.in_deferred_pragma
+ && !pfile->state.skipping
+ && !pfile->state.parsing_args
+ && !pfile->state.angled_headers
+ && (pfile->state.save_comments
+ == !CPP_OPTION (pfile, discard_comments)));
+
+ /* Enter directives mode sufficiently for peeking. We don't have
+ to actually set in_directive. */
+ pfile->state.in_deferred_pragma = true;
+
+ /* These two fields are needed to process tokenization in deferred
+ pragma mode. They are not used outside deferred pragma mode or
+ directives mode. */
+ pfile->state.pragma_allow_expansion = true;
+ pfile->directive_line = result->src_loc;
+
+ /* Saving comments is incompatible with directives mode. */
+ pfile->state.save_comments = 0;
+
+ if (node == n_modules[spec_nodes::M_EXPORT][0])
+ {
+ peek = _cpp_lex_direct (pfile);
+ keyword = peek;
+ backup++;
+ if (keyword->type != CPP_NAME)
+ goto not_module;
+ node = keyword->val.node.node;
+ if (!(node->flags & NODE_MODULE))
+ goto not_module;
+ }
+
+ if (node == n_modules[spec_nodes::M__IMPORT][0])
+ /* __import */
+ header_count = backup + 2 + 16;
+ else if (node == n_modules[spec_nodes::M_IMPORT][0])
+ /* import */
+ header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
+ else if (node == n_modules[spec_nodes::M_MODULE][0])
+ ; /* module */
+ else
+ goto not_module;
+
+ /* We've seen [export] {module|import|__import}. Check the next token. */
+ if (header_count)
+ /* After '{,__}import' a header name may appear. */
+ pfile->state.angled_headers = true;
+ peek = _cpp_lex_direct (pfile);
+ backup++;
+
+ /* ... import followed by identifier, ':', '<' or
+ header-name preprocessing tokens, or module
+ followed by cpp-identifier, ':' or ';' preprocessing
+ tokens. C++ keywords are not yet relevant. */
+ if (peek->type == CPP_NAME
+ || peek->type == CPP_COLON
+ || (header_count
+ ? (peek->type == CPP_LESS
+ || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
+ || peek->type == CPP_HEADER_NAME)
+ : peek->type == CPP_SEMICOLON))
+ {
+ pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
+ if (!pfile->state.pragma_allow_expansion)
+ pfile->state.prevent_expansion++;
+
+ if (!header_count && linemap_included_from
+ (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
+ cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
+ "module control-line cannot be in included file");
+
+ /* The first one or two tokens cannot be macro names. */
+ for (int ix = backup; ix--;)
+ {
+ cpp_token *tok = ix ? keyword : result;
+ cpp_hashnode *node = tok->val.node.node;
+
+ /* Don't attempt to expand the token. */
+ tok->flags |= NO_EXPAND;
+ if (_cpp_defined_macro_p (node)
+ && !cpp_fun_like_macro_p (node))
+ cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
+ "module control-line \"%s\" cannot be"
+ " an object-like macro",
+ NODE_NAME (node));
+ }
+
+ /* Map to underbar variants. */
+ keyword->val.node.node = n_modules[header_count
+ ? spec_nodes::M_IMPORT
+ : spec_nodes::M_MODULE][1];
+ if (backup != 1)
+ result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
+
+ /* Maybe tell the tokenizer we expect a header-name down the
+ road. */
+ pfile->state.directive_file_token = header_count;
+ }
+ else
+ {
+ not_module:
+ /* Drop out of directive mode. */
+ /* We aaserted save_comments had this value upon entry. */
+ pfile->state.save_comments
+ = !CPP_OPTION (pfile, discard_comments);
+ pfile->state.in_deferred_pragma = false;
+ /* Do not let this remain on. */
+ pfile->state.angled_headers = false;
+ }
+
+ /* In either case we want to backup the peeked tokens. */
+ if (backup)
+ {
+ /* If we saw EOL, we should drop it, because this isn't a module
+ control-line after all. */
+ bool eol = peek->type == CPP_PRAGMA_EOL;
+ if (!eol || backup > 1)
+ {
+ /* Put put the peeked tokens back */
+ _cpp_backup_tokens_direct (pfile, backup);
+ /* But if the last one was an EOL, forget it. */
+ if (eol)
+ pfile->lookaheads--;
+ }
+ }
+}
+
/* Lex a token into RESULT (external interface). Takes care of issues
like directive handling, token lookahead, multiple include
optimization and skipping. */
@@ -2663,6 +2807,21 @@ _cpp_lex_token (cpp_reader *pfile)
}
else if (pfile->state.in_deferred_pragma)
result = &pfile->directive_result;
+ else if (result->type == CPP_NAME
+ && (result->val.node.node->flags & NODE_MODULE)
+ && !pfile->state.skipping
+ /* Unlike regular directives, we do not deal with
+ tokenizing module directives as macro arguments.
+ That's not permitted. */
+ && !pfile->state.parsing_args)
+ {
+ /* P1857. Before macro expansion, At start of logical
+ line ... */
+ /* We don't have to consider lookaheads at this point. */
+ gcc_checking_assert (!pfile->lookaheads);
+
+ cpp_maybe_module_directive (pfile, result);
+ }
if (pfile->cb.line_change && !pfile->state.skipping)
pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
@@ -3461,7 +3620,11 @@ cpp_output_token (const cpp_token *token, FILE *fp)
break;
case SPELL_LITERAL:
+ if (token->type == CPP_HEADER_NAME)
+ fputc ('"', fp);
fwrite (token->val.str.text, 1, token->val.str.len, fp);
+ if (token->type == CPP_HEADER_NAME)
+ fputc ('"', fp);
break;
case SPELL_NONE:
@@ -3947,6 +4110,188 @@ do_peek_prev (const unsigned char *peek, const unsigned char *bound)
return peek;
}
+/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
+ space. Otherwise return NULL. */
+
+static const unsigned char *
+do_peek_ident (const char *match, const unsigned char *peek,
+ const unsigned char *limit)
+{
+ for (; *++match; peek++)
+ if (*peek != *match)
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek != *match)
+ return NULL;
+ }
+
+ /* Must now not be looking at an identifier char. */
+ peek = do_peek_next (peek, limit);
+ if (ISIDNUM (*peek))
+ return NULL;
+
+ /* Skip control-line whitespace. */
+ ws:
+ while (*peek == ' ' || *peek == '\t')
+ peek++;
+ if (__builtin_expect (*peek == '\\', false))
+ {
+ peek = do_peek_backslash (peek, limit);
+ if (*peek != '\\')
+ goto ws;
+ }
+
+ return peek;
+}
+
+/* Are we looking at a module control line starting as PEEK - 1? */
+
+static bool
+do_peek_module (cpp_reader *pfile, unsigned char c,
+ const unsigned char *peek, const unsigned char *limit)
+{
+ bool import = false;
+
+ if (__builtin_expect (c == 'e', false))
+ {
+ if (!((peek[0] == 'x' || peek[0] == '\\')
+ && (peek = do_peek_ident ("export", peek, limit))))
+ return false;
+
+ /* export, peek for import or module. No need to peek __import
+ here. */
+ if (peek[0] == 'i')
+ {
+ if (!((peek[1] == 'm' || peek[1] == '\\')
+ && (peek = do_peek_ident ("import", peek + 1, limit))))
+ return false;
+ import = true;
+ }
+ else if (peek[0] == 'm')
+ {
+ if (!((peek[1] == 'o' || peek[1] == '\\')
+ && (peek = do_peek_ident ("module", peek + 1, limit))))
+ return false;
+ }
+ else
+ return false;
+ }
+ else if (__builtin_expect (c == 'i', false))
+ {
+ if (!((peek[0] == 'm' || peek[0] == '\\')
+ && (peek = do_peek_ident ("import", peek, limit))))
+ return false;
+ import = true;
+ }
+ else if (__builtin_expect (c == '_', false))
+ {
+ /* Needed for translated includes. */
+ if (!((peek[0] == '_' || peek[0] == '\\')
+ && (peek = do_peek_ident ("__import", peek, limit))))
+ return false;
+ import = true;
+ }
+ else if (__builtin_expect (c == 'm', false))
+ {
+ if (!((peek[0] == 'o' || peek[0] == '\\')
+ && (peek = do_peek_ident ("module", peek, limit))))
+ return false;
+ }
+ else
+ return false;
+
+ /* Peek the next character to see if it's good enough. We'll be at
+ the first non-whitespace char, including skipping an escaped
+ newline. */
+ /* ... import followed by identifier, ':', '<' or header-name
+ preprocessing tokens, or module followed by identifier, ':' or
+ ';' preprocessing tokens. */
+ unsigned char p = *peek++;
+
+ /* A character literal is ... single quotes, ... optionally preceded
+ by u8, u, U, or L */
+ /* A string-literal is a ... double quotes, optionally prefixed by
+ R, u8, u8R, u, uR, U, UR, L, or LR */
+ if (p == 'u')
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek == '8')
+ {
+ peek++;
+ goto peek_u8;
+ }
+ goto peek_u;
+ }
+ else if (p == 'U' || p == 'L')
+ {
+ peek_u8:
+ peek = do_peek_next (peek, limit);
+ peek_u:
+ if (*peek == '\"' || *peek == '\'')
+ return false;
+
+ if (*peek == 'R')
+ goto peek_R;
+ /* Identifier. Ok. */
+ }
+ else if (p == 'R')
+ {
+ peek_R:
+ if (CPP_OPTION (pfile, rliterals))
+ {
+ peek = do_peek_next (peek, limit);
+ if (*peek == '\"')
+ return false;
+ }
+ /* Identifier. Ok. */
+ }
+ else if ('Z' - 'A' == 25
+ ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
+ : ISIDST (p))
+ {
+ /* Identifier. Ok. */
+ }
+ else if (p == '<')
+ {
+ /* Maybe angle header, ok for import. Reject
+ '<=', '<<' digraph:'<:'. */
+ if (!import)
+ return false;
+ peek = do_peek_next (peek, limit);
+ if (*peek == '=' || *peek == '<'
+ || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
+ return false;
+ }
+ else if (p == ';')
+ {
+ /* SEMICOLON, ok for module. */
+ if (import)
+ return false;
+ }
+ else if (p == '"')
+ {
+ /* STRING, ok for import. */
+ if (!import)
+ return false;
+ }
+ else if (p == ':')
+ {
+ /* Maybe COLON, ok. Reject '::', digraph:':>'. */
+ peek = do_peek_next (peek, limit);
+ if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
+ return false;
+ }
+ else
+ /* FIXME: Detect a unicode character, excluding those not
+ permitted as the initial character. [lex.name]/1. I presume
+ we need to check the \[uU] spellings, and directly using
+ Unicode in say UTF8 form? Or perhaps we do the phase-1
+ conversion of UTF8 to universal-character-names? */
+ return false;
+
+ return true;
+}
+
/* Directives-only scanning. Somewhat more relaxed than correct
parsing -- some ill-formed programs will not be rejected. */
@@ -3955,6 +4300,8 @@ cpp_directive_only_process (cpp_reader *pfile,
void *data,
void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
{
+ bool module_p = CPP_OPTION (pfile, module_directives);
+
do
{
restart:
@@ -4347,6 +4694,51 @@ cpp_directive_only_process (cpp_reader *pfile,
}
goto dflt;
+ case '_':
+ case 'e':
+ case 'i':
+ case 'm':
+ if (bol && module_p && !pfile->state.skipping
+ && do_peek_module (pfile, c, pos, limit))
+ {
+ /* We've seen the start of a module control line.
+ Start up the tokenizer. */
+ pos--; /* Backup over the first character. */
+
+ /* Backup over whitespace to start of line. */
+ while (pos > line_start
+ && (pos[-1] == ' ' || pos[-1] == '\t'))
+ pos--;
+
+ if (pos > base)
+ cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
+
+ /* Prep things for directive handling. */
+ buffer->next_line = pos;
+ buffer->need_line = true;
+
+ /* Now get tokens until the PRAGMA_EOL. */
+ do
+ {
+ location_t spelling;
+ const cpp_token *tok
+ = cpp_get_token_with_location (pfile, &spelling);
+
+ gcc_assert (pfile->state.in_deferred_pragma
+ || tok->type == CPP_PRAGMA_EOL);
+ cb (pfile, CPP_DO_token, data, tok, spelling);
+ }
+ while (pfile->state.in_deferred_pragma);
+
+ if (pfile->buffer->next_line < pfile->buffer->rlimit)
+ cb (pfile, CPP_DO_location, data,
+ pfile->line_table->highest_line);
+
+ pfile->mi_valid = false;
+ goto restart;
+ }
+ goto dflt;
+
default:
dflt:
bol = false;