// Copyright (C) 2020-2023 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// .
#include "rust-macro-builtins.h"
#include "rust-ast.h"
#include "rust-diagnostics.h"
#include "rust-expr.h"
#include "rust-session-manager.h"
#include "rust-macro-invoc-lexer.h"
#include "rust-lex.h"
#include "rust-parse.h"
#include "rust-early-name-resolver.h"
#include "rust-attribute-visitor.h"
namespace Rust {
namespace {
/**
* Shorthand function for creating unique_ptr tokens
*/
static std::unique_ptr
make_token (const TokenPtr tok)
{
return std::unique_ptr (new AST::Token (tok));
}
std::unique_ptr
make_string (Location locus, std::string value)
{
return std::unique_ptr (
new AST::LiteralExpr (value, AST::Literal::STRING,
PrimitiveCoreType::CORETYPE_STR, {}, locus));
}
// TODO: Is this correct?
static AST::Fragment
make_eager_builtin_invocation (
AST::BuiltinMacro kind, Location locus, AST::DelimTokenTree arguments,
std::vector> &&pending_invocations)
{
std::string path_str;
switch (kind)
{
// TODO: Should this be a table lookup?
case AST::BuiltinMacro::Assert:
path_str = "assert";
break;
case AST::BuiltinMacro::File:
path_str = "file";
break;
case AST::BuiltinMacro::Line:
path_str = "line";
break;
case AST::BuiltinMacro::Column:
path_str = "column";
break;
case AST::BuiltinMacro::IncludeBytes:
path_str = "include_bytes";
break;
case AST::BuiltinMacro::IncludeStr:
path_str = "include_str";
break;
case AST::BuiltinMacro::CompileError:
path_str = "compile_error";
break;
case AST::BuiltinMacro::Concat:
path_str = "concat";
break;
case AST::BuiltinMacro::Env:
path_str = "env";
break;
case AST::BuiltinMacro::Cfg:
path_str = "cfg";
break;
case AST::BuiltinMacro::Include:
path_str = "include";
break;
}
std::unique_ptr node = AST::MacroInvocation::Builtin (
kind,
AST::MacroInvocData (AST::SimplePath (
{AST::SimplePathSegment (path_str, locus)}),
std::move (arguments)),
{}, locus, std::move (pending_invocations));
return AST::Fragment ({AST::SingleASTNode (std::move (node))},
arguments.to_token_stream ());
}
/* Match the end token of a macro given the start delimiter of the macro */
static inline TokenId
macro_end_token (AST::DelimTokenTree &invoc_token_tree,
Parser &parser)
{
auto last_token_id = TokenId::RIGHT_CURLY;
switch (invoc_token_tree.get_delim_type ())
{
case AST::DelimType::PARENS:
last_token_id = TokenId::RIGHT_PAREN;
rust_assert (parser.skip_token (LEFT_PAREN));
break;
case AST::DelimType::CURLY:
rust_assert (parser.skip_token (LEFT_CURLY));
break;
case AST::DelimType::SQUARE:
last_token_id = TokenId::RIGHT_SQUARE;
rust_assert (parser.skip_token (LEFT_SQUARE));
break;
}
return last_token_id;
}
/* Expand and then extract a string literal from the macro */
static std::unique_ptr
try_extract_string_literal_from_fragment (const Location &parent_locus,
std::unique_ptr &node)
{
auto maybe_lit = static_cast (node.get ());
if (!node || !node->is_literal ()
|| maybe_lit->get_lit_type () != AST::Literal::STRING)
{
rust_error_at (parent_locus, "argument must be a string literal");
if (node)
rust_inform (node->get_locus (), "expanded from here");
return nullptr;
}
return std::unique_ptr (
static_cast (node->clone_expr ().release ()));
}
static std::vector>
try_expand_many_expr (Parser &parser,
const TokenId last_token_id, MacroExpander *expander,
bool &has_error)
{
auto restrictions = Rust::ParseRestrictions ();
// stop parsing when encountered a braces/brackets
restrictions.expr_can_be_null = true;
// we can't use std::optional, so...
auto result = std::vector> ();
auto empty_expr = std::vector> ();
auto first_token = parser.peek_current_token ()->get_id ();
if (first_token == COMMA)
{
rust_error_at (parser.peek_current_token ()->get_locus (),
"expected expression, found %<,%>");
has_error = true;
return empty_expr;
}
while (parser.peek_current_token ()->get_id () != last_token_id
&& parser.peek_current_token ()->get_id () != END_OF_FILE)
{
auto expr = parser.parse_expr (AST::AttrVec (), restrictions);
// something must be so wrong that the expression could not be parsed
rust_assert (expr);
result.push_back (std::move (expr));
auto next_token = parser.peek_current_token ();
if (!parser.skip_token (COMMA) && next_token->get_id () != last_token_id)
{
rust_error_at (next_token->get_locus (), "expected token: %<,%>");
// TODO: is this recoverable? to avoid crashing the parser in the next
// fragment we have to exit early here
has_error = true;
return empty_expr;
}
}
return result;
}
/* Parse a single string literal from the given delimited token tree,
and return the LiteralExpr for it. Allow for an optional trailing comma,
but otherwise enforce that these are the only tokens. */
std::unique_ptr
parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree,
Location invoc_locus, MacroExpander *expander)
{
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
Parser parser (lex);
auto last_token_id = macro_end_token (invoc_token_tree, parser);
std::unique_ptr lit_expr = nullptr;
if (parser.peek_current_token ()->get_id () == STRING_LITERAL)
{
lit_expr = parser.parse_literal_expr ();
parser.maybe_skip_token (COMMA);
if (parser.peek_current_token ()->get_id () != last_token_id)
{
lit_expr = nullptr;
rust_error_at (invoc_locus, "macro takes 1 argument");
}
}
else if (parser.peek_current_token ()->get_id () == last_token_id)
rust_error_at (invoc_locus, "macro takes 1 argument");
else
rust_error_at (invoc_locus, "argument must be a string literal");
parser.skip_token (last_token_id);
return lit_expr;
}
/* Treat PATH as a path relative to the source file currently being
compiled, and return the absolute path for it. */
std::string
source_relative_path (std::string path, Location locus)
{
std::string compile_fname
= Session::get_instance ().linemap->location_file (locus);
auto dir_separator_pos = compile_fname.rfind (file_separator);
/* If there is no file_separator in the path, use current dir ('.'). */
std::string dirname;
if (dir_separator_pos == std::string::npos)
dirname = std::string (".") + file_separator;
else
dirname = compile_fname.substr (0, dir_separator_pos) + file_separator;
return dirname + path;
}
/* Read the full contents of the file FILENAME and return them in a vector.
FIXME: platform specific. */
std::vector
load_file_bytes (const char *filename)
{
RAIIFile file_wrap (filename);
if (file_wrap.get_raw () == nullptr)
{
rust_error_at (Location (), "cannot open filename %s: %m", filename);
return std::vector ();
}
FILE *f = file_wrap.get_raw ();
fseek (f, 0L, SEEK_END);
long fsize = ftell (f);
fseek (f, 0L, SEEK_SET);
std::vector buf (fsize);
if (fread (&buf[0], fsize, 1, f) != 1)
{
rust_error_at (Location (), "error reading file %s: %m", filename);
return std::vector ();
}
return buf;
}
} // namespace
AST::Fragment
MacroBuiltin::assert_handler (Location, AST::MacroInvocData &)
{
rust_debug ("assert!() called");
return AST::Fragment::create_error ();
}
AST::Fragment
MacroBuiltin::file_handler (Location invoc_locus, AST::MacroInvocData &)
{
auto current_file
= Session::get_instance ().linemap->location_file (invoc_locus);
auto file_str = AST::SingleASTNode (make_string (invoc_locus, current_file));
auto str_token
= make_token (Token::make_string (invoc_locus, std::move (current_file)));
return AST::Fragment ({file_str}, std::move (str_token));
}
AST::Fragment
MacroBuiltin::column_handler (Location invoc_locus, AST::MacroInvocData &)
{
auto current_column
= Session::get_instance ().linemap->location_to_column (invoc_locus);
auto column_tok = make_token (
Token::make_int (invoc_locus, std::to_string (current_column)));
auto column_no = AST::SingleASTNode (std::unique_ptr (
new AST::LiteralExpr (std::to_string (current_column), AST::Literal::INT,
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
return AST::Fragment ({column_no}, std::move (column_tok));
}
/* Expand builtin macro include_bytes!("filename"), which includes the contents
of the given file as reference to a byte array. Yields an expression of type
&'static [u8; N]. */
AST::Fragment
MacroBuiltin::include_bytes_handler (Location invoc_locus,
AST::MacroInvocData &invoc)
{
/* Get target filename from the macro invocation, which is treated as a path
relative to the include!-ing file (currently being compiled). */
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus,
invoc.get_expander ());
if (lit_expr == nullptr)
return AST::Fragment::create_error ();
std::string target_filename
= source_relative_path (lit_expr->as_string (), invoc_locus);
std::vector bytes = load_file_bytes (target_filename.c_str ());
/* Is there a more efficient way to do this? */
std::vector> elts;
// We create the tokens for a borrow expression of a byte array, so
// & [ , , ... ]
std::vector> toks;
toks.emplace_back (make_token (Token::make (AMP, invoc_locus)));
toks.emplace_back (make_token (Token::make (LEFT_SQUARE, invoc_locus)));
for (uint8_t b : bytes)
{
elts.emplace_back (
new AST::LiteralExpr (std::string (1, (char) b), AST::Literal::BYTE,
PrimitiveCoreType::CORETYPE_U8,
{} /* outer_attrs */, invoc_locus));
toks.emplace_back (make_token (Token::make_byte_char (invoc_locus, b)));
toks.emplace_back (make_token (Token::make (COMMA, invoc_locus)));
}
toks.emplace_back (make_token (Token::make (RIGHT_SQUARE, invoc_locus)));
auto elems = std::unique_ptr (
new AST::ArrayElemsValues (std::move (elts), invoc_locus));
auto array = std::unique_ptr (
new AST::ArrayExpr (std::move (elems), {}, {}, invoc_locus));
auto borrow = std::unique_ptr (
new AST::BorrowExpr (std::move (array), false, false, {}, invoc_locus));
auto node = AST::SingleASTNode (std::move (borrow));
return AST::Fragment ({node}, std::move (toks));
} // namespace Rust
/* Expand builtin macro include_str!("filename"), which includes the contents
of the given file as a string. The file must be UTF-8 encoded. Yields an
expression of type &'static str. */
AST::Fragment
MacroBuiltin::include_str_handler (Location invoc_locus,
AST::MacroInvocData &invoc)
{
/* Get target filename from the macro invocation, which is treated as a path
relative to the include!-ing file (currently being compiled). */
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus,
invoc.get_expander ());
if (lit_expr == nullptr)
return AST::Fragment::create_error ();
std::string target_filename
= source_relative_path (lit_expr->as_string (), invoc_locus);
std::vector bytes = load_file_bytes (target_filename.c_str ());
/* FIXME: reuse lexer */
int expect_single = 0;
for (uint8_t b : bytes)
{
if (expect_single)
{
if ((b & 0xC0) != 0x80)
/* character was truncated, exit with expect_single != 0 */
break;
expect_single--;
}
else if (b & 0x80)
{
if (b >= 0xF8)
{
/* more than 4 leading 1s */
expect_single = 1;
break;
}
else if (b >= 0xF0)
{
/* 4 leading 1s */
expect_single = 3;
}
else if (b >= 0xE0)
{
/* 3 leading 1s */
expect_single = 2;
}
else if (b >= 0xC0)
{
/* 2 leading 1s */
expect_single = 1;
}
else
{
/* only 1 leading 1 */
expect_single = 1;
break;
}
}
}
std::string str;
if (expect_single)
rust_error_at (invoc_locus, "%s was not a valid utf-8 file",
target_filename.c_str ());
else
str = std::string ((const char *) &bytes[0], bytes.size ());
auto node = AST::SingleASTNode (make_string (invoc_locus, str));
auto str_tok = make_token (Token::make_string (invoc_locus, std::move (str)));
// FIXME: Do not return an empty token vector here
return AST::Fragment ({node}, std::move (str_tok));
}
/* Expand builtin macro compile_error!("error"), which forces a compile error
during the compile time. */
AST::Fragment
MacroBuiltin::compile_error_handler (Location invoc_locus,
AST::MacroInvocData &invoc)
{
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus,
invoc.get_expander ());
if (lit_expr == nullptr)
return AST::Fragment::create_error ();
std::string error_string = lit_expr->as_string ();
rust_error_at (invoc_locus, "%s", error_string.c_str ());
return AST::Fragment::create_error ();
}
static std::vector>
check_for_eager_invocations (
std::vector> &expressions)
{
std::vector> pending;
for (auto &expr : expressions)
if (expr->get_ast_kind () == AST::Kind::MACRO_INVOCATION)
pending.emplace_back (std::unique_ptr (
static_cast (expr->clone_expr ().release ())));
return pending;
}
/* Expand builtin macro concat!(), which joins all the literal parameters
into a string with no delimiter. */
// This is a weird one. We want to do something where, if something cannot be
// expanded yet (i.e. macro invocation?) we return the whole MacroInvocation
// node again but expanded as much as possible.
// Is that possible? How do we do that?
//
// Let's take a few examples:
//
// 1. concat!(1, 2, true);
// 2. concat!(a!(), 2, true);
// 3. concat!(concat!(1, false), 2, true);
// 4. concat!(concat!(1, a!()), 2, true);
//
// 1. We simply want to return the new fragment: "12true"
// 2. We want to return `concat!(a_expanded, 2, true)` as a fragment
// 3. We want to return `concat!(1, false, 2, true)`
// 4. We want to return `concat!(concat!(1, a_expanded), 2, true);
//
// How do we do that?
//
// For each (un)expanded fragment: we check if it is expanded fully
//
// 1. What is expanded fully?
// 2. How to check?
//
// If it is expanded fully and not a literal, then we error out.
// Otherwise we simply emplace it back and keep going.
//
// In the second case, we must mark that this concat invocation still has some
// expansion to do: This allows us to return a `MacroInvocation { ... }` as an
// AST fragment, instead of a completed string.
//
// This means that we must change all the `try_expand_many_*` APIs and so on to
// return some sort of index or way to signify that we might want to reuse some
// bits and pieces of the original token tree.
//
// Now, before that: How do we resolve the names used in a builtin macro
// invocation?
// Do we split the two passes of parsing the token tree and then expanding it?
// Can we do that easily?
AST::Fragment
MacroBuiltin::concat_handler (Location invoc_locus, AST::MacroInvocData &invoc)
{
auto invoc_token_tree = invoc.get_delim_tok_tree ();
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
Parser parser (lex);
auto str = std::string ();
bool has_error = false;
auto last_token_id = macro_end_token (invoc_token_tree, parser);
auto start = lex.get_offs ();
/* NOTE: concat! could accept no argument, so we don't have any checks here */
auto expanded_expr = try_expand_many_expr (parser, last_token_id,
invoc.get_expander (), has_error);
auto end = lex.get_offs ();
auto tokens = lex.get_token_slice (start, end);
auto pending_invocations = check_for_eager_invocations (expanded_expr);
if (!pending_invocations.empty ())
return make_eager_builtin_invocation (AST::BuiltinMacro::Concat,
invoc_locus,
invoc.get_delim_tok_tree (),
std::move (pending_invocations));
for (auto &expr : expanded_expr)
{
if (!expr->is_literal ()
&& expr->get_ast_kind () != AST::MACRO_INVOCATION)
{
has_error = true;
rust_error_at (expr->get_locus (), "expected a literal");
// diagnostics copied from rustc
rust_inform (expr->get_locus (),
"only literals (like %<\"foo\"%>, %<42%> and "
"%<3.14%>) can be passed to %");
continue;
}
auto *literal = static_cast (expr.get ());
if (literal->get_lit_type () == AST::Literal::BYTE
|| literal->get_lit_type () == AST::Literal::BYTE_STRING)
{
has_error = true;
rust_error_at (expr->get_locus (),
"cannot concatenate a byte string literal");
continue;
}
str += literal->as_string ();
}
parser.skip_token (last_token_id);
if (has_error)
return AST::Fragment::create_error ();
auto node = AST::SingleASTNode (make_string (invoc_locus, str));
auto str_tok = make_token (Token::make_string (invoc_locus, std::move (str)));
return AST::Fragment ({node}, std::move (str_tok));
}
/* Expand builtin macro env!(), which inspects an environment variable at
compile time. */
AST::Fragment
MacroBuiltin::env_handler (Location invoc_locus, AST::MacroInvocData &invoc)
{
auto invoc_token_tree = invoc.get_delim_tok_tree ();
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
Parser parser (lex);
auto last_token_id = macro_end_token (invoc_token_tree, parser);
std::unique_ptr error_expr = nullptr;
std::unique_ptr lit_expr = nullptr;
bool has_error = false;
auto start = lex.get_offs ();
auto expanded_expr = try_expand_many_expr (parser, last_token_id,
invoc.get_expander (), has_error);
auto end = lex.get_offs ();
auto tokens = lex.get_token_slice (start, end);
if (has_error)
return AST::Fragment::create_error ();
auto pending = check_for_eager_invocations (expanded_expr);
if (!pending.empty ())
return make_eager_builtin_invocation (AST::BuiltinMacro::Env, invoc_locus,
invoc_token_tree,
std::move (pending));
if (expanded_expr.size () < 1 || expanded_expr.size () > 2)
{
rust_error_at (invoc_locus, "env! takes 1 or 2 arguments");
return AST::Fragment::create_error ();
}
if (expanded_expr.size () > 0)
{
if (!(lit_expr
= try_extract_string_literal_from_fragment (invoc_locus,
expanded_expr[0])))
{
return AST::Fragment::create_error ();
}
}
if (expanded_expr.size () > 1)
{
if (!(error_expr
= try_extract_string_literal_from_fragment (invoc_locus,
expanded_expr[1])))
{
return AST::Fragment::create_error ();
}
}
parser.skip_token (last_token_id);
auto env_value = getenv (lit_expr->as_string ().c_str ());
if (env_value == nullptr)
{
if (error_expr == nullptr)
rust_error_at (invoc_locus, "environment variable %qs not defined",
lit_expr->as_string ().c_str ());
else
rust_error_at (invoc_locus, "%s", error_expr->as_string ().c_str ());
return AST::Fragment::create_error ();
}
auto node = AST::SingleASTNode (make_string (invoc_locus, env_value));
auto tok
= make_token (Token::make_string (invoc_locus, std::move (env_value)));
// FIXME: Do not return an empty token vector here
return AST::Fragment ({node}, std::move (tok));
}
AST::Fragment
MacroBuiltin::cfg_handler (Location invoc_locus, AST::MacroInvocData &invoc)
{
// only parse if not already parsed
if (!invoc.is_parsed ())
{
std::unique_ptr converted_input (
invoc.get_delim_tok_tree ().parse_to_meta_item ());
if (converted_input == nullptr)
{
rust_debug ("DEBUG: failed to parse macro to meta item");
// TODO: do something now? is this an actual error?
}
else
{
std::vector> meta_items (
std::move (converted_input->get_items ()));
invoc.set_meta_item_output (std::move (meta_items));
}
}
/* TODO: assuming that cfg! macros can only have one meta item inner, like cfg
* attributes */
if (invoc.get_meta_items ().size () != 1)
return AST::Fragment::create_error ();
bool result = invoc.get_meta_items ()[0]->check_cfg_predicate (
Session::get_instance ());
auto literal_exp = AST::SingleASTNode (std::unique_ptr (
new AST::LiteralExpr (result ? "true" : "false", AST::Literal::BOOL,
PrimitiveCoreType::CORETYPE_BOOL, {}, invoc_locus)));
auto tok = make_token (
Token::make (result ? TRUE_LITERAL : FALSE_LITERAL, invoc_locus));
// FIXME: Do not return an empty token vector here
return AST::Fragment ({literal_exp}, std::move (tok));
}
/* Expand builtin macro include!(), which includes a source file at the current
scope compile time. */
AST::Fragment
MacroBuiltin::include_handler (Location invoc_locus, AST::MacroInvocData &invoc)
{
/* Get target filename from the macro invocation, which is treated as a path
relative to the include!-ing file (currently being compiled). */
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus,
invoc.get_expander ());
if (lit_expr == nullptr)
return AST::Fragment::create_error ();
std::string filename
= source_relative_path (lit_expr->as_string (), invoc_locus);
auto target_filename
= Rust::Session::get_instance ().include_extra_file (std::move (filename));
RAIIFile target_file (target_filename);
Linemap *linemap = Session::get_instance ().linemap;
if (!target_file.ok ())
{
rust_error_at (lit_expr->get_locus (),
"cannot open included file %qs: %m", target_filename);
return AST::Fragment::create_error ();
}
rust_debug ("Attempting to parse included file %s", target_filename);
Lexer lex (target_filename, std::move (target_file), linemap);
Parser parser (lex);
auto parsed_items = parser.parse_items ();
bool has_error = !parser.get_errors ().empty ();
for (const auto &error : parser.get_errors ())
error.emit ();
if (has_error)
{
// inform the user that the errors above are from a included file
rust_inform (invoc_locus, "included from here");
return AST::Fragment::create_error ();
}
std::vector nodes{};
for (auto &item : parsed_items)
{
AST::SingleASTNode node (std::move (item));
nodes.push_back (node);
}
// FIXME: This returns an empty vector of tokens and works fine, but is that
// the expected behavior? `include` macros are a bit harder to reason about
// since they include tokens. Furthermore, our lexer has no easy way to return
// a slice of tokens like the MacroInvocLexer. So it gets even harder to
// extrac tokens from here. For now, let's keep it that way and see if it
// eventually breaks, but I don't expect it to cause many issues since the
// list of tokens is only used when a macro invocation mixes eager
// macro invocations and already expanded tokens. Think
// `concat!(a!(), 15, b!())`. We need to be able to expand a!(), expand b!(),
// and then insert the `15` token in between. In the case of `include!()`, we
// only have one argument. So it's either going to be a macro invocation or a
// string literal.
return AST::Fragment (nodes, std::vector> ());
}
AST::Fragment
MacroBuiltin::line_handler (Location invoc_locus, AST::MacroInvocData &)
{
auto current_line
= Session::get_instance ().linemap->location_to_line (invoc_locus);
auto line_no = AST::SingleASTNode (std::unique_ptr (
new AST::LiteralExpr (std::to_string (current_line), AST::Literal::INT,
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
auto tok
= make_token (Token::make_int (invoc_locus, std::to_string (current_line)));
// FIXME: Do not return an empty token vector here
return AST::Fragment ({line_no}, std::move (tok));
}
} // namespace Rust