aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorPhilip Herron <philip.herron@embecosm.com>2022-07-05 16:56:14 +0100
committerPhilip Herron <philip.herron@embecosm.com>2022-07-07 12:28:58 +0100
commitec5da37dbfbcc55183f7ea4658c8856b9335ad61 (patch)
treec1ae133b17d14f2c96f8d5b56774b6e09c9b79bb /gcc
parent29d594e263f0ccbcbd2babf43ff453c5188f4f2c (diff)
downloadgcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.zip
gcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.tar.gz
gcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.tar.bz2
Refactor Lexer to support an abstract InputSource class
This patch allows us to remove the fmemopen lex_string hack to support parsing buffers. This will allow us to support mutliple sources such as metadata imports etc. The patch here updates the parser to hold onto a reference to the lexer rather than 'owning' the lexer which allows us to decouple the move semantics here. Fixes #1203 #1000
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/ast/rust-ast-full-test.cc6
-rw-r--r--gcc/rust/expand/rust-macro-builtins.cc11
-rw-r--r--gcc/rust/expand/rust-macro-expand.cc4
-rw-r--r--gcc/rust/lex/rust-lex.cc14
-rw-r--r--gcc/rust/lex/rust-lex.h87
-rw-r--r--gcc/rust/parse/rust-cfg-parser.cc4
-rw-r--r--gcc/rust/parse/rust-parse.h4
-rw-r--r--gcc/rust/rust-buffered-queue.h6
-rw-r--r--gcc/rust/rust-session-manager.cc8
9 files changed, 75 insertions, 69 deletions
diff --git a/gcc/rust/ast/rust-ast-full-test.cc b/gcc/rust/ast/rust-ast-full-test.cc
index d98a7cf..7cccd40 100644
--- a/gcc/rust/ast/rust-ast-full-test.cc
+++ b/gcc/rust/ast/rust-ast-full-test.cc
@@ -4058,8 +4058,7 @@ Module::load_items ()
RAIIFile file_wrap (module_file.c_str ());
Linemap *linemap = Session::get_instance ().linemap;
-
- if (file_wrap.get_raw () == nullptr)
+ if (!file_wrap.ok ())
{
rust_error_at (get_locus (), "cannot open module file %s: %m",
module_file.c_str ());
@@ -4069,10 +4068,9 @@ Module::load_items ()
rust_debug ("Attempting to parse file %s", module_file.c_str ());
Lexer lex (module_file.c_str (), std::move (file_wrap), linemap);
- Parser<Lexer> parser (std::move (lex));
+ Parser<Lexer> parser (lex);
auto parsed_items = parser.parse_items ();
-
for (const auto &error : parser.get_errors ())
error.emit_error ();
diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index 2b175fc..5eace13 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -70,7 +70,7 @@ parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree,
Location invoc_locus)
{
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
- Parser<MacroInvocLexer> parser (std::move (lex));
+ Parser<MacroInvocLexer> parser (lex);
auto last_token_id = macro_end_token (invoc_token_tree, parser);
@@ -270,7 +270,8 @@ MacroBuiltin::concat (Location invoc_locus, AST::MacroInvocData &invoc)
{
auto invoc_token_tree = invoc.get_delim_tok_tree ();
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
- Parser<MacroInvocLexer> parser (std::move (lex));
+ Parser<MacroInvocLexer> parser (lex);
+
auto str = std::string ();
bool has_error = false;
@@ -313,7 +314,7 @@ MacroBuiltin::env (Location invoc_locus, AST::MacroInvocData &invoc)
{
auto invoc_token_tree = invoc.get_delim_tok_tree ();
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
- Parser<MacroInvocLexer> parser (std::move (lex));
+ Parser<MacroInvocLexer> parser (lex);
auto last_token_id = macro_end_token (invoc_token_tree, parser);
@@ -432,7 +433,7 @@ MacroBuiltin::include (Location invoc_locus, AST::MacroInvocData &invoc)
RAIIFile target_file (target_filename);
Linemap *linemap = Session::get_instance ().linemap;
- if (target_file.get_raw () == nullptr)
+ if (!target_file.ok ())
{
rust_error_at (lit_expr->get_locus (),
"cannot open included file %qs: %m", target_filename);
@@ -442,7 +443,7 @@ MacroBuiltin::include (Location invoc_locus, AST::MacroInvocData &invoc)
rust_debug ("Attempting to parse included file %s", target_filename);
Lexer lex (target_filename, std::move (target_file), linemap);
- Parser<Lexer> parser (std::move (lex));
+ Parser<Lexer> parser (lex);
auto parsed_items = parser.parse_items ();
bool has_error = !parser.get_errors ().empty ();
diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc
index 1219e11..d8684c8 100644
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@@ -375,7 +375,7 @@ MacroExpander::try_match_rule (AST::MacroRule &match_rule,
AST::DelimTokenTree &invoc_token_tree)
{
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
- Parser<MacroInvocLexer> parser (std::move (lex));
+ Parser<MacroInvocLexer> parser (lex);
AST::MacroMatcher &matcher = match_rule.get_matcher ();
@@ -975,7 +975,7 @@ MacroExpander::transcribe_rule (
// parse it to an ASTFragment
MacroInvocLexer lex (std::move (substituted_tokens));
- Parser<MacroInvocLexer> parser (std::move (lex));
+ Parser<MacroInvocLexer> parser (lex);
auto last_token_id = TokenId::RIGHT_CURLY;
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 13921e7..3c48916 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -116,14 +116,17 @@ is_non_decimal_int_literal_separator (char character)
return character == 'x' || character == 'o' || character == 'b';
}
-// this compiles fine, so any intellisense saying otherwise is fake news
+Lexer::Lexer (const std::string &input)
+ : input (RAIIFile::create_error ()), current_line (1), current_column (1),
+ line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)),
+ input_queue{*raw_input_source}, token_queue (TokenSource (this))
+{}
+
Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap)
: input (std::move (file_input)), current_line (1), current_column (1),
line_map (linemap),
- /*input_source (input.get_raw ()), */
- input_queue{InputSource (input.get_raw ())},
- /*token_source (this),*/
- token_queue (TokenSource (this))
+ raw_input_source (new FileInputSource (input.get_raw ())),
+ input_queue{*raw_input_source}, token_queue (TokenSource (this))
{
// inform line_table that file is being entered and is in line 1
if (linemap)
@@ -138,6 +141,7 @@ Lexer::~Lexer ()
* mentioned in GCC docs as being useful for "just leaving an included header"
* and stuff like that, so this line mapping functionality may need fixing.
* FIXME: find out whether this occurs. */
+
// line_map->stop();
}
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 429b9e1..da7dbc9 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -72,10 +72,14 @@ public:
return *this;
}
+ static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); }
+
~RAIIFile () { close (); }
FILE *get_raw () { return file; }
const char *get_filename () { return filename; }
+
+ bool ok () const { return file; }
};
class Lexer
@@ -136,43 +140,12 @@ private:
public:
// Construct lexer with input file and filename provided
Lexer (const char *filename, RAIIFile input, Linemap *linemap);
- ~Lexer ();
- /**
- * Lex the contents of a string instead of a file
- */
- // FIXME: This is unsafe!
- // Since we are taking a reference to the string's internal buffer, we must
- // ensure that the lexer does not outlive the string, which might not always
- // be the case.
- //
- // We could have a fix, which would include using fmemopen() to allocate a
- // buffer and copy the string inside it.
- // ```
- // // There will be an extra nul-terminator byte written on fclose(), so
- // // account for that
- // auto string_file = fmemopen(NULL, input.length() + 1, "wr");
- // fwrite(input.c_str(), sizeof(char), input.length(), string_file);
- // auto wrapper = RAIIFile(string_file);
- // ```
- // But sadly our RAIIFile does not support moving really well... And the
- // destructor, which calls fclose(), gets called, triggering a lack of a
- // buffer to parse :)
- //
- // We need to look into fixing the RAIIFile so that it supports this
- // behaviour. I'm assuming this will be something like fixing one of the copy
- // or move constructors, but is outside of the scope of this fix. For now,
- // make sure your lexers don't live longer than the strings they're trying
- // to lex
- static Lexer lex_string (std::string &input)
- {
- // We can perform this ugly cast to a non-const char* since we're only
- // *reading* the string. This would not be valid if we were doing any
- // modification to it.
- auto string_file = fmemopen (&input[0], input.length (), "r");
+ // Lex the contents of a string instead of a file
+ Lexer (const std::string &input);
- return Lexer (nullptr, RAIIFile (string_file), nullptr);
- }
+ // dtor
+ ~Lexer ();
// don't allow copy semantics (for now, at least)
Lexer (const Lexer &other) = delete;
@@ -223,22 +196,54 @@ private:
static const int max_column_hint = 80;
// Input source wrapper thing.
- struct InputSource
+ class InputSource
{
+ public:
+ virtual ~InputSource () {}
+
+ // Overload operator () to return next char from input stream.
+ virtual int next () = 0;
+ };
+
+ class FileInputSource : public InputSource
+ {
+ private:
// Input source file.
FILE *input;
+ public:
// Create new input source from file.
- InputSource (FILE *input) : input (input) {}
+ FileInputSource (FILE *input) : input (input) {}
- // Overload operator () to return next char from input stream.
- int operator() () { return fgetc (input); }
+ int next () override { return fgetc (input); }
+ };
+
+ class BufferInputSource : public InputSource
+ {
+ private:
+ const std::string &buffer;
+ size_t offs;
+
+ public:
+ // Create new input source from file.
+ BufferInputSource (const std::string &b, size_t offset)
+ : buffer (b), offs (offset)
+ {}
+
+ int next () override
+ {
+ if (offs >= buffer.size ())
+ return EOF;
+
+ return buffer.at (offs++);
+ }
};
// The input source for the lexer.
// InputSource input_source;
// Input file queue.
- buffered_queue<int, InputSource> input_queue;
+ std::unique_ptr<InputSource> raw_input_source;
+ buffered_queue<int, InputSource &> input_queue;
// Token source wrapper thing.
struct TokenSource
@@ -250,7 +255,7 @@ private:
TokenSource (Lexer *parLexer) : lexer (parLexer) {}
// Overload operator () to build token in lexer.
- TokenPtr operator() () { return lexer->build_token (); }
+ TokenPtr next () { return lexer->build_token (); }
};
// The token source for the lexer.
diff --git a/gcc/rust/parse/rust-cfg-parser.cc b/gcc/rust/parse/rust-cfg-parser.cc
index f98419b..00693c4 100644
--- a/gcc/rust/parse/rust-cfg-parser.cc
+++ b/gcc/rust/parse/rust-cfg-parser.cc
@@ -11,8 +11,8 @@ parse_cfg_option (std::string &input, std::string &key, std::string &value)
key.clear ();
value.clear ();
- auto lexer = Lexer::lex_string (input);
- auto parser = Parser<Lexer> (std::move (lexer));
+ auto lexer = Lexer (input);
+ auto parser = Parser<Lexer> (lexer);
auto token = parser.peek_current_token ();
if (token->get_id () != IDENTIFIER)
diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h
index fa88f8e..d799a56 100644
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@@ -662,7 +662,7 @@ private:
public:
// Construct parser with specified "managed" token source.
- Parser (ManagedTokenSource tokenSource) : lexer (std::move (tokenSource)) {}
+ Parser (ManagedTokenSource &tokenSource) : lexer (tokenSource) {}
// Parse items without parsing an entire crate. This function is the main
// parsing loop of AST::Crate::parse_crate().
@@ -689,7 +689,7 @@ public:
private:
// The token source (usually lexer) associated with the parser.
- ManagedTokenSource lexer;
+ ManagedTokenSource &lexer;
// The error list.
std::vector<Error> error_table;
// The names of inline modules while parsing.
diff --git a/gcc/rust/rust-buffered-queue.h b/gcc/rust/rust-buffered-queue.h
index 39f3506..afcc467 100644
--- a/gcc/rust/rust-buffered-queue.h
+++ b/gcc/rust/rust-buffered-queue.h
@@ -28,9 +28,7 @@ template <typename T, typename Source> class buffered_queue
{
public:
// Construct empty queue from Source src.
- buffered_queue (Source src)
- : source (std::move (src)), start (0), end (0), buffer ()
- {}
+ buffered_queue (Source src) : source (src), start (0), end (0), buffer () {}
/* disable copying (since source is probably non-copyable)
* TODO is this actually a good idea? If source is non-copyable, it would
@@ -104,7 +102,7 @@ public:
/* iterate through buffer and invoke operator () on source on values
* past original end */
for (int i = 0; i < num_items_to_read; i++)
- buffer[end + i] = source ();
+ buffer[end + i] = source.next ();
// move end based on additional items added
end += num_items_to_read;
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 8d18f95..1107608 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -643,17 +643,17 @@ void
Session::parse_file (const char *filename)
{
RAIIFile file_wrap (filename);
-
- if (file_wrap.get_raw () == nullptr)
+ if (!file_wrap.ok ())
{
- rust_fatal_error (Location (), "cannot open filename %s: %m", filename);
+ rust_error_at (Location (), "cannot open filename %s: %m", filename);
+ return;
}
// parse file here
/* create lexer and parser - these are file-specific and so aren't instance
* variables */
Lexer lex (filename, std::move (file_wrap), linemap);
- Parser<Lexer> parser (std::move (lex));
+ Parser<Lexer> parser (lex);
// generate crate from parser
auto parsed_crate = parser.parse_crate ();