diff options
author | Philip Herron <philip.herron@embecosm.com> | 2022-07-05 16:56:14 +0100 |
---|---|---|
committer | Philip Herron <philip.herron@embecosm.com> | 2022-07-07 12:28:58 +0100 |
commit | ec5da37dbfbcc55183f7ea4658c8856b9335ad61 (patch) | |
tree | c1ae133b17d14f2c96f8d5b56774b6e09c9b79bb /gcc | |
parent | 29d594e263f0ccbcbd2babf43ff453c5188f4f2c (diff) | |
download | gcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.zip gcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.tar.gz gcc-ec5da37dbfbcc55183f7ea4658c8856b9335ad61.tar.bz2 |
Refactor Lexer to support an abstract InputSource class
This patch allows us to remove the fmemopen lex_string hack to support
parsing buffers. This will allow us to support mutliple sources such as
metadata imports etc. The patch here updates the parser to hold onto a
reference to the lexer rather than 'owning' the lexer which allows us to
decouple the move semantics here.
Fixes #1203 #1000
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/ast/rust-ast-full-test.cc | 6 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-builtins.cc | 11 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-expand.cc | 4 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 14 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.h | 87 | ||||
-rw-r--r-- | gcc/rust/parse/rust-cfg-parser.cc | 4 | ||||
-rw-r--r-- | gcc/rust/parse/rust-parse.h | 4 | ||||
-rw-r--r-- | gcc/rust/rust-buffered-queue.h | 6 | ||||
-rw-r--r-- | gcc/rust/rust-session-manager.cc | 8 |
9 files changed, 75 insertions, 69 deletions
diff --git a/gcc/rust/ast/rust-ast-full-test.cc b/gcc/rust/ast/rust-ast-full-test.cc index d98a7cf..7cccd40 100644 --- a/gcc/rust/ast/rust-ast-full-test.cc +++ b/gcc/rust/ast/rust-ast-full-test.cc @@ -4058,8 +4058,7 @@ Module::load_items () RAIIFile file_wrap (module_file.c_str ()); Linemap *linemap = Session::get_instance ().linemap; - - if (file_wrap.get_raw () == nullptr) + if (!file_wrap.ok ()) { rust_error_at (get_locus (), "cannot open module file %s: %m", module_file.c_str ()); @@ -4069,10 +4068,9 @@ Module::load_items () rust_debug ("Attempting to parse file %s", module_file.c_str ()); Lexer lex (module_file.c_str (), std::move (file_wrap), linemap); - Parser<Lexer> parser (std::move (lex)); + Parser<Lexer> parser (lex); auto parsed_items = parser.parse_items (); - for (const auto &error : parser.get_errors ()) error.emit_error (); diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc index 2b175fc..5eace13 100644 --- a/gcc/rust/expand/rust-macro-builtins.cc +++ b/gcc/rust/expand/rust-macro-builtins.cc @@ -70,7 +70,7 @@ parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree, Location invoc_locus) { MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); - Parser<MacroInvocLexer> parser (std::move (lex)); + Parser<MacroInvocLexer> parser (lex); auto last_token_id = macro_end_token (invoc_token_tree, parser); @@ -270,7 +270,8 @@ MacroBuiltin::concat (Location invoc_locus, AST::MacroInvocData &invoc) { auto invoc_token_tree = invoc.get_delim_tok_tree (); MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); - Parser<MacroInvocLexer> parser (std::move (lex)); + Parser<MacroInvocLexer> parser (lex); + auto str = std::string (); bool has_error = false; @@ -313,7 +314,7 @@ MacroBuiltin::env (Location invoc_locus, AST::MacroInvocData &invoc) { auto invoc_token_tree = invoc.get_delim_tok_tree (); MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); - Parser<MacroInvocLexer> parser (std::move (lex)); + Parser<MacroInvocLexer> parser (lex); auto last_token_id = macro_end_token (invoc_token_tree, parser); @@ -432,7 +433,7 @@ MacroBuiltin::include (Location invoc_locus, AST::MacroInvocData &invoc) RAIIFile target_file (target_filename); Linemap *linemap = Session::get_instance ().linemap; - if (target_file.get_raw () == nullptr) + if (!target_file.ok ()) { rust_error_at (lit_expr->get_locus (), "cannot open included file %qs: %m", target_filename); @@ -442,7 +443,7 @@ MacroBuiltin::include (Location invoc_locus, AST::MacroInvocData &invoc) rust_debug ("Attempting to parse included file %s", target_filename); Lexer lex (target_filename, std::move (target_file), linemap); - Parser<Lexer> parser (std::move (lex)); + Parser<Lexer> parser (lex); auto parsed_items = parser.parse_items (); bool has_error = !parser.get_errors ().empty (); diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 1219e11..d8684c8 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -375,7 +375,7 @@ MacroExpander::try_match_rule (AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree) { MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); - Parser<MacroInvocLexer> parser (std::move (lex)); + Parser<MacroInvocLexer> parser (lex); AST::MacroMatcher &matcher = match_rule.get_matcher (); @@ -975,7 +975,7 @@ MacroExpander::transcribe_rule ( // parse it to an ASTFragment MacroInvocLexer lex (std::move (substituted_tokens)); - Parser<MacroInvocLexer> parser (std::move (lex)); + Parser<MacroInvocLexer> parser (lex); auto last_token_id = TokenId::RIGHT_CURLY; diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 13921e7..3c48916 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -116,14 +116,17 @@ is_non_decimal_int_literal_separator (char character) return character == 'x' || character == 'o' || character == 'b'; } -// this compiles fine, so any intellisense saying otherwise is fake news +Lexer::Lexer (const std::string &input) + : input (RAIIFile::create_error ()), current_line (1), current_column (1), + line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)), + input_queue{*raw_input_source}, token_queue (TokenSource (this)) +{} + Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap) : input (std::move (file_input)), current_line (1), current_column (1), line_map (linemap), - /*input_source (input.get_raw ()), */ - input_queue{InputSource (input.get_raw ())}, - /*token_source (this),*/ - token_queue (TokenSource (this)) + raw_input_source (new FileInputSource (input.get_raw ())), + input_queue{*raw_input_source}, token_queue (TokenSource (this)) { // inform line_table that file is being entered and is in line 1 if (linemap) @@ -138,6 +141,7 @@ Lexer::~Lexer () * mentioned in GCC docs as being useful for "just leaving an included header" * and stuff like that, so this line mapping functionality may need fixing. * FIXME: find out whether this occurs. */ + // line_map->stop(); } diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 429b9e1..da7dbc9 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -72,10 +72,14 @@ public: return *this; } + static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); } + ~RAIIFile () { close (); } FILE *get_raw () { return file; } const char *get_filename () { return filename; } + + bool ok () const { return file; } }; class Lexer @@ -136,43 +140,12 @@ private: public: // Construct lexer with input file and filename provided Lexer (const char *filename, RAIIFile input, Linemap *linemap); - ~Lexer (); - /** - * Lex the contents of a string instead of a file - */ - // FIXME: This is unsafe! - // Since we are taking a reference to the string's internal buffer, we must - // ensure that the lexer does not outlive the string, which might not always - // be the case. - // - // We could have a fix, which would include using fmemopen() to allocate a - // buffer and copy the string inside it. - // ``` - // // There will be an extra nul-terminator byte written on fclose(), so - // // account for that - // auto string_file = fmemopen(NULL, input.length() + 1, "wr"); - // fwrite(input.c_str(), sizeof(char), input.length(), string_file); - // auto wrapper = RAIIFile(string_file); - // ``` - // But sadly our RAIIFile does not support moving really well... And the - // destructor, which calls fclose(), gets called, triggering a lack of a - // buffer to parse :) - // - // We need to look into fixing the RAIIFile so that it supports this - // behaviour. I'm assuming this will be something like fixing one of the copy - // or move constructors, but is outside of the scope of this fix. For now, - // make sure your lexers don't live longer than the strings they're trying - // to lex - static Lexer lex_string (std::string &input) - { - // We can perform this ugly cast to a non-const char* since we're only - // *reading* the string. This would not be valid if we were doing any - // modification to it. - auto string_file = fmemopen (&input[0], input.length (), "r"); + // Lex the contents of a string instead of a file + Lexer (const std::string &input); - return Lexer (nullptr, RAIIFile (string_file), nullptr); - } + // dtor + ~Lexer (); // don't allow copy semantics (for now, at least) Lexer (const Lexer &other) = delete; @@ -223,22 +196,54 @@ private: static const int max_column_hint = 80; // Input source wrapper thing. - struct InputSource + class InputSource { + public: + virtual ~InputSource () {} + + // Overload operator () to return next char from input stream. + virtual int next () = 0; + }; + + class FileInputSource : public InputSource + { + private: // Input source file. FILE *input; + public: // Create new input source from file. - InputSource (FILE *input) : input (input) {} + FileInputSource (FILE *input) : input (input) {} - // Overload operator () to return next char from input stream. - int operator() () { return fgetc (input); } + int next () override { return fgetc (input); } + }; + + class BufferInputSource : public InputSource + { + private: + const std::string &buffer; + size_t offs; + + public: + // Create new input source from file. + BufferInputSource (const std::string &b, size_t offset) + : buffer (b), offs (offset) + {} + + int next () override + { + if (offs >= buffer.size ()) + return EOF; + + return buffer.at (offs++); + } }; // The input source for the lexer. // InputSource input_source; // Input file queue. - buffered_queue<int, InputSource> input_queue; + std::unique_ptr<InputSource> raw_input_source; + buffered_queue<int, InputSource &> input_queue; // Token source wrapper thing. struct TokenSource @@ -250,7 +255,7 @@ private: TokenSource (Lexer *parLexer) : lexer (parLexer) {} // Overload operator () to build token in lexer. - TokenPtr operator() () { return lexer->build_token (); } + TokenPtr next () { return lexer->build_token (); } }; // The token source for the lexer. diff --git a/gcc/rust/parse/rust-cfg-parser.cc b/gcc/rust/parse/rust-cfg-parser.cc index f98419b..00693c4 100644 --- a/gcc/rust/parse/rust-cfg-parser.cc +++ b/gcc/rust/parse/rust-cfg-parser.cc @@ -11,8 +11,8 @@ parse_cfg_option (std::string &input, std::string &key, std::string &value) key.clear (); value.clear (); - auto lexer = Lexer::lex_string (input); - auto parser = Parser<Lexer> (std::move (lexer)); + auto lexer = Lexer (input); + auto parser = Parser<Lexer> (lexer); auto token = parser.peek_current_token (); if (token->get_id () != IDENTIFIER) diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index fa88f8e..d799a56 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -662,7 +662,7 @@ private: public: // Construct parser with specified "managed" token source. - Parser (ManagedTokenSource tokenSource) : lexer (std::move (tokenSource)) {} + Parser (ManagedTokenSource &tokenSource) : lexer (tokenSource) {} // Parse items without parsing an entire crate. This function is the main // parsing loop of AST::Crate::parse_crate(). @@ -689,7 +689,7 @@ public: private: // The token source (usually lexer) associated with the parser. - ManagedTokenSource lexer; + ManagedTokenSource &lexer; // The error list. std::vector<Error> error_table; // The names of inline modules while parsing. diff --git a/gcc/rust/rust-buffered-queue.h b/gcc/rust/rust-buffered-queue.h index 39f3506..afcc467 100644 --- a/gcc/rust/rust-buffered-queue.h +++ b/gcc/rust/rust-buffered-queue.h @@ -28,9 +28,7 @@ template <typename T, typename Source> class buffered_queue { public: // Construct empty queue from Source src. - buffered_queue (Source src) - : source (std::move (src)), start (0), end (0), buffer () - {} + buffered_queue (Source src) : source (src), start (0), end (0), buffer () {} /* disable copying (since source is probably non-copyable) * TODO is this actually a good idea? If source is non-copyable, it would @@ -104,7 +102,7 @@ public: /* iterate through buffer and invoke operator () on source on values * past original end */ for (int i = 0; i < num_items_to_read; i++) - buffer[end + i] = source (); + buffer[end + i] = source.next (); // move end based on additional items added end += num_items_to_read; diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 8d18f95..1107608 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -643,17 +643,17 @@ void Session::parse_file (const char *filename) { RAIIFile file_wrap (filename); - - if (file_wrap.get_raw () == nullptr) + if (!file_wrap.ok ()) { - rust_fatal_error (Location (), "cannot open filename %s: %m", filename); + rust_error_at (Location (), "cannot open filename %s: %m", filename); + return; } // parse file here /* create lexer and parser - these are file-specific and so aren't instance * variables */ Lexer lex (filename, std::move (file_wrap), linemap); - Parser<Lexer> parser (std::move (lex)); + Parser<Lexer> parser (lex); // generate crate from parser auto parsed_crate = parser.parse_crate (); |