From 8dc692afc2c86cbf3b5124484dd2596514a5acf3 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Wed, 20 Jul 2022 15:28:23 +0200 Subject: ast: Add new AttributeChecker visitor This commit adds a new attribute checker visitor. Its role is to take care of validating builtin attributes and their inputs. In order to validate doc(alias) strings properly, as well as handle multiline (byte) strings, this also fixes the lexer to better handle EOF in bytes and codepoints. --- gcc/rust/lex/rust-lex.cc | 49 +++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'gcc/rust/lex/rust-lex.cc') diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index ecf151d..70e6b50 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1696,7 +1696,7 @@ Lexer::parse_byte_string (Location loc) int length = 1; current_char = peek_input (); - while (current_char != '"' && current_char != '\n') + while (current_char != '"' && current_char != EOF) { if (current_char == '\\') { @@ -1723,17 +1723,18 @@ Lexer::parse_byte_string (Location loc) current_column += length; - if (current_char == '\n') - { - rust_error_at (get_current_location (), "unended byte string literal"); - } - else if (current_char == '"') + if (current_char == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char == EOF) + { + rust_error_at (get_current_location (), "unended byte string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -1917,7 +1918,8 @@ Lexer::parse_string (Location loc) int length = 1; current_char32 = peek_codepoint_input (); - while (current_char32.value != '\n' && current_char32.value != '"') + // FIXME: This fails if the input ends. How do we check for EOF? + while (current_char32.value != '"' && !current_char32.is_eof ()) { if (current_char32.value == '\\') { @@ -1949,20 +1951,18 @@ Lexer::parse_string (Location loc) current_column += length; - if (current_char32.value == '\n') - { - rust_error_at (get_current_location (), "unended string literal"); - // by this point, the parser will stuck at this position due to - // undetermined string termination. we now need to unstuck the parser - skip_broken_string_input (current_char32.value); - } - else if (current_char32.value == '"') + if (current_char32.value == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char32.is_eof ()) + { + rust_error_at (get_current_location (), "unended string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -2046,7 +2046,7 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count) skip_input (); Codepoint current_char32 = peek_codepoint_input (); - while (true) + while (!current_char32.is_eof ()) { if (current_char32.value == '"') { @@ -2318,6 +2318,8 @@ Lexer::parse_char_or_lifetime (Location loc) int length = 1; current_char32 = peek_codepoint_input (); + if (current_char32.is_eof ()) + return nullptr; // parse escaped char literal if (current_char32.value == '\\') @@ -2398,6 +2400,9 @@ Lexer::get_input_codepoint_length () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return 0; + if (input < 128) { // ascii -- 1 byte @@ -2467,7 +2472,8 @@ Lexer::get_input_codepoint_length () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [FIRST] (too long)"); return 0; } } @@ -2478,6 +2484,9 @@ Lexer::peek_codepoint_input () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return Codepoint::eof (); + if (input < 128) { // ascii -- 1 byte @@ -2534,7 +2543,8 @@ Lexer::peek_codepoint_input () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [SECND] (too long)"); return {0xFFFE}; } } @@ -2620,7 +2630,8 @@ Lexer::test_get_input_codepoint_n_length (int n_start_offset) } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [THIRD] (too long)"); return 0; } } -- cgit v1.1