diff options
author | Arthur Cohen <arthur.cohen@embecosm.com> | 2022-07-20 15:28:23 +0200 |
---|---|---|
committer | Arthur Cohen <arthur.cohen@embecosm.com> | 2022-07-21 11:16:19 +0200 |
commit | 8dc692afc2c86cbf3b5124484dd2596514a5acf3 (patch) | |
tree | 9bc01a3484c3786743ab0d4a65cfca4dae95321e /gcc/rust/lex | |
parent | 137cd3bbaa05038b01c46f7ac7472da7ca662ed7 (diff) | |
download | gcc-8dc692afc2c86cbf3b5124484dd2596514a5acf3.zip gcc-8dc692afc2c86cbf3b5124484dd2596514a5acf3.tar.gz gcc-8dc692afc2c86cbf3b5124484dd2596514a5acf3.tar.bz2 |
ast: Add new AttributeChecker visitor
This commit adds a new attribute checker visitor. Its role is to take care of validating builtin attributes and their inputs.
In order to validate doc(alias) strings properly, as well as handle
multiline (byte) strings, this also fixes the lexer to better handle EOF
in bytes and codepoints.
Diffstat (limited to 'gcc/rust/lex')
-rw-r--r-- | gcc/rust/lex/rust-codepoint.h | 4 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 49 |
2 files changed, 33 insertions, 20 deletions
diff --git a/gcc/rust/lex/rust-codepoint.h b/gcc/rust/lex/rust-codepoint.h index cdadfcd..22da080 100644 --- a/gcc/rust/lex/rust-codepoint.h +++ b/gcc/rust/lex/rust-codepoint.h @@ -32,11 +32,13 @@ struct Codepoint // Creates a codepoint from an encoded UTF-8 value. Codepoint (uint32_t value) : value (value) {} + static Codepoint eof () { return Codepoint (UINT32_MAX); } + bool is_eof () const { return value == UINT32_MAX; } + // Returns a C++ string containing string value of codepoint. std::string as_string (); bool operator== (Codepoint other) const { return value == other.value; } - bool operator!= (Codepoint other) const { return !operator== (other); } }; } // namespace Rust diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index ecf151d..70e6b50 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1696,7 +1696,7 @@ Lexer::parse_byte_string (Location loc) int length = 1; current_char = peek_input (); - while (current_char != '"' && current_char != '\n') + while (current_char != '"' && current_char != EOF) { if (current_char == '\\') { @@ -1723,17 +1723,18 @@ Lexer::parse_byte_string (Location loc) current_column += length; - if (current_char == '\n') - { - rust_error_at (get_current_location (), "unended byte string literal"); - } - else if (current_char == '"') + if (current_char == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char == EOF) + { + rust_error_at (get_current_location (), "unended byte string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -1917,7 +1918,8 @@ Lexer::parse_string (Location loc) int length = 1; current_char32 = peek_codepoint_input (); - while (current_char32.value != '\n' && current_char32.value != '"') + // FIXME: This fails if the input ends. How do we check for EOF? + while (current_char32.value != '"' && !current_char32.is_eof ()) { if (current_char32.value == '\\') { @@ -1949,20 +1951,18 @@ Lexer::parse_string (Location loc) current_column += length; - if (current_char32.value == '\n') - { - rust_error_at (get_current_location (), "unended string literal"); - // by this point, the parser will stuck at this position due to - // undetermined string termination. we now need to unstuck the parser - skip_broken_string_input (current_char32.value); - } - else if (current_char32.value == '"') + if (current_char32.value == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char32.is_eof ()) + { + rust_error_at (get_current_location (), "unended string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -2046,7 +2046,7 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count) skip_input (); Codepoint current_char32 = peek_codepoint_input (); - while (true) + while (!current_char32.is_eof ()) { if (current_char32.value == '"') { @@ -2318,6 +2318,8 @@ Lexer::parse_char_or_lifetime (Location loc) int length = 1; current_char32 = peek_codepoint_input (); + if (current_char32.is_eof ()) + return nullptr; // parse escaped char literal if (current_char32.value == '\\') @@ -2398,6 +2400,9 @@ Lexer::get_input_codepoint_length () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return 0; + if (input < 128) { // ascii -- 1 byte @@ -2467,7 +2472,8 @@ Lexer::get_input_codepoint_length () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [FIRST] (too long)"); return 0; } } @@ -2478,6 +2484,9 @@ Lexer::peek_codepoint_input () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return Codepoint::eof (); + if (input < 128) { // ascii -- 1 byte @@ -2534,7 +2543,8 @@ Lexer::peek_codepoint_input () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [SECND] (too long)"); return {0xFFFE}; } } @@ -2620,7 +2630,8 @@ Lexer::test_get_input_codepoint_n_length (int n_start_offset) } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [THIRD] (too long)"); return 0; } } |