diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2022-07-22 13:40:06 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-22 13:40:06 +0000 |
commit | dec7ee4c5e703422ecb9bc94417645d66af7eaae (patch) | |
tree | 3aea5c56255e3cd08ff7ac851e7da4c789e40894 /gcc/rust/lex/rust-lex.cc | |
parent | 15d4bdd84a7aca62bceb9dead770fd1df4759360 (diff) | |
parent | 8dc692afc2c86cbf3b5124484dd2596514a5acf3 (diff) | |
download | gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.zip gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.gz gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.bz2 |
Merge #1406
1406: Add Attribute checking visitor r=CohenArthur a=CohenArthur
This PR adds a new attribute checker visitor. Its role is to take care of validating builtin attributes and their inputs.
This is currently a draft as there are multiple issues:
1. The visitor is not complete
2. The lexer is broken, but was also broken before in that it didn't allow multiline string literals. There is a FIXME asking for how to check for an `EOF` character when parsing UTF-8 strings
3. The checking of attributes is very limited. Currently, only the `#[doc(alias = "...")]` is being checked. This is incomplete.
I'm looking for feedback on the implementation and advice for my issues. Thanks :)
Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
Diffstat (limited to 'gcc/rust/lex/rust-lex.cc')
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 49 |
1 files changed, 30 insertions, 19 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index ecf151d..70e6b50 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1696,7 +1696,7 @@ Lexer::parse_byte_string (Location loc) int length = 1; current_char = peek_input (); - while (current_char != '"' && current_char != '\n') + while (current_char != '"' && current_char != EOF) { if (current_char == '\\') { @@ -1723,17 +1723,18 @@ Lexer::parse_byte_string (Location loc) current_column += length; - if (current_char == '\n') - { - rust_error_at (get_current_location (), "unended byte string literal"); - } - else if (current_char == '"') + if (current_char == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char == EOF) + { + rust_error_at (get_current_location (), "unended byte string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -1917,7 +1918,8 @@ Lexer::parse_string (Location loc) int length = 1; current_char32 = peek_codepoint_input (); - while (current_char32.value != '\n' && current_char32.value != '"') + // FIXME: This fails if the input ends. How do we check for EOF? + while (current_char32.value != '"' && !current_char32.is_eof ()) { if (current_char32.value == '\\') { @@ -1949,20 +1951,18 @@ Lexer::parse_string (Location loc) current_column += length; - if (current_char32.value == '\n') - { - rust_error_at (get_current_location (), "unended string literal"); - // by this point, the parser will stuck at this position due to - // undetermined string termination. we now need to unstuck the parser - skip_broken_string_input (current_char32.value); - } - else if (current_char32.value == '"') + if (current_char32.value == '"') { current_column++; skip_input (); current_char = peek_input (); } + else if (current_char32.is_eof ()) + { + rust_error_at (get_current_location (), "unended string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else { gcc_unreachable (); @@ -2046,7 +2046,7 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count) skip_input (); Codepoint current_char32 = peek_codepoint_input (); - while (true) + while (!current_char32.is_eof ()) { if (current_char32.value == '"') { @@ -2318,6 +2318,8 @@ Lexer::parse_char_or_lifetime (Location loc) int length = 1; current_char32 = peek_codepoint_input (); + if (current_char32.is_eof ()) + return nullptr; // parse escaped char literal if (current_char32.value == '\\') @@ -2398,6 +2400,9 @@ Lexer::get_input_codepoint_length () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return 0; + if (input < 128) { // ascii -- 1 byte @@ -2467,7 +2472,8 @@ Lexer::get_input_codepoint_length () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [FIRST] (too long)"); return 0; } } @@ -2478,6 +2484,9 @@ Lexer::peek_codepoint_input () { uint8_t input = peek_input (); + if ((int8_t) input == EOF) + return Codepoint::eof (); + if (input < 128) { // ascii -- 1 byte @@ -2534,7 +2543,8 @@ Lexer::peek_codepoint_input () } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [SECND] (too long)"); return {0xFFFE}; } } @@ -2620,7 +2630,8 @@ Lexer::test_get_input_codepoint_n_length (int n_start_offset) } else { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); + rust_error_at (get_current_location (), + "invalid UTF-8 [THIRD] (too long)"); return 0; } } |