aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/lex/rust-lex.cc
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2022-07-22 13:40:06 +0000
committerGitHub <noreply@github.com>2022-07-22 13:40:06 +0000
commitdec7ee4c5e703422ecb9bc94417645d66af7eaae (patch)
tree3aea5c56255e3cd08ff7ac851e7da4c789e40894 /gcc/rust/lex/rust-lex.cc
parent15d4bdd84a7aca62bceb9dead770fd1df4759360 (diff)
parent8dc692afc2c86cbf3b5124484dd2596514a5acf3 (diff)
downloadgcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.zip
gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.gz
gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.bz2
Merge #1406
1406: Add Attribute checking visitor r=CohenArthur a=CohenArthur This PR adds a new attribute checker visitor. Its role is to take care of validating builtin attributes and their inputs. This is currently a draft as there are multiple issues: 1. The visitor is not complete 2. The lexer is broken, but was also broken before in that it didn't allow multiline string literals. There is a FIXME asking for how to check for an `EOF` character when parsing UTF-8 strings 3. The checking of attributes is very limited. Currently, only the `#[doc(alias = "...")]` is being checked. This is incomplete. I'm looking for feedback on the implementation and advice for my issues. Thanks :) Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
Diffstat (limited to 'gcc/rust/lex/rust-lex.cc')
-rw-r--r--gcc/rust/lex/rust-lex.cc49
1 files changed, 30 insertions, 19 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index ecf151d..70e6b50 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1696,7 +1696,7 @@ Lexer::parse_byte_string (Location loc)
int length = 1;
current_char = peek_input ();
- while (current_char != '"' && current_char != '\n')
+ while (current_char != '"' && current_char != EOF)
{
if (current_char == '\\')
{
@@ -1723,17 +1723,18 @@ Lexer::parse_byte_string (Location loc)
current_column += length;
- if (current_char == '\n')
- {
- rust_error_at (get_current_location (), "unended byte string literal");
- }
- else if (current_char == '"')
+ if (current_char == '"')
{
current_column++;
skip_input ();
current_char = peek_input ();
}
+ else if (current_char == EOF)
+ {
+ rust_error_at (get_current_location (), "unended byte string literal");
+ return Token::make (END_OF_FILE, get_current_location ());
+ }
else
{
gcc_unreachable ();
@@ -1917,7 +1918,8 @@ Lexer::parse_string (Location loc)
int length = 1;
current_char32 = peek_codepoint_input ();
- while (current_char32.value != '\n' && current_char32.value != '"')
+ // FIXME: This fails if the input ends. How do we check for EOF?
+ while (current_char32.value != '"' && !current_char32.is_eof ())
{
if (current_char32.value == '\\')
{
@@ -1949,20 +1951,18 @@ Lexer::parse_string (Location loc)
current_column += length;
- if (current_char32.value == '\n')
- {
- rust_error_at (get_current_location (), "unended string literal");
- // by this point, the parser will stuck at this position due to
- // undetermined string termination. we now need to unstuck the parser
- skip_broken_string_input (current_char32.value);
- }
- else if (current_char32.value == '"')
+ if (current_char32.value == '"')
{
current_column++;
skip_input ();
current_char = peek_input ();
}
+ else if (current_char32.is_eof ())
+ {
+ rust_error_at (get_current_location (), "unended string literal");
+ return Token::make (END_OF_FILE, get_current_location ());
+ }
else
{
gcc_unreachable ();
@@ -2046,7 +2046,7 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
skip_input ();
Codepoint current_char32 = peek_codepoint_input ();
- while (true)
+ while (!current_char32.is_eof ())
{
if (current_char32.value == '"')
{
@@ -2318,6 +2318,8 @@ Lexer::parse_char_or_lifetime (Location loc)
int length = 1;
current_char32 = peek_codepoint_input ();
+ if (current_char32.is_eof ())
+ return nullptr;
// parse escaped char literal
if (current_char32.value == '\\')
@@ -2398,6 +2400,9 @@ Lexer::get_input_codepoint_length ()
{
uint8_t input = peek_input ();
+ if ((int8_t) input == EOF)
+ return 0;
+
if (input < 128)
{
// ascii -- 1 byte
@@ -2467,7 +2472,8 @@ Lexer::get_input_codepoint_length ()
}
else
{
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+ rust_error_at (get_current_location (),
+ "invalid UTF-8 [FIRST] (too long)");
return 0;
}
}
@@ -2478,6 +2484,9 @@ Lexer::peek_codepoint_input ()
{
uint8_t input = peek_input ();
+ if ((int8_t) input == EOF)
+ return Codepoint::eof ();
+
if (input < 128)
{
// ascii -- 1 byte
@@ -2534,7 +2543,8 @@ Lexer::peek_codepoint_input ()
}
else
{
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+ rust_error_at (get_current_location (),
+ "invalid UTF-8 [SECND] (too long)");
return {0xFFFE};
}
}
@@ -2620,7 +2630,8 @@ Lexer::test_get_input_codepoint_n_length (int n_start_offset)
}
else
{
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+ rust_error_at (get_current_location (),
+ "invalid UTF-8 [THIRD] (too long)");
return 0;
}
}