Merge #1406

1406: Add Attribute checking visitor r=CohenArthur a=CohenArthur This PR adds a new attribute checker visitor. Its role is to take care of validating builtin attributes and their inputs. This is currently a draft as there are multiple issues: 1. The visitor is not complete 2. The lexer is broken, but was also broken before in that it didn't allow multiline string literals. There is a FIXME asking for how to check for an `EOF` character when parsing UTF-8 strings 3. The checking of attributes is very limited. Currently, only the `#[doc(alias = "...")]` is being checked. This is incomplete. I'm looking for feedback on the implementation and advice for my issues. Thanks :) Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2022-07-22 13:40:06 +0000
committer: GitHub <noreply@github.com> 2022-07-22 13:40:06 +0000
commit: dec7ee4c5e703422ecb9bc94417645d66af7eaae (patch)
tree: 3aea5c56255e3cd08ff7ac851e7da4c789e40894 /gcc/rust/lex/rust-lex.cc
parent: 15d4bdd84a7aca62bceb9dead770fd1df4759360 (diff)
parent: 8dc692afc2c86cbf3b5124484dd2596514a5acf3 (diff)
download: gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.zip
gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.gz
gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.bz2
1 files changed, 30 insertions, 19 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index ecf151d..70e6b50 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1696,7 +1696,7 @@ Lexer::parse_byte_string (Location loc)
   int length = 1;
   current_char = peek_input ();
 
-  while (current_char != '"' && current_char != '\n')
+  while (current_char != '"' && current_char != EOF)
     {
       if (current_char == '\\')
 	{
@@ -1723,17 +1723,18 @@ Lexer::parse_byte_string (Location loc)
 
   current_column += length;
 
-  if (current_char == '\n')
-    {
-      rust_error_at (get_current_location (), "unended byte string literal");
-    }
-  else if (current_char == '"')
+  if (current_char == '"')
     {
       current_column++;
 
       skip_input ();
       current_char = peek_input ();
     }
+  else if (current_char == EOF)
+    {
+      rust_error_at (get_current_location (), "unended byte string literal");
+      return Token::make (END_OF_FILE, get_current_location ());
+    }
   else
     {
       gcc_unreachable ();
@@ -1917,7 +1918,8 @@ Lexer::parse_string (Location loc)
   int length = 1;
   current_char32 = peek_codepoint_input ();
 
-  while (current_char32.value != '\n' && current_char32.value != '"')
+  // FIXME: This fails if the input ends. How do we check for EOF?
+  while (current_char32.value != '"' && !current_char32.is_eof ())
     {
       if (current_char32.value == '\\')
 	{
@@ -1949,20 +1951,18 @@ Lexer::parse_string (Location loc)
 
   current_column += length;
 
-  if (current_char32.value == '\n')
-    {
-      rust_error_at (get_current_location (), "unended string literal");
-      // by this point, the parser will stuck at this position due to
-      // undetermined string termination. we now need to unstuck the parser
-      skip_broken_string_input (current_char32.value);
-    }
-  else if (current_char32.value == '"')
+  if (current_char32.value == '"')
     {
       current_column++;
 
       skip_input ();
       current_char = peek_input ();
     }
+  else if (current_char32.is_eof ())
+    {
+      rust_error_at (get_current_location (), "unended string literal");
+      return Token::make (END_OF_FILE, get_current_location ());
+    }
   else
     {
       gcc_unreachable ();
@@ -2046,7 +2046,7 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
   skip_input ();
   Codepoint current_char32 = peek_codepoint_input ();
 
-  while (true)
+  while (!current_char32.is_eof ())
     {
       if (current_char32.value == '"')
 	{
@@ -2318,6 +2318,8 @@ Lexer::parse_char_or_lifetime (Location loc)
   int length = 1;
 
   current_char32 = peek_codepoint_input ();
+  if (current_char32.is_eof ())
+    return nullptr;
 
   // parse escaped char literal
   if (current_char32.value == '\\')
@@ -2398,6 +2400,9 @@ Lexer::get_input_codepoint_length ()
 {
   uint8_t input = peek_input ();
 
+  if ((int8_t) input == EOF)
+    return 0;
+
   if (input < 128)
     {
       // ascii -- 1 byte
@@ -2467,7 +2472,8 @@ Lexer::get_input_codepoint_length ()
     }
   else
     {
-      rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+      rust_error_at (get_current_location (),
+		     "invalid UTF-8 [FIRST] (too long)");
       return 0;
     }
 }
@@ -2478,6 +2484,9 @@ Lexer::peek_codepoint_input ()
 {
   uint8_t input = peek_input ();
 
+  if ((int8_t) input == EOF)
+    return Codepoint::eof ();
+
   if (input < 128)
     {
       // ascii -- 1 byte
@@ -2534,7 +2543,8 @@ Lexer::peek_codepoint_input ()
     }
   else
     {
-      rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+      rust_error_at (get_current_location (),
+		     "invalid UTF-8 [SECND] (too long)");
       return {0xFFFE};
     }
 }
@@ -2620,7 +2630,8 @@ Lexer::test_get_input_codepoint_n_length (int n_start_offset)
     }
   else
     {
-      rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
+      rust_error_at (get_current_location (),
+		     "invalid UTF-8 [THIRD] (too long)");
       return 0;
     }
 }
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2022-07-22 13:40:06 +0000
committer	GitHub <noreply@github.com>	2022-07-22 13:40:06 +0000
commit	dec7ee4c5e703422ecb9bc94417645d66af7eaae (patch)
tree	3aea5c56255e3cd08ff7ac851e7da4c789e40894 /gcc/rust/lex/rust-lex.cc
parent	15d4bdd84a7aca62bceb9dead770fd1df4759360 (diff)
parent	8dc692afc2c86cbf3b5124484dd2596514a5acf3 (diff)
download	gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.zip gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.gz gcc-dec7ee4c5e703422ecb9bc94417645d66af7eaae.tar.bz2