Improved parsing of MatchExpr (and AST structure)

Fixed compilation errors Fixed accidental break out of loop when expr_with_block when parsing match expr
author: SimplyTheOther <simplytheother@gmail.com> 2020-10-01 14:55:15 +0800
committer: Philip Herron <philip.herron@embecosm.com> 2020-11-28 21:13:21 +0000
commit: 2ed062a0be3e2c6e7bd50d25d9ede336fad7be38 (patch)
tree: 3eb122d4ae25035a067ddaf78576541a75fdf9b7 /gcc/rust/parse
parent: 4cfd93746242dc6e73f4fa71520bb361a2d6de4a (diff)
download: gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.zip
gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.tar.gz
gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.tar.bz2
2 files changed, 103 insertions, 28 deletions
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index 286d7a4..468312e 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -3895,7 +3895,8 @@ Parser<ManagedTokenSource>::parse_struct (
 	lexer.skip_token ();
 
 	// parse struct fields, if any
-	std::vector<AST::StructField> struct_fields = parse_struct_fields ();
+	std::vector<AST::StructField> struct_fields
+	  = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; });
 
 	if (!skip_token (RIGHT_CURLY))
 	  {
@@ -3938,42 +3939,69 @@ Parser<ManagedTokenSource>::parse_struct_fields ()
 
   // Return empty field list if no field there
   if (initial_field.is_error ())
-    {
-      return fields;
-    }
+    return fields;
 
   fields.push_back (std::move (initial_field));
 
-  // maybe think of a better control structure here - do-while with an initial
-  // error state? basically, loop through field list until can't find any more
-  // params
-  while (true)
+  while (lexer.peek_token ()->get_id () == COMMA)
     {
-      if (lexer.peek_token ()->get_id () != COMMA)
+      lexer.skip_token ();
+
+      AST::StructField field = parse_struct_field ();
+
+      if (field.is_error ())
 	{
+	  // would occur with trailing comma, so allowed
 	  break;
 	}
 
-      // skip comma if applies
+      fields.push_back (std::move (field));
+    }
+
+  fields.shrink_to_fit ();
+  return fields;
+  // TODO: template if possible (parse_non_ptr_seq)
+}
+
+// Parses struct fields in struct declarations.
+template <typename ManagedTokenSource>
+template <typename EndTokenPred>
+std::vector<AST::StructField>
+Parser<ManagedTokenSource>::parse_struct_fields (EndTokenPred is_end_tok)
+{
+  std::vector<AST::StructField> fields;
+
+  AST::StructField initial_field = parse_struct_field ();
+
+  // Return empty field list if no field there
+  if (initial_field.is_error ())
+    return fields;
+
+  fields.push_back (std::move (initial_field));
+
+  while (lexer.peek_token ()->get_id () == COMMA)
+    {
       lexer.skip_token ();
 
-      AST::StructField field = parse_struct_field ();
+      if (is_end_tok (lexer.peek_token ()->get_id ()))
+	break;
 
-      if (!field.is_error ())
-	{
-	  fields.push_back (std::move (field));
-	}
-      else
+      AST::StructField field = parse_struct_field ();
+      if (field.is_error ())
 	{
-	  // this would occur with a trailing comma, which is allowed
-	  break;
+	  /* TODO: should every field be ditched just because one couldn't be
+	   * parsed? */
+	  rust_error_at (lexer.peek_token ()->get_locus (),
+			 "failed to parse struct field in struct fields");
+	  return {};
 	}
+
+      fields.push_back (std::move (field));
     }
 
+  fields.shrink_to_fit ();
   return fields;
-
-  // TODO: this shares basically all code with function params and tuple fields
-  // - templates?
+  // TODO: template if possible (parse_non_ptr_seq)
 }
 
 // Parses a single struct field (in a struct definition). Does not parse commas.
@@ -4257,7 +4285,8 @@ Parser<ManagedTokenSource>::parse_enum_item ()
 	// struct enum item
 	lexer.skip_token ();
 
-	std::vector<AST::StructField> struct_fields = parse_struct_fields ();
+	std::vector<AST::StructField> struct_fields
+	  = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; });
 
 	if (!skip_token (RIGHT_CURLY))
 	  {
@@ -4326,7 +4355,8 @@ Parser<ManagedTokenSource>::parse_union (
 
   /* parse union inner items as "struct fields" because hey, syntax reuse. Spec
    * said so. */
-  std::vector<AST::StructField> union_fields = parse_struct_fields ();
+  std::vector<AST::StructField> union_fields
+    = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; });
 
   if (!skip_token (RIGHT_CURLY))
     {
@@ -8017,18 +8047,20 @@ Parser<ManagedTokenSource>::parse_match_expr (
   std::vector<AST::Attribute> inner_attrs = parse_inner_attributes ();
 
   // parse match arms (if they exist)
-  std::vector<std::unique_ptr<AST::MatchCase> > match_arms;
+  // std::vector<std::unique_ptr<AST::MatchCase> > match_arms;
+  std::vector<AST::MatchCase> match_arms;
 
-  // FIXME: absolute worst control structure ever
   // parse match cases
-  while (true)
+  while (lexer.peek_token ()->get_id () != RIGHT_CURLY)
     {
       // parse match arm itself, which is required
       AST::MatchArm arm = parse_match_arm ();
       if (arm.is_error ())
 	{
-	  // not necessarily an error
-	  break;
+	  // TODO is this worth throwing everything away?
+	  rust_error_at (lexer.peek_token ()->get_locus (),
+			 "failed to parse match arm in match arms");
+	  return nullptr;
 	}
 
       if (!skip_token (MATCH_ARROW))
@@ -8038,6 +8070,45 @@ Parser<ManagedTokenSource>::parse_match_expr (
 	  return nullptr;
 	}
 
+      std::unique_ptr<AST::Expr> expr = parse_expr ();
+      if (expr == nullptr)
+	{
+	  rust_error_at (lexer.peek_token ()->get_locus (),
+			 "failed to parse expr in match arm in match expr");
+	  // skip somewhere?
+	  return nullptr;
+	}
+      bool is_expr_without_block = expr->is_expr_without_block ();
+
+      // construct match case expr and add to cases
+      match_arms.push_back (AST::MatchCase (std::move (arm), std::move (expr)));
+
+      // handle comma presence
+      if (lexer.peek_token ()->get_id () != COMMA)
+	{
+	  if (!is_expr_without_block)
+	    {
+	      // allowed even if not final case
+	      continue;
+	    }
+	  else if (is_expr_without_block
+		   && lexer.peek_token ()->get_id () != RIGHT_CURLY)
+	    {
+	      // not allowed if not final case
+	      rust_error_at (lexer.peek_token ()->get_locus (),
+			     "exprwithoutblock requires comma after match case "
+			     "expression in match arm (if not final case)");
+	      return nullptr;
+	    }
+	  else
+	    {
+	      // otherwise, must be final case, so fine
+	      break;
+	    }
+	}
+      lexer.skip_token ();
+
+#if 0
       // branch on next token - if '{', block expr, otherwise just expr
       if (lexer.peek_token ()->get_id () == LEFT_CURLY)
 	{
@@ -8089,6 +8160,7 @@ Parser<ManagedTokenSource>::parse_match_expr (
 	    }
 	  lexer.skip_token ();
 	}
+#endif
     }
 
   if (!skip_token (RIGHT_CURLY))
diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h
index b2de05e..e2f3a78 100644
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@@ -189,6 +189,8 @@ private:
   std::unique_ptr<AST::Struct>
   parse_struct (AST::Visibility vis, std::vector<AST::Attribute> outer_attrs);
   std::vector<AST::StructField> parse_struct_fields ();
+  template <typename EndTokenPred>
+  std::vector<AST::StructField> parse_struct_fields (EndTokenPred is_end_token);
   AST::StructField parse_struct_field ();
   std::vector<AST::TupleField> parse_tuple_fields ();
   AST::TupleField parse_tuple_field ();
@@ -528,6 +530,7 @@ private:
 			       = std::vector<AST::Attribute> (),
 			       bool pratt_parse = false);
   std::unique_ptr<AST::StructExprField> parse_struct_expr_field ();
+  bool will_be_expr_with_block ();
 
   // Type-related
   std::unique_ptr<AST::Type> parse_type ();
author	SimplyTheOther <simplytheother@gmail.com>	2020-10-01 14:55:15 +0800
committer	Philip Herron <philip.herron@embecosm.com>	2020-11-28 21:13:21 +0000
commit	2ed062a0be3e2c6e7bd50d25d9ede336fad7be38 (patch)
tree	3eb122d4ae25035a067ddaf78576541a75fdf9b7 /gcc/rust/parse
parent	4cfd93746242dc6e73f4fa71520bb361a2d6de4a (diff)
download	gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.zip gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.tar.gz gcc-2ed062a0be3e2c6e7bd50d25d9ede336fad7be38.tar.bz2