/* Bison parser for Rust expressions, for GDB. Copyright (C) 2016-2019 Free Software Foundation, Inc. This file is part of GDB. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* The Bison manual says that %pure-parser is deprecated, but we use it anyway because it also works with Byacc. That is also why this uses %lex-param and %parse-param rather than the simpler %param -- Byacc does not support the latter. */ %pure-parser %lex-param {struct rust_parser *parser} %parse-param {struct rust_parser *parser} /* Removing the last conflict seems difficult. */ %expect 1 %{ #include "defs.h" #include "block.h" #include "charset.h" #include "cp-support.h" #include "gdb_obstack.h" #include "gdb_regex.h" #include "rust-lang.h" #include "parser-defs.h" #include "common/selftest.h" #include "value.h" #include "common/vec.h" #define GDB_YY_REMAP_PREFIX rust #include "yy-remap.h" #define RUSTSTYPE YYSTYPE struct rust_op; typedef std::vector rust_op_vector; /* A typed integer constant. */ struct typed_val_int { LONGEST val; struct type *type; }; /* A typed floating point constant. */ struct typed_val_float { gdb_byte val[16]; struct type *type; }; /* An identifier and an expression. This is used to represent one element of a struct initializer. */ struct set_field { struct stoken name; const struct rust_op *init; }; typedef std::vector rust_set_vector; %} %union { /* A typed integer constant. */ struct typed_val_int typed_val_int; /* A typed floating point constant. */ struct typed_val_float typed_val_float; /* An identifier or string. */ struct stoken sval; /* A token representing an opcode, like "==". */ enum exp_opcode opcode; /* A list of expressions; for example, the arguments to a function call. */ rust_op_vector *params; /* A list of field initializers. */ rust_set_vector *field_inits; /* A single field initializer. */ struct set_field one_field_init; /* An expression. */ const struct rust_op *op; /* A plain integer, for example used to count the number of "super::" prefixes on a path. */ unsigned int depth; } %{ struct rust_parser; static int rustyylex (YYSTYPE *, rust_parser *); static void rustyyerror (rust_parser *parser, const char *msg); static struct stoken make_stoken (const char *); /* A regular expression for matching Rust numbers. This is split up since it is very long and this gives us a way to comment the sections. */ static const char *number_regex_text = /* subexpression 1: allows use of alternation, otherwise uninteresting */ "^(" /* First comes floating point. */ /* Recognize number after the decimal point, with optional exponent and optional type suffix. subexpression 2: allows "?", otherwise uninteresting subexpression 3: if present, type suffix */ "[0-9][0-9_]*\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?(f32|f64)?" #define FLOAT_TYPE1 3 "|" /* Recognize exponent without decimal point, with optional type suffix. subexpression 4: if present, type suffix */ #define FLOAT_TYPE2 4 "[0-9][0-9_]*[eE][-+]?[0-9][0-9_]*(f32|f64)?" "|" /* "23." is a valid floating point number, but "23.e5" and "23.f32" are not. So, handle the trailing-. case separately. */ "[0-9][0-9_]*\\." "|" /* Finally come integers. subexpression 5: text of integer subexpression 6: if present, type suffix subexpression 7: allows use of alternation, otherwise uninteresting */ #define INT_TEXT 5 #define INT_TYPE 6 "(0x[a-fA-F0-9_]+|0o[0-7_]+|0b[01_]+|[0-9][0-9_]*)" "([iu](size|8|16|32|64))?" ")"; /* The number of subexpressions to allocate space for, including the "0th" whole match subexpression. */ #define NUM_SUBEXPRESSIONS 8 /* The compiled number-matching regex. */ static regex_t number_regex; /* An instance of this is created before parsing, and destroyed when parsing is finished. */ struct rust_parser { rust_parser (struct parser_state *state) : rust_ast (nullptr), pstate (state) { } ~rust_parser () { } /* Create a new rust_set_vector. The storage for the new vector is managed by this class. */ rust_set_vector *new_set_vector () { rust_set_vector *result = new rust_set_vector; set_vectors.push_back (std::unique_ptr (result)); return result; } /* Create a new rust_ops_vector. The storage for the new vector is managed by this class. */ rust_op_vector *new_op_vector () { rust_op_vector *result = new rust_op_vector; op_vectors.push_back (std::unique_ptr (result)); return result; } /* Return the parser's language. */ const struct language_defn *language () const { return pstate->language (); } /* Return the parser's gdbarch. */ struct gdbarch *arch () const { return pstate->gdbarch (); } /* A helper to look up a Rust type, or fail. This only works for types defined by rust_language_arch_info. */ struct type *get_type (const char *name) { struct type *type; type = language_lookup_primitive_type (language (), arch (), name); if (type == NULL) error (_("Could not find Rust type %s"), name); return type; } const char *copy_name (const char *name, int len); struct stoken concat3 (const char *s1, const char *s2, const char *s3); const struct rust_op *crate_name (const struct rust_op *name); const struct rust_op *super_name (const struct rust_op *ident, unsigned int n_supers); int lex_character (YYSTYPE *lvalp); int lex_number (YYSTYPE *lvalp); int lex_string (YYSTYPE *lvalp); int lex_identifier (YYSTYPE *lvalp); uint32_t lex_hex (int min, int max); uint32_t lex_escape (int is_byte); int lex_operator (YYSTYPE *lvalp); void push_back (char c); void update_innermost_block (struct block_symbol sym); struct block_symbol lookup_symbol (const char *name, const struct block *block, const domain_enum domain); struct type *rust_lookup_type (const char *name, const struct block *block); std::vector convert_params_to_types (rust_op_vector *params); struct type *convert_ast_to_type (const struct rust_op *operation); const char *convert_name (const struct rust_op *operation); void convert_params_to_expression (rust_op_vector *params, const struct rust_op *top); void convert_ast_to_expression (const struct rust_op *operation, const struct rust_op *top, bool want_type = false); struct rust_op *ast_basic_type (enum type_code typecode); const struct rust_op *ast_operation (enum exp_opcode opcode, const struct rust_op *left, const struct rust_op *right); const struct rust_op *ast_compound_assignment (enum exp_opcode opcode, const struct rust_op *left, const struct rust_op *rust_op); const struct rust_op *ast_literal (struct typed_val_int val); const struct rust_op *ast_dliteral (struct typed_val_float val); const struct rust_op *ast_structop (const struct rust_op *left, const char *name, int completing); const struct rust_op *ast_structop_anonymous (const struct rust_op *left, struct typed_val_int number); const struct rust_op *ast_unary (enum exp_opcode opcode, const struct rust_op *expr); const struct rust_op *ast_cast (const struct rust_op *expr, const struct rust_op *type); const struct rust_op *ast_call_ish (enum exp_opcode opcode, const struct rust_op *expr, rust_op_vector *params); const struct rust_op *ast_path (struct stoken name, rust_op_vector *params); const struct rust_op *ast_string (struct stoken str); const struct rust_op *ast_struct (const struct rust_op *name, rust_set_vector *fields); const struct rust_op *ast_range (const struct rust_op *lhs, const struct rust_op *rhs, bool inclusive); const struct rust_op *ast_array_type (const struct rust_op *lhs, struct typed_val_int val); const struct rust_op *ast_slice_type (const struct rust_op *type); const struct rust_op *ast_reference_type (const struct rust_op *type); const struct rust_op *ast_pointer_type (const struct rust_op *type, int is_mut); const struct rust_op *ast_function_type (const struct rust_op *result, rust_op_vector *params); const struct rust_op *ast_tuple_type (rust_op_vector *params); /* A pointer to this is installed globally. */ auto_obstack obstack; /* Result of parsing. Points into obstack. */ const struct rust_op *rust_ast; /* This keeps track of the various vectors we allocate. */ std::vector> set_vectors; std::vector> op_vectors; /* The parser state gdb gave us. */ struct parser_state *pstate; /* Depth of parentheses. */ int paren_depth = 0; }; /* Rust AST operations. We build a tree of these; then lower them to gdb expressions when parsing has completed. */ struct rust_op { /* The opcode. */ enum exp_opcode opcode; /* If OPCODE is OP_TYPE, then this holds information about what type is described by this node. */ enum type_code typecode; /* Indicates whether OPCODE actually represents a compound assignment. For example, if OPCODE is GTGT and this is false, then this rust_op represents an ordinary ">>"; but if this is true, then this rust_op represents ">>=". Unused in other cases. */ unsigned int compound_assignment : 1; /* Only used by a field expression; if set, indicates that the field name occurred at the end of the expression and is eligible for completion. */ unsigned int completing : 1; /* For OP_RANGE, indicates whether the range is inclusive or exclusive. */ unsigned int inclusive : 1; /* Operands of expression. Which one is used and how depends on the particular opcode. */ RUSTSTYPE left; RUSTSTYPE right; }; %} %token GDBVAR %token IDENT %token COMPLETE %token INTEGER %token DECIMAL_INTEGER %token STRING %token BYTESTRING %token FLOAT %token COMPOUND_ASSIGN /* Keyword tokens. */ %token KW_AS %token KW_IF %token KW_TRUE %token KW_FALSE %token KW_SUPER %token KW_SELF %token KW_MUT %token KW_EXTERN %token KW_CONST %token KW_FN %token KW_SIZEOF /* Operator tokens. */ %token DOTDOT %token DOTDOTEQ %token OROR %token ANDAND %token EQEQ %token NOTEQ %token LTEQ %token GTEQ %token LSH RSH %token COLONCOLON %token ARROW %type type %type path_for_expr %type identifier_path_for_expr %type path_for_type %type identifier_path_for_type %type just_identifiers_for_type %type maybe_type_list %type type_list %type super_path %type literal %type expr %type field_expr %type idx_expr %type unop_expr %type binop_expr %type binop_expr_expr %type type_cast_expr %type assignment_expr %type compound_assignment_expr %type paren_expr %type call_expr %type path_expr %type tuple_expr %type unit_expr %type struct_expr %type array_expr %type range_expr %type expr_list %type maybe_expr_list %type paren_expr_list %type struct_expr_list %type struct_expr_tail /* Precedence. */ %nonassoc DOTDOT DOTDOTEQ %right '=' COMPOUND_ASSIGN %left OROR %left ANDAND %nonassoc EQEQ NOTEQ '<' '>' LTEQ GTEQ %left '|' %left '^' %left '&' %left LSH RSH %left '@' %left '+' '-' %left '*' '/' '%' /* These could be %precedence in Bison, but that isn't a yacc feature. */ %left KW_AS %left UNARY %left '[' '.' '(' %% start: expr { /* If we are completing and see a valid parse, rust_ast will already have been set. */ if (parser->rust_ast == NULL) parser->rust_ast = $1; } ; /* Note that the Rust grammar includes a method_call_expr, but we handle this differently, to avoid a shift/reduce conflict with call_expr. */ expr: literal | path_expr | tuple_expr | unit_expr | struct_expr | field_expr | array_expr | idx_expr | range_expr | unop_expr /* Must precede call_expr because of ambiguity with sizeof. */ | binop_expr | paren_expr | call_expr ; tuple_expr: '(' expr ',' maybe_expr_list ')' { $4->push_back ($2); error (_("Tuple expressions not supported yet")); } ; unit_expr: '(' ')' { struct typed_val_int val; val.type = (language_lookup_primitive_type (parser->language (), parser->arch (), "()")); val.val = 0; $$ = parser->ast_literal (val); } ; /* To avoid a shift/reduce conflict with call_expr, we don't handle tuple struct expressions here, but instead when examining the AST. */ struct_expr: path_for_expr '{' struct_expr_list '}' { $$ = parser->ast_struct ($1, $3); } ; struct_expr_tail: DOTDOT expr { struct set_field sf; sf.name.ptr = NULL; sf.name.length = 0; sf.init = $2; $$ = sf; } | IDENT ':' expr { struct set_field sf; sf.name = $1; sf.init = $3; $$ = sf; } | IDENT { struct set_field sf; sf.name = $1; sf.init = parser->ast_path ($1, NULL); $$ = sf; } ; struct_expr_list: /* %empty */ { $$ = parser->new_set_vector (); } | struct_expr_tail { rust_set_vector *result = parser->new_set_vector (); result->push_back ($1); $$ = result; } | IDENT ':' expr ',' struct_expr_list { struct set_field sf; sf.name = $1; sf.init = $3; $5->push_back (sf); $$ = $5; } | IDENT ',' struct_expr_list { struct set_field sf; sf.name = $1; sf.init = parser->ast_path ($1, NULL); $3->push_back (sf); $$ = $3; } ; array_expr: '[' KW_MUT expr_list ']' { $$ = parser->ast_call_ish (OP_ARRAY, NULL, $3); } | '[' expr_list ']' { $$ = parser->ast_call_ish (OP_ARRAY, NULL, $2); } | '[' KW_MUT expr ';' expr ']' { $$ = parser->ast_operation (OP_RUST_ARRAY, $3, $5); } | '[' expr ';' expr ']' { $$ = parser->ast_operation (OP_RUST_ARRAY, $2, $4); } ; range_expr: expr DOTDOT { $$ = parser->ast_range ($1, NULL, false); } | expr DOTDOT expr { $$ = parser->ast_range ($1, $3, false); } | expr DOTDOTEQ expr { $$ = parser->ast_range ($1, $3, true); } | DOTDOT expr { $$ = parser->ast_range (NULL, $2, false); } | DOTDOTEQ expr { $$ = parser->ast_range (NULL, $2, true); } | DOTDOT { $$ = parser->ast_range (NULL, NULL, false); } ; literal: INTEGER { $$ = parser->ast_literal ($1); } | DECIMAL_INTEGER { $$ = parser->ast_literal ($1); } | FLOAT { $$ = parser->ast_dliteral ($1); } | STRING { struct set_field field; struct typed_val_int val; struct stoken token; rust_set_vector *fields = parser->new_set_vector (); /* Wrap the raw string in the &str struct. */ field.name.ptr = "data_ptr"; field.name.length = strlen (field.name.ptr); field.init = parser->ast_unary (UNOP_ADDR, parser->ast_string ($1)); fields->push_back (field); val.type = parser->get_type ("usize"); val.val = $1.length; field.name.ptr = "length"; field.name.length = strlen (field.name.ptr); field.init = parser->ast_literal (val); fields->push_back (field); token.ptr = "&str"; token.length = strlen (token.ptr); $$ = parser->ast_struct (parser->ast_path (token, NULL), fields); } | BYTESTRING { $$ = parser->ast_string ($1); } | KW_TRUE { struct typed_val_int val; val.type = language_bool_type (parser->language (), parser->arch ()); val.val = 1; $$ = parser->ast_literal (val); } | KW_FALSE { struct typed_val_int val; val.type = language_bool_type (parser->language (), parser->arch ()); val.val = 0; $$ = parser->ast_literal (val); } ; field_expr: expr '.' IDENT { $$ = parser->ast_structop ($1, $3.ptr, 0); } | expr '.' COMPLETE { $$ = parser->ast_structop ($1, $3.ptr, 1); parser->rust_ast = $$; } | expr '.' DECIMAL_INTEGER { $$ = parser->ast_structop_anonymous ($1, $3); } ; idx_expr: expr '[' expr ']' { $$ = parser->ast_operation (BINOP_SUBSCRIPT, $1, $3); } ; unop_expr: '+' expr %prec UNARY { $$ = parser->ast_unary (UNOP_PLUS, $2); } | '-' expr %prec UNARY { $$ = parser->ast_unary (UNOP_NEG, $2); } | '!' expr %prec UNARY { /* Note that we provide a Rust-specific evaluator override for UNOP_COMPLEMENT, so it can do the right thing for both bool and integral values. */ $$ = parser->ast_unary (UNOP_COMPLEMENT, $2); } | '*' expr %prec UNARY { $$ = parser->ast_unary (UNOP_IND, $2); } | '&' expr %prec UNARY { $$ = parser->ast_unary (UNOP_ADDR, $2); } | '&' KW_MUT expr %prec UNARY { $$ = parser->ast_unary (UNOP_ADDR, $3); } | KW_SIZEOF '(' expr ')' %prec UNARY { $$ = parser->ast_unary (UNOP_SIZEOF, $3); } ; binop_expr: binop_expr_expr | type_cast_expr | assignment_expr | compound_assignment_expr ; binop_expr_expr: expr '*' expr { $$ = parser->ast_operation (BINOP_MUL, $1, $3); } | expr '@' expr { $$ = parser->ast_operation (BINOP_REPEAT, $1, $3); } | expr '/' expr { $$ = parser->ast_operation (BINOP_DIV, $1, $3); } | expr '%' expr { $$ = parser->ast_operation (BINOP_REM, $1, $3); } | expr '<' expr { $$ = parser->ast_operation (BINOP_LESS, $1, $3); } | expr '>' expr { $$ = parser->ast_operation (BINOP_GTR, $1, $3); } | expr '&' expr { $$ = parser->ast_operation (BINOP_BITWISE_AND, $1, $3); } | expr '|' expr { $$ = parser->ast_operation (BINOP_BITWISE_IOR, $1, $3); } | expr '^' expr { $$ = parser->ast_operation (BINOP_BITWISE_XOR, $1, $3); } | expr '+' expr { $$ = parser->ast_operation (BINOP_ADD, $1, $3); } | expr '-' expr { $$ = parser->ast_operation (BINOP_SUB, $1, $3); } | expr OROR expr { $$ = parser->ast_operation (BINOP_LOGICAL_OR, $1, $3); } | expr ANDAND expr { $$ = parser->ast_operation (BINOP_LOGICAL_AND, $1, $3); } | expr EQEQ expr { $$ = parser->ast_operation (BINOP_EQUAL, $1, $3); } | expr NOTEQ expr { $$ = parser->ast_operation (BINOP_NOTEQUAL, $1, $3); } | expr LTEQ expr { $$ = parser->ast_operation (BINOP_LEQ, $1, $3); } | expr GTEQ expr { $$ = parser->ast_operation (BINOP_GEQ, $1, $3); } | expr LSH expr { $$ = parser->ast_operation (BINOP_LSH, $1, $3); } | expr RSH expr { $$ = parser->ast_operation (BINOP_RSH, $1, $3); } ; type_cast_expr: expr KW_AS type { $$ = parser->ast_cast ($1, $3); } ; assignment_expr: expr '=' expr { $$ = parser->ast_operation (BINOP_ASSIGN, $1, $3); } ; compound_assignment_expr: expr COMPOUND_ASSIGN expr { $$ = parser->ast_compound_assignment ($2, $1, $3); } ; paren_expr: '(' expr ')' { $$ = $2; } ; expr_list: expr { $$ = parser->new_op_vector (); $$->push_back ($1); } | expr_list ',' expr { $1->push_back ($3); $$ = $1; } ; maybe_expr_list: /* %empty */ { /* The result can't be NULL. */ $$ = parser->new_op_vector (); } | expr_list { $$ = $1; } ; paren_expr_list: '(' maybe_expr_list ')' { $$ = $2; } ; call_expr: expr paren_expr_list { $$ = parser->ast_call_ish (OP_FUNCALL, $1, $2); } ; maybe_self_path: /* %empty */ | KW_SELF COLONCOLON ; super_path: KW_SUPER COLONCOLON { $$ = 1; } | super_path KW_SUPER COLONCOLON { $$ = $1 + 1; } ; path_expr: path_for_expr { $$ = $1; } | GDBVAR { $$ = parser->ast_path ($1, NULL); } | KW_SELF { $$ = parser->ast_path (make_stoken ("self"), NULL); } ; path_for_expr: identifier_path_for_expr | KW_SELF COLONCOLON identifier_path_for_expr { $$ = parser->super_name ($3, 0); } | maybe_self_path super_path identifier_path_for_expr { $$ = parser->super_name ($3, $2); } | COLONCOLON identifier_path_for_expr { $$ = parser->crate_name ($2); } | KW_EXTERN identifier_path_for_expr { /* This is a gdb extension to make it possible to refer to items in other crates. It just bypasses adding the current crate to the front of the name. */ $$ = parser->ast_path (parser->concat3 ("::", $2->left.sval.ptr, NULL), $2->right.params); } ; identifier_path_for_expr: IDENT { $$ = parser->ast_path ($1, NULL); } | identifier_path_for_expr COLONCOLON IDENT { $$ = parser->ast_path (parser->concat3 ($1->left.sval.ptr, "::", $3.ptr), NULL); } | identifier_path_for_expr COLONCOLON '<' type_list '>' { $$ = parser->ast_path ($1->left.sval, $4); } | identifier_path_for_expr COLONCOLON '<' type_list RSH { $$ = parser->ast_path ($1->left.sval, $4); parser->push_back ('>'); } ; path_for_type: identifier_path_for_type | KW_SELF COLONCOLON identifier_path_for_type { $$ = parser->super_name ($3, 0); } | maybe_self_path super_path identifier_path_for_type { $$ = parser->super_name ($3, $2); } | COLONCOLON identifier_path_for_type { $$ = parser->crate_name ($2); } | KW_EXTERN identifier_path_for_type { /* This is a gdb extension to make it possible to refer to items in other crates. It just bypasses adding the current crate to the front of the name. */ $$ = parser->ast_path (parser->concat3 ("::", $2->left.sval.ptr, NULL), $2->right.params); } ; just_identifiers_for_type: IDENT { $$ = parser->ast_path ($1, NULL); } | just_identifiers_for_type COLONCOLON IDENT { $$ = parser->ast_path (parser->concat3 ($1->left.sval.ptr, "::", $3.ptr), NULL); } ; identifier_path_for_type: just_identifiers_for_type | just_identifiers_for_type '<' type_list '>' { $$ = parser->ast_path ($1->left.sval, $3); } | just_identifiers_for_type '<' type_list RSH { $$ = parser->ast_path ($1->left.sval, $3); parser->push_back ('>'); } ; type: path_for_type | '[' type ';' INTEGER ']' { $$ = parser->ast_array_type ($2, $4); } | '[' type ';' DECIMAL_INTEGER ']' { $$ = parser->ast_array_type ($2, $4); } | '&' '[' type ']' { $$ = parser->ast_slice_type ($3); } | '&' type { $$ = parser->ast_reference_type ($2); } | '*' KW_MUT type { $$ = parser->ast_pointer_type ($3, 1); } | '*' KW_CONST type { $$ = parser->ast_pointer_type ($3, 0); } | KW_FN '(' maybe_type_list ')' ARROW type { $$ = parser->ast_function_type ($6, $3); } | '(' maybe_type_list ')' { $$ = parser->ast_tuple_type ($2); } ; maybe_type_list: /* %empty */ { $$ = NULL; } | type_list { $$ = $1; } ; type_list: type { rust_op_vector *result = parser->new_op_vector (); result->push_back ($1); $$ = result; } | type_list ',' type { $1->push_back ($3); $$ = $1; } ; %% /* A struct of this type is used to describe a token. */ struct token_info { const char *name; int value; enum exp_opcode opcode; }; /* Identifier tokens. */ static const struct token_info identifier_tokens[] = { { "as", KW_AS, OP_NULL }, { "false", KW_FALSE, OP_NULL }, { "if", 0, OP_NULL }, { "mut", KW_MUT, OP_NULL }, { "const", KW_CONST, OP_NULL }, { "self", KW_SELF, OP_NULL }, { "super", KW_SUPER, OP_NULL }, { "true", KW_TRUE, OP_NULL }, { "extern", KW_EXTERN, OP_NULL }, { "fn", KW_FN, OP_NULL }, { "sizeof", KW_SIZEOF, OP_NULL }, }; /* Operator tokens, sorted longest first. */ static const struct token_info operator_tokens[] = { { ">>=", COMPOUND_ASSIGN, BINOP_RSH }, { "<<=", COMPOUND_ASSIGN, BINOP_LSH }, { "<<", LSH, OP_NULL }, { ">>", RSH, OP_NULL }, { "&&", ANDAND, OP_NULL }, { "||", OROR, OP_NULL }, { "==", EQEQ, OP_NULL }, { "!=", NOTEQ, OP_NULL }, { "<=", LTEQ, OP_NULL }, { ">=", GTEQ, OP_NULL }, { "+=", COMPOUND_ASSIGN, BINOP_ADD }, { "-=", COMPOUND_ASSIGN, BINOP_SUB }, { "*=", COMPOUND_ASSIGN, BINOP_MUL }, { "/=", COMPOUND_ASSIGN, BINOP_DIV }, { "%=", COMPOUND_ASSIGN, BINOP_REM }, { "&=", COMPOUND_ASSIGN, BINOP_BITWISE_AND }, { "|=", COMPOUND_ASSIGN, BINOP_BITWISE_IOR }, { "^=", COMPOUND_ASSIGN, BINOP_BITWISE_XOR }, { "..=", DOTDOTEQ, OP_NULL }, { "::", COLONCOLON, OP_NULL }, { "..", DOTDOT, OP_NULL }, { "->", ARROW, OP_NULL } }; /* Helper function to copy to the name obstack. */ const char * rust_parser::copy_name (const char *name, int len) { return (const char *) obstack_copy0 (&obstack, name, len); } /* Helper function to make an stoken from a C string. */ static struct stoken make_stoken (const char *p) { struct stoken result; result.ptr = p; result.length = strlen (result.ptr); return result; } /* Helper function to concatenate three strings on the name obstack. */ struct stoken rust_parser::concat3 (const char *s1, const char *s2, const char *s3) { return make_stoken (obconcat (&obstack, s1, s2, s3, (char *) NULL)); } /* Return an AST node referring to NAME, but relative to the crate's name. */ const struct rust_op * rust_parser::crate_name (const struct rust_op *name) { std::string crate = rust_crate_for_block (pstate->expression_context_block); struct stoken result; gdb_assert (name->opcode == OP_VAR_VALUE); if (crate.empty ()) error (_("Could not find crate for current location")); result = make_stoken (obconcat (&obstack, "::", crate.c_str (), "::", name->left.sval.ptr, (char *) NULL)); return ast_path (result, name->right.params); } /* Create an AST node referring to a "super::" qualified name. IDENT is the base name and N_SUPERS is how many "super::"s were provided. N_SUPERS can be zero. */ const struct rust_op * rust_parser::super_name (const struct rust_op *ident, unsigned int n_supers) { const char *scope = block_scope (pstate->expression_context_block); int offset; gdb_assert (ident->opcode == OP_VAR_VALUE); if (scope[0] == '\0') error (_("Couldn't find namespace scope for self::")); if (n_supers > 0) { int len; std::vector offsets; unsigned int current_len; current_len = cp_find_first_component (scope); while (scope[current_len] != '\0') { offsets.push_back (current_len); gdb_assert (scope[current_len] == ':'); /* The "::". */ current_len += 2; current_len += cp_find_first_component (scope + current_len); } len = offsets.size (); if (n_supers >= len) error (_("Too many super:: uses from '%s'"), scope); offset = offsets[len - n_supers]; } else offset = strlen (scope); obstack_grow (&obstack, "::", 2); obstack_grow (&obstack, scope, offset); obstack_grow (&obstack, "::", 2); obstack_grow0 (&obstack, ident->left.sval.ptr, ident->left.sval.length); return ast_path (make_stoken ((const char *) obstack_finish (&obstack)), ident->right.params); } /* A helper that updates the innermost block as appropriate. */ void rust_parser::update_innermost_block (struct block_symbol sym) { if (symbol_read_needs_frame (sym.symbol)) pstate->block_tracker->update (sym); } /* Lex a hex number with at least MIN digits and at most MAX digits. */ uint32_t rust_parser::lex_hex (int min, int max) { uint32_t result = 0; int len = 0; /* We only want to stop at MAX if we're lexing a byte escape. */ int check_max = min == max; while ((check_max ? len <= max : 1) && ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f') || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F') || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))) { result *= 16; if (pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f') result = result + 10 + pstate->lexptr[0] - 'a'; else if (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F') result = result + 10 + pstate->lexptr[0] - 'A'; else result = result + pstate->lexptr[0] - '0'; ++pstate->lexptr; ++len; } if (len < min) error (_("Not enough hex digits seen")); if (len > max) { gdb_assert (min != max); error (_("Overlong hex escape")); } return result; } /* Lex an escape. IS_BYTE is true if we're lexing a byte escape; otherwise we're lexing a character escape. */ uint32_t rust_parser::lex_escape (int is_byte) { uint32_t result; gdb_assert (pstate->lexptr[0] == '\\'); ++pstate->lexptr; switch (pstate->lexptr[0]) { case 'x': ++pstate->lexptr; result = lex_hex (2, 2); break; case 'u': if (is_byte) error (_("Unicode escape in byte literal")); ++pstate->lexptr; if (pstate->lexptr[0] != '{') error (_("Missing '{' in Unicode escape")); ++pstate->lexptr; result = lex_hex (1, 6); /* Could do range checks here. */ if (pstate->lexptr[0] != '}') error (_("Missing '}' in Unicode escape")); ++pstate->lexptr; break; case 'n': result = '\n'; ++pstate->lexptr; break; case 'r': result = '\r'; ++pstate->lexptr; break; case 't': result = '\t'; ++pstate->lexptr; break; case '\\': result = '\\'; ++pstate->lexptr; break; case '0': result = '\0'; ++pstate->lexptr; break; case '\'': result = '\''; ++pstate->lexptr; break; case '"': result = '"'; ++pstate->lexptr; break; default: error (_("Invalid escape \\%c in literal"), pstate->lexptr[0]); } return result; } /* Lex a character constant. */ int rust_parser::lex_character (YYSTYPE *lvalp) { int is_byte = 0; uint32_t value; if (pstate->lexptr[0] == 'b') { is_byte = 1; ++pstate->lexptr; } gdb_assert (pstate->lexptr[0] == '\''); ++pstate->lexptr; /* This should handle UTF-8 here. */ if (pstate->lexptr[0] == '\\') value = lex_escape (is_byte); else { value = pstate->lexptr[0] & 0xff; ++pstate->lexptr; } if (pstate->lexptr[0] != '\'') error (_("Unterminated character literal")); ++pstate->lexptr; lvalp->typed_val_int.val = value; lvalp->typed_val_int.type = get_type (is_byte ? "u8" : "char"); return INTEGER; } /* Return the offset of the double quote if STR looks like the start of a raw string, or 0 if STR does not start a raw string. */ static int starts_raw_string (const char *str) { const char *save = str; if (str[0] != 'r') return 0; ++str; while (str[0] == '#') ++str; if (str[0] == '"') return str - save; return 0; } /* Return true if STR looks like the end of a raw string that had N hashes at the start. */ static bool ends_raw_string (const char *str, int n) { int i; gdb_assert (str[0] == '"'); for (i = 0; i < n; ++i) if (str[i + 1] != '#') return false; return true; } /* Lex a string constant. */ int rust_parser::lex_string (YYSTYPE *lvalp) { int is_byte = pstate->lexptr[0] == 'b'; int raw_length; if (is_byte) ++pstate->lexptr; raw_length = starts_raw_string (pstate->lexptr); pstate->lexptr += raw_length; gdb_assert (pstate->lexptr[0] == '"'); ++pstate->lexptr; while (1) { uint32_t value; if (raw_length > 0) { if (pstate->lexptr[0] == '"' && ends_raw_string (pstate->lexptr, raw_length - 1)) { /* Exit with lexptr pointing after the final "#". */ pstate->lexptr += raw_length; break; } else if (pstate->lexptr[0] == '\0') error (_("Unexpected EOF in string")); value = pstate->lexptr[0] & 0xff; if (is_byte && value > 127) error (_("Non-ASCII value in raw byte string")); obstack_1grow (&obstack, value); ++pstate->lexptr; } else if (pstate->lexptr[0] == '"') { /* Make sure to skip the quote. */ ++pstate->lexptr; break; } else if (pstate->lexptr[0] == '\\') { value = lex_escape (is_byte); if (is_byte) obstack_1grow (&obstack, value); else convert_between_encodings ("UTF-32", "UTF-8", (gdb_byte *) &value, sizeof (value), sizeof (value), &obstack, translit_none); } else if (pstate->lexptr[0] == '\0') error (_("Unexpected EOF in string")); else { value = pstate->lexptr[0] & 0xff; if (is_byte && value > 127) error (_("Non-ASCII value in byte string")); obstack_1grow (&obstack, value); ++pstate->lexptr; } } lvalp->sval.length = obstack_object_size (&obstack); lvalp->sval.ptr = (const char *) obstack_finish (&obstack); return is_byte ? BYTESTRING : STRING; } /* Return true if STRING starts with whitespace followed by a digit. */ static bool space_then_number (const char *string) { const char *p = string; while (p[0] == ' ' || p[0] == '\t') ++p; if (p == string) return false; return *p >= '0' && *p <= '9'; } /* Return true if C can start an identifier. */ static bool rust_identifier_start_p (char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'); } /* Lex an identifier. */ int rust_parser::lex_identifier (YYSTYPE *lvalp) { const char *start = pstate->lexptr; unsigned int length; const struct token_info *token; int i; int is_gdb_var = pstate->lexptr[0] == '$'; gdb_assert (rust_identifier_start_p (pstate->lexptr[0])); ++pstate->lexptr; /* For the time being this doesn't handle Unicode rules. Non-ASCII identifiers are gated anyway. */ while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z') || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z') || pstate->lexptr[0] == '_' || (is_gdb_var && pstate->lexptr[0] == '$') || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')) ++pstate->lexptr; length = pstate->lexptr - start; token = NULL; for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i) { if (length == strlen (identifier_tokens[i].name) && strncmp (identifier_tokens[i].name, start, length) == 0) { token = &identifier_tokens[i]; break; } } if (token != NULL) { if (token->value == 0) { /* Leave the terminating token alone. */ pstate->lexptr = start; return 0; } } else if (token == NULL && (strncmp (start, "thread", length) == 0 || strncmp (start, "task", length) == 0) && space_then_number (pstate->lexptr)) { /* "task" or "thread" followed by a number terminates the parse, per gdb rules. */ pstate->lexptr = start; return 0; } if (token == NULL || (pstate->parse_completion && pstate->lexptr[0] == '\0')) lvalp->sval = make_stoken (copy_name (start, length)); if (pstate->parse_completion && pstate->lexptr[0] == '\0') { /* Prevent rustyylex from returning two COMPLETE tokens. */ pstate->prev_lexptr = pstate->lexptr; return COMPLETE; } if (token != NULL) return token->value; if (is_gdb_var) return GDBVAR; return IDENT; } /* Lex an operator. */ int rust_parser::lex_operator (YYSTYPE *lvalp) { const struct token_info *token = NULL; int i; for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i) { if (strncmp (operator_tokens[i].name, pstate->lexptr, strlen (operator_tokens[i].name)) == 0) { pstate->lexptr += strlen (operator_tokens[i].name); token = &operator_tokens[i]; break; } } if (token != NULL) { lvalp->opcode = token->opcode; return token->value; } return *pstate->lexptr++; } /* Lex a number. */ int rust_parser::lex_number (YYSTYPE *lvalp) { regmatch_t subexps[NUM_SUBEXPRESSIONS]; int match; int is_integer = 0; int could_be_decimal = 1; int implicit_i32 = 0; const char *type_name = NULL; struct type *type; int end_index; int type_index = -1; int i; match = regexec (&number_regex, pstate->lexptr, ARRAY_SIZE (subexps), subexps, 0); /* Failure means the regexp is broken. */ gdb_assert (match == 0); if (subexps[INT_TEXT].rm_so != -1) { /* Integer part matched. */ is_integer = 1; end_index = subexps[INT_TEXT].rm_eo; if (subexps[INT_TYPE].rm_so == -1) { type_name = "i32"; implicit_i32 = 1; } else { type_index = INT_TYPE; could_be_decimal = 0; } } else if (subexps[FLOAT_TYPE1].rm_so != -1) { /* Found floating point type suffix. */ end_index = subexps[FLOAT_TYPE1].rm_so; type_index = FLOAT_TYPE1; } else if (subexps[FLOAT_TYPE2].rm_so != -1) { /* Found floating point type suffix. */ end_index = subexps[FLOAT_TYPE2].rm_so; type_index = FLOAT_TYPE2; } else { /* Any other floating point match. */ end_index = subexps[0].rm_eo; type_name = "f64"; } /* We need a special case if the final character is ".". In this case we might need to parse an integer. For example, "23.f()" is a request for a trait method call, not a syntax error involving the floating point number "23.". */ gdb_assert (subexps[0].rm_eo > 0); if (pstate->lexptr[subexps[0].rm_eo - 1] == '.') { const char *next = skip_spaces (&pstate->lexptr[subexps[0].rm_eo]); if (rust_identifier_start_p (*next) || *next == '.') { --subexps[0].rm_eo; is_integer = 1; end_index = subexps[0].rm_eo; type_name = "i32"; could_be_decimal = 1; implicit_i32 = 1; } } /* Compute the type name if we haven't already. */ std::string type_name_holder; if (type_name == NULL) { gdb_assert (type_index != -1); type_name_holder = std::string ((pstate->lexptr + subexps[type_index].rm_so), (subexps[type_index].rm_eo - subexps[type_index].rm_so)); type_name = type_name_holder.c_str (); } /* Look up the type. */ type = get_type (type_name); /* Copy the text of the number and remove the "_"s. */ std::string number; for (i = 0; i < end_index && pstate->lexptr[i]; ++i) { if (pstate->lexptr[i] == '_') could_be_decimal = 0; else number.push_back (pstate->lexptr[i]); } /* Advance past the match. */ pstate->lexptr += subexps[0].rm_eo; /* Parse the number. */ if (is_integer) { uint64_t value; int radix = 10; int offset = 0; if (number[0] == '0') { if (number[1] == 'x') radix = 16; else if (number[1] == 'o') radix = 8; else if (number[1] == 'b') radix = 2; if (radix != 10) { offset = 2; could_be_decimal = 0; } } value = strtoul (number.c_str () + offset, NULL, radix); if (implicit_i32 && value >= ((uint64_t) 1) << 31) type = get_type ("i64"); lvalp->typed_val_int.val = value; lvalp->typed_val_int.type = type; } else { lvalp->typed_val_float.type = type; bool parsed = parse_float (number.c_str (), number.length (), lvalp->typed_val_float.type, lvalp->typed_val_float.val); gdb_assert (parsed); } return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT; } /* The lexer. */ static int rustyylex (YYSTYPE *lvalp, rust_parser *parser) { struct parser_state *pstate = parser->pstate; /* Skip all leading whitespace. */ while (pstate->lexptr[0] == ' ' || pstate->lexptr[0] == '\t' || pstate->lexptr[0] == '\r' || pstate->lexptr[0] == '\n') ++pstate->lexptr; /* If we hit EOF and we're completing, then return COMPLETE -- maybe we're completing an empty string at the end of a field_expr. But, we don't want to return two COMPLETE tokens in a row. */ if (pstate->lexptr[0] == '\0' && pstate->lexptr == pstate->prev_lexptr) return 0; pstate->prev_lexptr = pstate->lexptr; if (pstate->lexptr[0] == '\0') { if (pstate->parse_completion) { lvalp->sval = make_stoken (""); return COMPLETE; } return 0; } if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') return parser->lex_number (lvalp); else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'') return parser->lex_character (lvalp); else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"') return parser->lex_string (lvalp); else if (pstate->lexptr[0] == 'b' && starts_raw_string (pstate->lexptr + 1)) return parser->lex_string (lvalp); else if (starts_raw_string (pstate->lexptr)) return parser->lex_string (lvalp); else if (rust_identifier_start_p (pstate->lexptr[0])) return parser->lex_identifier (lvalp); else if (pstate->lexptr[0] == '"') return parser->lex_string (lvalp); else if (pstate->lexptr[0] == '\'') return parser->lex_character (lvalp); else if (pstate->lexptr[0] == '}' || pstate->lexptr[0] == ']') { /* Falls through to lex_operator. */ --parser->paren_depth; } else if (pstate->lexptr[0] == '(' || pstate->lexptr[0] == '{') { /* Falls through to lex_operator. */ ++parser->paren_depth; } else if (pstate->lexptr[0] == ',' && pstate->comma_terminates && parser->paren_depth == 0) return 0; return parser->lex_operator (lvalp); } /* Push back a single character to be re-lexed. */ void rust_parser::push_back (char c) { /* Can't be called before any lexing. */ gdb_assert (pstate->prev_lexptr != NULL); --pstate->lexptr; gdb_assert (*pstate->lexptr == c); } /* Make an arbitrary operation and fill in the fields. */ const struct rust_op * rust_parser::ast_operation (enum exp_opcode opcode, const struct rust_op *left, const struct rust_op *right) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = opcode; result->left.op = left; result->right.op = right; return result; } /* Make a compound assignment operation. */ const struct rust_op * rust_parser::ast_compound_assignment (enum exp_opcode opcode, const struct rust_op *left, const struct rust_op *right) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = opcode; result->compound_assignment = 1; result->left.op = left; result->right.op = right; return result; } /* Make a typed integer literal operation. */ const struct rust_op * rust_parser::ast_literal (struct typed_val_int val) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_LONG; result->left.typed_val_int = val; return result; } /* Make a typed floating point literal operation. */ const struct rust_op * rust_parser::ast_dliteral (struct typed_val_float val) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_FLOAT; result->left.typed_val_float = val; return result; } /* Make a unary operation. */ const struct rust_op * rust_parser::ast_unary (enum exp_opcode opcode, const struct rust_op *expr) { return ast_operation (opcode, expr, NULL); } /* Make a cast operation. */ const struct rust_op * rust_parser::ast_cast (const struct rust_op *expr, const struct rust_op *type) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = UNOP_CAST; result->left.op = expr; result->right.op = type; return result; } /* Make a call-like operation. This is nominally a function call, but when lowering we may discover that it actually represents the creation of a tuple struct. */ const struct rust_op * rust_parser::ast_call_ish (enum exp_opcode opcode, const struct rust_op *expr, rust_op_vector *params) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = opcode; result->left.op = expr; result->right.params = params; return result; } /* Make a structure creation operation. */ const struct rust_op * rust_parser::ast_struct (const struct rust_op *name, rust_set_vector *fields) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_AGGREGATE; result->left.op = name; result->right.field_inits = fields; return result; } /* Make an identifier path. */ const struct rust_op * rust_parser::ast_path (struct stoken path, rust_op_vector *params) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_VAR_VALUE; result->left.sval = path; result->right.params = params; return result; } /* Make a string constant operation. */ const struct rust_op * rust_parser::ast_string (struct stoken str) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_STRING; result->left.sval = str; return result; } /* Make a field expression. */ const struct rust_op * rust_parser::ast_structop (const struct rust_op *left, const char *name, int completing) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = STRUCTOP_STRUCT; result->completing = completing; result->left.op = left; result->right.sval = make_stoken (name); return result; } /* Make an anonymous struct operation, like 'x.0'. */ const struct rust_op * rust_parser::ast_structop_anonymous (const struct rust_op *left, struct typed_val_int number) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = STRUCTOP_ANONYMOUS; result->left.op = left; result->right.typed_val_int = number; return result; } /* Make a range operation. */ const struct rust_op * rust_parser::ast_range (const struct rust_op *lhs, const struct rust_op *rhs, bool inclusive) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_RANGE; result->inclusive = inclusive; result->left.op = lhs; result->right.op = rhs; return result; } /* A helper function to make a type-related AST node. */ struct rust_op * rust_parser::ast_basic_type (enum type_code typecode) { struct rust_op *result = OBSTACK_ZALLOC (&obstack, struct rust_op); result->opcode = OP_TYPE; result->typecode = typecode; return result; } /* Create an AST node describing an array type. */ const struct rust_op * rust_parser::ast_array_type (const struct rust_op *lhs, struct typed_val_int val) { struct rust_op *result = ast_basic_type (TYPE_CODE_ARRAY); result->left.op = lhs; result->right.typed_val_int = val; return result; } /* Create an AST node describing a reference type. */ const struct rust_op * rust_parser::ast_slice_type (const struct rust_op *type) { /* Use TYPE_CODE_COMPLEX just because it is handy. */ struct rust_op *result = ast_basic_type (TYPE_CODE_COMPLEX); result->left.op = type; return result; } /* Create an AST node describing a reference type. */ const struct rust_op * rust_parser::ast_reference_type (const struct rust_op *type) { struct rust_op *result = ast_basic_type (TYPE_CODE_REF); result->left.op = type; return result; } /* Create an AST node describing a pointer type. */ const struct rust_op * rust_parser::ast_pointer_type (const struct rust_op *type, int is_mut) { struct rust_op *result = ast_basic_type (TYPE_CODE_PTR); result->left.op = type; /* For the time being we ignore is_mut. */ return result; } /* Create an AST node describing a function type. */ const struct rust_op * rust_parser::ast_function_type (const struct rust_op *rtype, rust_op_vector *params) { struct rust_op *result = ast_basic_type (TYPE_CODE_FUNC); result->left.op = rtype; result->right.params = params; return result; } /* Create an AST node describing a tuple type. */ const struct rust_op * rust_parser::ast_tuple_type (rust_op_vector *params) { struct rust_op *result = ast_basic_type (TYPE_CODE_STRUCT); result->left.params = params; return result; } /* A helper to appropriately munge NAME and BLOCK depending on the presence of a leading "::". */ static void munge_name_and_block (const char **name, const struct block **block) { /* If it is a global reference, skip the current block in favor of the static block. */ if (strncmp (*name, "::", 2) == 0) { *name += 2; *block = block_static_block (*block); } } /* Like lookup_symbol, but handles Rust namespace conventions, and doesn't require field_of_this_result. */ struct block_symbol rust_parser::lookup_symbol (const char *name, const struct block *block, const domain_enum domain) { struct block_symbol result; munge_name_and_block (&name, &block); result = ::lookup_symbol (name, block, domain, NULL); if (result.symbol != NULL) update_innermost_block (result); return result; } /* Look up a type, following Rust namespace conventions. */ struct type * rust_parser::rust_lookup_type (const char *name, const struct block *block) { struct block_symbol result; struct type *type; munge_name_and_block (&name, &block); result = ::lookup_symbol (name, block, STRUCT_DOMAIN, NULL); if (result.symbol != NULL) { update_innermost_block (result); return SYMBOL_TYPE (result.symbol); } type = lookup_typename (language (), arch (), name, NULL, 1); if (type != NULL) return type; /* Last chance, try a built-in type. */ return language_lookup_primitive_type (language (), arch (), name); } /* Convert a vector of rust_ops representing types to a vector of types. */ std::vector rust_parser::convert_params_to_types (rust_op_vector *params) { std::vector result; if (params != nullptr) { for (const rust_op *op : *params) result.push_back (convert_ast_to_type (op)); } return result; } /* Convert a rust_op representing a type to a struct type *. */ struct type * rust_parser::convert_ast_to_type (const struct rust_op *operation) { struct type *type, *result = NULL; if (operation->opcode == OP_VAR_VALUE) { const char *varname = convert_name (operation); result = rust_lookup_type (varname, pstate->expression_context_block); if (result == NULL) error (_("No typed name '%s' in current context"), varname); return result; } gdb_assert (operation->opcode == OP_TYPE); switch (operation->typecode) { case TYPE_CODE_ARRAY: type = convert_ast_to_type (operation->left.op); if (operation->right.typed_val_int.val < 0) error (_("Negative array length")); result = lookup_array_range_type (type, 0, operation->right.typed_val_int.val - 1); break; case TYPE_CODE_COMPLEX: { struct type *usize = get_type ("usize"); type = convert_ast_to_type (operation->left.op); result = rust_slice_type ("&[*gdb*]", type, usize); } break; case TYPE_CODE_REF: case TYPE_CODE_PTR: /* For now we treat &x and *x identically. */ type = convert_ast_to_type (operation->left.op); result = lookup_pointer_type (type); break; case TYPE_CODE_FUNC: { std::vector args (convert_params_to_types (operation->right.params)); struct type **argtypes = NULL; type = convert_ast_to_type (operation->left.op); if (!args.empty ()) argtypes = args.data (); result = lookup_function_type_with_arguments (type, args.size (), argtypes); result = lookup_pointer_type (result); } break; case TYPE_CODE_STRUCT: { std::vector args (convert_params_to_types (operation->left.params)); int i; const char *name; obstack_1grow (&obstack, '('); for (i = 0; i < args.size (); ++i) { std::string type_name = type_to_string (args[i]); if (i > 0) obstack_1grow (&obstack, ','); obstack_grow_str (&obstack, type_name.c_str ()); } obstack_grow_str0 (&obstack, ")"); name = (const char *) obstack_finish (&obstack); /* We don't allow creating new tuple types (yet), but we do allow looking up existing tuple types. */ result = rust_lookup_type (name, pstate->expression_context_block); if (result == NULL) error (_("could not find tuple type '%s'"), name); } break; default: gdb_assert_not_reached ("unhandled opcode in convert_ast_to_type"); } gdb_assert (result != NULL); return result; } /* A helper function to turn a rust_op representing a name into a full name. This applies generic arguments as needed. The returned name is allocated on the work obstack. */ const char * rust_parser::convert_name (const struct rust_op *operation) { int i; gdb_assert (operation->opcode == OP_VAR_VALUE); if (operation->right.params == NULL) return operation->left.sval.ptr; std::vector types (convert_params_to_types (operation->right.params)); obstack_grow_str (&obstack, operation->left.sval.ptr); obstack_1grow (&obstack, '<'); for (i = 0; i < types.size (); ++i) { std::string type_name = type_to_string (types[i]); if (i > 0) obstack_1grow (&obstack, ','); obstack_grow_str (&obstack, type_name.c_str ()); } obstack_grow_str0 (&obstack, ">"); return (const char *) obstack_finish (&obstack); } /* A helper function that converts a vec of rust_ops to a gdb expression. */ void rust_parser::convert_params_to_expression (rust_op_vector *params, const struct rust_op *top) { for (const rust_op *elem : *params) convert_ast_to_expression (elem, top); } /* Lower a rust_op to a gdb expression. STATE is the parser state. OPERATION is the operation to lower. TOP is a pointer to the top-most operation; it is used to handle the special case where the top-most expression is an identifier and can be optionally lowered to OP_TYPE. WANT_TYPE is a flag indicating that, if the expression is the name of a type, then emit an OP_TYPE for it (rather than erroring). If WANT_TYPE is set, then the similar TOP handling is not done. */ void rust_parser::convert_ast_to_expression (const struct rust_op *operation, const struct rust_op *top, bool want_type) { switch (operation->opcode) { case OP_LONG: write_exp_elt_opcode (pstate, OP_LONG); write_exp_elt_type (pstate, operation->left.typed_val_int.type); write_exp_elt_longcst (pstate, operation->left.typed_val_int.val); write_exp_elt_opcode (pstate, OP_LONG); break; case OP_FLOAT: write_exp_elt_opcode (pstate, OP_FLOAT); write_exp_elt_type (pstate, operation->left.typed_val_float.type); write_exp_elt_floatcst (pstate, operation->left.typed_val_float.val); write_exp_elt_opcode (pstate, OP_FLOAT); break; case STRUCTOP_STRUCT: { convert_ast_to_expression (operation->left.op, top); if (operation->completing) pstate->mark_struct_expression (); write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); write_exp_string (pstate, operation->right.sval); write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); } break; case STRUCTOP_ANONYMOUS: { convert_ast_to_expression (operation->left.op, top); write_exp_elt_opcode (pstate, STRUCTOP_ANONYMOUS); write_exp_elt_longcst (pstate, operation->right.typed_val_int.val); write_exp_elt_opcode (pstate, STRUCTOP_ANONYMOUS); } break; case UNOP_SIZEOF: convert_ast_to_expression (operation->left.op, top, true); write_exp_elt_opcode (pstate, UNOP_SIZEOF); break; case UNOP_PLUS: case UNOP_NEG: case UNOP_COMPLEMENT: case UNOP_IND: case UNOP_ADDR: convert_ast_to_expression (operation->left.op, top); write_exp_elt_opcode (pstate, operation->opcode); break; case BINOP_SUBSCRIPT: case BINOP_MUL: case BINOP_REPEAT: case BINOP_DIV: case BINOP_REM: case BINOP_LESS: case BINOP_GTR: case BINOP_BITWISE_AND: case BINOP_BITWISE_IOR: case BINOP_BITWISE_XOR: case BINOP_ADD: case BINOP_SUB: case BINOP_LOGICAL_OR: case BINOP_LOGICAL_AND: case BINOP_EQUAL: case BINOP_NOTEQUAL: case BINOP_LEQ: case BINOP_GEQ: case BINOP_LSH: case BINOP_RSH: case BINOP_ASSIGN: case OP_RUST_ARRAY: convert_ast_to_expression (operation->left.op, top); convert_ast_to_expression (operation->right.op, top); if (operation->compound_assignment) { write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); write_exp_elt_opcode (pstate, operation->opcode); write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); } else write_exp_elt_opcode (pstate, operation->opcode); if (operation->compound_assignment || operation->opcode == BINOP_ASSIGN) { struct type *type; type = language_lookup_primitive_type (pstate->language (), pstate->gdbarch (), "()"); write_exp_elt_opcode (pstate, OP_LONG); write_exp_elt_type (pstate, type); write_exp_elt_longcst (pstate, 0); write_exp_elt_opcode (pstate, OP_LONG); write_exp_elt_opcode (pstate, BINOP_COMMA); } break; case UNOP_CAST: { struct type *type = convert_ast_to_type (operation->right.op); convert_ast_to_expression (operation->left.op, top); write_exp_elt_opcode (pstate, UNOP_CAST); write_exp_elt_type (pstate, type); write_exp_elt_opcode (pstate, UNOP_CAST); } break; case OP_FUNCALL: { if (operation->left.op->opcode == OP_VAR_VALUE) { struct type *type; const char *varname = convert_name (operation->left.op); type = rust_lookup_type (varname, pstate->expression_context_block); if (type != NULL) { /* This is actually a tuple struct expression, not a call expression. */ rust_op_vector *params = operation->right.params; if (TYPE_CODE (type) != TYPE_CODE_NAMESPACE) { if (!rust_tuple_struct_type_p (type)) error (_("Type %s is not a tuple struct"), varname); for (int i = 0; i < params->size (); ++i) { char *cell = get_print_cell (); xsnprintf (cell, PRINT_CELL_SIZE, "__%d", i); write_exp_elt_opcode (pstate, OP_NAME); write_exp_string (pstate, make_stoken (cell)); write_exp_elt_opcode (pstate, OP_NAME); convert_ast_to_expression ((*params)[i], top); } write_exp_elt_opcode (pstate, OP_AGGREGATE); write_exp_elt_type (pstate, type); write_exp_elt_longcst (pstate, 2 * params->size ()); write_exp_elt_opcode (pstate, OP_AGGREGATE); break; } } } convert_ast_to_expression (operation->left.op, top); convert_params_to_expression (operation->right.params, top); write_exp_elt_opcode (pstate, OP_FUNCALL); write_exp_elt_longcst (pstate, operation->right.params->size ()); write_exp_elt_longcst (pstate, OP_FUNCALL); } break; case OP_ARRAY: gdb_assert (operation->left.op == NULL); convert_params_to_expression (operation->right.params, top); write_exp_elt_opcode (pstate, OP_ARRAY); write_exp_elt_longcst (pstate, 0); write_exp_elt_longcst (pstate, operation->right.params->size () - 1); write_exp_elt_longcst (pstate, OP_ARRAY); break; case OP_VAR_VALUE: { struct block_symbol sym; const char *varname; if (operation->left.sval.ptr[0] == '$') { write_dollar_variable (pstate, operation->left.sval); break; } varname = convert_name (operation); sym = lookup_symbol (varname, pstate->expression_context_block, VAR_DOMAIN); if (sym.symbol != NULL && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF) { write_exp_elt_opcode (pstate, OP_VAR_VALUE); write_exp_elt_block (pstate, sym.block); write_exp_elt_sym (pstate, sym.symbol); write_exp_elt_opcode (pstate, OP_VAR_VALUE); } else { struct type *type = NULL; if (sym.symbol != NULL) { gdb_assert (SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF); type = SYMBOL_TYPE (sym.symbol); } if (type == NULL) type = rust_lookup_type (varname, pstate->expression_context_block); if (type == NULL) error (_("No symbol '%s' in current context"), varname); if (!want_type && TYPE_CODE (type) == TYPE_CODE_STRUCT && TYPE_NFIELDS (type) == 0) { /* A unit-like struct. */ write_exp_elt_opcode (pstate, OP_AGGREGATE); write_exp_elt_type (pstate, type); write_exp_elt_longcst (pstate, 0); write_exp_elt_opcode (pstate, OP_AGGREGATE); } else if (want_type || operation == top) { write_exp_elt_opcode (pstate, OP_TYPE); write_exp_elt_type (pstate, type); write_exp_elt_opcode (pstate, OP_TYPE); } else error (_("Found type '%s', which can't be " "evaluated in this context"), varname); } } break; case OP_AGGREGATE: { int length; rust_set_vector *fields = operation->right.field_inits; struct type *type; const char *name; length = 0; for (const set_field &init : *fields) { if (init.name.ptr != NULL) { write_exp_elt_opcode (pstate, OP_NAME); write_exp_string (pstate, init.name); write_exp_elt_opcode (pstate, OP_NAME); ++length; } convert_ast_to_expression (init.init, top); ++length; if (init.name.ptr == NULL) { /* This is handled differently from Ada in our evaluator. */ write_exp_elt_opcode (pstate, OP_OTHERS); } } name = convert_name (operation->left.op); type = rust_lookup_type (name, pstate->expression_context_block); if (type == NULL) error (_("Could not find type '%s'"), operation->left.sval.ptr); if (TYPE_CODE (type) != TYPE_CODE_STRUCT || rust_tuple_type_p (type) || rust_tuple_struct_type_p (type)) error (_("Struct expression applied to non-struct type")); write_exp_elt_opcode (pstate, OP_AGGREGATE); write_exp_elt_type (pstate, type); write_exp_elt_longcst (pstate, length); write_exp_elt_opcode (pstate, OP_AGGREGATE); } break; case OP_STRING: { write_exp_elt_opcode (pstate, OP_STRING); write_exp_string (pstate, operation->left.sval); write_exp_elt_opcode (pstate, OP_STRING); } break; case OP_RANGE: { enum range_type kind = BOTH_BOUND_DEFAULT; if (operation->left.op != NULL) { convert_ast_to_expression (operation->left.op, top); kind = HIGH_BOUND_DEFAULT; } if (operation->right.op != NULL) { convert_ast_to_expression (operation->right.op, top); if (kind == BOTH_BOUND_DEFAULT) kind = (operation->inclusive ? LOW_BOUND_DEFAULT : LOW_BOUND_DEFAULT_EXCLUSIVE); else { gdb_assert (kind == HIGH_BOUND_DEFAULT); kind = (operation->inclusive ? NONE_BOUND_DEFAULT : NONE_BOUND_DEFAULT_EXCLUSIVE); } } else { /* Nothing should make an inclusive range without an upper bound. */ gdb_assert (!operation->inclusive); } write_exp_elt_opcode (pstate, OP_RANGE); write_exp_elt_longcst (pstate, kind); write_exp_elt_opcode (pstate, OP_RANGE); } break; default: gdb_assert_not_reached ("unhandled opcode in convert_ast_to_expression"); } } /* The parser as exposed to gdb. */ int rust_parse (struct parser_state *state) { int result; /* This sets various globals and also clears them on destruction. */ rust_parser parser (state); result = rustyyparse (&parser); if (!result || (state->parse_completion && parser.rust_ast != NULL)) parser.convert_ast_to_expression (parser.rust_ast, parser.rust_ast); return result; } /* The parser error handler. */ static void rustyyerror (rust_parser *parser, const char *msg) { const char *where = (parser->pstate->prev_lexptr ? parser->pstate->prev_lexptr : parser->pstate->lexptr); error (_("%s in expression, near `%s'."), msg, where); } #if GDB_SELF_TEST /* Initialize the lexer for testing. */ static void rust_lex_test_init (rust_parser *parser, const char *input) { parser->pstate->prev_lexptr = NULL; parser->pstate->lexptr = input; parser->paren_depth = 0; } /* A test helper that lexes a string, expecting a single token. It returns the lexer data for this token. */ static RUSTSTYPE rust_lex_test_one (rust_parser *parser, const char *input, int expected) { int token; RUSTSTYPE result; rust_lex_test_init (parser, input); token = rustyylex (&result, parser); SELF_CHECK (token == expected); if (token) { RUSTSTYPE ignore; token = rustyylex (&ignore, parser); SELF_CHECK (token == 0); } return result; } /* Test that INPUT lexes as the integer VALUE. */ static void rust_lex_int_test (rust_parser *parser, const char *input, int value, int kind) { RUSTSTYPE result = rust_lex_test_one (parser, input, kind); SELF_CHECK (result.typed_val_int.val == value); } /* Test that INPUT throws an exception with text ERR. */ static void rust_lex_exception_test (rust_parser *parser, const char *input, const char *err) { TRY { /* The "kind" doesn't matter. */ rust_lex_test_one (parser, input, DECIMAL_INTEGER); SELF_CHECK (0); } CATCH (except, RETURN_MASK_ERROR) { SELF_CHECK (strcmp (except.message, err) == 0); } END_CATCH } /* Test that INPUT lexes as the identifier, string, or byte-string VALUE. KIND holds the expected token kind. */ static void rust_lex_stringish_test (rust_parser *parser, const char *input, const char *value, int kind) { RUSTSTYPE result = rust_lex_test_one (parser, input, kind); SELF_CHECK (result.sval.length == strlen (value)); SELF_CHECK (strncmp (result.sval.ptr, value, result.sval.length) == 0); } /* Helper to test that a string parses as a given token sequence. */ static void rust_lex_test_sequence (rust_parser *parser, const char *input, int len, const int expected[]) { int i; parser->pstate->lexptr = input; parser->paren_depth = 0; for (i = 0; i < len; ++i) { RUSTSTYPE ignore; int token = rustyylex (&ignore, parser); SELF_CHECK (token == expected[i]); } } /* Tests for an integer-parsing corner case. */ static void rust_lex_test_trailing_dot (rust_parser *parser) { const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 }; const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 }; const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 }; const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 }; rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1); rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2), expected2); rust_lex_test_sequence (parser, "23.==()", ARRAY_SIZE (expected3), expected3); rust_lex_test_sequence (parser, "23..25", ARRAY_SIZE (expected4), expected4); } /* Tests of completion. */ static void rust_lex_test_completion (rust_parser *parser) { const int expected[] = { IDENT, '.', COMPLETE, 0 }; parser->pstate->parse_completion = 1; rust_lex_test_sequence (parser, "something.wha", ARRAY_SIZE (expected), expected); rust_lex_test_sequence (parser, "something.", ARRAY_SIZE (expected), expected); parser->pstate->parse_completion = 0; } /* Test pushback. */ static void rust_lex_test_push_back (rust_parser *parser) { int token; RUSTSTYPE lval; rust_lex_test_init (parser, ">>="); token = rustyylex (&lval, parser); SELF_CHECK (token == COMPOUND_ASSIGN); SELF_CHECK (lval.opcode == BINOP_RSH); parser->push_back ('='); token = rustyylex (&lval, parser); SELF_CHECK (token == '='); token = rustyylex (&lval, parser); SELF_CHECK (token == 0); } /* Unit test the lexer. */ static void rust_lex_tests (void) { int i; // Set up dummy "parser", so that rust_type works. struct parser_state ps (&rust_language_defn, target_gdbarch (), nullptr, 0, 0, nullptr, 0, nullptr); rust_parser parser (&ps); rust_lex_test_one (&parser, "", 0); rust_lex_test_one (&parser, " \t \n \r ", 0); rust_lex_test_one (&parser, "thread 23", 0); rust_lex_test_one (&parser, "task 23", 0); rust_lex_test_one (&parser, "th 104", 0); rust_lex_test_one (&parser, "ta 97", 0); rust_lex_int_test (&parser, "'z'", 'z', INTEGER); rust_lex_int_test (&parser, "'\\xff'", 0xff, INTEGER); rust_lex_int_test (&parser, "'\\u{1016f}'", 0x1016f, INTEGER); rust_lex_int_test (&parser, "b'z'", 'z', INTEGER); rust_lex_int_test (&parser, "b'\\xfe'", 0xfe, INTEGER); rust_lex_int_test (&parser, "b'\\xFE'", 0xfe, INTEGER); rust_lex_int_test (&parser, "b'\\xfE'", 0xfe, INTEGER); /* Test all escapes in both modes. */ rust_lex_int_test (&parser, "'\\n'", '\n', INTEGER); rust_lex_int_test (&parser, "'\\r'", '\r', INTEGER); rust_lex_int_test (&parser, "'\\t'", '\t', INTEGER); rust_lex_int_test (&parser, "'\\\\'", '\\', INTEGER); rust_lex_int_test (&parser, "'\\0'", '\0', INTEGER); rust_lex_int_test (&parser, "'\\''", '\'', INTEGER); rust_lex_int_test (&parser, "'\\\"'", '"', INTEGER); rust_lex_int_test (&parser, "b'\\n'", '\n', INTEGER); rust_lex_int_test (&parser, "b'\\r'", '\r', INTEGER); rust_lex_int_test (&parser, "b'\\t'", '\t', INTEGER); rust_lex_int_test (&parser, "b'\\\\'", '\\', INTEGER); rust_lex_int_test (&parser, "b'\\0'", '\0', INTEGER); rust_lex_int_test (&parser, "b'\\''", '\'', INTEGER); rust_lex_int_test (&parser, "b'\\\"'", '"', INTEGER); rust_lex_exception_test (&parser, "'z", "Unterminated character literal"); rust_lex_exception_test (&parser, "b'\\x0'", "Not enough hex digits seen"); rust_lex_exception_test (&parser, "b'\\u{0}'", "Unicode escape in byte literal"); rust_lex_exception_test (&parser, "'\\x0'", "Not enough hex digits seen"); rust_lex_exception_test (&parser, "'\\u0'", "Missing '{' in Unicode escape"); rust_lex_exception_test (&parser, "'\\u{0", "Missing '}' in Unicode escape"); rust_lex_exception_test (&parser, "'\\u{0000007}", "Overlong hex escape"); rust_lex_exception_test (&parser, "'\\u{}", "Not enough hex digits seen"); rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal"); rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal"); rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER); rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER); rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER); rust_lex_int_test (&parser, "23usize", 23, INTEGER); rust_lex_int_test (&parser, "23i32", 23, INTEGER); rust_lex_int_test (&parser, "0x1_f", 0x1f, INTEGER); rust_lex_int_test (&parser, "0b1_101011__", 0x6b, INTEGER); rust_lex_int_test (&parser, "0o001177i64", 639, INTEGER); rust_lex_test_trailing_dot (&parser); rust_lex_test_one (&parser, "23.", FLOAT); rust_lex_test_one (&parser, "23.99f32", FLOAT); rust_lex_test_one (&parser, "23e7", FLOAT); rust_lex_test_one (&parser, "23E-7", FLOAT); rust_lex_test_one (&parser, "23e+7", FLOAT); rust_lex_test_one (&parser, "23.99e+7f64", FLOAT); rust_lex_test_one (&parser, "23.82f32", FLOAT); rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT); rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT); rust_lex_stringish_test (&parser, "thread", "thread", IDENT); rust_lex_stringish_test (&parser, "\"string\"", "string", STRING); rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING); rust_lex_stringish_test (&parser, "\"str\\\"ing\"", "str\"ing", STRING); rust_lex_stringish_test (&parser, "r\"str\\ing\"", "str\\ing", STRING); rust_lex_stringish_test (&parser, "r#\"str\\ting\"#", "str\\ting", STRING); rust_lex_stringish_test (&parser, "r###\"str\\\"ing\"###", "str\\\"ing", STRING); rust_lex_stringish_test (&parser, "b\"string\"", "string", BYTESTRING); rust_lex_stringish_test (&parser, "b\"\x73tring\"", "string", BYTESTRING); rust_lex_stringish_test (&parser, "b\"str\\\"ing\"", "str\"ing", BYTESTRING); rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring", BYTESTRING); for (i = 0; i < ARRAY_SIZE (identifier_tokens); ++i) rust_lex_test_one (&parser, identifier_tokens[i].name, identifier_tokens[i].value); for (i = 0; i < ARRAY_SIZE (operator_tokens); ++i) rust_lex_test_one (&parser, operator_tokens[i].name, operator_tokens[i].value); rust_lex_test_completion (&parser); rust_lex_test_push_back (&parser); } #endif /* GDB_SELF_TEST */ void _initialize_rust_exp (void) { int code = regcomp (&number_regex, number_regex_text, REG_EXTENDED); /* If the regular expression was incorrect, it was a programming error. */ gdb_assert (code == 0); #if GDB_SELF_TEST selftests::register_test ("rust-lex", rust_lex_tests); #endif }