diff options
author | David Malcolm <dmalcolm@redhat.com> | 2016-06-22 15:20:41 +0000 |
---|---|---|
committer | David Malcolm <dmalcolm@gcc.gnu.org> | 2016-06-22 15:20:41 +0000 |
commit | 1a4f11c88ae761d4c618e540e07e4e32e85850d1 (patch) | |
tree | 6249c6a772dc9140476eee29b52cfe04dbe0e29d /gcc/c | |
parent | 6f99ef82f1457d2f71121853ef2f006d0800bd19 (diff) | |
download | gcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.zip gcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.tar.gz gcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.tar.bz2 |
C FE: suggest corrections for misspelled identifiers and type names
gcc/c-family/ChangeLog:
PR c/70339
* c-common.h (enum lookup_name_fuzzy_kind): New enum.
(lookup_name_fuzzy): New prototype.
gcc/c/ChangeLog:
PR c/70339
* c-decl.c: Include spellcheck-tree.h and gcc-rich-location.h.
(implicit_decl_warning): When issuing warnings for implicit
declarations, attempt to provide a suggestion via
lookup_name_fuzzy.
(undeclared_variable): Likewise when issuing errors.
(lookup_name_in_scope): Likewise.
(struct edit_distance_traits<cpp_hashnode *>): New struct.
(best_macro_match): New typedef.
(find_closest_macro_cpp_cb): New function.
(lookup_name_fuzzy): New function.
* c-parser.c: Include gcc-rich-location.h.
(c_token_starts_typename): Split out case CPP_KEYWORD into...
(c_keyword_starts_typename): ...this new function.
(c_parser_declaration_or_fndef): When issuing errors about
missing "struct" etc, add a fixit. For other kinds of errors,
attempt to provide a suggestion via lookup_name_fuzzy.
(c_parser_parms_declarator): When looking ahead to detect typos in
type names, also reject CPP_KEYWORD.
(c_parser_parameter_declaration): When issuing errors about
unknown type names, attempt to provide a suggestion via
lookup_name_fuzzy.
* c-tree.h (c_keyword_starts_typename): New prototype.
gcc/ChangeLog:
PR c/70339
* diagnostic-core.h (pedwarn_at_rich_loc): New prototype.
* diagnostic.c (pedwarn_at_rich_loc): New function.
* spellcheck.h (best_match::best_match): Add a
"best_distance_so_far" optional parameter.
(best_match::set_best_so_far): New method.
(best_match::get_best_distance): New accessor.
(best_match::get_best_candidate_length): New accessor.
gcc/testsuite/ChangeLog:
PR c/70339
* c-c++-common/attributes-1.c: Update dg-prune-output to include
hint.
* gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Update
expected results due to builtin "nanl" now being suggested for
"name".
* gcc.dg/pr67580.c: Update expected messages.
* gcc.dg/spellcheck-identifiers.c: New testcase.
* gcc.dg/spellcheck-typenames.c: New testcase.
From-SVN: r237714
Diffstat (limited to 'gcc/c')
-rw-r--r-- | gcc/c/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/c/c-decl.c | 190 | ||||
-rw-r--r-- | gcc/c/c-parser.c | 144 | ||||
-rw-r--r-- | gcc/c/c-tree.h | 1 |
4 files changed, 310 insertions, 51 deletions
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 35b3de4..47d1f57 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,29 @@ +2016-06-22 David Malcolm <dmalcolm@redhat.com> + + PR c/70339 + * c-decl.c: Include spellcheck-tree.h and gcc-rich-location.h. + (implicit_decl_warning): When issuing warnings for implicit + declarations, attempt to provide a suggestion via + lookup_name_fuzzy. + (undeclared_variable): Likewise when issuing errors. + (lookup_name_in_scope): Likewise. + (struct edit_distance_traits<cpp_hashnode *>): New struct. + (best_macro_match): New typedef. + (find_closest_macro_cpp_cb): New function. + (lookup_name_fuzzy): New function. + * c-parser.c: Include gcc-rich-location.h. + (c_token_starts_typename): Split out case CPP_KEYWORD into... + (c_keyword_starts_typename): ...this new function. + (c_parser_declaration_or_fndef): When issuing errors about + missing "struct" etc, add a fixit. For other kinds of errors, + attempt to provide a suggestion via lookup_name_fuzzy. + (c_parser_parms_declarator): When looking ahead to detect typos in + type names, also reject CPP_KEYWORD. + (c_parser_parameter_declaration): When issuing errors about + unknown type names, attempt to provide a suggestion via + lookup_name_fuzzy. + * c-tree.h (c_keyword_starts_typename): New prototype. + 2016-06-20 Joseph Myers <joseph@codesourcery.com> PR c/71601 diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 5c08c59..8b966fe 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see #include "c-family/c-ada-spec.h" #include "cilk.h" #include "builtins.h" +#include "spellcheck-tree.h" +#include "gcc-rich-location.h" /* In grokdeclarator, distinguish syntactic contexts of declarators. */ enum decl_context @@ -3086,13 +3088,36 @@ implicit_decl_warning (location_t loc, tree id, tree olddecl) if (warn_implicit_function_declaration) { bool warned; + tree hint = NULL_TREE; + if (!olddecl) + hint = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME); if (flag_isoc99) - warned = pedwarn (loc, OPT_Wimplicit_function_declaration, - "implicit declaration of function %qE", id); + if (hint) + { + gcc_rich_location richloc (loc); + richloc.add_fixit_misspelled_id (loc, hint); + warned = pedwarn_at_rich_loc + (&richloc, OPT_Wimplicit_function_declaration, + "implicit declaration of function %qE; did you mean %qE?", + id, hint); + } + else + warned = pedwarn (loc, OPT_Wimplicit_function_declaration, + "implicit declaration of function %qE", id); else - warned = warning_at (loc, OPT_Wimplicit_function_declaration, - G_("implicit declaration of function %qE"), id); + if (hint) + { + gcc_rich_location richloc (loc); + richloc.add_fixit_misspelled_id (loc, hint); + warned = warning_at_rich_loc + (&richloc, OPT_Wimplicit_function_declaration, + G_("implicit declaration of function %qE;did you mean %qE?"), + id, hint); + } + else + warned = warning_at (loc, OPT_Wimplicit_function_declaration, + G_("implicit declaration of function %qE"), id); if (olddecl && warned) locate_old_decl (olddecl); } @@ -3408,13 +3433,38 @@ undeclared_variable (location_t loc, tree id) if (current_function_decl == 0) { - error_at (loc, "%qE undeclared here (not in a function)", id); + tree guessed_id = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME); + if (guessed_id) + { + gcc_rich_location richloc (loc); + richloc.add_fixit_misspelled_id (loc, guessed_id); + error_at_rich_loc (&richloc, + "%qE undeclared here (not in a function);" + " did you mean %qE?", + id, guessed_id); + } + else + error_at (loc, "%qE undeclared here (not in a function)", id); scope = current_scope; } else { if (!objc_diagnose_private_ivar (id)) - error_at (loc, "%qE undeclared (first use in this function)", id); + { + tree guessed_id = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME); + if (guessed_id) + { + gcc_rich_location richloc (loc); + richloc.add_fixit_misspelled_id (loc, guessed_id); + error_at_rich_loc + (&richloc, + "%qE undeclared (first use in this function);" + " did you mean %qE?", + id, guessed_id); + } + else + error_at (loc, "%qE undeclared (first use in this function)", id); + } if (!already) { inform (loc, "each undeclared identifier is reported only" @@ -3904,6 +3954,134 @@ lookup_name_in_scope (tree name, struct c_scope *scope) return b->decl; return NULL_TREE; } + +/* Specialization of edit_distance_traits for preprocessor macros. */ + +template <> +struct edit_distance_traits<cpp_hashnode *> +{ + static size_t get_length (cpp_hashnode *hashnode) + { + return hashnode->ident.len; + } + + static const char *get_string (cpp_hashnode *hashnode) + { + return (const char *)hashnode->ident.str; + } +}; + +/* Specialization of best_match<> for finding the closest preprocessor + macro to a given identifier. */ + +typedef best_match<tree, cpp_hashnode *> best_macro_match; + +/* A callback for cpp_forall_identifiers, for use by lookup_name_fuzzy. + Process HASHNODE and update the best_macro_match instance pointed to be + USER_DATA. */ + +static int +find_closest_macro_cpp_cb (cpp_reader *, cpp_hashnode *hashnode, + void *user_data) +{ + if (hashnode->type != NT_MACRO) + return 1; + + best_macro_match *bmm = (best_macro_match *)user_data; + bmm->consider (hashnode); + + /* Keep iterating. */ + return 1; +} + +/* Look for the closest match for NAME within the currently valid + scopes. + + This finds the identifier with the lowest Levenshtein distance to + NAME. If there are multiple candidates with equal minimal distance, + the first one found is returned. Scopes are searched from innermost + outwards, and within a scope in reverse order of declaration, thus + benefiting candidates "near" to the current scope. + + The function also looks for similar macro names to NAME, since a + misspelled macro name will not be expanded, and hence looks like an + identifier to the C frontend. + + It also looks for start_typename keywords, to detect "singed" vs "signed" + typos. */ + +tree +lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind) +{ + gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE); + + best_match<tree, tree> bm (name); + + /* Look within currently valid scopes. */ + for (c_scope *scope = current_scope; scope; scope = scope->outer) + for (c_binding *binding = scope->bindings; binding; binding = binding->prev) + { + if (!binding->id) + continue; + /* Don't use bindings from implicitly declared functions, + as they were likely misspellings themselves. */ + if (TREE_CODE (binding->decl) == FUNCTION_DECL) + if (C_DECL_IMPLICIT (binding->decl)) + continue; + if (kind == FUZZY_LOOKUP_TYPENAME) + if (TREE_CODE (binding->decl) != TYPE_DECL) + continue; + bm.consider (binding->id); + } + + /* Consider macros: if the user misspelled a macro name e.g. "SOME_MACRO" + as: + x = SOME_OTHER_MACRO (y); + then "SOME_OTHER_MACRO" will survive to the frontend and show up + as a misspelled identifier. + + Use the best distance so far so that a candidate is only set if + a macro is better than anything so far. This allows early rejection + (without calculating the edit distance) of macro names that must have + distance >= bm.get_best_distance (), and means that we only get a + non-NULL result for best_macro_match if it's better than any of + the identifiers already checked, which avoids needless creation + of identifiers for macro hashnodes. */ + best_macro_match bmm (name, bm.get_best_distance ()); + cpp_forall_identifiers (parse_in, find_closest_macro_cpp_cb, &bmm); + cpp_hashnode *best_macro = bmm.get_best_meaningful_candidate (); + /* If a macro is the closest so far to NAME, use it, creating an + identifier tree node for it. */ + if (best_macro) + { + const char *id = (const char *)best_macro->ident.str; + tree macro_as_identifier + = get_identifier_with_length (id, best_macro->ident.len); + bm.set_best_so_far (macro_as_identifier, + bmm.get_best_distance (), + bmm.get_best_candidate_length ()); + } + + /* Try the "start_typename" keywords to detect + "singed" vs "signed" typos. */ + if (kind == FUZZY_LOOKUP_TYPENAME) + { + for (unsigned i = 0; i < num_c_common_reswords; i++) + { + const c_common_resword *resword = &c_common_reswords[i]; + if (!c_keyword_starts_typename (resword->rid)) + continue; + tree resword_identifier = ridpointers [resword->rid]; + if (!resword_identifier) + continue; + gcc_assert (TREE_CODE (resword_identifier) == IDENTIFIER_NODE); + bm.consider (resword_identifier); + } + } + + return bm.get_best_meaningful_candidate (); +} + /* Create the predefined scalar types of C, and some nodes representing standard constants (0, 1, (void *) 0). diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 78bf68e..7f491f1 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "c-family/c-indentation.h" #include "gimple-expr.h" #include "context.h" +#include "gcc-rich-location.h" /* We need to walk over decls with incomplete struct/union/enum types after parsing the whole translation unit. @@ -518,6 +519,48 @@ c_parser_peek_nth_token (c_parser *parser, unsigned int n) return &parser->tokens[n - 1]; } +bool +c_keyword_starts_typename (enum rid keyword) +{ + switch (keyword) + { + case RID_UNSIGNED: + case RID_LONG: + case RID_SHORT: + case RID_SIGNED: + case RID_COMPLEX: + case RID_INT: + case RID_CHAR: + case RID_FLOAT: + case RID_DOUBLE: + case RID_VOID: + case RID_DFLOAT32: + case RID_DFLOAT64: + case RID_DFLOAT128: + case RID_BOOL: + case RID_ENUM: + case RID_STRUCT: + case RID_UNION: + case RID_TYPEOF: + case RID_CONST: + case RID_ATOMIC: + case RID_VOLATILE: + case RID_RESTRICT: + case RID_ATTRIBUTE: + case RID_FRACT: + case RID_ACCUM: + case RID_SAT: + case RID_AUTO_TYPE: + return true; + default: + if (keyword >= RID_FIRST_INT_N + && keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS + && int_n_enabled_p[keyword - RID_FIRST_INT_N]) + return true; + return false; + } +} + /* Return true if TOKEN can start a type name, false otherwise. */ static bool @@ -541,43 +584,7 @@ c_token_starts_typename (c_token *token) gcc_unreachable (); } case CPP_KEYWORD: - switch (token->keyword) - { - case RID_UNSIGNED: - case RID_LONG: - case RID_SHORT: - case RID_SIGNED: - case RID_COMPLEX: - case RID_INT: - case RID_CHAR: - case RID_FLOAT: - case RID_DOUBLE: - case RID_VOID: - case RID_DFLOAT32: - case RID_DFLOAT64: - case RID_DFLOAT128: - case RID_BOOL: - case RID_ENUM: - case RID_STRUCT: - case RID_UNION: - case RID_TYPEOF: - case RID_CONST: - case RID_ATOMIC: - case RID_VOLATILE: - case RID_RESTRICT: - case RID_ATTRIBUTE: - case RID_FRACT: - case RID_ACCUM: - case RID_SAT: - case RID_AUTO_TYPE: - return true; - default: - if (token->keyword >= RID_FIRST_INT_N - && token->keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS - && int_n_enabled_p[token->keyword - RID_FIRST_INT_N]) - return true; - return false; - } + return c_keyword_starts_typename (token->keyword); case CPP_LESS: if (c_dialect_objc ()) return true; @@ -1655,15 +1662,50 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, && (!nested || !lookup_name (c_parser_peek_token (parser)->value))) { tree name = c_parser_peek_token (parser)->value; - error_at (here, "unknown type name %qE", name); - /* Give a hint to the user. This is not C++ with its implicit - typedef. */ + + /* Issue a warning about NAME being an unknown type name, perhaps + with some kind of hint. + If the user forgot a "struct" etc, suggest inserting + it. Otherwise, attempt to look for misspellings. */ + gcc_rich_location richloc (here); if (tag_exists_p (RECORD_TYPE, name)) - inform (here, "use %<struct%> keyword to refer to the type"); + { + /* This is not C++ with its implicit typedef. */ + richloc.add_fixit_insert (here, "struct"); + error_at_rich_loc (&richloc, + "unknown type name %qE;" + " use %<struct%> keyword to refer to the type", + name); + } else if (tag_exists_p (UNION_TYPE, name)) - inform (here, "use %<union%> keyword to refer to the type"); + { + richloc.add_fixit_insert (here, "union"); + error_at_rich_loc (&richloc, + "unknown type name %qE;" + " use %<union%> keyword to refer to the type", + name); + } else if (tag_exists_p (ENUMERAL_TYPE, name)) - inform (here, "use %<enum%> keyword to refer to the type"); + { + richloc.add_fixit_insert (here, "enum"); + error_at_rich_loc (&richloc, + "unknown type name %qE;" + " use %<enum%> keyword to refer to the type", + name); + } + else + { + tree hint = lookup_name_fuzzy (name, FUZZY_LOOKUP_TYPENAME); + if (hint) + { + richloc.add_fixit_misspelled_id (here, hint); + error_at_rich_loc (&richloc, + "unknown type name %qE; did you mean %qE?", + name, hint); + } + else + error_at (here, "unknown type name %qE", name); + } /* Parse declspecs normally to get a correct pointer type, but avoid a further "fails to be a type name" error. Refuse nested functions @@ -3632,7 +3674,8 @@ c_parser_parms_declarator (c_parser *parser, bool id_list_ok, tree attrs) && c_parser_peek_2nd_token (parser)->type != CPP_NAME && c_parser_peek_2nd_token (parser)->type != CPP_MULT && c_parser_peek_2nd_token (parser)->type != CPP_OPEN_PAREN - && c_parser_peek_2nd_token (parser)->type != CPP_OPEN_SQUARE) + && c_parser_peek_2nd_token (parser)->type != CPP_OPEN_SQUARE + && c_parser_peek_2nd_token (parser)->type != CPP_KEYWORD) { tree list = NULL_TREE, *nextp = &list; while (c_parser_next_token_is (parser, CPP_NAME) @@ -3807,7 +3850,18 @@ c_parser_parameter_declaration (c_parser *parser, tree attrs) c_parser_set_source_position_from_token (token); if (c_parser_next_tokens_start_typename (parser, cla_prefer_type)) { - error_at (token->location, "unknown type name %qE", token->value); + tree hint = lookup_name_fuzzy (token->value, FUZZY_LOOKUP_TYPENAME); + if (hint) + { + gcc_assert (TREE_CODE (hint) == IDENTIFIER_NODE); + gcc_rich_location richloc (token->location); + richloc.add_fixit_misspelled_id (token->location, hint); + error_at_rich_loc (&richloc, + "unknown type name %qE; did you mean %qE?", + token->value, hint); + } + else + error_at (token->location, "unknown type name %qE", token->value); parser->error = true; } /* ??? In some Objective-C cases '...' isn't applicable so there diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 8f10a13..46be53e 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -482,6 +482,7 @@ enum c_inline_static_type { /* in c-parser.c */ extern void c_parse_init (void); +extern bool c_keyword_starts_typename (enum rid keyword); /* in c-aux-info.c */ extern void gen_aux_info_record (tree, int, int, int); |