aboutsummaryrefslogtreecommitdiff
path: root/gcc/c
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2016-06-22 15:20:41 +0000
committerDavid Malcolm <dmalcolm@gcc.gnu.org>2016-06-22 15:20:41 +0000
commit1a4f11c88ae761d4c618e540e07e4e32e85850d1 (patch)
tree6249c6a772dc9140476eee29b52cfe04dbe0e29d /gcc/c
parent6f99ef82f1457d2f71121853ef2f006d0800bd19 (diff)
downloadgcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.zip
gcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.tar.gz
gcc-1a4f11c88ae761d4c618e540e07e4e32e85850d1.tar.bz2
C FE: suggest corrections for misspelled identifiers and type names
gcc/c-family/ChangeLog: PR c/70339 * c-common.h (enum lookup_name_fuzzy_kind): New enum. (lookup_name_fuzzy): New prototype. gcc/c/ChangeLog: PR c/70339 * c-decl.c: Include spellcheck-tree.h and gcc-rich-location.h. (implicit_decl_warning): When issuing warnings for implicit declarations, attempt to provide a suggestion via lookup_name_fuzzy. (undeclared_variable): Likewise when issuing errors. (lookup_name_in_scope): Likewise. (struct edit_distance_traits<cpp_hashnode *>): New struct. (best_macro_match): New typedef. (find_closest_macro_cpp_cb): New function. (lookup_name_fuzzy): New function. * c-parser.c: Include gcc-rich-location.h. (c_token_starts_typename): Split out case CPP_KEYWORD into... (c_keyword_starts_typename): ...this new function. (c_parser_declaration_or_fndef): When issuing errors about missing "struct" etc, add a fixit. For other kinds of errors, attempt to provide a suggestion via lookup_name_fuzzy. (c_parser_parms_declarator): When looking ahead to detect typos in type names, also reject CPP_KEYWORD. (c_parser_parameter_declaration): When issuing errors about unknown type names, attempt to provide a suggestion via lookup_name_fuzzy. * c-tree.h (c_keyword_starts_typename): New prototype. gcc/ChangeLog: PR c/70339 * diagnostic-core.h (pedwarn_at_rich_loc): New prototype. * diagnostic.c (pedwarn_at_rich_loc): New function. * spellcheck.h (best_match::best_match): Add a "best_distance_so_far" optional parameter. (best_match::set_best_so_far): New method. (best_match::get_best_distance): New accessor. (best_match::get_best_candidate_length): New accessor. gcc/testsuite/ChangeLog: PR c/70339 * c-c++-common/attributes-1.c: Update dg-prune-output to include hint. * gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Update expected results due to builtin "nanl" now being suggested for "name". * gcc.dg/pr67580.c: Update expected messages. * gcc.dg/spellcheck-identifiers.c: New testcase. * gcc.dg/spellcheck-typenames.c: New testcase. From-SVN: r237714
Diffstat (limited to 'gcc/c')
-rw-r--r--gcc/c/ChangeLog26
-rw-r--r--gcc/c/c-decl.c190
-rw-r--r--gcc/c/c-parser.c144
-rw-r--r--gcc/c/c-tree.h1
4 files changed, 310 insertions, 51 deletions
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog
index 35b3de4..47d1f57 100644
--- a/gcc/c/ChangeLog
+++ b/gcc/c/ChangeLog
@@ -1,3 +1,29 @@
+2016-06-22 David Malcolm <dmalcolm@redhat.com>
+
+ PR c/70339
+ * c-decl.c: Include spellcheck-tree.h and gcc-rich-location.h.
+ (implicit_decl_warning): When issuing warnings for implicit
+ declarations, attempt to provide a suggestion via
+ lookup_name_fuzzy.
+ (undeclared_variable): Likewise when issuing errors.
+ (lookup_name_in_scope): Likewise.
+ (struct edit_distance_traits<cpp_hashnode *>): New struct.
+ (best_macro_match): New typedef.
+ (find_closest_macro_cpp_cb): New function.
+ (lookup_name_fuzzy): New function.
+ * c-parser.c: Include gcc-rich-location.h.
+ (c_token_starts_typename): Split out case CPP_KEYWORD into...
+ (c_keyword_starts_typename): ...this new function.
+ (c_parser_declaration_or_fndef): When issuing errors about
+ missing "struct" etc, add a fixit. For other kinds of errors,
+ attempt to provide a suggestion via lookup_name_fuzzy.
+ (c_parser_parms_declarator): When looking ahead to detect typos in
+ type names, also reject CPP_KEYWORD.
+ (c_parser_parameter_declaration): When issuing errors about
+ unknown type names, attempt to provide a suggestion via
+ lookup_name_fuzzy.
+ * c-tree.h (c_keyword_starts_typename): New prototype.
+
2016-06-20 Joseph Myers <joseph@codesourcery.com>
PR c/71601
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 5c08c59..8b966fe 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-ada-spec.h"
#include "cilk.h"
#include "builtins.h"
+#include "spellcheck-tree.h"
+#include "gcc-rich-location.h"
/* In grokdeclarator, distinguish syntactic contexts of declarators. */
enum decl_context
@@ -3086,13 +3088,36 @@ implicit_decl_warning (location_t loc, tree id, tree olddecl)
if (warn_implicit_function_declaration)
{
bool warned;
+ tree hint = NULL_TREE;
+ if (!olddecl)
+ hint = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME);
if (flag_isoc99)
- warned = pedwarn (loc, OPT_Wimplicit_function_declaration,
- "implicit declaration of function %qE", id);
+ if (hint)
+ {
+ gcc_rich_location richloc (loc);
+ richloc.add_fixit_misspelled_id (loc, hint);
+ warned = pedwarn_at_rich_loc
+ (&richloc, OPT_Wimplicit_function_declaration,
+ "implicit declaration of function %qE; did you mean %qE?",
+ id, hint);
+ }
+ else
+ warned = pedwarn (loc, OPT_Wimplicit_function_declaration,
+ "implicit declaration of function %qE", id);
else
- warned = warning_at (loc, OPT_Wimplicit_function_declaration,
- G_("implicit declaration of function %qE"), id);
+ if (hint)
+ {
+ gcc_rich_location richloc (loc);
+ richloc.add_fixit_misspelled_id (loc, hint);
+ warned = warning_at_rich_loc
+ (&richloc, OPT_Wimplicit_function_declaration,
+ G_("implicit declaration of function %qE;did you mean %qE?"),
+ id, hint);
+ }
+ else
+ warned = warning_at (loc, OPT_Wimplicit_function_declaration,
+ G_("implicit declaration of function %qE"), id);
if (olddecl && warned)
locate_old_decl (olddecl);
}
@@ -3408,13 +3433,38 @@ undeclared_variable (location_t loc, tree id)
if (current_function_decl == 0)
{
- error_at (loc, "%qE undeclared here (not in a function)", id);
+ tree guessed_id = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME);
+ if (guessed_id)
+ {
+ gcc_rich_location richloc (loc);
+ richloc.add_fixit_misspelled_id (loc, guessed_id);
+ error_at_rich_loc (&richloc,
+ "%qE undeclared here (not in a function);"
+ " did you mean %qE?",
+ id, guessed_id);
+ }
+ else
+ error_at (loc, "%qE undeclared here (not in a function)", id);
scope = current_scope;
}
else
{
if (!objc_diagnose_private_ivar (id))
- error_at (loc, "%qE undeclared (first use in this function)", id);
+ {
+ tree guessed_id = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME);
+ if (guessed_id)
+ {
+ gcc_rich_location richloc (loc);
+ richloc.add_fixit_misspelled_id (loc, guessed_id);
+ error_at_rich_loc
+ (&richloc,
+ "%qE undeclared (first use in this function);"
+ " did you mean %qE?",
+ id, guessed_id);
+ }
+ else
+ error_at (loc, "%qE undeclared (first use in this function)", id);
+ }
if (!already)
{
inform (loc, "each undeclared identifier is reported only"
@@ -3904,6 +3954,134 @@ lookup_name_in_scope (tree name, struct c_scope *scope)
return b->decl;
return NULL_TREE;
}
+
+/* Specialization of edit_distance_traits for preprocessor macros. */
+
+template <>
+struct edit_distance_traits<cpp_hashnode *>
+{
+ static size_t get_length (cpp_hashnode *hashnode)
+ {
+ return hashnode->ident.len;
+ }
+
+ static const char *get_string (cpp_hashnode *hashnode)
+ {
+ return (const char *)hashnode->ident.str;
+ }
+};
+
+/* Specialization of best_match<> for finding the closest preprocessor
+ macro to a given identifier. */
+
+typedef best_match<tree, cpp_hashnode *> best_macro_match;
+
+/* A callback for cpp_forall_identifiers, for use by lookup_name_fuzzy.
+ Process HASHNODE and update the best_macro_match instance pointed to be
+ USER_DATA. */
+
+static int
+find_closest_macro_cpp_cb (cpp_reader *, cpp_hashnode *hashnode,
+ void *user_data)
+{
+ if (hashnode->type != NT_MACRO)
+ return 1;
+
+ best_macro_match *bmm = (best_macro_match *)user_data;
+ bmm->consider (hashnode);
+
+ /* Keep iterating. */
+ return 1;
+}
+
+/* Look for the closest match for NAME within the currently valid
+ scopes.
+
+ This finds the identifier with the lowest Levenshtein distance to
+ NAME. If there are multiple candidates with equal minimal distance,
+ the first one found is returned. Scopes are searched from innermost
+ outwards, and within a scope in reverse order of declaration, thus
+ benefiting candidates "near" to the current scope.
+
+ The function also looks for similar macro names to NAME, since a
+ misspelled macro name will not be expanded, and hence looks like an
+ identifier to the C frontend.
+
+ It also looks for start_typename keywords, to detect "singed" vs "signed"
+ typos. */
+
+tree
+lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind)
+{
+ gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
+
+ best_match<tree, tree> bm (name);
+
+ /* Look within currently valid scopes. */
+ for (c_scope *scope = current_scope; scope; scope = scope->outer)
+ for (c_binding *binding = scope->bindings; binding; binding = binding->prev)
+ {
+ if (!binding->id)
+ continue;
+ /* Don't use bindings from implicitly declared functions,
+ as they were likely misspellings themselves. */
+ if (TREE_CODE (binding->decl) == FUNCTION_DECL)
+ if (C_DECL_IMPLICIT (binding->decl))
+ continue;
+ if (kind == FUZZY_LOOKUP_TYPENAME)
+ if (TREE_CODE (binding->decl) != TYPE_DECL)
+ continue;
+ bm.consider (binding->id);
+ }
+
+ /* Consider macros: if the user misspelled a macro name e.g. "SOME_MACRO"
+ as:
+ x = SOME_OTHER_MACRO (y);
+ then "SOME_OTHER_MACRO" will survive to the frontend and show up
+ as a misspelled identifier.
+
+ Use the best distance so far so that a candidate is only set if
+ a macro is better than anything so far. This allows early rejection
+ (without calculating the edit distance) of macro names that must have
+ distance >= bm.get_best_distance (), and means that we only get a
+ non-NULL result for best_macro_match if it's better than any of
+ the identifiers already checked, which avoids needless creation
+ of identifiers for macro hashnodes. */
+ best_macro_match bmm (name, bm.get_best_distance ());
+ cpp_forall_identifiers (parse_in, find_closest_macro_cpp_cb, &bmm);
+ cpp_hashnode *best_macro = bmm.get_best_meaningful_candidate ();
+ /* If a macro is the closest so far to NAME, use it, creating an
+ identifier tree node for it. */
+ if (best_macro)
+ {
+ const char *id = (const char *)best_macro->ident.str;
+ tree macro_as_identifier
+ = get_identifier_with_length (id, best_macro->ident.len);
+ bm.set_best_so_far (macro_as_identifier,
+ bmm.get_best_distance (),
+ bmm.get_best_candidate_length ());
+ }
+
+ /* Try the "start_typename" keywords to detect
+ "singed" vs "signed" typos. */
+ if (kind == FUZZY_LOOKUP_TYPENAME)
+ {
+ for (unsigned i = 0; i < num_c_common_reswords; i++)
+ {
+ const c_common_resword *resword = &c_common_reswords[i];
+ if (!c_keyword_starts_typename (resword->rid))
+ continue;
+ tree resword_identifier = ridpointers [resword->rid];
+ if (!resword_identifier)
+ continue;
+ gcc_assert (TREE_CODE (resword_identifier) == IDENTIFIER_NODE);
+ bm.consider (resword_identifier);
+ }
+ }
+
+ return bm.get_best_meaningful_candidate ();
+}
+
/* Create the predefined scalar types of C,
and some nodes representing standard constants (0, 1, (void *) 0).
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 78bf68e..7f491f1 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-indentation.h"
#include "gimple-expr.h"
#include "context.h"
+#include "gcc-rich-location.h"
/* We need to walk over decls with incomplete struct/union/enum types
after parsing the whole translation unit.
@@ -518,6 +519,48 @@ c_parser_peek_nth_token (c_parser *parser, unsigned int n)
return &parser->tokens[n - 1];
}
+bool
+c_keyword_starts_typename (enum rid keyword)
+{
+ switch (keyword)
+ {
+ case RID_UNSIGNED:
+ case RID_LONG:
+ case RID_SHORT:
+ case RID_SIGNED:
+ case RID_COMPLEX:
+ case RID_INT:
+ case RID_CHAR:
+ case RID_FLOAT:
+ case RID_DOUBLE:
+ case RID_VOID:
+ case RID_DFLOAT32:
+ case RID_DFLOAT64:
+ case RID_DFLOAT128:
+ case RID_BOOL:
+ case RID_ENUM:
+ case RID_STRUCT:
+ case RID_UNION:
+ case RID_TYPEOF:
+ case RID_CONST:
+ case RID_ATOMIC:
+ case RID_VOLATILE:
+ case RID_RESTRICT:
+ case RID_ATTRIBUTE:
+ case RID_FRACT:
+ case RID_ACCUM:
+ case RID_SAT:
+ case RID_AUTO_TYPE:
+ return true;
+ default:
+ if (keyword >= RID_FIRST_INT_N
+ && keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS
+ && int_n_enabled_p[keyword - RID_FIRST_INT_N])
+ return true;
+ return false;
+ }
+}
+
/* Return true if TOKEN can start a type name,
false otherwise. */
static bool
@@ -541,43 +584,7 @@ c_token_starts_typename (c_token *token)
gcc_unreachable ();
}
case CPP_KEYWORD:
- switch (token->keyword)
- {
- case RID_UNSIGNED:
- case RID_LONG:
- case RID_SHORT:
- case RID_SIGNED:
- case RID_COMPLEX:
- case RID_INT:
- case RID_CHAR:
- case RID_FLOAT:
- case RID_DOUBLE:
- case RID_VOID:
- case RID_DFLOAT32:
- case RID_DFLOAT64:
- case RID_DFLOAT128:
- case RID_BOOL:
- case RID_ENUM:
- case RID_STRUCT:
- case RID_UNION:
- case RID_TYPEOF:
- case RID_CONST:
- case RID_ATOMIC:
- case RID_VOLATILE:
- case RID_RESTRICT:
- case RID_ATTRIBUTE:
- case RID_FRACT:
- case RID_ACCUM:
- case RID_SAT:
- case RID_AUTO_TYPE:
- return true;
- default:
- if (token->keyword >= RID_FIRST_INT_N
- && token->keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS
- && int_n_enabled_p[token->keyword - RID_FIRST_INT_N])
- return true;
- return false;
- }
+ return c_keyword_starts_typename (token->keyword);
case CPP_LESS:
if (c_dialect_objc ())
return true;
@@ -1655,15 +1662,50 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
&& (!nested || !lookup_name (c_parser_peek_token (parser)->value)))
{
tree name = c_parser_peek_token (parser)->value;
- error_at (here, "unknown type name %qE", name);
- /* Give a hint to the user. This is not C++ with its implicit
- typedef. */
+
+ /* Issue a warning about NAME being an unknown type name, perhaps
+ with some kind of hint.
+ If the user forgot a "struct" etc, suggest inserting
+ it. Otherwise, attempt to look for misspellings. */
+ gcc_rich_location richloc (here);
if (tag_exists_p (RECORD_TYPE, name))
- inform (here, "use %<struct%> keyword to refer to the type");
+ {
+ /* This is not C++ with its implicit typedef. */
+ richloc.add_fixit_insert (here, "struct");
+ error_at_rich_loc (&richloc,
+ "unknown type name %qE;"
+ " use %<struct%> keyword to refer to the type",
+ name);
+ }
else if (tag_exists_p (UNION_TYPE, name))
- inform (here, "use %<union%> keyword to refer to the type");
+ {
+ richloc.add_fixit_insert (here, "union");
+ error_at_rich_loc (&richloc,
+ "unknown type name %qE;"
+ " use %<union%> keyword to refer to the type",
+ name);
+ }
else if (tag_exists_p (ENUMERAL_TYPE, name))
- inform (here, "use %<enum%> keyword to refer to the type");
+ {
+ richloc.add_fixit_insert (here, "enum");
+ error_at_rich_loc (&richloc,
+ "unknown type name %qE;"
+ " use %<enum%> keyword to refer to the type",
+ name);
+ }
+ else
+ {
+ tree hint = lookup_name_fuzzy (name, FUZZY_LOOKUP_TYPENAME);
+ if (hint)
+ {
+ richloc.add_fixit_misspelled_id (here, hint);
+ error_at_rich_loc (&richloc,
+ "unknown type name %qE; did you mean %qE?",
+ name, hint);
+ }
+ else
+ error_at (here, "unknown type name %qE", name);
+ }
/* Parse declspecs normally to get a correct pointer type, but avoid
a further "fails to be a type name" error. Refuse nested functions
@@ -3632,7 +3674,8 @@ c_parser_parms_declarator (c_parser *parser, bool id_list_ok, tree attrs)
&& c_parser_peek_2nd_token (parser)->type != CPP_NAME
&& c_parser_peek_2nd_token (parser)->type != CPP_MULT
&& c_parser_peek_2nd_token (parser)->type != CPP_OPEN_PAREN
- && c_parser_peek_2nd_token (parser)->type != CPP_OPEN_SQUARE)
+ && c_parser_peek_2nd_token (parser)->type != CPP_OPEN_SQUARE
+ && c_parser_peek_2nd_token (parser)->type != CPP_KEYWORD)
{
tree list = NULL_TREE, *nextp = &list;
while (c_parser_next_token_is (parser, CPP_NAME)
@@ -3807,7 +3850,18 @@ c_parser_parameter_declaration (c_parser *parser, tree attrs)
c_parser_set_source_position_from_token (token);
if (c_parser_next_tokens_start_typename (parser, cla_prefer_type))
{
- error_at (token->location, "unknown type name %qE", token->value);
+ tree hint = lookup_name_fuzzy (token->value, FUZZY_LOOKUP_TYPENAME);
+ if (hint)
+ {
+ gcc_assert (TREE_CODE (hint) == IDENTIFIER_NODE);
+ gcc_rich_location richloc (token->location);
+ richloc.add_fixit_misspelled_id (token->location, hint);
+ error_at_rich_loc (&richloc,
+ "unknown type name %qE; did you mean %qE?",
+ token->value, hint);
+ }
+ else
+ error_at (token->location, "unknown type name %qE", token->value);
parser->error = true;
}
/* ??? In some Objective-C cases '...' isn't applicable so there
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 8f10a13..46be53e 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -482,6 +482,7 @@ enum c_inline_static_type {
/* in c-parser.c */
extern void c_parse_init (void);
+extern bool c_keyword_starts_typename (enum rid keyword);
/* in c-aux-info.c */
extern void gen_aux_info_record (tree, int, int, int);