diff options
author | David Malcolm <dmalcolm@redhat.com> | 2015-11-13 01:54:41 +0000 |
---|---|---|
committer | David Malcolm <dmalcolm@gcc.gnu.org> | 2015-11-13 01:54:41 +0000 |
commit | 277fe616911ac1ce91e9f1178d648303b4a26940 (patch) | |
tree | 574b1272f86df52172c56c614532ff5032b6f73d /gcc/c | |
parent | 6d82151f8c08d123eaab63409cd8ba6f85d0539c (diff) | |
download | gcc-277fe616911ac1ce91e9f1178d648303b4a26940.zip gcc-277fe616911ac1ce91e9f1178d648303b4a26940.tar.gz gcc-277fe616911ac1ce91e9f1178d648303b4a26940.tar.bz2 |
Implement Levenshtein distance; use in C FE for misspelled field names
This is the combination of:
[PATCH 1/2] Implement Levenshtein distance
[PATCH 2/2] C FE: suggest corrections for misspelled field names
plus some nit fixes in spellcheck.c.
gcc/ChangeLog:
* Makefile.in (OBJS): Add spellcheck.o.
* spellcheck.c: New file.
* spellcheck.h: New file.
gcc/c/ChangeLog:
* c-typeck.c: Include spellcheck.h.
(lookup_field_fuzzy_find_candidates): New function.
(lookup_field_fuzzy): New function.
(build_component_ref): If the field was not found, try using
lookup_field_fuzzy and potentially offer a suggestion.
gcc/testsuite/ChangeLog:
* gcc.dg/plugin/levenshtein-test-1.c: New file.
* gcc.dg/plugin/levenshtein_plugin.c: New file.
* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
levenshtein_plugin.c.
* gcc.dg/spellcheck-fields.c: New file.
From-SVN: r230284
Diffstat (limited to 'gcc/c')
-rw-r--r-- | gcc/c/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/c/c-typeck.c | 74 |
2 files changed, 81 insertions, 1 deletions
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 02ae07e..9b1fdc9 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,11 @@ +2015-11-13 David Malcolm <dmalcolm@redhat.com> + + * c-typeck.c: Include spellcheck.h. + (lookup_field_fuzzy_find_candidates): New function. + (lookup_field_fuzzy): New function. + (build_component_ref): If the field was not found, try using + lookup_field_fuzzy and potentially offer a suggestion. + 2015-11-12 James Norris <jnorris@codesourcery.com> Joseph Myers <joseph@codesourcery.com> diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 4335a87..eb4e1fc 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "c-family/c-ubsan.h" #include "cilk.h" #include "gomp-constants.h" +#include "spellcheck.h" /* Possible cases of implicit bad conversions. Used to select diagnostic messages in convert_for_assignment. */ @@ -2242,6 +2243,72 @@ lookup_field (tree type, tree component) return tree_cons (NULL_TREE, field, NULL_TREE); } +/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES. */ + +static void +lookup_field_fuzzy_find_candidates (tree type, tree component, + vec<tree> *candidates) +{ + tree field; + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (DECL_NAME (field) == NULL_TREE + && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE + || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)) + { + lookup_field_fuzzy_find_candidates (TREE_TYPE (field), + component, + candidates); + } + + if (DECL_NAME (field)) + candidates->safe_push (DECL_NAME (field)); + } +} + +/* Like "lookup_field", but find the closest matching IDENTIFIER_NODE, + rather than returning a TREE_LIST for an exact match. */ + +static tree +lookup_field_fuzzy (tree type, tree component) +{ + gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE); + + /* First, gather a list of candidates. */ + auto_vec <tree> candidates; + + lookup_field_fuzzy_find_candidates (type, component, + &candidates); + + /* Now determine which is closest. */ + int i; + tree identifier; + tree best_identifier = NULL; + edit_distance_t best_distance = MAX_EDIT_DISTANCE; + FOR_EACH_VEC_ELT (candidates, i, identifier) + { + gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE); + edit_distance_t dist = levenshtein_distance (component, identifier); + if (dist < best_distance) + { + best_distance = dist; + best_identifier = identifier; + } + } + + /* If more than half of the letters were misspelled, the suggestion is + likely to be meaningless. */ + if (best_identifier) + { + unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component), + IDENTIFIER_LENGTH (best_identifier)) / 2; + if (best_distance > cutoff) + return NULL; + } + + return best_identifier; +} + /* Make an expression to refer to the COMPONENT field of structure or union value DATUM. COMPONENT is an IDENTIFIER_NODE. LOC is the location of the COMPONENT_REF. */ @@ -2277,7 +2344,12 @@ build_component_ref (location_t loc, tree datum, tree component) if (!field) { - error_at (loc, "%qT has no member named %qE", type, component); + tree guessed_id = lookup_field_fuzzy (type, component); + if (guessed_id) + error_at (loc, "%qT has no member named %qE; did you mean %qE?", + type, component, guessed_id); + else + error_at (loc, "%qT has no member named %qE", type, component); return error_mark_node; } |