aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2016-06-15 03:29:39 +0000
committerDavid Malcolm <dmalcolm@gcc.gnu.org>2016-06-15 03:29:39 +0000
commit6a3f203c3cc8e0f0757f7ed038b3cb34063936ba (patch)
treee99b8b2be80b721ed128799b811f4eccae4a9f64 /gcc
parent9e990b5d19d930bd1769d6db455fcbda8e7eae11 (diff)
downloadgcc-6a3f203c3cc8e0f0757f7ed038b3cb34063936ba.zip
gcc-6a3f203c3cc8e0f0757f7ed038b3cb34063936ba.tar.gz
gcc-6a3f203c3cc8e0f0757f7ed038b3cb34063936ba.tar.bz2
spellcheck.h: add best_match template; implement early-reject
gcc/c/ChangeLog: * c-typeck.c: Include spellcheck-tree.h rather than spellcheck.h. gcc/cp/ChangeLog: * search.c: Include spellcheck-tree.h rather than spellcheck.h. gcc/ChangeLog: * spellcheck-tree.c: Include spellcheck-tree.h rather than spellcheck.h. (find_closest_identifier): Reimplement in terms of best_match<tree,tree>. * spellcheck-tree.h: New file. * spellcheck.c (struct edit_distance_traits<const char *>): New struct. (find_closest_string): Reimplement in terms of best_match<const char *, const char *>. * spellcheck.h (levenshtein_distance): Move prototype of tree-based overload to spellcheck-tree.h. (find_closest_identifier): Likewise. (struct edit_distance_traits<T>): New template. (class best_match): New class. From-SVN: r237471
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/c/ChangeLog4
-rw-r--r--gcc/c/c-typeck.c2
-rw-r--r--gcc/cp/ChangeLog4
-rw-r--r--gcc/cp/search.c2
-rw-r--r--gcc/spellcheck-tree.c24
-rw-r--r--gcc/spellcheck-tree.h51
-rw-r--r--gcc/spellcheck.c42
-rw-r--r--gcc/spellcheck.h110
9 files changed, 208 insertions, 48 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9bf0db86..907bb06 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,22 @@
2016-06-14 David Malcolm <dmalcolm@redhat.com>
+ * spellcheck-tree.c: Include spellcheck-tree.h rather than
+ spellcheck.h.
+ (find_closest_identifier): Reimplement in terms of
+ best_match<tree,tree>.
+ * spellcheck-tree.h: New file.
+ * spellcheck.c (struct edit_distance_traits<const char *>): New
+ struct.
+ (find_closest_string): Reimplement in terms of
+ best_match<const char *, const char *>.
+ * spellcheck.h (levenshtein_distance): Move prototype of tree-based
+ overload to spellcheck-tree.h.
+ (find_closest_identifier): Likewise.
+ (struct edit_distance_traits<T>): New template.
+ (class best_match): New class.
+
+2016-06-14 David Malcolm <dmalcolm@redhat.com>
+
* selftest-run-tests.c (selftest::run_tests): Call
selftest::spellcheck_tree_c_tests.
* selftest.h (selftest::spellcheck_tree_c_tests): New decl.
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog
index 85fe5e3..e5d84bb 100644
--- a/gcc/c/ChangeLog
+++ b/gcc/c/ChangeLog
@@ -1,5 +1,9 @@
2016-06-14 David Malcolm <dmalcolm@redhat.com>
+ * c-typeck.c: Include spellcheck-tree.h rather than spellcheck.h.
+
+2016-06-14 David Malcolm <dmalcolm@redhat.com>
+
* c-typeck.c (build_component_ref): Simplify fixit code by
using gcc_rich_location::add_fixit_misspelled_id.
(set_init_label): Likewise.
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index f987508..f03c07b 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-ubsan.h"
#include "cilk.h"
#include "gomp-constants.h"
-#include "spellcheck.h"
+#include "spellcheck-tree.h"
#include "gcc-rich-location.h"
/* Possible cases of implicit bad conversions. Used to select
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 4e4a662..775b614 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,5 +1,9 @@
2016-06-14 David Malcolm <dmalcolm@redhat.com>
+ * search.c: Include spellcheck-tree.h rather than spellcheck.h.
+
+2016-06-14 David Malcolm <dmalcolm@redhat.com>
+
* typeck.c: Include "gcc-rich-location.h".
(finish_class_member_access_expr): Simplify fixit code by
using gcc_rich_location::add_fixit_misspelled_id.
diff --git a/gcc/cp/search.c b/gcc/cp/search.c
index f47833f..990c3fe 100644
--- a/gcc/cp/search.c
+++ b/gcc/cp/search.c
@@ -27,7 +27,7 @@ along with GCC; see the file COPYING3. If not see
#include "cp-tree.h"
#include "intl.h"
#include "toplev.h"
-#include "spellcheck.h"
+#include "spellcheck-tree.h"
static int is_subobject_of_p (tree, tree);
static tree dfs_lookup_base (tree, void *);
diff --git a/gcc/spellcheck-tree.c b/gcc/spellcheck-tree.c
index 2d73b774..63fb1a8 100644
--- a/gcc/spellcheck-tree.c
+++ b/gcc/spellcheck-tree.c
@@ -22,7 +22,7 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
-#include "spellcheck.h"
+#include "spellcheck-tree.h"
#include "selftest.h"
#include "stringpool.h"
@@ -53,32 +53,16 @@ find_closest_identifier (tree target, const auto_vec<tree> *candidates)
{
gcc_assert (TREE_CODE (target) == IDENTIFIER_NODE);
+ best_match<tree, tree> bm (target);
int i;
tree identifier;
- tree best_identifier = NULL_TREE;
- edit_distance_t best_distance = MAX_EDIT_DISTANCE;
FOR_EACH_VEC_ELT (*candidates, i, identifier)
{
gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE);
- edit_distance_t dist = levenshtein_distance (target, identifier);
- if (dist < best_distance)
- {
- best_distance = dist;
- best_identifier = identifier;
- }
+ bm.consider (identifier);
}
- /* If more than half of the letters were misspelled, the suggestion is
- likely to be meaningless. */
- if (best_identifier)
- {
- unsigned int cutoff = MAX (IDENTIFIER_LENGTH (target),
- IDENTIFIER_LENGTH (best_identifier)) / 2;
- if (best_distance > cutoff)
- return NULL_TREE;
- }
-
- return best_identifier;
+ return bm.get_best_meaningful_candidate ();
}
#if CHECKING_P
diff --git a/gcc/spellcheck-tree.h b/gcc/spellcheck-tree.h
new file mode 100644
index 0000000..0d5e253
--- /dev/null
+++ b/gcc/spellcheck-tree.h
@@ -0,0 +1,51 @@
+/* Find near-matches for identifiers.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_SPELLCHECK_TREE_H
+#define GCC_SPELLCHECK_TREE_H
+
+#include "spellcheck.h"
+
+/* spellcheck-tree.c */
+
+extern edit_distance_t
+levenshtein_distance (tree ident_s, tree ident_t);
+
+extern tree
+find_closest_identifier (tree target, const auto_vec<tree> *candidates);
+
+/* Specialization of edit_distance_traits for identifiers. */
+
+template <>
+struct edit_distance_traits<tree>
+{
+ static size_t get_length (tree id)
+ {
+ gcc_assert (TREE_CODE (id) == IDENTIFIER_NODE);
+ return IDENTIFIER_LENGTH (id);
+ }
+
+ static const char *get_string (tree id)
+ {
+ gcc_assert (TREE_CODE (id) == IDENTIFIER_NODE);
+ return IDENTIFIER_POINTER (id);
+ }
+};
+
+#endif /* GCC_SPELLCHECK_TREE_H */
diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c
index e03f484..2648f3a 100644
--- a/gcc/spellcheck.c
+++ b/gcc/spellcheck.c
@@ -121,6 +121,24 @@ levenshtein_distance (const char *s, const char *t)
return levenshtein_distance (s, strlen (s), t, strlen (t));
}
+/* Specialization of edit_distance_traits for C-style strings. */
+
+template <>
+struct edit_distance_traits<const char *>
+{
+ static size_t get_length (const char *str)
+ {
+ gcc_assert (str);
+ return strlen (str);
+ }
+
+ static const char *get_string (const char *str)
+ {
+ gcc_assert (str);
+ return str;
+ }
+};
+
/* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
an autovec of non-NULL strings, determine which element within
CANDIDATES has the lowest edit distance to TARGET. If there are
@@ -139,32 +157,14 @@ find_closest_string (const char *target,
int i;
const char *candidate;
- const char *best_candidate = NULL;
- edit_distance_t best_distance = MAX_EDIT_DISTANCE;
- size_t len_target = strlen (target);
+ best_match<const char *, const char *> bm (target);
FOR_EACH_VEC_ELT (*candidates, i, candidate)
{
gcc_assert (candidate);
- edit_distance_t dist
- = levenshtein_distance (target, len_target,
- candidate, strlen (candidate));
- if (dist < best_distance)
- {
- best_distance = dist;
- best_candidate = candidate;
- }
- }
-
- /* If more than half of the letters were misspelled, the suggestion is
- likely to be meaningless. */
- if (best_candidate)
- {
- unsigned int cutoff = MAX (len_target, strlen (best_candidate)) / 2;
- if (best_distance > cutoff)
- return NULL;
+ bm.consider (candidate);
}
- return best_candidate;
+ return bm.get_best_meaningful_candidate ();
}
#if CHECKING_P
diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h
index 040c33e..7379399 100644
--- a/gcc/spellcheck.h
+++ b/gcc/spellcheck.h
@@ -35,12 +35,112 @@ extern const char *
find_closest_string (const char *target,
const auto_vec<const char *> *candidates);
-/* spellcheck-tree.c */
+/* A traits class for describing a string-like type usable by
+ class best_match.
+ Specializations should provide the implementations of the following:
-extern edit_distance_t
-levenshtein_distance (tree ident_s, tree ident_t);
+ static size_t get_length (TYPE);
+ static const char *get_string (TYPE);
+
+ get_string should return a non-NULL ptr, which does not need to be
+ 0-terminated. */
+
+template <typename TYPE>
+struct edit_distance_traits {};
+
+/* A type for use when determining the best match against a string,
+ expressed as a template so that we can match against various
+ string-like types (const char *, frontend identifiers, and preprocessor
+ macros).
+
+ This type accumulates the best possible match against GOAL_TYPE for
+ a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
+ number of calls to levenshtein_distance and to
+ edit_distance_traits<T>::get_length. */
+
+template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
+class best_match
+{
+ public:
+ typedef GOAL_TYPE goal_t;
+ typedef CANDIDATE_TYPE candidate_t;
+ typedef edit_distance_traits<goal_t> goal_traits;
+ typedef edit_distance_traits<candidate_t> candidate_traits;
+
+ /* Constructor. */
+
+ best_match (goal_t goal)
+ : m_goal (goal_traits::get_string (goal)),
+ m_goal_len (goal_traits::get_length (goal)),
+ m_best_candidate (NULL),
+ m_best_distance (MAX_EDIT_DISTANCE)
+ {}
+
+ /* Compare the edit distance between CANDIDATE and m_goal,
+ and if it's the best so far, record it. */
+
+ void consider (candidate_t candidate)
+ {
+ size_t candidate_len = candidate_traits::get_length (candidate);
+
+ /* Calculate a lower bound on the candidate's distance to the goal,
+ based on the difference in lengths; it will require at least
+ this many insertions/deletions. */
+ edit_distance_t min_candidate_distance
+ = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
+
+ /* If the candidate's length is sufficiently different to that
+ of the goal string, then the number of insertions/deletions
+ may be >= the best distance so far. If so, we can reject
+ the candidate immediately without needing to compute
+ the exact distance, since it won't be an improvement. */
+ if (min_candidate_distance >= m_best_distance)
+ return;
+
+ /* If the candidate will be unable to beat the criterion in
+ get_best_meaningful_candidate, reject it without computing
+ the exact distance. */
+ unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
+ if (min_candidate_distance > cutoff)
+ return;
+
+ /* Otherwise, compute the distance and see if the candidate
+ has beaten the previous best value. */
+ edit_distance_t dist
+ = levenshtein_distance (m_goal, m_goal_len,
+ candidate_traits::get_string (candidate),
+ candidate_len);
+ if (dist < m_best_distance)
+ {
+ m_best_distance = dist;
+ m_best_candidate = candidate;
+ m_best_candidate_len = candidate_len;
+ }
+ }
+
+ /* Get the best candidate so far, but applying a filter to ensure
+ that we return NULL if none of the candidates are close to the goal,
+ to avoid offering nonsensical suggestions to the user. */
+
+ candidate_t get_best_meaningful_candidate () const
+ {
+ /* If more than half of the letters were misspelled, the suggestion is
+ likely to be meaningless. */
+ if (m_best_candidate)
+ {
+ unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
+ if (m_best_distance > cutoff)
+ return NULL;
+ }
+ return m_best_candidate;
+ }
-extern tree
-find_closest_identifier (tree target, const auto_vec<tree> *candidates);
+ private:
+ const char *m_goal;
+ size_t m_goal_len;
+ candidate_t m_best_candidate;
+ edit_distance_t m_best_distance;
+ size_t m_best_candidate_len;
+};
#endif /* GCC_SPELLCHECK_H */