diff options
author | Tom de Vries <tdevries@suse.de> | 2024-10-06 07:59:48 +0200 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2024-10-06 07:59:48 +0200 |
commit | 67eca1ccc1ad5237403dc151eb91f5e506dea0c4 (patch) | |
tree | fd35fce4b34cd73e1d455c76d8de29abafd75f9d /gdb/contrib | |
parent | 2e676da72d7dd98bbd9025de591e69179a613120 (diff) | |
download | binutils-67eca1ccc1ad5237403dc151eb91f5e506dea0c4.zip binutils-67eca1ccc1ad5237403dc151eb91f5e506dea0c4.tar.gz binutils-67eca1ccc1ad5237403dc151eb91f5e506dea0c4.tar.bz2 |
[gdb/contrib] Add spellcheck.sh
I came across a table containing common misspellings [1], and wrote a script to
detect and correct these misspellings.
The table also contains entries that have alternatives, like this:
...
addres->address, adders
...
and for those the script prints a TODO instead.
The script downloads the webpage containing the table, extracts the table and
caches it in .git/wikipedia-common-misspellings.txt to prevent downloading it
over and over again.
Example usage:
...
$ gdb/contrib/spellcheck.sh gdb*
...
ChangeLog files are silently skipped.
Checked with shellcheck.
Tested on x86_64-linux, by running it on the gdb* dirs on doing a build and
test run.
The results of running it are in the two following patches.
Reviewed-By: Andrew Burgess <aburgess@redhat.com>
Approved-By: Tom Tromey <tom@tromey.com>
[1] https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
Diffstat (limited to 'gdb/contrib')
-rwxr-xr-x | gdb/contrib/spellcheck.sh | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh new file mode 100755 index 0000000..e7db621 --- /dev/null +++ b/gdb/contrib/spellcheck.sh @@ -0,0 +1,287 @@ +#!/bin/bash + +# Copyright (C) 2024 Free Software Foundation, Inc. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Script to auto-correct common spelling mistakes. +# +# Example usage: +# $ ./gdb/contrib/spellcheck.sh gdb* + +scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P) + +url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines +cache_dir=$scriptdir/../../.git +cache_file=wikipedia-common-misspellings.txt +dictionary=$cache_dir/$cache_file + +# Separators: space, slash, tab. +grep_separator=" |/| " +sed_separator=" \|/\|\t" + +usage () +{ + echo "usage: $(basename "$0") <file|dir>+" +} + +make_absolute () +{ + local arg + arg="$1" + + case "$arg" in + /*) + ;; + *) + arg=$(pwd -P)/"$arg" + ;; + esac + + echo "$arg" +} + +parse_args () +{ + local files + files=$(mktemp) + trap 'rm -f "$files"' EXIT + + if [ $# -eq -0 ]; then + usage + exit 1 + fi + + local arg + for arg in "$@"; do + if [ -f "$arg" ]; then + arg=$(make_absolute "$arg") + readlink -e "$arg" \ + >> "$files" + elif [ -d "$arg" ]; then + arg=$(make_absolute "$arg") + local f + find "$arg" -type f -exec readlink -e {} \; \ + >> "$files" + else + echo "Not a file or directory: $arg" + exit 1 + fi + done + + mapfile -t unique_files \ + < <(sort -u "$files" \ + | grep -v ChangeLog) + + rm -f "$files" + trap "" EXIT +} + +get_dictionary () +{ + if [ -f "$dictionary" ]; then + return + fi + + local webpage + webpage=$(mktemp) + trap 'rm -f "$webpage"' EXIT + + # Download web page containing table. + wget $url -O "$webpage" + + # Extract table from web page. + awk '/<pre>/,/<\/pre>/' "$webpage" \ + | sed 's/<pre>//;s/<\/pre>//' \ + | grep -E -v "^$" \ + > "$dictionary" + + rm -f "$webpage" + trap "" EXIT +} + +parse_dictionary () +{ + # Parse dictionary. + mapfile -t words \ + < <(awk -F '->' '{print $1}' "$dictionary") + mapfile -t replacements \ + < <(awk -F '->' '{print $2}' "$dictionary") +} + +find_files_matching_words () +{ + local pat + pat="" + for word in "${words[@]}"; do + if [ "$pat" = "" ]; then + pat="$word" + else + pat="$pat|$word" + fi + done + pat="($pat)" + + local sep + sep=$grep_separator + + pat="(^|$sep)$pat($sep|$)" + + grep -E \ + -l \ + "$pat" \ + "$@" +} + +find_files_matching_word () +{ + local pat + pat="$1" + shift + + local sep + sep=$grep_separator + + pat="(^|$sep)$pat($sep|$)" + + grep -E \ + -l \ + "$pat" \ + "$@" +} + +replace_word_in_file () +{ + local word + word="$1" + + local replacement + replacement="$2" + + local file + file="$3" + + local sep + sep=$sed_separator + + # Save separator. + sep="\($sep\)" + + local repl1 repl2 repl3 + + repl1="s%$sep$word$sep%\1$replacement\2%g" + + repl2="s%^$word$sep%$replacement\1%" + + repl3="s%$sep$word$%\1$replacement%" + + sed -i \ + "$repl1;$repl2;$repl3" \ + "$file" +} + +replace_word_in_files () +{ + local word + word="$1" + + local replacement + replacement="$2" + + shift 2 + + local id + id="$word -> $replacement" + + # Reduce set of files for sed to operate on. + local files_matching_word + declare -a files_matching_word + mapfile -t files_matching_word \ + < <(find_files_matching_word "$word" "$@") + + if [ ${#files_matching_word[@]} -eq 0 ]; then + return + fi + + if echo "$replacement"| grep -q ","; then + echo "TODO: $id" + return + fi + + declare -A md5sums + + local changed f before after + changed=false + for f in "${files_matching_word[@]}"; do + if [ "${md5sums[$f]}" = "" ]; then + md5sums[$f]=$(md5sum "$f") + fi + + before="${md5sums[$f]}" + + replace_word_in_file \ + "$word" \ + "$replacement" \ + "$f" + + after=$(md5sum "$f") + + if [ "$after" != "$before" ]; then + md5sums[$f]="$after" + changed=true + fi + done + + if $changed; then + echo "$id" + fi + + find_files_matching_word "$word" "${files_matching_word[@]}" \ + | awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}" +} + +main () +{ + declare -a unique_files + parse_args "$@" + + get_dictionary + + declare -a words + declare -a replacements + parse_dictionary + + # Reduce set of files for sed to operate on. + local files_matching_words + declare -a files_matching_words + mapfile -t files_matching_words \ + < <(find_files_matching_words "${unique_files[@]}") + + if [ ${#files_matching_words[@]} -eq 0 ]; then + return + fi + + local i word replacement + i=0 + for word in "${words[@]}"; do + replacement=${replacements[$i]} + i=$((i + 1)) + + replace_word_in_files \ + "$word" \ + "$replacement" \ + "${files_matching_words[@]}" + done +} + +main "$@" |