GHA: spellcheck curl symbols better
This now makes sure to trim off exact matches for curl symbols and long curl commanad line options instead of using pattern matching as before. This should catch typoed names (that still follow the pattern) better. The cleanspell.pl script is no longer used. cleancmd.pl is used for all markdown files. Closes #16504
This commit is contained in:
parent
ae72de1caa
commit
3c7c614df5
142
.github/scripts/cleancmd.pl
vendored
142
.github/scripts/cleancmd.pl
vendored
@ -3,55 +3,117 @@
|
||||
#
|
||||
# SPDX-License-Identifier: curl
|
||||
#
|
||||
# Input: a cmdline docs markdown, it gets modified *in place*
|
||||
# Input: cmdline docs markdown files, they get modified *in place*
|
||||
#
|
||||
# Strip off the leading meta-data/header part, remove all known curl symbols
|
||||
# and long command line options. Also clean up whatever else the spell checker
|
||||
# might have a problem with that we still deem is fine.
|
||||
#
|
||||
# The main purpose is to strip off the leading meta-data part, but also to
|
||||
# clean up whatever else the spell checker might have a problem with that we
|
||||
# still deem is fine.
|
||||
|
||||
my $header = 1;
|
||||
while(1) {
|
||||
# set this if the markdown has no meta-data header to skip
|
||||
if($ARGV[0] eq "--no-header") {
|
||||
shift @ARGV;
|
||||
$header = 0;
|
||||
}
|
||||
else {
|
||||
last;
|
||||
open(S, "<./docs/libcurl/symbols-in-versions")
|
||||
|| die "can't find symbols-in-versions";
|
||||
while(<S>) {
|
||||
if(/^([^ ]*) /) {
|
||||
push @asyms, $1;
|
||||
}
|
||||
}
|
||||
close(S);
|
||||
|
||||
my $f = $ARGV[0];
|
||||
# init the opts table with "special" options not easy to figure out
|
||||
my @aopts = (
|
||||
'--ftp-ssl-reqd', # old alias
|
||||
);
|
||||
|
||||
open(F, "<$f") or die;
|
||||
|
||||
my $ignore = $header;
|
||||
my $sepcount = 0;
|
||||
my @out;
|
||||
while(<F>) {
|
||||
if(/^---/ && $header) {
|
||||
if(++$sepcount == 2) {
|
||||
$ignore = 0;
|
||||
open(O, "<./docs/options-in-versions")
|
||||
|| die "can't find options-in-versions";
|
||||
while(<O>) {
|
||||
chomp;
|
||||
if(/^([^ ]+)/) {
|
||||
my $o = $1;
|
||||
push @aopts, $o;
|
||||
if($o =~ /^--no-(.*)/) {
|
||||
# for the --no options, also make one without it
|
||||
push @aopts, "--$1";
|
||||
}
|
||||
elsif($o =~ /^--disable-(.*)/) {
|
||||
# for the --disable options, also make the special ones
|
||||
push @aopts, "--$1";
|
||||
push @aopts, "--no-$1";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(O);
|
||||
|
||||
open(C, "<./.github/scripts/spellcheck.curl")
|
||||
|| die "can't find spellcheck.curl";
|
||||
while(<C>) {
|
||||
if(/^\#/) {
|
||||
next;
|
||||
}
|
||||
next if($ignore);
|
||||
|
||||
# strip out backticked words
|
||||
$_ =~ s/`[^`]+`//g;
|
||||
|
||||
# strip out all long command line options
|
||||
$_ =~ s/--[a-z0-9-]+//g;
|
||||
|
||||
# strip out https URLs, we don't want them spellchecked
|
||||
$_ =~ s!https://[a-z0-9\#_/.-]+!!gi;
|
||||
|
||||
push @out, $_;
|
||||
chomp;
|
||||
if(/^([^ ]+)/) {
|
||||
push @asyms, $1;
|
||||
}
|
||||
}
|
||||
close(F);
|
||||
close(C);
|
||||
|
||||
if(!$ignore) {
|
||||
open(O, ">$f") or die;
|
||||
print O @out;
|
||||
close(O);
|
||||
# longest symbols first
|
||||
my @syms = sort { length($b) <=> length($a) } @asyms;
|
||||
|
||||
# longest cmdline options first
|
||||
my @opts = sort { length($b) <=> length($a) } @aopts;
|
||||
|
||||
sub process {
|
||||
my ($f) = @_;
|
||||
|
||||
my $ignore = 0;
|
||||
my $sepcount = 0;
|
||||
my $out;
|
||||
my $line = 0;
|
||||
open(F, "<$f") or die;
|
||||
|
||||
while(<F>) {
|
||||
$line++;
|
||||
if(/^---/ && ($line == 1)) {
|
||||
$ignore = 1;
|
||||
next;
|
||||
}
|
||||
elsif(/^---/ && $ignore) {
|
||||
$ignore = 0;
|
||||
next;
|
||||
}
|
||||
next if($ignore);
|
||||
|
||||
my $l = $_;
|
||||
|
||||
# strip out backticked words
|
||||
$l =~ s/`[^`]+`//g;
|
||||
|
||||
# **bold**
|
||||
$l =~ s/\*\*(\S.*?)\*\*//g;
|
||||
# *italics*
|
||||
$l =~ s/\*(\S.*?)\*//g;
|
||||
|
||||
# strip out https URLs, we don't want them spellchecked
|
||||
$l =~ s!https://[a-z0-9\#_/.-]+!!gi;
|
||||
|
||||
$out .= $l;
|
||||
}
|
||||
close(F);
|
||||
|
||||
# cut out all known curl cmdline options
|
||||
map { $out =~ s/$_//g; } (@opts);
|
||||
|
||||
# cut out all known curl symbols
|
||||
map { $out =~ s/\b$_\b//g; } (@syms);
|
||||
|
||||
if(!$ignore) {
|
||||
open(O, ">$f") or die;
|
||||
print O $out;
|
||||
close(O);
|
||||
}
|
||||
}
|
||||
|
||||
for my $f (@ARGV) {
|
||||
process($f);
|
||||
}
|
||||
|
||||
86
.github/scripts/cleanspell.pl
vendored
86
.github/scripts/cleanspell.pl
vendored
@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
|
||||
#
|
||||
# SPDX-License-Identifier: curl
|
||||
#
|
||||
# Given: a libcurl curldown man page
|
||||
# Outputs: the same file, minus the SYNOPSIS and the EXAMPLE sections
|
||||
#
|
||||
|
||||
my $f = $ARGV[0];
|
||||
|
||||
open(F, "<$f") or die;
|
||||
|
||||
my @out;
|
||||
my $ignore = 0;
|
||||
while(<F>) {
|
||||
if($_ =~ /^# (SYNOPSIS|EXAMPLE)/) {
|
||||
$ignore = 1;
|
||||
}
|
||||
elsif($ignore && ($_ =~ /^# [A-Z]/)) {
|
||||
$ignore = 0;
|
||||
}
|
||||
elsif(!$ignore) {
|
||||
# **bold**
|
||||
$_ =~ s/\*\*(\S.*?)\*\*//g;
|
||||
# *italics*
|
||||
$_ =~ s/\*(\S.*?)\*//g;
|
||||
|
||||
$_ =~ s/CURL(M|SH|U|H)code//g;
|
||||
$_ =~ s/CURL_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLALTSVC_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLAUTH_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLFORM_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLFTP_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLFTPAUTH_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLFTPMETHOD_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLFTPSSL_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLGSSAPI_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLHEADER_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLINFO_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLM_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLMIMEOPT_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLMOPT_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLOPT_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLPIPE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLPROTO_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLPROXY_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLPX_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLSHE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLSHOPT_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLSSLOPT_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLSSH_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLSSLBACKEND_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLU_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLUPART_[A-Z0-9_]*//g;
|
||||
#$_ =~ s/\bCURLU\b//g; # stand-alone CURLU
|
||||
$_ =~ s/CURLUE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLHE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLWS_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLKH[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLUPART_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLUSESSL_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLPAUSE_[A-Z0-9_]*//g;
|
||||
$_ =~ s/CURLHSTS_[A-Z0-9_]*//g;
|
||||
$_ =~ s/curl_global_([a-z_]*)//g;
|
||||
$_ =~ s/curl_(strequal|strnequal|formadd|waitfd|formget|getdate|formfree)//g;
|
||||
$_ =~ s/curl_easy_([a-z]*)//g;
|
||||
$_ =~ s/curl_multi_([a-z_]*)//g;
|
||||
$_ =~ s/curl_mime_(subparts|addpart|filedata|data_cb)//g;
|
||||
$_ =~ s/curl_ws_(send|recv|meta)//g;
|
||||
$_ =~ s/curl_url_(dup)//g;
|
||||
$_ =~ s/curl_pushheader_by(name|num)//g;
|
||||
$_ =~ s/libcurl-(env|ws)//g;
|
||||
$_ =~ s/libcurl\\-(env|ws)//g;
|
||||
$_ =~ s/(^|\W)((tftp|https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&'()*+,;=\\]+)//gi;
|
||||
push @out, $_;
|
||||
}
|
||||
}
|
||||
close(F);
|
||||
|
||||
open(O, ">$f") or die;
|
||||
for my $l (@out) {
|
||||
print O $l;
|
||||
}
|
||||
close(O);
|
||||
151
.github/scripts/spellcheck.curl
vendored
Normal file
151
.github/scripts/spellcheck.curl
vendored
Normal file
@ -0,0 +1,151 @@
|
||||
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
|
||||
#
|
||||
# SPDX-License-Identifier: curl
|
||||
#
|
||||
# common variable types + structs
|
||||
# callback typedefs
|
||||
# public functions names
|
||||
# some man page names
|
||||
curl_fileinfo
|
||||
curl_forms
|
||||
curl_hstsentry
|
||||
curl_httppost
|
||||
curl_index
|
||||
curl_khkey
|
||||
curl_pushheaders
|
||||
curl_waitfd
|
||||
CURLcode
|
||||
CURLformoption
|
||||
CURLHcode
|
||||
CURLMcode
|
||||
CURLMsg
|
||||
CURLSHcode
|
||||
CURLUcode
|
||||
curl_calloc_callback
|
||||
curl_chunk_bgn_callback
|
||||
curl_chunk_end_callback
|
||||
curl_conv_callback
|
||||
curl_debug_callback
|
||||
curl_fnmatch_callback
|
||||
curl_formget_callback
|
||||
curl_free_callback
|
||||
curl_hstsread_callback
|
||||
curl_hstswrite_callback
|
||||
curl_ioctl_callback
|
||||
curl_malloc_callback
|
||||
curl_multi_timer_callback
|
||||
curl_opensocket_callback
|
||||
curl_prereq_callback
|
||||
curl_progress_callback
|
||||
curl_push_callback
|
||||
curl_read_callback
|
||||
curl_realloc_callback
|
||||
curl_resolver_start_callback
|
||||
curl_seek_callback
|
||||
curl_socket_callback
|
||||
curl_sockopt_callback
|
||||
curl_ssl_ctx_callback
|
||||
curl_strdup_callback
|
||||
curl_trailer_callback
|
||||
curl_write_callback
|
||||
curl_xferinfo_callback
|
||||
curl_strequal
|
||||
curl_strnequal
|
||||
curl_mime_init
|
||||
curl_mime_free
|
||||
curl_mime_addpart
|
||||
curl_mime_name
|
||||
curl_mime_filename
|
||||
curl_mime_type
|
||||
curl_mime_encoder
|
||||
curl_mime_data
|
||||
curl_mime_filedata
|
||||
curl_mime_data_cb
|
||||
curl_mime_subparts
|
||||
curl_mime_headers
|
||||
curl_formadd
|
||||
curl_formget
|
||||
curl_formfree
|
||||
curl_getdate
|
||||
curl_getenv
|
||||
curl_version
|
||||
curl_easy_escape
|
||||
curl_escape
|
||||
curl_easy_unescape
|
||||
curl_unescape
|
||||
curl_free
|
||||
curl_global_init
|
||||
curl_global_init_mem
|
||||
curl_global_cleanup
|
||||
curl_global_trace
|
||||
curl_global_sslset
|
||||
curl_slist_append
|
||||
curl_slist_free_all
|
||||
curl_getdate
|
||||
curl_share_init
|
||||
curl_share_setopt
|
||||
curl_share_cleanup
|
||||
curl_version_info
|
||||
curl_easy_strerror
|
||||
curl_share_strerror
|
||||
curl_easy_pause
|
||||
curl_easy_ssls_import
|
||||
curl_easy_ssls_export
|
||||
curl_easy_init
|
||||
curl_easy_setopt
|
||||
curl_easy_perform
|
||||
curl_easy_cleanup
|
||||
curl_easy_getinfo
|
||||
curl_easy_duphandle
|
||||
curl_easy_reset
|
||||
curl_easy_recv
|
||||
curl_easy_send
|
||||
curl_easy_upkeep
|
||||
curl_easy_header
|
||||
curl_easy_nextheader
|
||||
curl_mprintf
|
||||
curl_mfprintf
|
||||
curl_msprintf
|
||||
curl_msnprintf
|
||||
curl_mvprintf
|
||||
curl_mvfprintf
|
||||
curl_mvsprintf
|
||||
curl_mvsnprintf
|
||||
curl_maprintf
|
||||
curl_mvaprintf
|
||||
curl_multi_init
|
||||
curl_multi_add_handle
|
||||
curl_multi_remove_handle
|
||||
curl_multi_fdset
|
||||
curl_multi_waitfds
|
||||
curl_multi_wait
|
||||
curl_multi_poll
|
||||
curl_multi_wakeup
|
||||
curl_multi_perform
|
||||
curl_multi_cleanup
|
||||
curl_multi_info_read
|
||||
curl_multi_strerror
|
||||
curl_multi_socket
|
||||
curl_multi_socket_action
|
||||
curl_multi_socket_all
|
||||
curl_multi_timeout
|
||||
curl_multi_setopt
|
||||
curl_multi_assign
|
||||
curl_multi_get_handles
|
||||
curl_pushheader_bynum
|
||||
curl_pushheader_byname
|
||||
curl_multi_waitfds
|
||||
curl_easy_option_by_name
|
||||
curl_easy_option_by_id
|
||||
curl_easy_option_next
|
||||
curl_url
|
||||
curl_url_cleanup
|
||||
curl_url_dup
|
||||
curl_url_get
|
||||
curl_url_set
|
||||
curl_url_strerror
|
||||
curl_ws_recv
|
||||
curl_ws_send
|
||||
curl_ws_meta
|
||||
libcurl-env
|
||||
libcurl-ws
|
||||
16
.github/workflows/checkdocs.yml
vendored
16
.github/workflows/checkdocs.yml
vendored
@ -107,20 +107,8 @@ jobs:
|
||||
persist-credentials: false
|
||||
name: checkout
|
||||
|
||||
- name: trim all man page *.md files
|
||||
run: find docs -name "*.md" ! -name "_*" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl
|
||||
|
||||
- name: trim libcurl man page *.md files
|
||||
run: find docs/libcurl \( -name "curl_*.md" -o -name "libcurl*.md" \) -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl
|
||||
|
||||
- name: trim libcurl option man page *.md files
|
||||
run: find docs/libcurl/opts -name "CURL*.md" -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl
|
||||
|
||||
- name: trim cmdline docs markdown _*.md files
|
||||
run: find docs/cmdline-opts -name "_*.md" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl --no-header
|
||||
|
||||
- name: trim docs/ markdown _*.md files
|
||||
run: git ls-files docs/*.md docs/internals/*.md | xargs -n1 .github/scripts/cleancmd.pl --no-header
|
||||
- name: trim all *.md files in docs/
|
||||
run: .github/scripts/cleancmd.pl $(find docs -name "*.md")
|
||||
|
||||
- name: setup the custom wordlist
|
||||
run: grep -v '^#' .github/scripts/spellcheck.words > wordlist.txt
|
||||
|
||||
Loading…
Reference in New Issue
Block a user