diff --git a/.github/scripts/cleancmd.pl b/.github/scripts/cleancmd.pl index 355a845e4c..283a9f4e93 100755 --- a/.github/scripts/cleancmd.pl +++ b/.github/scripts/cleancmd.pl @@ -3,55 +3,117 @@ # # SPDX-License-Identifier: curl # -# Input: a cmdline docs markdown, it gets modified *in place* +# Input: cmdline docs markdown files, they get modified *in place* +# +# Strip off the leading meta-data/header part, remove all known curl symbols +# and long command line options. Also clean up whatever else the spell checker +# might have a problem with that we still deem is fine. # -# The main purpose is to strip off the leading meta-data part, but also to -# clean up whatever else the spell checker might have a problem with that we -# still deem is fine. -my $header = 1; -while(1) { - # set this if the markdown has no meta-data header to skip - if($ARGV[0] eq "--no-header") { - shift @ARGV; - $header = 0; - } - else { - last; +open(S, "<./docs/libcurl/symbols-in-versions") + || die "can't find symbols-in-versions"; +while() { + if(/^([^ ]*) /) { + push @asyms, $1; } } +close(S); -my $f = $ARGV[0]; +# init the opts table with "special" options not easy to figure out +my @aopts = ( + '--ftp-ssl-reqd', # old alias + ); -open(F, "<$f") or die; - -my $ignore = $header; -my $sepcount = 0; -my @out; -while() { - if(/^---/ && $header) { - if(++$sepcount == 2) { - $ignore = 0; +open(O, "<./docs/options-in-versions") + || die "can't find options-in-versions"; +while() { + chomp; + if(/^([^ ]+)/) { + my $o = $1; + push @aopts, $o; + if($o =~ /^--no-(.*)/) { + # for the --no options, also make one without it + push @aopts, "--$1"; } + elsif($o =~ /^--disable-(.*)/) { + # for the --disable options, also make the special ones + push @aopts, "--$1"; + push @aopts, "--no-$1"; + } + } +} +close(O); + +open(C, "<./.github/scripts/spellcheck.curl") + || die "can't find spellcheck.curl"; +while() { + if(/^\#/) { next; } - next if($ignore); - - # strip out backticked words - $_ =~ s/`[^`]+`//g; - - # strip out all long command line options - $_ =~ s/--[a-z0-9-]+//g; - - # strip out https URLs, we don't want them spellchecked - $_ =~ s!https://[a-z0-9\#_/.-]+!!gi; - - push @out, $_; + chomp; + if(/^([^ ]+)/) { + push @asyms, $1; + } } -close(F); +close(C); -if(!$ignore) { - open(O, ">$f") or die; - print O @out; - close(O); +# longest symbols first +my @syms = sort { length($b) <=> length($a) } @asyms; + +# longest cmdline options first +my @opts = sort { length($b) <=> length($a) } @aopts; + +sub process { + my ($f) = @_; + + my $ignore = 0; + my $sepcount = 0; + my $out; + my $line = 0; + open(F, "<$f") or die; + + while() { + $line++; + if(/^---/ && ($line == 1)) { + $ignore = 1; + next; + } + elsif(/^---/ && $ignore) { + $ignore = 0; + next; + } + next if($ignore); + + my $l = $_; + + # strip out backticked words + $l =~ s/`[^`]+`//g; + + # **bold** + $l =~ s/\*\*(\S.*?)\*\*//g; + # *italics* + $l =~ s/\*(\S.*?)\*//g; + + # strip out https URLs, we don't want them spellchecked + $l =~ s!https://[a-z0-9\#_/.-]+!!gi; + + $out .= $l; + } + close(F); + + # cut out all known curl cmdline options + map { $out =~ s/$_//g; } (@opts); + + # cut out all known curl symbols + map { $out =~ s/\b$_\b//g; } (@syms); + + if(!$ignore) { + open(O, ">$f") or die; + print O $out; + close(O); + } +} + +for my $f (@ARGV) { + process($f); } diff --git a/.github/scripts/cleanspell.pl b/.github/scripts/cleanspell.pl deleted file mode 100755 index bfa07dc053..0000000000 --- a/.github/scripts/cleanspell.pl +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env perl -# Copyright (C) Daniel Stenberg, , et al. -# -# SPDX-License-Identifier: curl -# -# Given: a libcurl curldown man page -# Outputs: the same file, minus the SYNOPSIS and the EXAMPLE sections -# - -my $f = $ARGV[0]; - -open(F, "<$f") or die; - -my @out; -my $ignore = 0; -while() { - if($_ =~ /^# (SYNOPSIS|EXAMPLE)/) { - $ignore = 1; - } - elsif($ignore && ($_ =~ /^# [A-Z]/)) { - $ignore = 0; - } - elsif(!$ignore) { - # **bold** - $_ =~ s/\*\*(\S.*?)\*\*//g; - # *italics* - $_ =~ s/\*(\S.*?)\*//g; - - $_ =~ s/CURL(M|SH|U|H)code//g; - $_ =~ s/CURL_[A-Z0-9_]*//g; - $_ =~ s/CURLALTSVC_[A-Z0-9_]*//g; - $_ =~ s/CURLAUTH_[A-Z0-9_]*//g; - $_ =~ s/CURLE_[A-Z0-9_]*//g; - $_ =~ s/CURLFORM_[A-Z0-9_]*//g; - $_ =~ s/CURLFTP_[A-Z0-9_]*//g; - $_ =~ s/CURLFTPAUTH_[A-Z0-9_]*//g; - $_ =~ s/CURLFTPMETHOD_[A-Z0-9_]*//g; - $_ =~ s/CURLFTPSSL_[A-Z0-9_]*//g; - $_ =~ s/CURLGSSAPI_[A-Z0-9_]*//g; - $_ =~ s/CURLHEADER_[A-Z0-9_]*//g; - $_ =~ s/CURLINFO_[A-Z0-9_]*//g; - $_ =~ s/CURLM_[A-Z0-9_]*//g; - $_ =~ s/CURLMIMEOPT_[A-Z0-9_]*//g; - $_ =~ s/CURLMOPT_[A-Z0-9_]*//g; - $_ =~ s/CURLOPT_[A-Z0-9_]*//g; - $_ =~ s/CURLPIPE_[A-Z0-9_]*//g; - $_ =~ s/CURLPROTO_[A-Z0-9_]*//g; - $_ =~ s/CURLPROXY_[A-Z0-9_]*//g; - $_ =~ s/CURLPX_[A-Z0-9_]*//g; - $_ =~ s/CURLSHE_[A-Z0-9_]*//g; - $_ =~ s/CURLSHOPT_[A-Z0-9_]*//g; - $_ =~ s/CURLSSLOPT_[A-Z0-9_]*//g; - $_ =~ s/CURLSSH_[A-Z0-9_]*//g; - $_ =~ s/CURLSSLBACKEND_[A-Z0-9_]*//g; - $_ =~ s/CURLU_[A-Z0-9_]*//g; - $_ =~ s/CURLUPART_[A-Z0-9_]*//g; - #$_ =~ s/\bCURLU\b//g; # stand-alone CURLU - $_ =~ s/CURLUE_[A-Z0-9_]*//g; - $_ =~ s/CURLHE_[A-Z0-9_]*//g; - $_ =~ s/CURLWS_[A-Z0-9_]*//g; - $_ =~ s/CURLKH[A-Z0-9_]*//g; - $_ =~ s/CURLUPART_[A-Z0-9_]*//g; - $_ =~ s/CURLUSESSL_[A-Z0-9_]*//g; - $_ =~ s/CURLPAUSE_[A-Z0-9_]*//g; - $_ =~ s/CURLHSTS_[A-Z0-9_]*//g; - $_ =~ s/curl_global_([a-z_]*)//g; - $_ =~ s/curl_(strequal|strnequal|formadd|waitfd|formget|getdate|formfree)//g; - $_ =~ s/curl_easy_([a-z]*)//g; - $_ =~ s/curl_multi_([a-z_]*)//g; - $_ =~ s/curl_mime_(subparts|addpart|filedata|data_cb)//g; - $_ =~ s/curl_ws_(send|recv|meta)//g; - $_ =~ s/curl_url_(dup)//g; - $_ =~ s/curl_pushheader_by(name|num)//g; - $_ =~ s/libcurl-(env|ws)//g; - $_ =~ s/libcurl\\-(env|ws)//g; - $_ =~ s/(^|\W)((tftp|https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&'()*+,;=\\]+)//gi; - push @out, $_; - } -} -close(F); - -open(O, ">$f") or die; -for my $l (@out) { - print O $l; -} -close(O); diff --git a/.github/scripts/spellcheck.curl b/.github/scripts/spellcheck.curl new file mode 100644 index 0000000000..4de9d86596 --- /dev/null +++ b/.github/scripts/spellcheck.curl @@ -0,0 +1,151 @@ +# Copyright (C) Daniel Stenberg, , et al. +# +# SPDX-License-Identifier: curl +# +# common variable types + structs +# callback typedefs +# public functions names +# some man page names +curl_fileinfo +curl_forms +curl_hstsentry +curl_httppost +curl_index +curl_khkey +curl_pushheaders +curl_waitfd +CURLcode +CURLformoption +CURLHcode +CURLMcode +CURLMsg +CURLSHcode +CURLUcode +curl_calloc_callback +curl_chunk_bgn_callback +curl_chunk_end_callback +curl_conv_callback +curl_debug_callback +curl_fnmatch_callback +curl_formget_callback +curl_free_callback +curl_hstsread_callback +curl_hstswrite_callback +curl_ioctl_callback +curl_malloc_callback +curl_multi_timer_callback +curl_opensocket_callback +curl_prereq_callback +curl_progress_callback +curl_push_callback +curl_read_callback +curl_realloc_callback +curl_resolver_start_callback +curl_seek_callback +curl_socket_callback +curl_sockopt_callback +curl_ssl_ctx_callback +curl_strdup_callback +curl_trailer_callback +curl_write_callback +curl_xferinfo_callback +curl_strequal +curl_strnequal +curl_mime_init +curl_mime_free +curl_mime_addpart +curl_mime_name +curl_mime_filename +curl_mime_type +curl_mime_encoder +curl_mime_data +curl_mime_filedata +curl_mime_data_cb +curl_mime_subparts +curl_mime_headers +curl_formadd +curl_formget +curl_formfree +curl_getdate +curl_getenv +curl_version +curl_easy_escape +curl_escape +curl_easy_unescape +curl_unescape +curl_free +curl_global_init +curl_global_init_mem +curl_global_cleanup +curl_global_trace +curl_global_sslset +curl_slist_append +curl_slist_free_all +curl_getdate +curl_share_init +curl_share_setopt +curl_share_cleanup +curl_version_info +curl_easy_strerror +curl_share_strerror +curl_easy_pause +curl_easy_ssls_import +curl_easy_ssls_export +curl_easy_init +curl_easy_setopt +curl_easy_perform +curl_easy_cleanup +curl_easy_getinfo +curl_easy_duphandle +curl_easy_reset +curl_easy_recv +curl_easy_send +curl_easy_upkeep +curl_easy_header +curl_easy_nextheader +curl_mprintf +curl_mfprintf +curl_msprintf +curl_msnprintf +curl_mvprintf +curl_mvfprintf +curl_mvsprintf +curl_mvsnprintf +curl_maprintf +curl_mvaprintf +curl_multi_init +curl_multi_add_handle +curl_multi_remove_handle +curl_multi_fdset +curl_multi_waitfds +curl_multi_wait +curl_multi_poll +curl_multi_wakeup +curl_multi_perform +curl_multi_cleanup +curl_multi_info_read +curl_multi_strerror +curl_multi_socket +curl_multi_socket_action +curl_multi_socket_all +curl_multi_timeout +curl_multi_setopt +curl_multi_assign +curl_multi_get_handles +curl_pushheader_bynum +curl_pushheader_byname +curl_multi_waitfds +curl_easy_option_by_name +curl_easy_option_by_id +curl_easy_option_next +curl_url +curl_url_cleanup +curl_url_dup +curl_url_get +curl_url_set +curl_url_strerror +curl_ws_recv +curl_ws_send +curl_ws_meta +libcurl-env +libcurl-ws diff --git a/.github/workflows/checkdocs.yml b/.github/workflows/checkdocs.yml index 753883e8eb..dd52efab14 100644 --- a/.github/workflows/checkdocs.yml +++ b/.github/workflows/checkdocs.yml @@ -107,20 +107,8 @@ jobs: persist-credentials: false name: checkout - - name: trim all man page *.md files - run: find docs -name "*.md" ! -name "_*" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl - - - name: trim libcurl man page *.md files - run: find docs/libcurl \( -name "curl_*.md" -o -name "libcurl*.md" \) -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl - - - name: trim libcurl option man page *.md files - run: find docs/libcurl/opts -name "CURL*.md" -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl - - - name: trim cmdline docs markdown _*.md files - run: find docs/cmdline-opts -name "_*.md" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl --no-header - - - name: trim docs/ markdown _*.md files - run: git ls-files docs/*.md docs/internals/*.md | xargs -n1 .github/scripts/cleancmd.pl --no-header + - name: trim all *.md files in docs/ + run: .github/scripts/cleancmd.pl $(find docs -name "*.md") - name: setup the custom wordlist run: grep -v '^#' .github/scripts/spellcheck.words > wordlist.txt