urlapi: add CURLU_GET_EMPTY for empty queries and fragments
By default the API inhibits empty queries and fragments extracted. Unless this new flag is set. This also makes the behavior more consistent: without it set, zero length queries and fragments are considered not present in the URL. With the flag set, they are returned as a zero length strings if they were in fact present in the URL. This applies when extracting the individual query and fragment components and for the full URL. Closes #13396
This commit is contained in:
parent
5379dbc248
commit
3eac21d86b
@ -113,6 +113,18 @@ punycode.
|
||||
|
||||
(Added in curl 8.3.0)
|
||||
|
||||
## CURLU_GET_EMPTY
|
||||
|
||||
When this flag is used in curl_url_get(), it makes the function return empty
|
||||
query and fragments parts or when used in the full URL. By default, libcurl
|
||||
otherwise considers empty parts non-existing.
|
||||
|
||||
An empty query part is one where this is nothing following the question mark
|
||||
(before the possible fragment). An empty fragments part is one where there is
|
||||
nothing following the hash sign.
|
||||
|
||||
(Added in curl 8.8.0)
|
||||
|
||||
# PARTS
|
||||
|
||||
## CURLUPART_URL
|
||||
|
||||
@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0
|
||||
CURLU_DEFAULT_PORT 7.62.0
|
||||
CURLU_DEFAULT_SCHEME 7.62.0
|
||||
CURLU_DISALLOW_USER 7.62.0
|
||||
CURLU_GET_EMPTY 8.8.0
|
||||
CURLU_GUESS_SCHEME 7.62.0
|
||||
CURLU_NO_AUTHORITY 7.67.0
|
||||
CURLU_NO_DEFAULT_PORT 7.62.0
|
||||
|
||||
@ -99,6 +99,9 @@ typedef enum {
|
||||
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
||||
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
|
||||
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
|
||||
#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
|
||||
when extracting the URL or the
|
||||
components */
|
||||
|
||||
typedef struct Curl_URL CURLU;
|
||||
|
||||
|
||||
37
lib/urlapi.c
37
lib/urlapi.c
@ -79,7 +79,9 @@ struct Curl_URL {
|
||||
char *path;
|
||||
char *query;
|
||||
char *fragment;
|
||||
long portnum; /* the numerical version */
|
||||
unsigned short portnum; /* the numerical version */
|
||||
BIT(query_present); /* to support blank */
|
||||
BIT(fragment_present); /* to support blank */
|
||||
};
|
||||
|
||||
#define DEFAULT_SCHEME "https"
|
||||
@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
|
||||
if(rest[0])
|
||||
return CURLUE_BAD_PORT_NUMBER;
|
||||
|
||||
u->portnum = port;
|
||||
u->portnum = (unsigned short) port;
|
||||
/* generate a new port number string to get rid of leading zeroes etc */
|
||||
free(u->port);
|
||||
u->port = aprintf("%ld", port);
|
||||
@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
fragment = strchr(path, '#');
|
||||
if(fragment) {
|
||||
fraglen = pathlen - (fragment - path);
|
||||
u->fragment_present = TRUE;
|
||||
if(fraglen > 1) {
|
||||
/* skip the leading '#' in the copy but include the terminating null */
|
||||
if(flags & CURLU_URLENCODE) {
|
||||
@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
||||
size_t qlen = fragment ? (size_t)(fragment - query) :
|
||||
pathlen - (query - path);
|
||||
pathlen -= qlen;
|
||||
u->query_present = TRUE;
|
||||
if(qlen > 1) {
|
||||
if(flags & CURLU_URLENCODE) {
|
||||
struct dynbuf enc;
|
||||
@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in)
|
||||
DUP(u, in, fragment);
|
||||
DUP(u, in, zoneid);
|
||||
u->portnum = in->portnum;
|
||||
u->fragment_present = in->fragment_present;
|
||||
u->query_present = in->query_present;
|
||||
}
|
||||
return u;
|
||||
fail:
|
||||
@ -1491,10 +1497,16 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
ptr = u->query;
|
||||
ifmissing = CURLUE_NO_QUERY;
|
||||
plusdecode = urldecode;
|
||||
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
|
||||
/* there was a blank query and the user do not ask for it */
|
||||
ptr = NULL;
|
||||
break;
|
||||
case CURLUPART_FRAGMENT:
|
||||
ptr = u->fragment;
|
||||
ifmissing = CURLUE_NO_FRAGMENT;
|
||||
if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
|
||||
/* there was a blank fragment and the user asks for it */
|
||||
ptr = "";
|
||||
break;
|
||||
case CURLUPART_URL: {
|
||||
char *url;
|
||||
@ -1502,13 +1514,18 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
char *options = u->options;
|
||||
char *port = u->port;
|
||||
char *allochost = NULL;
|
||||
bool show_fragment =
|
||||
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
|
||||
bool show_query =
|
||||
(u->query && u->query[0]) ||
|
||||
(u->query_present && flags & CURLU_GET_EMPTY);
|
||||
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
|
||||
if(u->scheme && strcasecompare("file", u->scheme)) {
|
||||
url = aprintf("file://%s%s%s",
|
||||
u->path,
|
||||
u->fragment? "#": "",
|
||||
u->fragment? u->fragment : "");
|
||||
show_fragment ? "#": "",
|
||||
u->fragment ? u->fragment : "");
|
||||
}
|
||||
else if(!u->host)
|
||||
return CURLUE_NO_HOST;
|
||||
@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
port ? ":": "",
|
||||
port ? port : "",
|
||||
u->path ? u->path : "/",
|
||||
(u->query && u->query[0]) ? "?": "",
|
||||
(u->query && u->query[0]) ? u->query : "",
|
||||
u->fragment? "#": "",
|
||||
show_query ? "?": "",
|
||||
u->query ? u->query : "",
|
||||
show_fragment ? "#": "",
|
||||
u->fragment? u->fragment : "");
|
||||
free(allochost);
|
||||
}
|
||||
@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|
||||
break;
|
||||
case CURLUPART_QUERY:
|
||||
storep = &u->query;
|
||||
u->query_present = FALSE;
|
||||
break;
|
||||
case CURLUPART_FRAGMENT:
|
||||
storep = &u->fragment;
|
||||
u->fragment_present = FALSE;
|
||||
break;
|
||||
default:
|
||||
return CURLUE_UNKNOWN_PART;
|
||||
@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|
||||
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
|
||||
equalsencode = appendquery;
|
||||
storep = &u->query;
|
||||
u->query_present = TRUE;
|
||||
break;
|
||||
case CURLUPART_FRAGMENT:
|
||||
storep = &u->fragment;
|
||||
u->fragment_present = TRUE;
|
||||
break;
|
||||
case CURLUPART_URL: {
|
||||
/*
|
||||
@ -1972,6 +1993,6 @@ nomem:
|
||||
/* set after the string, to make it not assigned if the allocation above
|
||||
fails */
|
||||
if(port)
|
||||
u->portnum = port;
|
||||
u->portnum = (unsigned short)port;
|
||||
return CURLUE_OK;
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ gopher
|
||||
Gopher selector
|
||||
</name>
|
||||
<command>
|
||||
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
|
||||
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER
|
||||
</command>
|
||||
</client>
|
||||
|
||||
@ -33,7 +33,7 @@ gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
|
||||
# Verify data after the test has been "shot"
|
||||
<verify>
|
||||
<protocol>
|
||||
/selector/SELECTOR/%TESTNUMBER?
|
||||
/selector/SELECTOR/%TESTNUMBER
|
||||
</protocol>
|
||||
</verify>
|
||||
</testcase>
|
||||
|
||||
@ -151,6 +151,21 @@ struct clearurlcase {
|
||||
};
|
||||
|
||||
static const struct testcase get_parts_list[] ={
|
||||
{"https://curl.se/#",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ",
|
||||
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
{"https://curl.se/?#",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | | ",
|
||||
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
{"https://curl.se/?",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | | [17]",
|
||||
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
{"https://curl.se/?",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
|
||||
0, 0, CURLUE_OK},
|
||||
{"https://curl.se/?#",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
|
||||
0, 0, CURLUE_OK},
|
||||
{"https://curl.se/# ",
|
||||
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
|
||||
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={
|
||||
};
|
||||
|
||||
static const struct urltestcase get_url_list[] = {
|
||||
{"http://user@example.com?#",
|
||||
"http://user@example.com/?#",
|
||||
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
|
||||
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},
|
||||
|
||||
@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out)
|
||||
3. Extract all components (not URL)
|
||||
*/
|
||||
static const struct setgetcase setget_parts_list[] = {
|
||||
{"https://example.com/",
|
||||
"query=\"\",",
|
||||
"https | [11] | [12] | [13] | example.com | [15] | / | | [17]",
|
||||
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
{"https://example.com/",
|
||||
"fragment=\"\",",
|
||||
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | ",
|
||||
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||
{"https://example.com/",
|
||||
"query=\"\",",
|
||||
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]",
|
||||
0, 0, 0, CURLUE_OK},
|
||||
{"https://example.com",
|
||||
"path=get,",
|
||||
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user