urlapi: add CURLU_GET_EMPTY for empty queries and fragments
By default the API inhibits empty queries and fragments extracted. Unless this new flag is set. This also makes the behavior more consistent: without it set, zero length queries and fragments are considered not present in the URL. With the flag set, they are returned as a zero length strings if they were in fact present in the URL. This applies when extracting the individual query and fragment components and for the full URL. Closes #13396
This commit is contained in:
parent
5379dbc248
commit
3eac21d86b
@ -113,6 +113,18 @@ punycode.
|
|||||||
|
|
||||||
(Added in curl 8.3.0)
|
(Added in curl 8.3.0)
|
||||||
|
|
||||||
|
## CURLU_GET_EMPTY
|
||||||
|
|
||||||
|
When this flag is used in curl_url_get(), it makes the function return empty
|
||||||
|
query and fragments parts or when used in the full URL. By default, libcurl
|
||||||
|
otherwise considers empty parts non-existing.
|
||||||
|
|
||||||
|
An empty query part is one where this is nothing following the question mark
|
||||||
|
(before the possible fragment). An empty fragments part is one where there is
|
||||||
|
nothing following the hash sign.
|
||||||
|
|
||||||
|
(Added in curl 8.8.0)
|
||||||
|
|
||||||
# PARTS
|
# PARTS
|
||||||
|
|
||||||
## CURLUPART_URL
|
## CURLUPART_URL
|
||||||
|
|||||||
@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0
|
|||||||
CURLU_DEFAULT_PORT 7.62.0
|
CURLU_DEFAULT_PORT 7.62.0
|
||||||
CURLU_DEFAULT_SCHEME 7.62.0
|
CURLU_DEFAULT_SCHEME 7.62.0
|
||||||
CURLU_DISALLOW_USER 7.62.0
|
CURLU_DISALLOW_USER 7.62.0
|
||||||
|
CURLU_GET_EMPTY 8.8.0
|
||||||
CURLU_GUESS_SCHEME 7.62.0
|
CURLU_GUESS_SCHEME 7.62.0
|
||||||
CURLU_NO_AUTHORITY 7.67.0
|
CURLU_NO_AUTHORITY 7.67.0
|
||||||
CURLU_NO_DEFAULT_PORT 7.62.0
|
CURLU_NO_DEFAULT_PORT 7.62.0
|
||||||
|
|||||||
@ -99,6 +99,9 @@ typedef enum {
|
|||||||
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
||||||
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
|
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
|
||||||
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
|
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
|
||||||
|
#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
|
||||||
|
when extracting the URL or the
|
||||||
|
components */
|
||||||
|
|
||||||
typedef struct Curl_URL CURLU;
|
typedef struct Curl_URL CURLU;
|
||||||
|
|
||||||
|
|||||||
35
lib/urlapi.c
35
lib/urlapi.c
@ -79,7 +79,9 @@ struct Curl_URL {
|
|||||||
char *path;
|
char *path;
|
||||||
char *query;
|
char *query;
|
||||||
char *fragment;
|
char *fragment;
|
||||||
long portnum; /* the numerical version */
|
unsigned short portnum; /* the numerical version */
|
||||||
|
BIT(query_present); /* to support blank */
|
||||||
|
BIT(fragment_present); /* to support blank */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DEFAULT_SCHEME "https"
|
#define DEFAULT_SCHEME "https"
|
||||||
@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
|
|||||||
if(rest[0])
|
if(rest[0])
|
||||||
return CURLUE_BAD_PORT_NUMBER;
|
return CURLUE_BAD_PORT_NUMBER;
|
||||||
|
|
||||||
u->portnum = port;
|
u->portnum = (unsigned short) port;
|
||||||
/* generate a new port number string to get rid of leading zeroes etc */
|
/* generate a new port number string to get rid of leading zeroes etc */
|
||||||
free(u->port);
|
free(u->port);
|
||||||
u->port = aprintf("%ld", port);
|
u->port = aprintf("%ld", port);
|
||||||
@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
fragment = strchr(path, '#');
|
fragment = strchr(path, '#');
|
||||||
if(fragment) {
|
if(fragment) {
|
||||||
fraglen = pathlen - (fragment - path);
|
fraglen = pathlen - (fragment - path);
|
||||||
|
u->fragment_present = TRUE;
|
||||||
if(fraglen > 1) {
|
if(fraglen > 1) {
|
||||||
/* skip the leading '#' in the copy but include the terminating null */
|
/* skip the leading '#' in the copy but include the terminating null */
|
||||||
if(flags & CURLU_URLENCODE) {
|
if(flags & CURLU_URLENCODE) {
|
||||||
@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
size_t qlen = fragment ? (size_t)(fragment - query) :
|
size_t qlen = fragment ? (size_t)(fragment - query) :
|
||||||
pathlen - (query - path);
|
pathlen - (query - path);
|
||||||
pathlen -= qlen;
|
pathlen -= qlen;
|
||||||
|
u->query_present = TRUE;
|
||||||
if(qlen > 1) {
|
if(qlen > 1) {
|
||||||
if(flags & CURLU_URLENCODE) {
|
if(flags & CURLU_URLENCODE) {
|
||||||
struct dynbuf enc;
|
struct dynbuf enc;
|
||||||
@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in)
|
|||||||
DUP(u, in, fragment);
|
DUP(u, in, fragment);
|
||||||
DUP(u, in, zoneid);
|
DUP(u, in, zoneid);
|
||||||
u->portnum = in->portnum;
|
u->portnum = in->portnum;
|
||||||
|
u->fragment_present = in->fragment_present;
|
||||||
|
u->query_present = in->query_present;
|
||||||
}
|
}
|
||||||
return u;
|
return u;
|
||||||
fail:
|
fail:
|
||||||
@ -1491,10 +1497,16 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
|||||||
ptr = u->query;
|
ptr = u->query;
|
||||||
ifmissing = CURLUE_NO_QUERY;
|
ifmissing = CURLUE_NO_QUERY;
|
||||||
plusdecode = urldecode;
|
plusdecode = urldecode;
|
||||||
|
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
|
||||||
|
/* there was a blank query and the user do not ask for it */
|
||||||
|
ptr = NULL;
|
||||||
break;
|
break;
|
||||||
case CURLUPART_FRAGMENT:
|
case CURLUPART_FRAGMENT:
|
||||||
ptr = u->fragment;
|
ptr = u->fragment;
|
||||||
ifmissing = CURLUE_NO_FRAGMENT;
|
ifmissing = CURLUE_NO_FRAGMENT;
|
||||||
|
if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
|
||||||
|
/* there was a blank fragment and the user asks for it */
|
||||||
|
ptr = "";
|
||||||
break;
|
break;
|
||||||
case CURLUPART_URL: {
|
case CURLUPART_URL: {
|
||||||
char *url;
|
char *url;
|
||||||
@ -1502,12 +1514,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
|||||||
char *options = u->options;
|
char *options = u->options;
|
||||||
char *port = u->port;
|
char *port = u->port;
|
||||||
char *allochost = NULL;
|
char *allochost = NULL;
|
||||||
|
bool show_fragment =
|
||||||
|
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
|
||||||
|
bool show_query =
|
||||||
|
(u->query && u->query[0]) ||
|
||||||
|
(u->query_present && flags & CURLU_GET_EMPTY);
|
||||||
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||||
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
|
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
|
||||||
if(u->scheme && strcasecompare("file", u->scheme)) {
|
if(u->scheme && strcasecompare("file", u->scheme)) {
|
||||||
url = aprintf("file://%s%s%s",
|
url = aprintf("file://%s%s%s",
|
||||||
u->path,
|
u->path,
|
||||||
u->fragment? "#": "",
|
show_fragment ? "#": "",
|
||||||
u->fragment ? u->fragment : "");
|
u->fragment ? u->fragment : "");
|
||||||
}
|
}
|
||||||
else if(!u->host)
|
else if(!u->host)
|
||||||
@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
|||||||
port ? ":": "",
|
port ? ":": "",
|
||||||
port ? port : "",
|
port ? port : "",
|
||||||
u->path ? u->path : "/",
|
u->path ? u->path : "/",
|
||||||
(u->query && u->query[0]) ? "?": "",
|
show_query ? "?": "",
|
||||||
(u->query && u->query[0]) ? u->query : "",
|
u->query ? u->query : "",
|
||||||
u->fragment? "#": "",
|
show_fragment ? "#": "",
|
||||||
u->fragment? u->fragment : "");
|
u->fragment? u->fragment : "");
|
||||||
free(allochost);
|
free(allochost);
|
||||||
}
|
}
|
||||||
@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|
|||||||
break;
|
break;
|
||||||
case CURLUPART_QUERY:
|
case CURLUPART_QUERY:
|
||||||
storep = &u->query;
|
storep = &u->query;
|
||||||
|
u->query_present = FALSE;
|
||||||
break;
|
break;
|
||||||
case CURLUPART_FRAGMENT:
|
case CURLUPART_FRAGMENT:
|
||||||
storep = &u->fragment;
|
storep = &u->fragment;
|
||||||
|
u->fragment_present = FALSE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return CURLUE_UNKNOWN_PART;
|
return CURLUE_UNKNOWN_PART;
|
||||||
@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|
|||||||
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
|
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
|
||||||
equalsencode = appendquery;
|
equalsencode = appendquery;
|
||||||
storep = &u->query;
|
storep = &u->query;
|
||||||
|
u->query_present = TRUE;
|
||||||
break;
|
break;
|
||||||
case CURLUPART_FRAGMENT:
|
case CURLUPART_FRAGMENT:
|
||||||
storep = &u->fragment;
|
storep = &u->fragment;
|
||||||
|
u->fragment_present = TRUE;
|
||||||
break;
|
break;
|
||||||
case CURLUPART_URL: {
|
case CURLUPART_URL: {
|
||||||
/*
|
/*
|
||||||
@ -1972,6 +1993,6 @@ nomem:
|
|||||||
/* set after the string, to make it not assigned if the allocation above
|
/* set after the string, to make it not assigned if the allocation above
|
||||||
fails */
|
fails */
|
||||||
if(port)
|
if(port)
|
||||||
u->portnum = port;
|
u->portnum = (unsigned short)port;
|
||||||
return CURLUE_OK;
|
return CURLUE_OK;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,7 +25,7 @@ gopher
|
|||||||
Gopher selector
|
Gopher selector
|
||||||
</name>
|
</name>
|
||||||
<command>
|
<command>
|
||||||
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
|
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER
|
||||||
</command>
|
</command>
|
||||||
</client>
|
</client>
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
|
|||||||
# Verify data after the test has been "shot"
|
# Verify data after the test has been "shot"
|
||||||
<verify>
|
<verify>
|
||||||
<protocol>
|
<protocol>
|
||||||
/selector/SELECTOR/%TESTNUMBER?
|
/selector/SELECTOR/%TESTNUMBER
|
||||||
</protocol>
|
</protocol>
|
||||||
</verify>
|
</verify>
|
||||||
</testcase>
|
</testcase>
|
||||||
|
|||||||
@ -151,6 +151,21 @@ struct clearurlcase {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct testcase get_parts_list[] ={
|
static const struct testcase get_parts_list[] ={
|
||||||
|
{"https://curl.se/#",
|
||||||
|
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ",
|
||||||
|
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
|
{"https://curl.se/?#",
|
||||||
|
"https | [11] | [12] | [13] | curl.se | [15] | / | | ",
|
||||||
|
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
|
{"https://curl.se/?",
|
||||||
|
"https | [11] | [12] | [13] | curl.se | [15] | / | | [17]",
|
||||||
|
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
|
{"https://curl.se/?",
|
||||||
|
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
|
||||||
|
0, 0, CURLUE_OK},
|
||||||
|
{"https://curl.se/?#",
|
||||||
|
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
|
||||||
|
0, 0, CURLUE_OK},
|
||||||
{"https://curl.se/# ",
|
{"https://curl.se/# ",
|
||||||
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
|
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
|
||||||
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||||
@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct urltestcase get_url_list[] = {
|
static const struct urltestcase get_url_list[] = {
|
||||||
|
{"http://user@example.com?#",
|
||||||
|
"http://user@example.com/?#",
|
||||||
|
0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
|
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
|
||||||
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},
|
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},
|
||||||
|
|
||||||
@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out)
|
|||||||
3. Extract all components (not URL)
|
3. Extract all components (not URL)
|
||||||
*/
|
*/
|
||||||
static const struct setgetcase setget_parts_list[] = {
|
static const struct setgetcase setget_parts_list[] = {
|
||||||
|
{"https://example.com/",
|
||||||
|
"query=\"\",",
|
||||||
|
"https | [11] | [12] | [13] | example.com | [15] | / | | [17]",
|
||||||
|
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
|
{"https://example.com/",
|
||||||
|
"fragment=\"\",",
|
||||||
|
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | ",
|
||||||
|
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
|
||||||
|
{"https://example.com/",
|
||||||
|
"query=\"\",",
|
||||||
|
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]",
|
||||||
|
0, 0, 0, CURLUE_OK},
|
||||||
{"https://example.com",
|
{"https://example.com",
|
||||||
"path=get,",
|
"path=get,",
|
||||||
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
|
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user