urlapi: add CURLU_PUNYCODE
Allows curl_url_get() get the punycode version of host names for the host name and URL parts. Extend test 1560 to verify. Closes #10109
This commit is contained in:
parent
cf174810db
commit
901392cbb7
1
.github/scripts/spellcheck.words
vendored
1
.github/scripts/spellcheck.words
vendored
@ -573,6 +573,7 @@ PSL
|
|||||||
pthreads
|
pthreads
|
||||||
PTR
|
PTR
|
||||||
ptr
|
ptr
|
||||||
|
punycode
|
||||||
py
|
py
|
||||||
pycurl
|
pycurl
|
||||||
QNX
|
QNX
|
||||||
|
|||||||
@ -76,6 +76,17 @@ typically using non-ASCII bytes that otherwise will be percent-encoded.
|
|||||||
|
|
||||||
Note that even when not asking for URL encoding, the '%' (byte 37) will be URL
|
Note that even when not asking for URL encoding, the '%' (byte 37) will be URL
|
||||||
encoded to make sure the host name remains valid.
|
encoded to make sure the host name remains valid.
|
||||||
|
.IP CURLU_PUNYCODE
|
||||||
|
If set and \fICURLU_URLENCODE\fP is not set, and asked to retrieve the
|
||||||
|
\fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP parts, libcurl returns the host
|
||||||
|
name in its punycode version if it contains any non-ASCII octets (and is an
|
||||||
|
IDN name).
|
||||||
|
|
||||||
|
If libcurl is built without IDN capabilities, using this bit will make
|
||||||
|
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name contains
|
||||||
|
anything outside the ASCII range.
|
||||||
|
|
||||||
|
(Added in curl 7.88.0)
|
||||||
.SH PARTS
|
.SH PARTS
|
||||||
.IP CURLUPART_URL
|
.IP CURLUPART_URL
|
||||||
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
|
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
|
||||||
|
|||||||
@ -1055,6 +1055,7 @@ CURLU_NO_AUTHORITY 7.67.0
|
|||||||
CURLU_NO_DEFAULT_PORT 7.62.0
|
CURLU_NO_DEFAULT_PORT 7.62.0
|
||||||
CURLU_NON_SUPPORT_SCHEME 7.62.0
|
CURLU_NON_SUPPORT_SCHEME 7.62.0
|
||||||
CURLU_PATH_AS_IS 7.62.0
|
CURLU_PATH_AS_IS 7.62.0
|
||||||
|
CURLU_PUNYCODE 7.88.0
|
||||||
CURLU_URLDECODE 7.62.0
|
CURLU_URLDECODE 7.62.0
|
||||||
CURLU_URLENCODE 7.62.0
|
CURLU_URLENCODE 7.62.0
|
||||||
CURLUE_BAD_FILE_URL 7.81.0
|
CURLUE_BAD_FILE_URL 7.81.0
|
||||||
@ -1071,6 +1072,7 @@ CURLUE_BAD_QUERY 7.81.0
|
|||||||
CURLUE_BAD_SCHEME 7.81.0
|
CURLUE_BAD_SCHEME 7.81.0
|
||||||
CURLUE_BAD_SLASHES 7.81.0
|
CURLUE_BAD_SLASHES 7.81.0
|
||||||
CURLUE_BAD_USER 7.81.0
|
CURLUE_BAD_USER 7.81.0
|
||||||
|
CURLUE_LACKS_IDN 7.88.0
|
||||||
CURLUE_MALFORMED_INPUT 7.62.0
|
CURLUE_MALFORMED_INPUT 7.62.0
|
||||||
CURLUE_NO_FRAGMENT 7.62.0
|
CURLUE_NO_FRAGMENT 7.62.0
|
||||||
CURLUE_NO_HOST 7.62.0
|
CURLUE_NO_HOST 7.62.0
|
||||||
|
|||||||
@ -62,6 +62,7 @@ typedef enum {
|
|||||||
CURLUE_BAD_SCHEME, /* 27 */
|
CURLUE_BAD_SCHEME, /* 27 */
|
||||||
CURLUE_BAD_SLASHES, /* 28 */
|
CURLUE_BAD_SLASHES, /* 28 */
|
||||||
CURLUE_BAD_USER, /* 29 */
|
CURLUE_BAD_USER, /* 29 */
|
||||||
|
CURLUE_LACKS_IDN, /* 30 */
|
||||||
CURLUE_LAST
|
CURLUE_LAST
|
||||||
} CURLUcode;
|
} CURLUcode;
|
||||||
|
|
||||||
@ -95,6 +96,7 @@ typedef enum {
|
|||||||
#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
|
#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
|
||||||
scheme is unknown. */
|
scheme is unknown. */
|
||||||
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
||||||
|
#define CURLU_PUNYCODE (1<<12) /* get the host name in pynycode */
|
||||||
|
|
||||||
typedef struct Curl_URL CURLU;
|
typedef struct Curl_URL CURLU;
|
||||||
|
|
||||||
|
|||||||
30
lib/idn.c
30
lib/idn.c
@ -116,7 +116,7 @@ bool Curl_is_ASCII_name(const char *hostname)
|
|||||||
* Curl_idn_decode() returns an allocated IDN decoded string if it was
|
* Curl_idn_decode() returns an allocated IDN decoded string if it was
|
||||||
* possible. NULL on error.
|
* possible. NULL on error.
|
||||||
*/
|
*/
|
||||||
static char *Curl_idn_decode(const char *input)
|
static char *idn_decode(const char *input)
|
||||||
{
|
{
|
||||||
char *decoded = NULL;
|
char *decoded = NULL;
|
||||||
#ifdef USE_LIBIDN2
|
#ifdef USE_LIBIDN2
|
||||||
@ -144,24 +144,29 @@ static char *Curl_idn_decode(const char *input)
|
|||||||
return decoded;
|
return decoded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *Curl_idn_decode(const char *input)
|
||||||
|
{
|
||||||
|
char *d = idn_decode(input);
|
||||||
|
#ifdef USE_LIBIDN2
|
||||||
|
if(d) {
|
||||||
|
char *c = strdup(d);
|
||||||
|
idn2_free(d);
|
||||||
|
d = c;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Frees data allocated by idnconvert_hostname()
|
* Frees data allocated by idnconvert_hostname()
|
||||||
*/
|
*/
|
||||||
void Curl_free_idnconverted_hostname(struct hostname *host)
|
void Curl_free_idnconverted_hostname(struct hostname *host)
|
||||||
{
|
{
|
||||||
#if defined(USE_LIBIDN2)
|
|
||||||
if(host->encalloc) {
|
if(host->encalloc) {
|
||||||
idn2_free(host->encalloc); /* must be freed with idn2_free() since this was
|
/* must be freed with idn2_free() if allocated by libidn */
|
||||||
allocated by libidn */
|
Curl_idn_free(host->encalloc);
|
||||||
host->encalloc = NULL;
|
host->encalloc = NULL;
|
||||||
}
|
}
|
||||||
#elif defined(USE_WIN32_IDN)
|
|
||||||
free(host->encalloc); /* must be freed with free() since this was
|
|
||||||
allocated by Curl_win32_idn_to_ascii */
|
|
||||||
host->encalloc = NULL;
|
|
||||||
#else
|
|
||||||
(void)host;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* USE_IDN */
|
#endif /* USE_IDN */
|
||||||
@ -177,7 +182,7 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host)
|
|||||||
#ifdef USE_IDN
|
#ifdef USE_IDN
|
||||||
/* Check name for non-ASCII and convert hostname if we can */
|
/* Check name for non-ASCII and convert hostname if we can */
|
||||||
if(!Curl_is_ASCII_name(host->name)) {
|
if(!Curl_is_ASCII_name(host->name)) {
|
||||||
char *decoded = Curl_idn_decode(host->name);
|
char *decoded = idn_decode(host->name);
|
||||||
if(decoded) {
|
if(decoded) {
|
||||||
/* successful */
|
/* successful */
|
||||||
host->encalloc = decoded;
|
host->encalloc = decoded;
|
||||||
@ -190,4 +195,3 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host)
|
|||||||
#endif
|
#endif
|
||||||
return CURLE_OK;
|
return CURLE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -32,7 +32,15 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host);
|
|||||||
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
|
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
|
||||||
#define USE_IDN
|
#define USE_IDN
|
||||||
void Curl_free_idnconverted_hostname(struct hostname *host);
|
void Curl_free_idnconverted_hostname(struct hostname *host);
|
||||||
|
char *Curl_idn_decode(const char *input);
|
||||||
|
#ifdef USE_LIBIDN2
|
||||||
|
#define Curl_idn_free(x) idn2_free(x)
|
||||||
|
#else
|
||||||
|
#define Curl_idn_free(x) free(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define Curl_free_idnconverted_hostname(x)
|
#define Curl_free_idnconverted_hostname(x)
|
||||||
|
#define Curl_idn_decode(x) NULL
|
||||||
#endif
|
#endif
|
||||||
#endif /* HEADER_CURL_IDN_H */
|
#endif /* HEADER_CURL_IDN_H */
|
||||||
|
|||||||
@ -550,6 +550,9 @@ curl_url_strerror(CURLUcode error)
|
|||||||
case CURLUE_BAD_USER:
|
case CURLUE_BAD_USER:
|
||||||
return "Bad user";
|
return "Bad user";
|
||||||
|
|
||||||
|
case CURLUE_LACKS_IDN:
|
||||||
|
return "libcurl lacks IDN support";
|
||||||
|
|
||||||
case CURLUE_LAST:
|
case CURLUE_LAST:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
28
lib/urlapi.c
28
lib/urlapi.c
@ -33,6 +33,7 @@
|
|||||||
#include "inet_pton.h"
|
#include "inet_pton.h"
|
||||||
#include "inet_ntop.h"
|
#include "inet_ntop.h"
|
||||||
#include "strdup.h"
|
#include "strdup.h"
|
||||||
|
#include "idn.h"
|
||||||
|
|
||||||
/* The last 3 #include files should be in this order */
|
/* The last 3 #include files should be in this order */
|
||||||
#include "curl_printf.h"
|
#include "curl_printf.h"
|
||||||
@ -1379,6 +1380,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
char portbuf[7];
|
char portbuf[7];
|
||||||
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
|
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
|
||||||
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
|
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
|
||||||
|
bool punycode = FALSE;
|
||||||
bool plusdecode = FALSE;
|
bool plusdecode = FALSE;
|
||||||
(void)flags;
|
(void)flags;
|
||||||
if(!u)
|
if(!u)
|
||||||
@ -1408,6 +1410,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
case CURLUPART_HOST:
|
case CURLUPART_HOST:
|
||||||
ptr = u->host;
|
ptr = u->host;
|
||||||
ifmissing = CURLUE_NO_HOST;
|
ifmissing = CURLUE_NO_HOST;
|
||||||
|
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||||
break;
|
break;
|
||||||
case CURLUPART_ZONEID:
|
case CURLUPART_ZONEID:
|
||||||
ptr = u->zoneid;
|
ptr = u->zoneid;
|
||||||
@ -1460,6 +1463,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
char *options = u->options;
|
char *options = u->options;
|
||||||
char *port = u->port;
|
char *port = u->port;
|
||||||
char *allochost = NULL;
|
char *allochost = NULL;
|
||||||
|
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||||
if(u->scheme && strcasecompare("file", u->scheme)) {
|
if(u->scheme && strcasecompare("file", u->scheme)) {
|
||||||
url = aprintf("file://%s%s%s",
|
url = aprintf("file://%s%s%s",
|
||||||
u->path,
|
u->path,
|
||||||
@ -1514,6 +1518,17 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
if(!allochost)
|
if(!allochost)
|
||||||
return CURLUE_OUT_OF_MEMORY;
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
|
else if(punycode) {
|
||||||
|
if(!Curl_is_ASCII_name(u->host)) {
|
||||||
|
#ifndef USE_IDN
|
||||||
|
return CURLUE_LACKS_IDN;
|
||||||
|
#else
|
||||||
|
allochost = Curl_idn_decode(u->host);
|
||||||
|
if(!allochost)
|
||||||
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
/* only encode '%' in output host name */
|
/* only encode '%' in output host name */
|
||||||
char *host = u->host;
|
char *host = u->host;
|
||||||
@ -1611,6 +1626,19 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
|
|||||||
free(*part);
|
free(*part);
|
||||||
*part = Curl_dyn_ptr(&enc);
|
*part = Curl_dyn_ptr(&enc);
|
||||||
}
|
}
|
||||||
|
else if(punycode) {
|
||||||
|
if(!Curl_is_ASCII_name(u->host)) {
|
||||||
|
#ifndef USE_IDN
|
||||||
|
return CURLUE_LACKS_IDN;
|
||||||
|
#else
|
||||||
|
char *allochost = Curl_idn_decode(*part);
|
||||||
|
if(!allochost)
|
||||||
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
|
free(*part);
|
||||||
|
*part = allochost;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return CURLUE_OK;
|
return CURLUE_OK;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -185,7 +185,8 @@ u26: Bad query
|
|||||||
u27: Bad scheme
|
u27: Bad scheme
|
||||||
u28: Unsupported number of slashes following scheme
|
u28: Unsupported number of slashes following scheme
|
||||||
u29: Bad user
|
u29: Bad user
|
||||||
u30: CURLUcode unknown
|
u30: libcurl lacks IDN support
|
||||||
|
u31: CURLUcode unknown
|
||||||
</stdout>
|
</stdout>
|
||||||
</verify>
|
</verify>
|
||||||
|
|
||||||
|
|||||||
@ -31,6 +31,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
|
||||||
|
#define USE_IDN
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "testutil.h"
|
#include "testutil.h"
|
||||||
#include "warnless.h"
|
#include "warnless.h"
|
||||||
@ -138,6 +141,15 @@ struct clearurlcase {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct testcase get_parts_list[] ={
|
static const struct testcase get_parts_list[] ={
|
||||||
|
#ifdef USE_IDN
|
||||||
|
{"https://räksmörgås.se",
|
||||||
|
"https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
|
||||||
|
"[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
|
||||||
|
#else
|
||||||
|
{"https://räksmörgås.se",
|
||||||
|
"https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",
|
||||||
|
0, CURLU_PUNYCODE, CURLUE_OK},
|
||||||
|
#endif
|
||||||
/* https://ℂᵤⓇℒ。𝐒🄴 */
|
/* https://ℂᵤⓇℒ。𝐒🄴 */
|
||||||
{"https://"
|
{"https://"
|
||||||
"%e2%84%82%e1%b5%a4%e2%93%87%e2%84%92%e3%80%82%f0%9d%90%92%f0%9f%84%b4",
|
"%e2%84%82%e1%b5%a4%e2%93%87%e2%84%92%e3%80%82%f0%9d%90%92%f0%9f%84%b4",
|
||||||
@ -454,6 +466,10 @@ static const struct testcase get_parts_list[] ={
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct urltestcase get_url_list[] = {
|
static const struct urltestcase get_url_list[] = {
|
||||||
|
#ifdef USE_IDN
|
||||||
|
{"https://räksmörgås.se/path?q#frag",
|
||||||
|
"https://xn--rksmrgs-5wao1o.se/path?q#frag", 0, CURLU_PUNYCODE, CURLUE_OK},
|
||||||
|
#endif
|
||||||
/* unsupported schemes with no guessing enabled */
|
/* unsupported schemes with no guessing enabled */
|
||||||
{"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIEhUTUw+PG1ldGEgY",
|
{"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIEhUTUw+PG1ldGEgY",
|
||||||
"", 0, 0, CURLUE_UNSUPPORTED_SCHEME},
|
"", 0, 0, CURLUE_UNSUPPORTED_SCHEME},
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user