urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name
Asssisted-by: Jay Satiro Closes #11655
This commit is contained in:
parent
0efe8b215c
commit
c350069f64
@ -91,6 +91,16 @@ If libcurl is built without IDN capabilities, using this bit will make
|
||||
anything outside the ASCII range.
|
||||
|
||||
(Added in curl 7.88.0)
|
||||
.IP CURLU_PUNY2IDN
|
||||
If set and asked to retrieve the \fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP
|
||||
parts, libcurl returns the host name in its IDN (International Domain Name)
|
||||
UTF-8 version if it otherwise is a punycode version.
|
||||
|
||||
If libcurl is built without IDN capabilities, using this bit will make
|
||||
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name is using
|
||||
punycode.
|
||||
|
||||
(Added in curl 8.3.0)
|
||||
.SH PARTS
|
||||
.IP CURLUPART_URL
|
||||
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
|
||||
|
||||
@ -1063,6 +1063,7 @@ CURLU_NO_AUTHORITY 7.67.0
|
||||
CURLU_NO_DEFAULT_PORT 7.62.0
|
||||
CURLU_NON_SUPPORT_SCHEME 7.62.0
|
||||
CURLU_PATH_AS_IS 7.62.0
|
||||
CURLU_PUNY2IDN 8.3.0
|
||||
CURLU_PUNYCODE 7.88.0
|
||||
CURLU_URLDECODE 7.62.0
|
||||
CURLU_URLENCODE 7.62.0
|
||||
|
||||
@ -97,6 +97,7 @@ typedef enum {
|
||||
scheme is unknown. */
|
||||
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
||||
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
|
||||
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
|
||||
|
||||
typedef struct Curl_URL CURLU;
|
||||
|
||||
|
||||
50
lib/idn.c
50
lib/idn.c
@ -75,7 +75,8 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
|
||||
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
|
||||
if(in_w) {
|
||||
wchar_t punycode[IDN_MAX_LENGTH];
|
||||
int chars = IdnToAscii(0, in_w, -1, punycode, IDN_MAX_LENGTH);
|
||||
int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
|
||||
IDN_MAX_LENGTH);
|
||||
curlx_unicodefree(in_w);
|
||||
if(chars) {
|
||||
char *mstr = curlx_convert_wchar_to_UTF8(punycode);
|
||||
@ -91,6 +92,27 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
|
||||
return success;
|
||||
}
|
||||
|
||||
char *Curl_win32_ascii_to_idn(const char *in)
|
||||
{
|
||||
char *out = NULL;
|
||||
|
||||
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
|
||||
if(in_w) {
|
||||
WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
|
||||
int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
|
||||
IDN_MAX_LENGTH);
|
||||
if(chars) {
|
||||
/* 'chars' is "the number of characters retrieved" */
|
||||
char *mstr = curlx_convert_wchar_to_UTF8(idn);
|
||||
if(mstr) {
|
||||
out = strdup(mstr);
|
||||
curlx_unicodefree(mstr);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif /* USE_WIN32_IDN */
|
||||
|
||||
/*
|
||||
@ -144,6 +166,19 @@ static char *idn_decode(const char *input)
|
||||
return decoded;
|
||||
}
|
||||
|
||||
static char *idn_encode(const char *puny)
|
||||
{
|
||||
char *enc = NULL;
|
||||
#ifdef USE_LIBIDN2
|
||||
int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
|
||||
if(rc != IDNA_SUCCESS)
|
||||
return NULL;
|
||||
#elif defined(USE_WIN32_IDN)
|
||||
enc = Curl_win32_ascii_to_idn(puny);
|
||||
#endif
|
||||
return enc;
|
||||
}
|
||||
|
||||
char *Curl_idn_decode(const char *input)
|
||||
{
|
||||
char *d = idn_decode(input);
|
||||
@ -157,6 +192,19 @@ char *Curl_idn_decode(const char *input)
|
||||
return d;
|
||||
}
|
||||
|
||||
char *Curl_idn_encode(const char *puny)
|
||||
{
|
||||
char *d = idn_encode(puny);
|
||||
#ifdef USE_LIBIDN2
|
||||
if(d) {
|
||||
char *c = strdup(d);
|
||||
idn2_free(d);
|
||||
d = c;
|
||||
}
|
||||
#endif
|
||||
return d;
|
||||
}
|
||||
|
||||
/*
|
||||
* Frees data allocated by idnconvert_hostname()
|
||||
*/
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
|
||||
#ifdef USE_WIN32_IDN
|
||||
bool Curl_win32_idn_to_ascii(const char *in, char **out);
|
||||
char *Curl_win32_ascii_to_idn(const char *in);
|
||||
#endif /* USE_WIN32_IDN */
|
||||
bool Curl_is_ASCII_name(const char *hostname);
|
||||
CURLcode Curl_idnconvert_hostname(struct hostname *host);
|
||||
@ -33,6 +34,7 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host);
|
||||
#define USE_IDN
|
||||
void Curl_free_idnconverted_hostname(struct hostname *host);
|
||||
char *Curl_idn_decode(const char *input);
|
||||
char *Curl_idn_encode(const char *input);
|
||||
#ifdef USE_LIBIDN2
|
||||
#define Curl_idn_free(x) idn2_free(x)
|
||||
#else
|
||||
|
||||
27
lib/urlapi.c
27
lib/urlapi.c
@ -1403,6 +1403,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
|
||||
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
|
||||
bool punycode = FALSE;
|
||||
bool depunyfy = FALSE;
|
||||
bool plusdecode = FALSE;
|
||||
(void)flags;
|
||||
if(!u)
|
||||
@ -1433,6 +1434,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
ptr = u->host;
|
||||
ifmissing = CURLUE_NO_HOST;
|
||||
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
|
||||
break;
|
||||
case CURLUPART_ZONEID:
|
||||
ptr = u->zoneid;
|
||||
@ -1483,6 +1485,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
char *port = u->port;
|
||||
char *allochost = NULL;
|
||||
punycode = (flags & CURLU_PUNYCODE)?1:0;
|
||||
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
|
||||
if(u->scheme && strcasecompare("file", u->scheme)) {
|
||||
url = aprintf("file://%s%s%s",
|
||||
u->path,
|
||||
@ -1548,6 +1551,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if(depunyfy) {
|
||||
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
|
||||
#ifndef USE_IDN
|
||||
return CURLUE_LACKS_IDN;
|
||||
#else
|
||||
allochost = Curl_idn_encode(u->host);
|
||||
if(!allochost)
|
||||
return CURLUE_OUT_OF_MEMORY;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
scheme,
|
||||
@ -1626,6 +1640,19 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if(depunyfy) {
|
||||
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
|
||||
#ifndef USE_IDN
|
||||
return CURLUE_LACKS_IDN;
|
||||
#else
|
||||
char *allochost = Curl_idn_encode(*part);
|
||||
if(!allochost)
|
||||
return CURLUE_OUT_OF_MEMORY;
|
||||
free(*part);
|
||||
*part = allochost;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return CURLUE_OK;
|
||||
}
|
||||
|
||||
@ -179,6 +179,9 @@ static const struct testcase get_parts_list[] ={
|
||||
{"https://räksmörgås.se",
|
||||
"https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
|
||||
"[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
|
||||
{"https://xn--rksmrgs-5wao1o.se",
|
||||
"https | [11] | [12] | [13] | räksmörgås.se | "
|
||||
"[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK},
|
||||
#else
|
||||
{"https://räksmörgås.se",
|
||||
"https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user