urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name

Asssisted-by: Jay Satiro
Closes #11655
This commit is contained in:
Daniel Stenberg 2023-08-11 09:41:28 +02:00
parent 0efe8b215c
commit c350069f64
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
7 changed files with 93 additions and 1 deletions

View File

@ -91,6 +91,16 @@ If libcurl is built without IDN capabilities, using this bit will make
anything outside the ASCII range.
(Added in curl 7.88.0)
.IP CURLU_PUNY2IDN
If set and asked to retrieve the \fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP
parts, libcurl returns the host name in its IDN (International Domain Name)
UTF-8 version if it otherwise is a punycode version.
If libcurl is built without IDN capabilities, using this bit will make
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name is using
punycode.
(Added in curl 8.3.0)
.SH PARTS
.IP CURLUPART_URL
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a

View File

@ -1063,6 +1063,7 @@ CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0
CURLU_NON_SUPPORT_SCHEME 7.62.0
CURLU_PATH_AS_IS 7.62.0
CURLU_PUNY2IDN 8.3.0
CURLU_PUNYCODE 7.88.0
CURLU_URLDECODE 7.62.0
CURLU_URLENCODE 7.62.0

View File

@ -97,6 +97,7 @@ typedef enum {
scheme is unknown. */
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
typedef struct Curl_URL CURLU;

View File

@ -75,7 +75,8 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
if(in_w) {
wchar_t punycode[IDN_MAX_LENGTH];
int chars = IdnToAscii(0, in_w, -1, punycode, IDN_MAX_LENGTH);
int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
IDN_MAX_LENGTH);
curlx_unicodefree(in_w);
if(chars) {
char *mstr = curlx_convert_wchar_to_UTF8(punycode);
@ -91,6 +92,27 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
return success;
}
char *Curl_win32_ascii_to_idn(const char *in)
{
char *out = NULL;
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
if(in_w) {
WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
IDN_MAX_LENGTH);
if(chars) {
/* 'chars' is "the number of characters retrieved" */
char *mstr = curlx_convert_wchar_to_UTF8(idn);
if(mstr) {
out = strdup(mstr);
curlx_unicodefree(mstr);
}
}
}
return out;
}
#endif /* USE_WIN32_IDN */
/*
@ -144,6 +166,19 @@ static char *idn_decode(const char *input)
return decoded;
}
static char *idn_encode(const char *puny)
{
char *enc = NULL;
#ifdef USE_LIBIDN2
int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
if(rc != IDNA_SUCCESS)
return NULL;
#elif defined(USE_WIN32_IDN)
enc = Curl_win32_ascii_to_idn(puny);
#endif
return enc;
}
char *Curl_idn_decode(const char *input)
{
char *d = idn_decode(input);
@ -157,6 +192,19 @@ char *Curl_idn_decode(const char *input)
return d;
}
char *Curl_idn_encode(const char *puny)
{
char *d = idn_encode(puny);
#ifdef USE_LIBIDN2
if(d) {
char *c = strdup(d);
idn2_free(d);
d = c;
}
#endif
return d;
}
/*
* Frees data allocated by idnconvert_hostname()
*/

View File

@ -26,6 +26,7 @@
#ifdef USE_WIN32_IDN
bool Curl_win32_idn_to_ascii(const char *in, char **out);
char *Curl_win32_ascii_to_idn(const char *in);
#endif /* USE_WIN32_IDN */
bool Curl_is_ASCII_name(const char *hostname);
CURLcode Curl_idnconvert_hostname(struct hostname *host);
@ -33,6 +34,7 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host);
#define USE_IDN
void Curl_free_idnconverted_hostname(struct hostname *host);
char *Curl_idn_decode(const char *input);
char *Curl_idn_encode(const char *input);
#ifdef USE_LIBIDN2
#define Curl_idn_free(x) idn2_free(x)
#else

View File

@ -1403,6 +1403,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
bool punycode = FALSE;
bool depunyfy = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
@ -1433,6 +1434,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
@ -1483,6 +1485,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char *port = u->port;
char *allochost = NULL;
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
@ -1548,6 +1551,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
allochost = Curl_idn_encode(u->host);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
#endif
}
}
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
scheme,
@ -1626,6 +1640,19 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
char *allochost = Curl_idn_encode(*part);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
free(*part);
*part = allochost;
#endif
}
}
return CURLUE_OK;
}

View File

@ -179,6 +179,9 @@ static const struct testcase get_parts_list[] ={
{"https://räksmörgås.se",
"https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
"[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
{"https://xn--rksmrgs-5wao1o.se",
"https | [11] | [12] | [13] | räksmörgås.se | "
"[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK},
#else
{"https://räksmörgås.se",
"https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",