CL:
- Added tests for URI format - Added URI format validationCL:
This commit is contained in:
parent
1063c9adba
commit
e39c1aef5b
@ -84,10 +84,10 @@ void rfc3339_time_check(const std::string &value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @todo Could be made more exact by querying a leap second database and choosing the
|
* @todo Could be made more exact by querying a leap second database and choosing the
|
||||||
* correct maximum in {58,59,60}. This current solution might match some invalid dates
|
* correct maximum in {58,59,60}. This current solution might match some invalid dates
|
||||||
* but it won't lead to false negatives. This only works if we know the full date, however
|
* but it won't lead to false negatives. This only works if we know the full date, however
|
||||||
*/
|
*/
|
||||||
|
|
||||||
auto day_minutes = hour * 60 + minute - (offsetHour * 60 + offsetMinute);
|
auto day_minutes = hour * 60 + minute - (offsetHour * 60 + offsetMinute);
|
||||||
if (day_minutes < 0)
|
if (day_minutes < 0)
|
||||||
@ -126,7 +126,7 @@ void rfc3339_time_check(const std::string &value)
|
|||||||
* @endverbatim
|
* @endverbatim
|
||||||
* NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this
|
* NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this
|
||||||
* syntax may alternatively be lower case "t" or "z" respectively.
|
* syntax may alternatively be lower case "t" or "z" respectively.
|
||||||
*/
|
*/
|
||||||
void rfc3339_date_time_check(const std::string &value)
|
void rfc3339_date_time_check(const std::string &value)
|
||||||
{
|
{
|
||||||
const static std::regex dateTimeRegex{R"(^([0-9]{4}\-[0-9]{2}\-[0-9]{2})[Tt]([0-9]{2}\:[0-9]{2}\:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|(?:\+|\-)[0-9]{2}\:[0-9]{2}))$)"};
|
const static std::regex dateTimeRegex{R"(^([0-9]{4}\-[0-9]{2}\-[0-9]{2})[Tt]([0-9]{2}\:[0-9]{2}\:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|(?:\+|\-)[0-9]{2}\:[0-9]{2}))$)"};
|
||||||
@ -265,6 +265,112 @@ const std::string dotAtom{"(?:" + atext + R"(+(?:\.)" + atext + "+)*)"};
|
|||||||
const std::string stackoverflowMagicPart{R"((?:[[:alnum:]](?:[[:alnum:]-]*[[:alnum:]])?\.)+)"
|
const std::string stackoverflowMagicPart{R"((?:[[:alnum:]](?:[[:alnum:]-]*[[:alnum:]])?\.)+)"
|
||||||
R"([[:alnum:]](?:[[:alnum:]-]*[[:alnum:]])?)"};
|
R"([[:alnum:]](?:[[:alnum:]-]*[[:alnum:]])?)"};
|
||||||
const std::string email{"(?:" + dotAtom + "|" + quotedString + ")@(?:" + stackoverflowMagicPart + "|" + domainLiteral + ")"};
|
const std::string email{"(?:" + dotAtom + "|" + quotedString + ")@(?:" + stackoverflowMagicPart + "|" + domainLiteral + ")"};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see
|
||||||
|
*
|
||||||
|
* @verbatim
|
||||||
|
* URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
|
||||||
|
*
|
||||||
|
* hier-part = "//" authority path-abempty
|
||||||
|
* / path-absolute
|
||||||
|
* / path-rootless
|
||||||
|
* / path-empty
|
||||||
|
*
|
||||||
|
* URI-reference = URI / relative-ref
|
||||||
|
*
|
||||||
|
* absolute-URI = scheme ":" hier-part [ "?" query ]
|
||||||
|
*
|
||||||
|
* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
|
||||||
|
*
|
||||||
|
* relative-part = "//" authority path-abempty
|
||||||
|
* / path-absolute
|
||||||
|
* / path-noscheme
|
||||||
|
* / path-empty
|
||||||
|
*
|
||||||
|
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||||
|
*
|
||||||
|
* authority = [ userinfo "@" ] host [ ":" port ]
|
||||||
|
* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
|
||||||
|
* host = IP-literal / IPv4address / reg-name
|
||||||
|
* port = *DIGIT
|
||||||
|
*
|
||||||
|
* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
|
||||||
|
*
|
||||||
|
* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
|
||||||
|
*
|
||||||
|
* IPv6address = 6( h16 ":" ) ls32
|
||||||
|
* / "::" 5( h16 ":" ) ls32
|
||||||
|
* / [ h16 ] "::" 4( h16 ":" ) ls32
|
||||||
|
* / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
|
||||||
|
* / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
|
||||||
|
* / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
|
||||||
|
* / [ *4( h16 ":" ) h16 ] "::" ls32
|
||||||
|
* / [ *5( h16 ":" ) h16 ] "::" h16
|
||||||
|
* / [ *6( h16 ":" ) h16 ] "::"
|
||||||
|
*
|
||||||
|
* h16 = 1*4HEXDIG
|
||||||
|
* ls32 = ( h16 ":" h16 ) / IPv4address
|
||||||
|
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
|
||||||
|
* dec-octet = DIGIT ; 0-9
|
||||||
|
* / %x31-39 DIGIT ; 10-99
|
||||||
|
* / "1" 2DIGIT ; 100-199
|
||||||
|
* / "2" %x30-34 DIGIT ; 200-249
|
||||||
|
* / "25" %x30-35 ; 250-255
|
||||||
|
*
|
||||||
|
* reg-name = *( unreserved / pct-encoded / sub-delims )
|
||||||
|
*
|
||||||
|
* path = path-abempty ; begins with "/" or is empty
|
||||||
|
* / path-absolute ; begins with "/" but not "//"
|
||||||
|
* / path-noscheme ; begins with a non-colon segment
|
||||||
|
* / path-rootless ; begins with a segment
|
||||||
|
* / path-empty ; zero characters
|
||||||
|
*
|
||||||
|
* path-abempty = *( "/" segment )
|
||||||
|
* path-absolute = "/" [ segment-nz *( "/" segment ) ]
|
||||||
|
* path-noscheme = segment-nz-nc *( "/" segment )
|
||||||
|
* path-rootless = segment-nz *( "/" segment )
|
||||||
|
* path-empty = 0<pchar>
|
||||||
|
*
|
||||||
|
* segment = *pchar
|
||||||
|
* segment-nz = 1*pchar
|
||||||
|
* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
|
||||||
|
* ; non-zero-length segment without any colon ":"
|
||||||
|
*
|
||||||
|
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
||||||
|
*
|
||||||
|
* query = *( pchar / "/" / "?" )
|
||||||
|
*
|
||||||
|
* fragment = *( pchar / "/" / "?" )
|
||||||
|
*
|
||||||
|
* pct-encoded = "%" HEXDIG HEXDIG
|
||||||
|
*
|
||||||
|
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||||
|
* reserved = gen-delims / sub-delims
|
||||||
|
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||||
|
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||||
|
* / "*" / "+" / "," / ";" / "="
|
||||||
|
*
|
||||||
|
* @endverbatim
|
||||||
|
* @see adapted from: https://github.com/jhermsmeier/uri.regex/blob/master/uri.regex
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
const std::string scheme{R"(([A-Za-z][A-Za-z0-9+\-.]*):)"};
|
||||||
|
const std::string hierPart{R"((?:(\/\/)(?:((?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?((?:\[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|::(?:[0-9A-Fa-f]{1,4}:){5}|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|(?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|(?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|(?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|[Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+)\]|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|\/((?:(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)?)|((?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|))"};
|
||||||
|
const std::string query{R"((?:\?((?:[A-Za-z0-9\-._~!$&'()*+,;=:@\/?]|%[0-9A-Fa-f]{2})*))?)"};
|
||||||
|
const std::string fragment{
|
||||||
|
R"((?:\#((?:[A-Za-z0-9\-._~!$&'()*+,;=:@\/?]|%[0-9A-Fa-f]{2})*))?)"};
|
||||||
|
|
||||||
|
void rfc3986_uri_check(const std::string &value)
|
||||||
|
{
|
||||||
|
const std::string uriFormat{scheme + hierPart + query + fragment};
|
||||||
|
static const std::regex uriRegex{uriFormat};
|
||||||
|
|
||||||
|
if (!std::regex_match(value, uriRegex)) {
|
||||||
|
throw std::invalid_argument(value + " is not a URI string according to RFC 3986.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace nlohmann
|
namespace nlohmann
|
||||||
@ -286,6 +392,8 @@ void default_string_format_check(const std::string &format, const std::string &v
|
|||||||
rfc3339_date_check(value);
|
rfc3339_date_check(value);
|
||||||
} else if (format == "time") {
|
} else if (format == "time") {
|
||||||
rfc3339_time_check(value);
|
rfc3339_time_check(value);
|
||||||
|
} else if (format == "uri") {
|
||||||
|
rfc3986_uri_check(value);
|
||||||
} else if (format == "email") {
|
} else if (format == "email") {
|
||||||
static const std::regex emailRegex{email};
|
static const std::regex emailRegex{email};
|
||||||
if (!std::regex_match(value, emailRegex)) {
|
if (!std::regex_match(value, emailRegex)) {
|
||||||
|
|||||||
@ -82,5 +82,21 @@ int main()
|
|||||||
|
|
||||||
numberOfErrors += testStringFormat("ipv4", ipv4Checks);
|
numberOfErrors += testStringFormat("ipv4", ipv4Checks);
|
||||||
|
|
||||||
|
const std::vector<std::pair<std::string, bool>> uriChecks{
|
||||||
|
{"http://www.google.com/search?q=regular%20expression", true},
|
||||||
|
{"http://www.google.com/", true},
|
||||||
|
{"http://www.google.com/search?q=regular%20expression", true},
|
||||||
|
{"www.google.com", false},
|
||||||
|
{"http://www.google.comj", true},
|
||||||
|
{"ldap://[2001:db8::7]/c=GB?objectClass?one", true},
|
||||||
|
{"mailto:John.Doe@example.com", true},
|
||||||
|
{"news:comp.infosystems.www.servers.unix", true},
|
||||||
|
{"https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top", true},
|
||||||
|
{"tel:+1-816-555-1212", true},
|
||||||
|
{"telnet://192.0.2.16:80/", true},
|
||||||
|
{"urn:oasis:names:specification:docbook:dtd:xml:4.1.2", true}};
|
||||||
|
|
||||||
|
numberOfErrors += testStringFormat("uri", uriChecks);
|
||||||
|
|
||||||
return numberOfErrors;
|
return numberOfErrors;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user