🚧 add support for ensure_ascii
This commit is contained in:
parent
a6a06b76e7
commit
3cd50255a8
@ -399,8 +399,8 @@ class serializer
|
||||
std::size_t bytes_after_last_accept = 0;
|
||||
std::size_t undumped_chars = 0;
|
||||
|
||||
// copy string as-is if error handler is set to keep
|
||||
if (error_handler == error_handler_t::keep)
|
||||
// copy string as-is if error handler is set to keep, and we don't want to ensure ASCII
|
||||
if (error_handler == error_handler_t::keep && !ensure_ascii)
|
||||
{
|
||||
o->write_characters(s.data(), s.size());
|
||||
return;
|
||||
@ -575,7 +575,22 @@ class serializer
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::keep: // LCOV_EXCL_LINE
|
||||
case error_handler_t::keep:
|
||||
{
|
||||
// copy undumped chars to string buffer
|
||||
for (int j = 0; j < undumped_chars; ++j)
|
||||
{
|
||||
string_buffer[bytes++] = s[bytes_after_last_accept + j];
|
||||
}
|
||||
|
||||
// add erroneous byte to string buffer
|
||||
string_buffer[bytes++] = s[i];
|
||||
|
||||
// continue processing the string
|
||||
state = UTF8_ACCEPT;
|
||||
break;
|
||||
}
|
||||
|
||||
default: // LCOV_EXCL_LINE
|
||||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
|
||||
}
|
||||
@ -614,6 +629,20 @@ class serializer
|
||||
JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast<std::uint8_t>(s.back() | 0))), nullptr));
|
||||
}
|
||||
|
||||
case error_handler_t::keep:
|
||||
{
|
||||
// copy undumped chars to string buffer
|
||||
for (int j = 0; j < undumped_chars; ++j)
|
||||
{
|
||||
string_buffer[bytes++] = s[bytes_after_last_accept + j];
|
||||
}
|
||||
undumped_chars = 0;
|
||||
|
||||
// write all accepted bytes
|
||||
o->write_characters(string_buffer.data(), bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::ignore:
|
||||
{
|
||||
// write all accepted bytes
|
||||
@ -637,7 +666,6 @@ class serializer
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::keep: // LCOV_EXCL_LINE
|
||||
default: // LCOV_EXCL_LINE
|
||||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
@ -18961,8 +18961,8 @@ class serializer
|
||||
std::size_t bytes_after_last_accept = 0;
|
||||
std::size_t undumped_chars = 0;
|
||||
|
||||
// copy string as-is if error handler is set to keep
|
||||
if (error_handler == error_handler_t::keep)
|
||||
// copy string as-is if error handler is set to keep, and we don't want to ensure ASCII
|
||||
if (error_handler == error_handler_t::keep && !ensure_ascii)
|
||||
{
|
||||
o->write_characters(s.data(), s.size());
|
||||
return;
|
||||
@ -19137,7 +19137,22 @@ class serializer
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::keep: // LCOV_EXCL_LINE
|
||||
case error_handler_t::keep:
|
||||
{
|
||||
// copy undumped chars to string buffer
|
||||
for (int j = 0; j < undumped_chars; ++j)
|
||||
{
|
||||
string_buffer[bytes++] = s[bytes_after_last_accept + j];
|
||||
}
|
||||
|
||||
// add erroneous byte to string buffer
|
||||
string_buffer[bytes++] = s[i];
|
||||
|
||||
// continue processing the string
|
||||
state = UTF8_ACCEPT;
|
||||
break;
|
||||
}
|
||||
|
||||
default: // LCOV_EXCL_LINE
|
||||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
|
||||
}
|
||||
@ -19176,6 +19191,20 @@ class serializer
|
||||
JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast<std::uint8_t>(s.back() | 0))), nullptr));
|
||||
}
|
||||
|
||||
case error_handler_t::keep:
|
||||
{
|
||||
// copy undumped chars to string buffer
|
||||
for (int j = 0; j < undumped_chars; ++j)
|
||||
{
|
||||
string_buffer[bytes++] = s[bytes_after_last_accept + j];
|
||||
}
|
||||
undumped_chars = 0;
|
||||
|
||||
// write all accepted bytes
|
||||
o->write_characters(string_buffer.data(), bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::ignore:
|
||||
{
|
||||
// write all accepted bytes
|
||||
@ -19199,7 +19228,6 @@ class serializer
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::keep: // LCOV_EXCL_LINE
|
||||
default: // LCOV_EXCL_LINE
|
||||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
@ -86,8 +86,11 @@ TEST_CASE("serialization")
|
||||
CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9", json::type_error&);
|
||||
CHECK_THROWS_WITH_AS(j.dump(1, ' ', false, json::error_handler_t::strict), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9", json::type_error&);
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"äü\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"\\u00e4\\u00fc\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"ä\xEF\xBF\xBDü\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"\\u00e4\\ufffd\\u00fc\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"ä\xA9ü\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"\\u00e4\xA9\\u00fc\"");
|
||||
}
|
||||
|
||||
SECTION("ending with incomplete character")
|
||||
@ -97,8 +100,11 @@ TEST_CASE("serialization")
|
||||
CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] incomplete UTF-8 string; last byte: 0xC2", json::type_error&);
|
||||
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"123\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xC2\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"123\xC2\"");
|
||||
}
|
||||
|
||||
SECTION("unexpected character")
|
||||
@ -110,9 +116,11 @@ TEST_CASE("serialization")
|
||||
|
||||
// see pending discussion at #4452
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"123456\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\"");
|
||||
}
|
||||
|
||||
SECTION("U+FFFD Substitution of Maximal Subparts")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user