🚧 WIP for #4552

This commit is contained in:
Niels Lohmann 2024-12-18 09:47:23 +01:00
parent 30cd44df95
commit 4d67e127aa
No known key found for this signature in database
GPG Key ID: 7F3CEA63AE251B69
4 changed files with 25 additions and 1 deletions

View File

@ -529,6 +529,12 @@ class serializer
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;
// fix for #4552
if (error_handler == error_handler_t::ignore)
{
bytes += undumped_chars;
}
if (error_handler == error_handler_t::replace)
{
// add a replacement character

View File

@ -18816,6 +18816,12 @@ class serializer
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;
// fix for #4552 - discussion pending
if (error_handler == error_handler_t::ignore)
{
bytes += undumped_chars;
}
if (error_handler == error_handler_t::replace)
{
// add a replacement character

View File

@ -995,6 +995,14 @@ TEST_CASE("regression tests 2")
CHECK(p.x == 1);
CHECK(p.y == 2);
}
SECTION("issue #4552 - UTF-8 invalid characters are not always ignored when dumping with error_handler_t::ignore")
{
nlohmann::json node;
node["test"] = "test\334\005";
const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore);
CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}");
}
}
DOCTEST_CLANG_SUPPRESS_WARNING_POP

View File

@ -107,7 +107,11 @@ TEST_CASE("serialization")
CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34", json::type_error&);
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
// see pending discussion at #4452
// CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\"");
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\"");
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\"");
}