From 4d67e127aab67724ead790fbaeaeb0f53b896141 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 18 Dec 2024 09:47:23 +0100 Subject: [PATCH 01/15] :construction: WIP for #4552 --- include/nlohmann/detail/output/serializer.hpp | 6 ++++++ single_include/nlohmann/json.hpp | 6 ++++++ tests/src/unit-regression2.cpp | 8 ++++++++ tests/src/unit-serialization.cpp | 6 +++++- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index b42a31044..3509de5fc 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -529,6 +529,12 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; + // fix for #4552 + if (error_handler == error_handler_t::ignore) + { + bytes += undumped_chars; + } + if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 00a467a99..d1ba1cb75 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18816,6 +18816,12 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; + // fix for #4552 - discussion pending + if (error_handler == error_handler_t::ignore) + { + bytes += undumped_chars; + } + if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 38d01059d..8f3a8b412 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -995,6 +995,14 @@ TEST_CASE("regression tests 2") CHECK(p.x == 1); CHECK(p.y == 2); } + + SECTION("issue #4552 - UTF-8 invalid characters are not always ignored when dumping with error_handler_t::ignore") + { + nlohmann::json node; + node["test"] = "test\334\005"; + const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore); + CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}"); + } } DOCTEST_CLANG_SUPPRESS_WARNING_POP diff --git a/tests/src/unit-serialization.cpp b/tests/src/unit-serialization.cpp index 201e5724c..bb7f1bfaf 100644 --- a/tests/src/unit-serialization.cpp +++ b/tests/src/unit-serialization.cpp @@ -107,7 +107,11 @@ TEST_CASE("serialization") CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34", json::type_error&); CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&); - CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); + + // see pending discussion at #4452 + // CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\""); } From 851584e609db1483406337c451eb0453e226a9d4 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 18 Dec 2024 09:44:09 +0100 Subject: [PATCH 02/15] Set parents after insert call (#4537) * :bug: set parents after insert call * :rotating_light: fix warning --- include/nlohmann/json.hpp | 1 + single_include/nlohmann/json.hpp | 1 + tests/src/unit-diagnostics.cpp | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 2a08d6df0..dda9b623f 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -3402,6 +3402,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } m_data.m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); + set_parents(); } /// @brief updates a JSON object from another object, overwriting existing keys diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index d1ba1cb75..aa11c5b63 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -22988,6 +22988,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } m_data.m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); + set_parents(); } /// @brief updates a JSON object from another object, overwriting existing keys diff --git a/tests/src/unit-diagnostics.cpp b/tests/src/unit-diagnostics.cpp index 345d6eee2..472d11e28 100644 --- a/tests/src/unit-diagnostics.cpp +++ b/tests/src/unit-diagnostics.cpp @@ -242,4 +242,24 @@ TEST_CASE("Regression tests for extended diagnostics") json const j_arr_copy = j_arr; } } + + SECTION("Regression test for issue #3915 - JSON_DIAGNOSTICS trigger assertion") + { + json j = json::object(); + j["root"] = "root_str"; + + json jj = json::object(); + jj["child"] = json::object(); + + // If do not push anything in object, then no assert will be produced + jj["child"]["prop1"] = "prop1_value"; + + // Push all properties of child in parent + j.insert(jj.at("child").begin(), jj.at("child").end()); + + // Here assert is generated when construct new json + const json k(j); + + CHECK(k.dump() == "{\"prop1\":\"prop1_value\",\"root\":\"root_str\"}"); + } } From 3665dabb0045f95b9c58c00452c65ac2a2398bde Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 18 Dec 2024 17:44:56 +0100 Subject: [PATCH 03/15] Suppress modernize-use-integer-sign-comparison (#4558) --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index ca6b9b092..9591b0ec6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -53,6 +53,7 @@ Checks: '*, -modernize-type-traits, -modernize-use-constraints, -modernize-use-designated-initializers, + -modernize-use-integer-sign-comparison, -modernize-use-nodiscard, -modernize-use-ranges, -modernize-use-std-numbers, From 1a76a2c5148fb9f34ffe3a12a000f5492e768d13 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 18 Dec 2024 17:07:12 +0100 Subject: [PATCH 04/15] :art: fix format --- single_include/nlohmann/json.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index aa11c5b63..8dd4444cb 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18816,7 +18816,7 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; - // fix for #4552 - discussion pending + // fix for #4552 if (error_handler == error_handler_t::ignore) { bytes += undumped_chars; From 3db5cc4ba86907853681cfe19a9a801187ea24f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 18:43:22 +0000 Subject: [PATCH 05/15] Bump actions/upload-artifact from 4.4.3 to 4.5.0 (#4557) --- .github/workflows/check_amalgamation.yml | 2 +- .github/workflows/cifuzz.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/ubuntu.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check_amalgamation.yml b/.github/workflows/check_amalgamation.yml index 032a5d6ed..4946be8d6 100644 --- a/.github/workflows/check_amalgamation.yml +++ b/.github/workflows/check_amalgamation.yml @@ -20,7 +20,7 @@ jobs: mkdir -p ./pr echo ${{ github.event.number }} > ./pr/number echo ${{ github.event.pull_request.user.login }} > ./pr/author - - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: pr path: pr/ diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index d82d0b569..425993f09 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -28,7 +28,7 @@ jobs: dry-run: false language: c++ - name: Upload Crash - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 4c88cc309..4a271301b 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -68,7 +68,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index a39768b4b..8b4d71a78 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -147,7 +147,7 @@ jobs: - name: Build run: cmake --build build --target ci_test_coverage - name: Archive coverage report - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: code-coverage-report path: ${{ github.workspace }}/build/html From 9f73bc1b373bac721a64427bce713833acb3b411 Mon Sep 17 00:00:00 2001 From: Sushrut Shringarputale Date: Wed, 18 Dec 2024 13:46:14 -0800 Subject: [PATCH 06/15] json start/end position implementation (#4517) * Add implementation to retrieve start and end positions of json during parse * Add more unit tests and add start/stop parsing for arrays * Add raw value for all types * Add more tests and fix compiler warning * Amalgamate * Fix CLang GCC warnings * Fix error in build * Style using astyle 3.1 * Fix whitespace changes * revert * more whitespace reverts * Address PR comments * Fix failing issues * More whitespace reverts * Address remaining PR comments * Address comments * Switch to using custom base class instead of default basic_json * Adding a basic using for a json using the new base class. Also address PR comments and fix CI failures * Address decltype comments * Diagnostic positions macro (#4) Co-authored-by: Sush Shringarputale * Fix missed include deletion * Add docs and address other PR comments (#5) * Add docs and address other PR comments --------- Co-authored-by: Sush Shringarputale * Address new PR comments and fix CI tests for documentation * Update documentation based on feedback (#6) --------- Co-authored-by: Sush Shringarputale * Address std::size_t and other comments * Fix new CI issues * Fix lcov * Improve lcov case with update to handle_diagnostic_positions call for discarded values * Fix indentation of LCOV_EXCL_STOP comments * fix amalgamation astyle issue --------- Co-authored-by: Sush Shringarputale --- CMakeLists.txt | 6 + docs/examples/diagnostic_positions.cpp | 51 + docs/examples/diagnostic_positions.output | 50 + .../api/macros/json_diagnostic_positions.md | 61 + docs/mkdocs/docs/integration/cmake.md | 3 + include/nlohmann/detail/abi_macros.hpp | 19 +- .../nlohmann/detail/input/binary_reader.hpp | 2 +- .../nlohmann/detail/input/input_adapters.hpp | 3 + include/nlohmann/detail/input/json_sax.hpp | 282 ++- include/nlohmann/detail/input/parser.hpp | 4 +- include/nlohmann/json.hpp | 73 +- single_include/nlohmann/json.hpp | 1784 ++++++++------- single_include/nlohmann/json_fwd.hpp | 19 +- tests/src/unit-cbor.cpp | 2 +- tests/src/unit-class_parser.cpp | 2 +- ...unit-class_parser_diagnostic_positions.cpp | 1957 +++++++++++++++++ tests/src/unit-deserialization.cpp | 2 +- tests/src/unit-disabled_exceptions.cpp | 4 +- tests/src/unit-regression2.cpp | 4 +- tests/src/unit-ubjson.cpp | 4 +- 20 files changed, 3545 insertions(+), 787 deletions(-) create mode 100644 docs/examples/diagnostic_positions.cpp create mode 100644 docs/examples/diagnostic_positions.output create mode 100644 docs/mkdocs/docs/api/macros/json_diagnostic_positions.md create mode 100644 tests/src/unit-class_parser_diagnostic_positions.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bcfa90729..da5799cba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ endif() option(JSON_BuildTests "Build the unit tests when BUILD_TESTING is enabled." ${JSON_BuildTests_INIT}) option(JSON_CI "Enable CI build targets." OFF) option(JSON_Diagnostics "Use extended diagnostic messages." OFF) +option(JSON_Diagnostic_Positions "Enable diagnostic positions." OFF) option(JSON_GlobalUDLs "Place user-defined string literals in the global namespace." ON) option(JSON_ImplicitConversions "Enable implicit conversions." ON) option(JSON_DisableEnumSerialization "Disable default integer enum serialization." OFF) @@ -96,6 +97,10 @@ if (JSON_Diagnostics) message(STATUS "Diagnostics enabled (JSON_DIAGNOSTICS=1)") endif() +if (JSON_Diagnostic_Positions) + message(STATUS "Diagnostic positions enabled") +endif() + if (NOT JSON_GlobalUDLs) message(STATUS "User-defined string literals are not put in the global namespace (JSON_USE_GLOBAL_UDLS=0)") endif() @@ -123,6 +128,7 @@ target_compile_definitions( $<$>:JSON_USE_IMPLICIT_CONVERSIONS=0> $<$:JSON_DISABLE_ENUM_SERIALIZATION=1> $<$:JSON_DIAGNOSTICS=1> + $<$:JSON_DIAGNOSTIC_POSITIONS=1> $<$:JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON=1> ) diff --git a/docs/examples/diagnostic_positions.cpp b/docs/examples/diagnostic_positions.cpp new file mode 100644 index 000000000..259344cd0 --- /dev/null +++ b/docs/examples/diagnostic_positions.cpp @@ -0,0 +1,51 @@ +#include + +#define JSON_DIAGNOSTIC_POSITIONS 1 +#include + +using json = nlohmann::json; + +int main() +{ + std::string json_string = R"( + { + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } + )"; + json j = json::parse(json_string); + + std::cout << "Root diagnostic positions: \n"; + std::cout << "\tstart_pos: " << j.start_pos() << '\n'; + std::cout << "\tend_pos:" << j.end_pos() << "\n"; + std::cout << "Original string: \n"; + std::cout << "{\n \"address\": {\n \"street\": \"Fake Street\",\n \"housenumber\": 1\n }\n }" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j.start_pos(), j.end_pos() - j.start_pos()) << "\n\n"; + + std::cout << "address diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "{ \"street\": \"Fake Street\",\n \"housenumber\": 1\n }" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"].start_pos(), j["address"].end_pos() - j["address"].start_pos()) << "\n\n"; + + std::cout << "street diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"]["street"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"]["street"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "\"Fake Street\"" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"]["street"].start_pos(), j["address"]["street"].end_pos() - j["address"]["street"].start_pos()) << "\n\n"; + + std::cout << "housenumber diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"]["housenumber"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"]["housenumber"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "1" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"]["housenumber"].start_pos(), j["address"]["housenumber"].end_pos() - j["address"]["housenumber"].start_pos()) << "\n\n"; +} diff --git a/docs/examples/diagnostic_positions.output b/docs/examples/diagnostic_positions.output new file mode 100644 index 000000000..a6becc0e6 --- /dev/null +++ b/docs/examples/diagnostic_positions.output @@ -0,0 +1,50 @@ +Root diagnostic positions: + start_pos: 5 + end_pos:109 +Original string: +{ + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } +Parsed string: +{ + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } + +address diagnostic positions: + start_pos:26 + end_pos:103 + +Original string: +{ "street": "Fake Street", + "housenumber": 1 + } +Parsed string: +{ + "street": "Fake Street", + "housenumber": 1 + } + +street diagnostic positions: + start_pos:50 + end_pos:63 + +Original string: +"Fake Street" +Parsed string: +"Fake Street" + +housenumber diagnostic positions: + start_pos:92 + end_pos:93 + +Original string: +1 +Parsed string: +1 + diff --git a/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md b/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md new file mode 100644 index 000000000..d7d4ddd43 --- /dev/null +++ b/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md @@ -0,0 +1,61 @@ +# JSON_DIAGNOSTIC_POSITIONS + +```cpp +#define JSON_DIAGNOSTIC_POSITIONS /* value */ +``` + +This macro enables position diagnostics for generated JSON objects. + +When enabled, two new properties: `start_pos()` and `end_pos()` are added to `nlohmann::basic_json` objects and fields. `start_pos()` returns the start +position of that JSON object/field in the original string the object was parsed from. Likewise, `end_pos()` returns the end position of that JSON +object/field in the original string the object was parsed from. + +`start_pos()` returns the first character of a given element in the original JSON string, while `end_pos()` returns the character following the last +character. For objects and arrays, the first and last characters correspond to the opening or closing braces/brackets, respectively. For fields, the first +and last character represent the opening and closing quotes or the first and last character of the field's numerical or predefined value +(true/false/null), respectively. + +Given the above, `end_pos() - start_pos()` for an object or field provides the length of the string representation for that object or field, including the +opening or closing braces, brackets, or quotes. + +`start_pos()` and `end_pos()` are only set if the JSON object was parsed using `parse()`. For all other cases, `std::string::npos` will be returned. + +Note that enabling this macro increases the size of every JSON value by two `std::size_t` fields and adds +slight runtime overhead. + +## Default definition + +The default value is `0` (position diagnostics are switched off). + +```cpp +#define JSON_DIAGNOSTIC_POSITIONS 0 +``` + +When the macro is not defined, the library will define it to its default value. + +## Notes + +!!! hint "CMake option" + + Diagnostic messages can also be controlled with the CMake option + [`JSON_Diagnostic_Positions`](../../integration/cmake.md#json_diagnostic_positions) (`OFF` by default) + which defines `JSON_DIAGNOSTIC_POSITIONS` accordingly. + +## Examples + +??? example "Example 1: retrieving positions" + + ```cpp + --8<-- "examples/diagnostic_positions.cpp" + ``` + + Output: + + ``` + --8<-- "examples/diagnostic_positions.output" + ``` + + The output shows the start/end positions of all the objects and fields in the JSON string. + +## Version history + diff --git a/docs/mkdocs/docs/integration/cmake.md b/docs/mkdocs/docs/integration/cmake.md index 2f68f09ce..df0203192 100644 --- a/docs/mkdocs/docs/integration/cmake.md +++ b/docs/mkdocs/docs/integration/cmake.md @@ -135,6 +135,9 @@ Enable CI build targets. The exact targets are used during the several CI steps Enable [extended diagnostic messages](../home/exceptions.md#extended-diagnostic-messages) by defining macro [`JSON_DIAGNOSTICS`](../api/macros/json_diagnostics.md). This option is `OFF` by default. +### `JSON_Diagnostic_Positions` +Enable position diagnostics by defining macro [`JSON_DIAGNOSTIC_POSITIONS`](../api/macros/json_diagnostic_positions.md). This option is off by default. + ### `JSON_DisableEnumSerialization` Disable default `enum` serialization by defining the macro diff --git a/include/nlohmann/detail/abi_macros.hpp b/include/nlohmann/detail/abi_macros.hpp index ea7ce6421..baa2762da 100644 --- a/include/nlohmann/detail/abi_macros.hpp +++ b/include/nlohmann/detail/abi_macros.hpp @@ -26,6 +26,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -36,6 +40,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -47,14 +57,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f8c519064..f498b0de9 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -65,7 +65,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index f0ef1f44c..1fb132d37 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -462,6 +462,9 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval())); + #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index b549ec2fa..f081e4aa4 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -10,13 +10,14 @@ #include #include // string +#include // enable_if_t #include // move #include // vector #include +#include #include #include - NLOHMANN_JSON_NAMESPACE_BEGIN /*! @@ -157,7 +158,7 @@ constructor contains the parsed value. @tparam BasicJsonType the JSON type */ -template +template class json_sax_dom_parser { public: @@ -166,14 +167,15 @@ class json_sax_dom_parser using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; /*! @param[in,out] r reference to a JSON value that is manipulated while parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) {} // make class move-only @@ -229,6 +231,17 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); @@ -252,6 +265,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -261,6 +282,15 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); @@ -274,6 +304,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_array()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -298,6 +336,75 @@ class json_sax_dom_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -311,6 +418,11 @@ class json_sax_dom_parser if (ref_stack.empty()) { root = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif + return &root; } @@ -319,12 +431,22 @@ class json_sax_dom_parser if (ref_stack.back()->is_array()) { ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif + return &(ref_stack.back()->m_data.m_value.array->back()); } JSON_ASSERT(ref_stack.back()->is_object()); JSON_ASSERT(object_element); *object_element = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif + return object_element; } @@ -338,9 +460,11 @@ class json_sax_dom_parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; -template +template class json_sax_dom_callback_parser { public: @@ -351,11 +475,13 @@ class json_sax_dom_callback_parser using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; json_sax_dom_callback_parser(BasicJsonType& r, parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_) + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) { keep_stack.push_back(true); } @@ -418,12 +544,26 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + } return true; } @@ -452,9 +592,23 @@ class json_sax_dom_callback_parser { // discard object *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } else { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } } @@ -488,10 +642,25 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } } return true; @@ -506,12 +675,26 @@ class json_sax_dom_callback_parser keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); if (keep) { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } else { // discard array *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } } @@ -548,6 +731,71 @@ class json_sax_dom_callback_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @param[in] v value to add to the JSON value we build during parsing @param[in] skip_callback whether we should skip calling the callback @@ -578,6 +826,10 @@ class json_sax_dom_callback_parser // create value auto value = BasicJsonType(std::forward(v)); +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif + // check callback const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); @@ -645,6 +897,8 @@ class json_sax_dom_callback_parser const bool allow_exceptions = true; /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; template diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index e46b0ef2a..997a26ff3 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -94,7 +94,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -122,7 +122,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index dda9b623f..7234177a0 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -115,9 +115,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -848,6 +848,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) +#if JSON_DIAGNOSTIC_POSITIONS + : start_position(val.start_position), + end_position(val.end_position) +#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -894,6 +898,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); + set_parents(); assert_invariant(); } @@ -1145,6 +1150,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) + , end_position(other.end_position) +#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -1215,6 +1224,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check + , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check +#endif { // check that passed value is valid other.assert_invariant(false); // cppcheck-suppress[accessForwarded] @@ -1223,6 +1236,11 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec other.m_data.m_type = value_t::null; other.m_data.m_value = {}; +#if JSON_DIAGNOSTIC_POSITIONS + other.start_position = std::string::npos; + other.end_position = std::string::npos; +#endif + set_parents(); assert_invariant(); } @@ -1243,6 +1261,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + +#if JSON_DIAGNOSTIC_POSITIONS + swap(start_position, other.start_position); + swap(end_position, other.end_position); +#endif + json_base_class_t::operator=(std::move(other)); set_parents(); @@ -4226,6 +4250,23 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif +#if JSON_DIAGNOSTIC_POSITIONS + /// the start position of the value + std::size_t start_position = std::string::npos; + /// the end position of the value + std::size_t end_position = std::string::npos; + public: + constexpr std::size_t start_pos() const noexcept + { + return start_position; + } + + constexpr std::size_t end_pos() const noexcept + { + return end_position; + } +#endif + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -4367,8 +4408,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4383,8 +4424,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4408,8 +4449,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4424,8 +4465,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4439,8 +4480,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4462,8 +4503,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4478,8 +4519,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4493,8 +4534,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4516,8 +4557,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4532,8 +4573,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4547,8 +4588,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4562,8 +4603,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4577,8 +4618,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4600,8 +4641,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 8dd4444cb..2192b4f55 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -73,6 +73,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -83,6 +87,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -94,14 +104,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ @@ -6775,6 +6786,9 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval())); + #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) @@ -6871,725 +6885,12 @@ NLOHMANN_JSON_NAMESPACE_END #include #include // string +#include // enable_if_t #include // move #include // vector // #include -// #include - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN - -/*! -@brief SAX interface - -This class describes the SAX interface used by @ref nlohmann::json::sax_parse. -Each function is called in different situations while the input is parsed. The -boolean return value informs the parser whether to continue processing the -input. -*/ -template -struct json_sax -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - /*! - @brief a null value was read - @return whether parsing should proceed - */ - virtual bool null() = 0; - - /*! - @brief a boolean value was read - @param[in] val boolean value - @return whether parsing should proceed - */ - virtual bool boolean(bool val) = 0; - - /*! - @brief an integer number was read - @param[in] val integer value - @return whether parsing should proceed - */ - virtual bool number_integer(number_integer_t val) = 0; - - /*! - @brief an unsigned integer number was read - @param[in] val unsigned integer value - @return whether parsing should proceed - */ - virtual bool number_unsigned(number_unsigned_t val) = 0; - - /*! - @brief a floating-point number was read - @param[in] val floating-point value - @param[in] s raw token value - @return whether parsing should proceed - */ - virtual bool number_float(number_float_t val, const string_t& s) = 0; - - /*! - @brief a string value was read - @param[in] val string value - @return whether parsing should proceed - @note It is safe to move the passed string value. - */ - virtual bool string(string_t& val) = 0; - - /*! - @brief a binary value was read - @param[in] val binary value - @return whether parsing should proceed - @note It is safe to move the passed binary value. - */ - virtual bool binary(binary_t& val) = 0; - - /*! - @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_object(std::size_t elements) = 0; - - /*! - @brief an object key was read - @param[in] val object key - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool key(string_t& val) = 0; - - /*! - @brief the end of an object was read - @return whether parsing should proceed - */ - virtual bool end_object() = 0; - - /*! - @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_array(std::size_t elements) = 0; - - /*! - @brief the end of an array was read - @return whether parsing should proceed - */ - virtual bool end_array() = 0; - - /*! - @brief a parse error occurred - @param[in] position the position in the input where the error occurs - @param[in] last_token the last read token - @param[in] ex an exception object describing the error - @return whether parsing should proceed (must return false) - */ - virtual bool parse_error(std::size_t position, - const std::string& last_token, - const detail::exception& ex) = 0; - - json_sax() = default; - json_sax(const json_sax&) = default; - json_sax(json_sax&&) noexcept = default; - json_sax& operator=(const json_sax&) = default; - json_sax& operator=(json_sax&&) noexcept = default; - virtual ~json_sax() = default; -}; - -namespace detail -{ -/*! -@brief SAX implementation to create a JSON value from SAX events - -This class implements the @ref json_sax interface and processes the SAX events -to create a JSON value which makes it basically a DOM parser. The structure or -hierarchy of the JSON value is managed by the stack `ref_stack` which contains -a pointer to the respective array or object for each recursion depth. - -After successful parsing, the value that is passed by reference to the -constructor contains the parsed value. - -@tparam BasicJsonType the JSON type -*/ -template -class json_sax_dom_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - /*! - @param[in,out] r reference to a JSON value that is manipulated while - parsing - @param[in] allow_exceptions_ whether parse errors yield exceptions - */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) - {} - - // make class move-only - json_sax_dom_parser(const json_sax_dom_parser&) = delete; - json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; - json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_parser() = default; - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } - - bool start_object(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool key(string_t& val) - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); - - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); - return true; - } - - bool end_object() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); - - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } - - bool start_array(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); - - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool end_array() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_array()); - - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } - - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - template - JSON_HEDLEY_RETURNS_NON_NULL - BasicJsonType* handle_value(Value&& v) - { - if (ref_stack.empty()) - { - root = BasicJsonType(std::forward(v)); - return &root; - } - - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); - return &(ref_stack.back()->m_data.m_value.array->back()); - } - - JSON_ASSERT(ref_stack.back()->is_object()); - JSON_ASSERT(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; -}; - -template -class json_sax_dom_callback_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - using parser_callback_t = typename BasicJsonType::parser_callback_t; - using parse_event_t = typename BasicJsonType::parse_event_t; - - json_sax_dom_callback_parser(BasicJsonType& r, - parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_) - { - keep_stack.push_back(true); - } - - // make class move-only - json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_callback_parser() = default; - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } - - bool start_object(std::size_t len) - { - // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::object, true); - ref_stack.push_back(val.second); - - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool key(string_t& val) - { - BasicJsonType k = BasicJsonType(val); - - // check callback for key - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); - key_keep_stack.push_back(keep); - - // add discarded value at given key and store the reference for later - if (keep && ref_stack.back()) - { - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); - } - - return true; - } - - bool end_object() - { - if (ref_stack.back()) - { - if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) - { - // discard object - *ref_stack.back() = discarded; - } - else - { - ref_stack.back()->set_parents(); - } - } - - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) - { - // remove discarded value - for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) - { - if (it->is_discarded()) - { - ref_stack.back()->erase(it); - break; - } - } - } - - return true; - } - - bool start_array(std::size_t len) - { - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::array, true); - ref_stack.push_back(val.second); - - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool end_array() - { - bool keep = true; - - if (ref_stack.back()) - { - keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); - if (keep) - { - ref_stack.back()->set_parents(); - } - else - { - // discard array - *ref_stack.back() = discarded; - } - } - - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - // remove discarded value - if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->pop_back(); - } - - return true; - } - - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - /*! - @param[in] v value to add to the JSON value we build during parsing - @param[in] skip_callback whether we should skip calling the callback - function; this is required after start_array() and - start_object() SAX events, because otherwise we would call the - callback function with an empty array or object, respectively. - - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - - @return pair of boolean (whether value should be kept) and pointer (to the - passed value in the ref_stack hierarchy; nullptr if not kept) - */ - template - std::pair handle_value(Value&& v, const bool skip_callback = false) - { - JSON_ASSERT(!keep_stack.empty()); - - // do not handle this value if we know it would be added to a discarded - // container - if (!keep_stack.back()) - { - return {false, nullptr}; - } - - // create value - auto value = BasicJsonType(std::forward(v)); - - // check callback - const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); - - // do not handle this value if we just learnt it shall be discarded - if (!keep) - { - return {false, nullptr}; - } - - if (ref_stack.empty()) - { - root = std::move(value); - return {true, & root}; - } - - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (!ref_stack.back()) - { - return {false, nullptr}; - } - - // we now only expect arrays and objects - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - - // array - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); - return {true, & (ref_stack.back()->m_data.m_value.array->back())}; - } - - // object - JSON_ASSERT(ref_stack.back()->is_object()); - // check if we should store an element for the current key - JSON_ASSERT(!key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); - - if (!store_element) - { - return {false, nullptr}; - } - - JSON_ASSERT(object_element); - *object_element = std::move(value); - return {true, object_element}; - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// stack to manage which values to keep - std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) - /// stack to manage which object keys to keep - std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// callback function - const parser_callback_t callback = nullptr; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// a discarded value for the callback - BasicJsonType discarded = BasicJsonType::value_t::discarded; -}; - -template -class json_sax_acceptor -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - bool null() - { - return true; - } - - bool boolean(bool /*unused*/) - { - return true; - } - - bool number_integer(number_integer_t /*unused*/) - { - return true; - } - - bool number_unsigned(number_unsigned_t /*unused*/) - { - return true; - } - - bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) - { - return true; - } - - bool string(string_t& /*unused*/) - { - return true; - } - - bool binary(binary_t& /*unused*/) - { - return true; - } - - bool start_object(std::size_t /*unused*/ = static_cast(-1)) - { - return true; - } - - bool key(string_t& /*unused*/) - { - return true; - } - - bool end_object() - { - return true; - } - - bool start_array(std::size_t /*unused*/ = static_cast(-1)) - { - return true; - } - - bool end_array() - { - return true; - } - - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) - { - return false; - } -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ @@ -9241,6 +8542,974 @@ NLOHMANN_JSON_NAMESPACE_END // #include +// #include + +NLOHMANN_JSON_NAMESPACE_BEGIN + +/*! +@brief SAX interface + +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template +struct json_sax +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief a floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; + + /*! + @brief a string value was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string value. + */ + virtual bool string(string_t& val) = 0; + + /*! + @brief a binary value was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary value. + */ + virtual bool binary(binary_t& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; + + json_sax() = default; + json_sax(const json_sax&) = default; + json_sax(json_sax&&) noexcept = default; + json_sax& operator=(const json_sax&) = default; + json_sax& operator=(json_sax&&) noexcept = default; + virtual ~json_sax() = default; +}; + +namespace detail +{ +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ +template +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; + + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + {} + + // make class move-only + json_sax_dom_parser(const json_sax_dom_parser&) = delete; + json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; + json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool key(string_t& val) + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); + return true; + } + + bool end_object() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool end_array() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_array()); + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif + + return &root; + } + + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif + + return &(ref_stack.back()->m_data.m_value.array->back()); + } + + JSON_ASSERT(ref_stack.back()->is_object()); + JSON_ASSERT(object_element); + *object_element = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif + + return object_element; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; + +template +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; + + json_sax_dom_callback_parser(BasicJsonType& r, + parser_callback_t cb, + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + { + keep_stack.push_back(true); + } + + // make class move-only + json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_callback_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) + { + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + } + return true; + } + + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); + + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep && ref_stack.back()) + { + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); + } + + return true; + } + + bool end_object() + { + if (ref_stack.back()) + { + if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) + { + // discard object + *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif + } + else + { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) + { + // remove discarded value + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + + return true; + } + + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) + { + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + } + + return true; + } + + bool end_array() + { + bool keep = true; + + if (ref_stack.back()) + { + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (keep) + { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + } + else + { + // discard array + *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + // remove discarded value + if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->pop_back(); + } + + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + + /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) + */ + template + std::pair handle_value(Value&& v, const bool skip_callback = false) + { + JSON_ASSERT(!keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (!keep_stack.back()) + { + return {false, nullptr}; + } + + // create value + auto value = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif + + // check callback + const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (!keep) + { + return {false, nullptr}; + } + + if (ref_stack.empty()) + { + root = std::move(value); + return {true, & root}; + } + + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (!ref_stack.back()) + { + return {false, nullptr}; + } + + // we now only expect arrays and objects + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + // array + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); + return {true, & (ref_stack.back()->m_data.m_value.array->back())}; + } + + // object + JSON_ASSERT(ref_stack.back()->is_object()); + // check if we should store an element for the current key + JSON_ASSERT(!key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (!store_element) + { + return {false, nullptr}; + } + + JSON_ASSERT(object_element); + *object_element = std::move(value); + return {true, object_element}; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// stack to manage which values to keep + std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) + /// stack to manage which object keys to keep + std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; + +template +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + bool null() + { + return true; + } + + bool boolean(bool /*unused*/) + { + return true; + } + + bool number_integer(number_integer_t /*unused*/) + { + return true; + } + + bool number_unsigned(number_unsigned_t /*unused*/) + { + return true; + } + + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) + { + return true; + } + + bool string(string_t& /*unused*/) + { + return true; + } + + bool binary(binary_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = static_cast(-1)) + { + return true; + } + + bool key(string_t& /*unused*/) + { + return true; + } + + bool end_object() + { + return true; + } + + bool start_array(std::size_t /*unused*/ = static_cast(-1)) + { + return true; + } + + bool end_array() + { + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ @@ -9443,7 +9712,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -12530,7 +12799,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -12558,7 +12827,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -19701,9 +19970,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -20434,6 +20703,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) +#if JSON_DIAGNOSTIC_POSITIONS + : start_position(val.start_position), + end_position(val.end_position) +#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -20480,6 +20753,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); + set_parents(); assert_invariant(); } @@ -20731,6 +21005,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) + , end_position(other.end_position) +#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -20801,6 +21079,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check + , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check +#endif { // check that passed value is valid other.assert_invariant(false); // cppcheck-suppress[accessForwarded] @@ -20809,6 +21091,11 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec other.m_data.m_type = value_t::null; other.m_data.m_value = {}; +#if JSON_DIAGNOSTIC_POSITIONS + other.start_position = std::string::npos; + other.end_position = std::string::npos; +#endif + set_parents(); assert_invariant(); } @@ -20829,6 +21116,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + +#if JSON_DIAGNOSTIC_POSITIONS + swap(start_position, other.start_position); + swap(end_position, other.end_position); +#endif + json_base_class_t::operator=(std::move(other)); set_parents(); @@ -23812,6 +24105,23 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif +#if JSON_DIAGNOSTIC_POSITIONS + /// the start position of the value + std::size_t start_position = std::string::npos; + /// the end position of the value + std::size_t end_position = std::string::npos; + public: + constexpr std::size_t start_pos() const noexcept + { + return start_position; + } + + constexpr std::size_t end_pos() const noexcept + { + return end_position; + } +#endif + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -23953,8 +24263,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -23969,8 +24279,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -23994,8 +24304,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24010,8 +24320,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24025,8 +24335,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24048,8 +24358,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24064,8 +24374,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24079,8 +24389,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24102,8 +24412,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24118,8 +24428,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24133,8 +24443,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24148,8 +24458,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24163,8 +24473,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24186,8 +24496,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); diff --git a/single_include/nlohmann/json_fwd.hpp b/single_include/nlohmann/json_fwd.hpp index f96b6af96..1df3928d7 100644 --- a/single_include/nlohmann/json_fwd.hpp +++ b/single_include/nlohmann/json_fwd.hpp @@ -44,6 +44,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -54,6 +58,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -65,14 +75,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ diff --git a/tests/src/unit-cbor.cpp b/tests/src/unit-cbor.cpp index cfee5f5dc..6c0c50ae5 100644 --- a/tests/src/unit-cbor.cpp +++ b/tests/src/unit-cbor.cpp @@ -1631,7 +1631,7 @@ TEST_CASE("CBOR") }; json j; - auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); + auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); CHECK(json::sax_parse(input, &cbp, json::input_format_t::cbor)); CHECK(j.at("foo").is_binary()); CHECK(binary_seen); diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index 017a789b4..36de35e55 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -219,7 +219,7 @@ json parser_helper(const std::string& s) CHECK(j_nothrow == j); json j_sax; - nlohmann::detail::json_sax_dom_parser sdp(j_sax); + nlohmann::detail::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); CHECK(j_sax == j); diff --git a/tests/src/unit-class_parser_diagnostic_positions.cpp b/tests/src/unit-class_parser_diagnostic_positions.cpp new file mode 100644 index 000000000..b4ef434d6 --- /dev/null +++ b/tests/src/unit-class_parser_diagnostic_positions.cpp @@ -0,0 +1,1957 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ (supporting code) +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +#include "doctest_compatibility.h" +#define JSON_TESTS_PRIVATE +#ifdef JSON_DIAGNOSTIC_POSITIONS + #undef JSON_DIAGNOSTIC_POSITIONS +#endif + +#define JSON_DIAGNOSTIC_POSITIONS 1 +#include +using nlohmann::json; + +#ifdef JSON_TEST_NO_GLOBAL_UDLS + using namespace nlohmann::literals; // NOLINT(google-build-using-namespace) +#endif + +#include + +namespace +{ +class SaxEventLogger +{ + public: + bool null() + { + events.emplace_back("null()"); + return true; + } + + bool boolean(bool val) + { + events.emplace_back(val ? "boolean(true)" : "boolean(false)"); + return true; + } + + bool number_integer(json::number_integer_t val) + { + events.push_back("number_integer(" + std::to_string(val) + ")"); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) + { + events.push_back("number_unsigned(" + std::to_string(val) + ")"); + return true; + } + + bool number_float(json::number_float_t /*unused*/, const std::string& s) + { + events.push_back("number_float(" + s + ")"); + return true; + } + + bool string(std::string& val) + { + events.push_back("string(" + val + ")"); + return true; + } + + bool binary(json::binary_t& val) + { + std::string binary_contents = "binary("; + std::string comma_space; + for (auto b : val) + { + binary_contents.append(comma_space); + binary_contents.append(std::to_string(static_cast(b))); + comma_space = ", "; + } + binary_contents.append(")"); + events.push_back(binary_contents); + return true; + } + + bool start_object(std::size_t elements) + { + if (elements == static_cast(-1)) + { + events.emplace_back("start_object()"); + } + else + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + } + return true; + } + + bool key(std::string& val) + { + events.push_back("key(" + val + ")"); + return true; + } + + bool end_object() + { + events.emplace_back("end_object()"); + return true; + } + + bool start_array(std::size_t elements) + { + if (elements == static_cast(-1)) + { + events.emplace_back("start_array()"); + } + else + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + } + return true; + } + + bool end_array() + { + events.emplace_back("end_array()"); + return true; + } + + bool parse_error(std::size_t position, const std::string& /*unused*/, const json::exception& /*unused*/) + { + errored = true; + events.push_back("parse_error(" + std::to_string(position) + ")"); + return false; + } + + std::vector events {}; // NOLINT(readability-redundant-member-init) + bool errored = false; +}; + +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool /*val*/) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t /*val*/) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t /*val*/) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t /*val*/, const std::string& /*s*/) override + { + return events_left-- > 0; + } + + bool string(std::string& /*val*/) override + { + return events_left-- > 0; + } + + bool binary(json::binary_t& /*val*/) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t /*elements*/) override + { + return events_left-- > 0; + } + + bool key(std::string& /*val*/) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t /*elements*/) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& /*ex*/) override + { + return false; + } + + private: + int events_left = 0; +}; + +json parser_helper(const std::string& s); +bool accept_helper(const std::string& s); +void comments_helper(const std::string& s); + +json parser_helper(const std::string& s) +{ + json j; + json::parser(nlohmann::detail::input_adapter(s)).parse(true, j); + + // if this line was reached, no exception occurred + // -> check if result is the same without exceptions + json j_nothrow; + CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); + CHECK(j_nothrow == j); + + json j_sax; + nlohmann::detail::json_sax_dom_parser sdp(j_sax); + json::sax_parse(s, &sdp); + CHECK(j_sax == j); + + comments_helper(s); + + return j; +} + +bool accept_helper(const std::string& s) +{ + CAPTURE(s) + + // 1. parse s without exceptions + json j; + CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j)); + const bool ok_noexcept = !j.is_discarded(); + + // 2. accept s + const bool ok_accept = json::parser(nlohmann::detail::input_adapter(s)).accept(true); + + // 3. check if both approaches come to the same result + CHECK(ok_noexcept == ok_accept); + + // 4. parse with SAX (compare with relaxed accept result) + SaxEventLogger el; + CHECK_NOTHROW(json::sax_parse(s, &el, json::input_format_t::json, false)); + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == !el.errored); + + // 5. parse with simple callback + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + json const j_cb = json::parse(s, cb, false); + const bool ok_noexcept_cb = !j_cb.is_discarded(); + + // 6. check if this approach came to the same result + CHECK(ok_noexcept == ok_noexcept_cb); + + // 7. check if comments are properly ignored + if (ok_accept) + { + comments_helper(s); + } + + // 8. return result + return ok_accept; +} + +void comments_helper(const std::string& s) +{ + json _; + + // parse/accept with default parser + CHECK_NOTHROW(_ = json::parse(s)); + CHECK(json::accept(s)); + + // parse/accept while skipping comments + CHECK_NOTHROW(_ = json::parse(s, nullptr, false, true)); + CHECK(json::accept(s, true)); + + std::vector json_with_comments; + + // start with a comment + json_with_comments.push_back(std::string("// this is a comment\n") + s); + json_with_comments.push_back(std::string("/* this is a comment */") + s); + // end with a comment + json_with_comments.push_back(s + "// this is a comment"); + json_with_comments.push_back(s + "/* this is a comment */"); + + // check all strings + for (const auto& json_with_comment : json_with_comments) + { + CAPTURE(json_with_comment) + CHECK_THROWS_AS(_ = json::parse(json_with_comment), json::parse_error); + CHECK(!json::accept(json_with_comment)); + + CHECK_NOTHROW(_ = json::parse(json_with_comment, nullptr, true, true)); + CHECK(json::accept(json_with_comment, true)); + } +} + +/** + * Validates that the generated JSON object is the same as expected + * Validates that the start position and end position match the start and end of the string + * + * This check assumes that there is no whitespace around the json object in the original string. + */ +void validate_generated_json_and_start_end_pos_helper(const std::string& original_string, const json& j, const json& check) +{ + CHECK(j == check); + CHECK(j.start_pos() == 0); + CHECK(j.end_pos() == original_string.size()); +} + +/** + * Parses the root object from the given root string and validates that the start and end positions for the nested object are correct. + * + * This checks that whitespace around the nested object is included in the start and end positions of the root object. + */ +void validate_start_end_pos_for_nested_obj_helper(const std::string& nested_type_json_str, const std::string& root_type_json_str, const json& expected_json, const json::parser_callback_t& cb = nullptr) +{ + json j; + + // 1. If callback is provided, use callback version of parse() + if (cb) + { + j = json::parse(root_type_json_str, cb); + } + else + { + j = json::parse(root_type_json_str); + } + + // 2. Check if the generated JSON is as expected + // Assumptions: The root_type_json_str does not have any whitespace around the json object + validate_generated_json_and_start_end_pos_helper(root_type_json_str, j, expected_json); + + // 3. Get the nested object + const auto& nested = j["nested"]; + // 4. Check if the start and end positions are generated correctly for nested objects and arrays + CHECK(nested_type_json_str == root_type_json_str.substr(nested.start_pos(), nested.end_pos() - nested.start_pos())); +} + +} // namespace + +TEST_CASE("parser class") +{ + SECTION("parse") + { + SECTION("null") + { + CHECK(parser_helper("null") == json(nullptr)); + } + + SECTION("true") + { + CHECK(parser_helper("true") == json(true)); + } + + SECTION("false") + { + CHECK(parser_helper("false") == json(false)); + } + + SECTION("array") + { + SECTION("empty array") + { + CHECK(parser_helper("[]") == json(json::value_t::array)); + CHECK(parser_helper("[ ]") == json(json::value_t::array)); + } + + SECTION("nonempty array") + { + CHECK(parser_helper("[true, false, null]") == json({true, false, nullptr})); + } + } + + SECTION("object") + { + SECTION("empty object") + { + CHECK(parser_helper("{}") == json(json::value_t::object)); + CHECK(parser_helper("{ }") == json(json::value_t::object)); + } + + SECTION("nonempty object") + { + CHECK(parser_helper("{\"\": true, \"one\": 1, \"two\": null}") == json({{"", true}, {"one", 1}, {"two", nullptr}})); + } + } + + SECTION("string") + { + // empty string + CHECK(parser_helper("\"\"") == json(json::value_t::string)); + + SECTION("errors") + { + // error: tab in string + CHECK_THROWS_WITH_AS(parser_helper("\"\t\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t; last read: '\"'", json::parse_error&); + // error: newline in string + CHECK_THROWS_WITH_AS(parser_helper("\"\n\""), "[json.exception.parse_error.101] parse error at line 2, column 0: syntax error while parsing value - invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\r\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r; last read: '\"'", json::parse_error&); + // error: backspace in string + CHECK_THROWS_WITH_AS(parser_helper("\"\b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b; last read: '\"'", json::parse_error&); + // improve code coverage + CHECK_THROWS_AS(parser_helper("\uFF01"), json::parse_error&); + CHECK_THROWS_AS(parser_helper("[-4:1,]"), json::parse_error&); + // unescaped control characters + CHECK_THROWS_WITH_AS(parser_helper("\"\x00\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: missing closing quote; last read: '\"'", json::parse_error&); // NOLINT(bugprone-string-literal-with-embedded-nul) + CHECK_THROWS_WITH_AS(parser_helper("\"\x01\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0001 (SOH) must be escaped to \\u0001; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x02\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0002 (STX) must be escaped to \\u0002; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x03\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0003 (ETX) must be escaped to \\u0003; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x04\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0004 (EOT) must be escaped to \\u0004; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x05\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0005 (ENQ) must be escaped to \\u0005; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x06\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0006 (ACK) must be escaped to \\u0006; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x07\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0007 (BEL) must be escaped to \\u0007; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x08\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x09\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0a\""), "[json.exception.parse_error.101] parse error at line 2, column 0: syntax error while parsing value - invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000B (VT) must be escaped to \\u000B; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0c\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0d\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0e\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000E (SO) must be escaped to \\u000E; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0f\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000F (SI) must be escaped to \\u000F; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x10\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0010 (DLE) must be escaped to \\u0010; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x11\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0011 (DC1) must be escaped to \\u0011; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x12\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0012 (DC2) must be escaped to \\u0012; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x13\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0013 (DC3) must be escaped to \\u0013; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x14\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0014 (DC4) must be escaped to \\u0014; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x15\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0015 (NAK) must be escaped to \\u0015; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x16\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0016 (SYN) must be escaped to \\u0016; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x17\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0017 (ETB) must be escaped to \\u0017; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x18\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0018 (CAN) must be escaped to \\u0018; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x19\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0019 (EM) must be escaped to \\u0019; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1a\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001A (SUB) must be escaped to \\u001A; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001B (ESC) must be escaped to \\u001B; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1c\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001C (FS) must be escaped to \\u001C; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1d\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001D (GS) must be escaped to \\u001D; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1e\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001E (RS) must be escaped to \\u001E; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1f\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001F (US) must be escaped to \\u001F; last read: '\"'", json::parse_error&); + + SECTION("additional test for null byte") + { + // The test above for the null byte is wrong, because passing + // a string to the parser only reads int until it encounters + // a null byte. This test inserts the null byte later on and + // uses an iterator range. + std::string s = "\"1\""; + s[1] = '\0'; + json _; + CHECK_THROWS_WITH_AS(_ = json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0000 (NUL) must be escaped to \\u0000; last read: '\"'", json::parse_error&); + } + } + + SECTION("escaped") + { + // quotation mark "\"" + auto r1 = R"("\"")"_json; + CHECK(parser_helper("\"\\\"\"") == r1); + // reverse solidus "\\" + auto r2 = R"("\\")"_json; + CHECK(parser_helper("\"\\\\\"") == r2); + // solidus + CHECK(parser_helper("\"\\/\"") == R"("/")"_json); + // backspace + CHECK(parser_helper("\"\\b\"") == json("\b")); + // formfeed + CHECK(parser_helper("\"\\f\"") == json("\f")); + // newline + CHECK(parser_helper("\"\\n\"") == json("\n")); + // carriage return + CHECK(parser_helper("\"\\r\"") == json("\r")); + // horizontal tab + CHECK(parser_helper("\"\\t\"") == json("\t")); + + CHECK(parser_helper("\"\\u0001\"").get() == "\x01"); + CHECK(parser_helper("\"\\u000a\"").get() == "\n"); + CHECK(parser_helper("\"\\u00b0\"").get() == "°"); + CHECK(parser_helper("\"\\u0c00\"").get() == "ఀ"); + CHECK(parser_helper("\"\\ud000\"").get() == "퀀"); + CHECK(parser_helper("\"\\u000E\"").get() == "\x0E"); + CHECK(parser_helper("\"\\u00F0\"").get() == "ð"); + CHECK(parser_helper("\"\\u0100\"").get() == "Ā"); + CHECK(parser_helper("\"\\u2000\"").get() == " "); + CHECK(parser_helper("\"\\uFFFF\"").get() == "￿"); + CHECK(parser_helper("\"\\u20AC\"").get() == "€"); + CHECK(parser_helper("\"€\"").get() == "€"); + CHECK(parser_helper("\"🎈\"").get() == "🎈"); + + CHECK(parser_helper("\"\\ud80c\\udc60\"").get() == "\xf0\x93\x81\xa0"); + CHECK(parser_helper("\"\\ud83c\\udf1e\"").get() == "🌞"); + } + } + + SECTION("number") + { + SECTION("integers") + { + SECTION("without exponent") + { + CHECK(parser_helper("-128") == json(-128)); + CHECK(parser_helper("-0") == json(-0)); + CHECK(parser_helper("0") == json(0)); + CHECK(parser_helper("128") == json(128)); + } + + SECTION("with exponent") + { + CHECK(parser_helper("0e1") == json(0e1)); + CHECK(parser_helper("0E1") == json(0e1)); + + CHECK(parser_helper("10000E-4") == json(10000e-4)); + CHECK(parser_helper("10000E-3") == json(10000e-3)); + CHECK(parser_helper("10000E-2") == json(10000e-2)); + CHECK(parser_helper("10000E-1") == json(10000e-1)); + CHECK(parser_helper("10000E0") == json(10000e0)); + CHECK(parser_helper("10000E1") == json(10000e1)); + CHECK(parser_helper("10000E2") == json(10000e2)); + CHECK(parser_helper("10000E3") == json(10000e3)); + CHECK(parser_helper("10000E4") == json(10000e4)); + + CHECK(parser_helper("10000e-4") == json(10000e-4)); + CHECK(parser_helper("10000e-3") == json(10000e-3)); + CHECK(parser_helper("10000e-2") == json(10000e-2)); + CHECK(parser_helper("10000e-1") == json(10000e-1)); + CHECK(parser_helper("10000e0") == json(10000e0)); + CHECK(parser_helper("10000e1") == json(10000e1)); + CHECK(parser_helper("10000e2") == json(10000e2)); + CHECK(parser_helper("10000e3") == json(10000e3)); + CHECK(parser_helper("10000e4") == json(10000e4)); + + CHECK(parser_helper("-0e1") == json(-0e1)); + CHECK(parser_helper("-0E1") == json(-0e1)); + CHECK(parser_helper("-0E123") == json(-0e123)); + + // numbers after exponent + CHECK(parser_helper("10E0") == json(10e0)); + CHECK(parser_helper("10E1") == json(10e1)); + CHECK(parser_helper("10E2") == json(10e2)); + CHECK(parser_helper("10E3") == json(10e3)); + CHECK(parser_helper("10E4") == json(10e4)); + CHECK(parser_helper("10E5") == json(10e5)); + CHECK(parser_helper("10E6") == json(10e6)); + CHECK(parser_helper("10E7") == json(10e7)); + CHECK(parser_helper("10E8") == json(10e8)); + CHECK(parser_helper("10E9") == json(10e9)); + CHECK(parser_helper("10E+0") == json(10e0)); + CHECK(parser_helper("10E+1") == json(10e1)); + CHECK(parser_helper("10E+2") == json(10e2)); + CHECK(parser_helper("10E+3") == json(10e3)); + CHECK(parser_helper("10E+4") == json(10e4)); + CHECK(parser_helper("10E+5") == json(10e5)); + CHECK(parser_helper("10E+6") == json(10e6)); + CHECK(parser_helper("10E+7") == json(10e7)); + CHECK(parser_helper("10E+8") == json(10e8)); + CHECK(parser_helper("10E+9") == json(10e9)); + CHECK(parser_helper("10E-1") == json(10e-1)); + CHECK(parser_helper("10E-2") == json(10e-2)); + CHECK(parser_helper("10E-3") == json(10e-3)); + CHECK(parser_helper("10E-4") == json(10e-4)); + CHECK(parser_helper("10E-5") == json(10e-5)); + CHECK(parser_helper("10E-6") == json(10e-6)); + CHECK(parser_helper("10E-7") == json(10e-7)); + CHECK(parser_helper("10E-8") == json(10e-8)); + CHECK(parser_helper("10E-9") == json(10e-9)); + } + + SECTION("edge cases") + { + // From RFC8259, Section 6: + // Note that when such software is used, numbers that are + // integers and are in the range [-(2**53)+1, (2**53)-1] + // are interoperable in the sense that implementations will + // agree exactly on their numeric values. + + // -(2**53)+1 + CHECK(parser_helper("-9007199254740991").get() == -9007199254740991); + // (2**53)-1 + CHECK(parser_helper("9007199254740991").get() == 9007199254740991); + } + + SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64-bit integers) + { + // While RFC8259, Section 6 specifies a preference for support + // for ranges in range of IEEE 754-2008 binary64 (double precision) + // this does not accommodate 64-bit integers without loss of accuracy. + // As 64-bit integers are now widely used in software, it is desirable + // to expand support to the full 64 bit (signed and unsigned) range + // i.e. -(2**63) -> (2**64)-1. + + // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) + CHECK(parser_helper("-9223372036854775808").get() == -9223372036854775807 - 1); + // (2**63)-1 + CHECK(parser_helper("9223372036854775807").get() == 9223372036854775807); + // (2**64)-1 + CHECK(parser_helper("18446744073709551615").get() == 18446744073709551615u); + } + } + + SECTION("floating-point") + { + SECTION("without exponent") + { + CHECK(parser_helper("-128.5") == json(-128.5)); + CHECK(parser_helper("0.999") == json(0.999)); + CHECK(parser_helper("128.5") == json(128.5)); + CHECK(parser_helper("-0.0") == json(-0.0)); + } + + SECTION("with exponent") + { + CHECK(parser_helper("-128.5E3") == json(-128.5E3)); + CHECK(parser_helper("-128.5E-3") == json(-128.5E-3)); + CHECK(parser_helper("-0.0e1") == json(-0.0e1)); + CHECK(parser_helper("-0.0E1") == json(-0.0e1)); + } + } + + SECTION("overflow") + { + // overflows during parsing yield an exception + CHECK_THROWS_WITH_AS(parser_helper("1.18973e+4932").empty(), "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'", json::out_of_range&); + } + + SECTION("invalid numbers") + { + // numbers must not begin with "+" + CHECK_THROWS_AS(parser_helper("+1"), json::parse_error&); + CHECK_THROWS_AS(parser_helper("+0"), json::parse_error&); + + CHECK_THROWS_WITH_AS(parser_helper("01"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - unexpected number literal; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-01"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - unexpected number literal; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("--1"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '--'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '1.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E-"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '1E-'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1.E1"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '1.E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-1E"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '-1E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E#"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '-0E#'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E-#"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '-0E-#'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0#"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: '-0#'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0.0:"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - unexpected ':'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0.0Z"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: '-0.0Z'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E123:"), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - unexpected ':'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0e0-:"), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-:'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0e-:"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '-0e-:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0f"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: '-0f'; expected end of input", json::parse_error&); + } + } + } + + SECTION("accept") + { + SECTION("null") + { + CHECK(accept_helper("null")); + } + + SECTION("true") + { + CHECK(accept_helper("true")); + } + + SECTION("false") + { + CHECK(accept_helper("false")); + } + + SECTION("array") + { + SECTION("empty array") + { + CHECK(accept_helper("[]")); + CHECK(accept_helper("[ ]")); + } + + SECTION("nonempty array") + { + CHECK(accept_helper("[true, false, null]")); + } + } + + SECTION("object") + { + SECTION("empty object") + { + CHECK(accept_helper("{}")); + CHECK(accept_helper("{ }")); + } + + SECTION("nonempty object") + { + CHECK(accept_helper("{\"\": true, \"one\": 1, \"two\": null}")); + } + } + + SECTION("string") + { + // empty string + CHECK(accept_helper("\"\"")); + + SECTION("errors") + { + // error: tab in string + CHECK(accept_helper("\"\t\"") == false); + // error: newline in string + CHECK(accept_helper("\"\n\"") == false); + CHECK(accept_helper("\"\r\"") == false); + // error: backspace in string + CHECK(accept_helper("\"\b\"") == false); + // improve code coverage + CHECK(accept_helper("\uFF01") == false); + CHECK(accept_helper("[-4:1,]") == false); + // unescaped control characters + CHECK(accept_helper("\"\x00\"") == false); // NOLINT(bugprone-string-literal-with-embedded-nul) + CHECK(accept_helper("\"\x01\"") == false); + CHECK(accept_helper("\"\x02\"") == false); + CHECK(accept_helper("\"\x03\"") == false); + CHECK(accept_helper("\"\x04\"") == false); + CHECK(accept_helper("\"\x05\"") == false); + CHECK(accept_helper("\"\x06\"") == false); + CHECK(accept_helper("\"\x07\"") == false); + CHECK(accept_helper("\"\x08\"") == false); + CHECK(accept_helper("\"\x09\"") == false); + CHECK(accept_helper("\"\x0a\"") == false); + CHECK(accept_helper("\"\x0b\"") == false); + CHECK(accept_helper("\"\x0c\"") == false); + CHECK(accept_helper("\"\x0d\"") == false); + CHECK(accept_helper("\"\x0e\"") == false); + CHECK(accept_helper("\"\x0f\"") == false); + CHECK(accept_helper("\"\x10\"") == false); + CHECK(accept_helper("\"\x11\"") == false); + CHECK(accept_helper("\"\x12\"") == false); + CHECK(accept_helper("\"\x13\"") == false); + CHECK(accept_helper("\"\x14\"") == false); + CHECK(accept_helper("\"\x15\"") == false); + CHECK(accept_helper("\"\x16\"") == false); + CHECK(accept_helper("\"\x17\"") == false); + CHECK(accept_helper("\"\x18\"") == false); + CHECK(accept_helper("\"\x19\"") == false); + CHECK(accept_helper("\"\x1a\"") == false); + CHECK(accept_helper("\"\x1b\"") == false); + CHECK(accept_helper("\"\x1c\"") == false); + CHECK(accept_helper("\"\x1d\"") == false); + CHECK(accept_helper("\"\x1e\"") == false); + CHECK(accept_helper("\"\x1f\"") == false); + } + + SECTION("escaped") + { + // quotation mark "\"" + auto r1 = R"("\"")"_json; + CHECK(accept_helper("\"\\\"\"")); + // reverse solidus "\\" + auto r2 = R"("\\")"_json; + CHECK(accept_helper("\"\\\\\"")); + // solidus + CHECK(accept_helper("\"\\/\"")); + // backspace + CHECK(accept_helper("\"\\b\"")); + // formfeed + CHECK(accept_helper("\"\\f\"")); + // newline + CHECK(accept_helper("\"\\n\"")); + // carriage return + CHECK(accept_helper("\"\\r\"")); + // horizontal tab + CHECK(accept_helper("\"\\t\"")); + + CHECK(accept_helper("\"\\u0001\"")); + CHECK(accept_helper("\"\\u000a\"")); + CHECK(accept_helper("\"\\u00b0\"")); + CHECK(accept_helper("\"\\u0c00\"")); + CHECK(accept_helper("\"\\ud000\"")); + CHECK(accept_helper("\"\\u000E\"")); + CHECK(accept_helper("\"\\u00F0\"")); + CHECK(accept_helper("\"\\u0100\"")); + CHECK(accept_helper("\"\\u2000\"")); + CHECK(accept_helper("\"\\uFFFF\"")); + CHECK(accept_helper("\"\\u20AC\"")); + CHECK(accept_helper("\"€\"")); + CHECK(accept_helper("\"🎈\"")); + + CHECK(accept_helper("\"\\ud80c\\udc60\"")); + CHECK(accept_helper("\"\\ud83c\\udf1e\"")); + } + } + + SECTION("number") + { + SECTION("integers") + { + SECTION("without exponent") + { + CHECK(accept_helper("-128")); + CHECK(accept_helper("-0")); + CHECK(accept_helper("0")); + CHECK(accept_helper("128")); + } + + SECTION("with exponent") + { + CHECK(accept_helper("0e1")); + CHECK(accept_helper("0E1")); + + CHECK(accept_helper("10000E-4")); + CHECK(accept_helper("10000E-3")); + CHECK(accept_helper("10000E-2")); + CHECK(accept_helper("10000E-1")); + CHECK(accept_helper("10000E0")); + CHECK(accept_helper("10000E1")); + CHECK(accept_helper("10000E2")); + CHECK(accept_helper("10000E3")); + CHECK(accept_helper("10000E4")); + + CHECK(accept_helper("10000e-4")); + CHECK(accept_helper("10000e-3")); + CHECK(accept_helper("10000e-2")); + CHECK(accept_helper("10000e-1")); + CHECK(accept_helper("10000e0")); + CHECK(accept_helper("10000e1")); + CHECK(accept_helper("10000e2")); + CHECK(accept_helper("10000e3")); + CHECK(accept_helper("10000e4")); + + CHECK(accept_helper("-0e1")); + CHECK(accept_helper("-0E1")); + CHECK(accept_helper("-0E123")); + } + + SECTION("edge cases") + { + // From RFC8259, Section 6: + // Note that when such software is used, numbers that are + // integers and are in the range [-(2**53)+1, (2**53)-1] + // are interoperable in the sense that implementations will + // agree exactly on their numeric values. + + // -(2**53)+1 + CHECK(accept_helper("-9007199254740991")); + // (2**53)-1 + CHECK(accept_helper("9007199254740991")); + } + + SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64-bit integers) + { + // While RFC8259, Section 6 specifies a preference for support + // for ranges in range of IEEE 754-2008 binary64 (double precision) + // this does not accommodate 64 bit integers without loss of accuracy. + // As 64 bit integers are now widely used in software, it is desirable + // to expand support to the full 64 bit (signed and unsigned) range + // i.e. -(2**63) -> (2**64)-1. + + // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) + CHECK(accept_helper("-9223372036854775808")); + // (2**63)-1 + CHECK(accept_helper("9223372036854775807")); + // (2**64)-1 + CHECK(accept_helper("18446744073709551615")); + } + } + + SECTION("floating-point") + { + SECTION("without exponent") + { + CHECK(accept_helper("-128.5")); + CHECK(accept_helper("0.999")); + CHECK(accept_helper("128.5")); + CHECK(accept_helper("-0.0")); + } + + SECTION("with exponent") + { + CHECK(accept_helper("-128.5E3")); + CHECK(accept_helper("-128.5E-3")); + CHECK(accept_helper("-0.0e1")); + CHECK(accept_helper("-0.0E1")); + } + } + + SECTION("overflow") + { + // overflows during parsing + CHECK(!accept_helper("1.18973e+4932")); + } + + SECTION("invalid numbers") + { + CHECK(accept_helper("01") == false); + CHECK(accept_helper("--1") == false); + CHECK(accept_helper("1.") == false); + CHECK(accept_helper("1E") == false); + CHECK(accept_helper("1E-") == false); + CHECK(accept_helper("1.E1") == false); + CHECK(accept_helper("-1E") == false); + CHECK(accept_helper("-0E#") == false); + CHECK(accept_helper("-0E-#") == false); + CHECK(accept_helper("-0#") == false); + CHECK(accept_helper("-0.0:") == false); + CHECK(accept_helper("-0.0Z") == false); + CHECK(accept_helper("-0E123:") == false); + CHECK(accept_helper("-0e0-:") == false); + CHECK(accept_helper("-0e-:") == false); + CHECK(accept_helper("-0f") == false); + + // numbers must not begin with "+" + CHECK(accept_helper("+1") == false); + CHECK(accept_helper("+0") == false); + } + } + } + + SECTION("parse errors") + { + // unexpected end of number + CHECK_THROWS_WITH_AS(parser_helper("0."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '0.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("--"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '--'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0."), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected digit after '.'; last read: '-0.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-."), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-:"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("0.:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '0.:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("e."), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - invalid literal; last read: 'e'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e/"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e/'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E/"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E/'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E:'", json::parse_error&); + + // unexpected end of null + CHECK_THROWS_WITH_AS(parser_helper("n"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 'n'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nu"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'nu'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nul"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nul'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nulk"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nulk'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nulm"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nulm'", json::parse_error&); + + // unexpected end of true + CHECK_THROWS_WITH_AS(parser_helper("t"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 't'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("tr"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'tr'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("tru"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'tru'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("trud"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'trud'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("truf"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'truf'", json::parse_error&); + + // unexpected end of false + CHECK_THROWS_WITH_AS(parser_helper("f"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 'f'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fa"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'fa'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fal"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'fal'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fals"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'fals'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("falsd"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'falsd'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("falsf"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'falsf'", json::parse_error&); + + // missing/unexpected end of array + CHECK_THROWS_WITH_AS(parser_helper("["), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing array - unexpected end of input; expected ']'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1,"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1,]"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - unexpected ']'; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("]"), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - unexpected ']'; expected '[', '{', or a literal", json::parse_error&); + + // missing/unexpected end of object + CHECK_THROWS_WITH_AS(parser_helper("{"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing object key - unexpected end of input; expected string literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\""), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing object separator - unexpected end of input; expected ':'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":"), + "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":}"), + "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - unexpected '}'; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":1,}"), + "[json.exception.parse_error.101] parse error at line 1, column 10: syntax error while parsing object key - unexpected '}'; expected string literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("}"), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - unexpected '}'; expected '[', '{', or a literal", json::parse_error&); + + // missing/unexpected end of string + CHECK_THROWS_WITH_AS(parser_helper("\""), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: missing closing quote; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\\""), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: missing closing quote; last read: '\"\\\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u\""), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u0\""), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u0\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u01\""), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u01\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u012\""), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u012\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u0"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u0'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u01"), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u01'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u012"), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u012'", json::parse_error&); + + // invalid escapes + for (int c = 1; c < 128; ++c) + { + auto s = std::string("\"\\") + std::string(1, static_cast(c)) + "\""; + + switch (c) + { + // valid escapes + case ('"'): + case ('\\'): + case ('/'): + case ('b'): + case ('f'): + case ('n'): + case ('r'): + case ('t'): + { + CHECK_NOTHROW(parser_helper(s)); + break; + } + + // \u must be followed with four numbers, so we skip it here + case ('u'): + { + break; + } + + // any other combination of backslash and character is invalid + default: + { + CHECK_THROWS_AS(parser_helper(s), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid string: forbidden character after backslash; last read: '\"\\" + std::string(1, static_cast(c)) + "'"); + } + break; + } + } + } + + // invalid \uxxxx escapes + { + // check whether character is a valid hex character + const auto valid = [](int c) + { + switch (c) + { + case ('0'): + case ('1'): + case ('2'): + case ('3'): + case ('4'): + case ('5'): + case ('6'): + case ('7'): + case ('8'): + case ('9'): + case ('a'): + case ('b'): + case ('c'): + case ('d'): + case ('e'): + case ('f'): + case ('A'): + case ('B'): + case ('C'): + case ('D'): + case ('E'): + case ('F'): + { + return true; + } + + default: + { + return false; + } + } + }; + + for (int c = 1; c < 128; ++c) + { + std::string const s = "\"\\u"; + + // create a string with the iterated character at each position + auto s1 = s + "000" + std::string(1, static_cast(c)) + "\""; + auto s2 = s + "00" + std::string(1, static_cast(c)) + "0\""; + auto s3 = s + "0" + std::string(1, static_cast(c)) + "00\""; + auto s4 = s + std::string(1, static_cast(c)) + "000\""; + + if (valid(c)) + { + CAPTURE(s1) + CHECK_NOTHROW(parser_helper(s1)); + CAPTURE(s2) + CHECK_NOTHROW(parser_helper(s2)); + CAPTURE(s3) + CHECK_NOTHROW(parser_helper(s3)); + CAPTURE(s4) + CHECK_NOTHROW(parser_helper(s4)); + } + else + { + CAPTURE(s1) + CHECK_THROWS_AS(parser_helper(s1), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s1), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s1.substr(0, 7) + "'"); + } + + CAPTURE(s2) + CHECK_THROWS_AS(parser_helper(s2), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s2), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s2.substr(0, 6) + "'"); + } + + CAPTURE(s3) + CHECK_THROWS_AS(parser_helper(s3), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s3), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s3.substr(0, 5) + "'"); + } + + CAPTURE(s4) + CHECK_THROWS_AS(parser_helper(s4), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s4), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s4.substr(0, 4) + "'"); + } + } + } + } + + json _; + + // missing part of a surrogate pair + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\""), "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\"'", json::parse_error&); + // invalid surrogate pair + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\uD80C\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\uD80C'", json::parse_error&); + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\u0000\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\u0000'", json::parse_error&); + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\uFFFF\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\uFFFF'", json::parse_error&); + } + + SECTION("parse errors (accept)") + { + // unexpected end of number + CHECK(accept_helper("0.") == false); + CHECK(accept_helper("-") == false); + CHECK(accept_helper("--") == false); + CHECK(accept_helper("-0.") == false); + CHECK(accept_helper("-.") == false); + CHECK(accept_helper("-:") == false); + CHECK(accept_helper("0.:") == false); + CHECK(accept_helper("e.") == false); + CHECK(accept_helper("1e.") == false); + CHECK(accept_helper("1e/") == false); + CHECK(accept_helper("1e:") == false); + CHECK(accept_helper("1E.") == false); + CHECK(accept_helper("1E/") == false); + CHECK(accept_helper("1E:") == false); + + // unexpected end of null + CHECK(accept_helper("n") == false); + CHECK(accept_helper("nu") == false); + CHECK(accept_helper("nul") == false); + + // unexpected end of true + CHECK(accept_helper("t") == false); + CHECK(accept_helper("tr") == false); + CHECK(accept_helper("tru") == false); + + // unexpected end of false + CHECK(accept_helper("f") == false); + CHECK(accept_helper("fa") == false); + CHECK(accept_helper("fal") == false); + CHECK(accept_helper("fals") == false); + + // missing/unexpected end of array + CHECK(accept_helper("[") == false); + CHECK(accept_helper("[1") == false); + CHECK(accept_helper("[1,") == false); + CHECK(accept_helper("[1,]") == false); + CHECK(accept_helper("]") == false); + + // missing/unexpected end of object + CHECK(accept_helper("{") == false); + CHECK(accept_helper("{\"foo\"") == false); + CHECK(accept_helper("{\"foo\":") == false); + CHECK(accept_helper("{\"foo\":}") == false); + CHECK(accept_helper("{\"foo\":1,}") == false); + CHECK(accept_helper("}") == false); + + // missing/unexpected end of string + CHECK(accept_helper("\"") == false); + CHECK(accept_helper("\"\\\"") == false); + CHECK(accept_helper("\"\\u\"") == false); + CHECK(accept_helper("\"\\u0\"") == false); + CHECK(accept_helper("\"\\u01\"") == false); + CHECK(accept_helper("\"\\u012\"") == false); + CHECK(accept_helper("\"\\u") == false); + CHECK(accept_helper("\"\\u0") == false); + CHECK(accept_helper("\"\\u01") == false); + CHECK(accept_helper("\"\\u012") == false); + + // unget of newline + CHECK(parser_helper("\n123\n") == 123); + + // invalid escapes + for (int c = 1; c < 128; ++c) + { + auto s = std::string("\"\\") + std::string(1, static_cast(c)) + "\""; + + switch (c) + { + // valid escapes + case ('"'): + case ('\\'): + case ('/'): + case ('b'): + case ('f'): + case ('n'): + case ('r'): + case ('t'): + { + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept()); + break; + } + + // \u must be followed with four numbers, so we skip it here + case ('u'): + { + break; + } + + // any other combination of backslash and character is invalid + default: + { + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept() == false); + break; + } + } + } + + // invalid \uxxxx escapes + { + // check whether character is a valid hex character + const auto valid = [](int c) + { + switch (c) + { + case ('0'): + case ('1'): + case ('2'): + case ('3'): + case ('4'): + case ('5'): + case ('6'): + case ('7'): + case ('8'): + case ('9'): + case ('a'): + case ('b'): + case ('c'): + case ('d'): + case ('e'): + case ('f'): + case ('A'): + case ('B'): + case ('C'): + case ('D'): + case ('E'): + case ('F'): + { + return true; + } + + default: + { + return false; + } + } + }; + + for (int c = 1; c < 128; ++c) + { + std::string const s = "\"\\u"; + + // create a string with the iterated character at each position + const auto s1 = s + "000" + std::string(1, static_cast(c)) + "\""; + const auto s2 = s + "00" + std::string(1, static_cast(c)) + "0\""; + const auto s3 = s + "0" + std::string(1, static_cast(c)) + "00\""; + const auto s4 = s + std::string(1, static_cast(c)) + "000\""; + + if (valid(c)) + { + CAPTURE(s1) + CHECK(json::parser(nlohmann::detail::input_adapter(s1)).accept()); + CAPTURE(s2) + CHECK(json::parser(nlohmann::detail::input_adapter(s2)).accept()); + CAPTURE(s3) + CHECK(json::parser(nlohmann::detail::input_adapter(s3)).accept()); + CAPTURE(s4) + CHECK(json::parser(nlohmann::detail::input_adapter(s4)).accept()); + } + else + { + CAPTURE(s1) + CHECK(json::parser(nlohmann::detail::input_adapter(s1)).accept() == false); + + CAPTURE(s2) + CHECK(json::parser(nlohmann::detail::input_adapter(s2)).accept() == false); + + CAPTURE(s3) + CHECK(json::parser(nlohmann::detail::input_adapter(s3)).accept() == false); + + CAPTURE(s4) + CHECK(json::parser(nlohmann::detail::input_adapter(s4)).accept() == false); + } + } + } + + // missing part of a surrogate pair + CHECK(accept_helper("\"\\uD80C\"") == false); + // invalid surrogate pair + CHECK(accept_helper("\"\\uD80C\\uD80C\"") == false); + CHECK(accept_helper("\"\\uD80C\\u0000\"") == false); + CHECK(accept_helper("\"\\uD80C\\uFFFF\"") == false); + } + + SECTION("tests found by mutate++") + { + // test case to make sure no comma precedes the first key + CHECK_THROWS_WITH_AS(parser_helper("{,\"key\": false}"), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing object key - unexpected ','; expected string literal", json::parse_error&); + // test case to make sure an object is properly closed + CHECK_THROWS_WITH_AS(parser_helper("[{\"key\": false true]"), "[json.exception.parse_error.101] parse error at line 1, column 19: syntax error while parsing object - unexpected true literal; expected '}'", json::parse_error&); + + // test case to make sure the callback is properly evaluated after reading a key + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t event, json& /*unused*/) noexcept + { + return event != json::parse_event_t::key; + }; + + json x = json::parse("{\"key\": false}", cb); + CHECK(x == json::object()); + } + } + + SECTION("callback function") + { + const auto* s_object = R"( + { + "foo": 2, + "bar": { + "baz": 1 + } + } + )"; + + const auto* s_array = R"( + [1,2,[3,4,5],4,5] + )"; + + const auto* structured_array = R"( + [ + 1, + { + "foo": "bar" + }, + { + "qux": "baz" + } + ] + )"; + + SECTION("filter nothing") + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return true; + }); + + CHECK (j_object == json({{"foo", 2}, {"bar", {{"baz", 1}}}})); + + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return true; + }); + + CHECK (j_array == json({1, 2, {3, 4, 5}, 4, 5})); + } + + SECTION("filter everything") + { + json const j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return false; + }); + + // the top-level object will be discarded, leaving a null + CHECK (j_object.is_null()); + + json const j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return false; + }); + + // the top-level array will be discarded, leaving a null + CHECK (j_array.is_null()); + } + + SECTION("filter specific element") + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t event, const json & j) noexcept + { + // filter all number(2) elements + return event != json::parse_event_t::value || j != json(2); + }); + + CHECK (j_object == json({{"bar", {{"baz", 1}}}})); + + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t event, const json & j) noexcept + { + return event != json::parse_event_t::value || j != json(2); + }); + + CHECK (j_array == json({1, {3, 4, 5}, 4, 5})); + } + + SECTION("filter object in array") + { + json j_filtered1 = json::parse(structured_array, [](int /*unused*/, json::parse_event_t e, const json & parsed) + { + return !(e == json::parse_event_t::object_end && parsed.contains("foo")); + }); + + // the specified object will be discarded, and removed. + CHECK (j_filtered1.size() == 2); + CHECK (j_filtered1 == json({1, {{"qux", "baz"}}})); + + json j_filtered2 = json::parse(structured_array, [](int /*unused*/, json::parse_event_t e, const json& /*parsed*/) noexcept + { + return e != json::parse_event_t::object_end; + }); + + // removed all objects in array. + CHECK (j_filtered2.size() == 1); + CHECK (j_filtered2 == json({1})); + } + + SECTION("filter specific events") + { + SECTION("first closing event") + { + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + static bool first = true; + if (e == json::parse_event_t::object_end && first) + { + first = false; + return false; + } + + return true; + }); + + // the first completed object will be discarded + CHECK (j_object == json({{"foo", 2}})); + } + + { + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + static bool first = true; + if (e == json::parse_event_t::array_end && first) + { + first = false; + return false; + } + + return true; + }); + + // the first completed array will be discarded + CHECK (j_array == json({1, 2, 4, 5})); + } + } + } + + SECTION("special cases") + { + // the following test cases cover the situation in which an empty + // object and array is discarded only after the closing character + // has been read + + json j_empty_object = json::parse("{}", [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + return e != json::parse_event_t::object_end; + }); + CHECK(j_empty_object == json()); + + json j_empty_array = json::parse("[]", [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + return e != json::parse_event_t::array_end; + }); + CHECK(j_empty_array == json()); + } + } + + SECTION("constructing from contiguous containers") + { + SECTION("from std::vector") + { + std::vector v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::array") + { + std::array v { {'t', 'r', 'u', 'e'} }; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from array") + { + uint8_t v[] = {'t', 'r', 'u', 'e'}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from char literal") + { + CHECK(parser_helper("true") == json(true)); + } + + SECTION("from std::string") + { + std::string v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::initializer_list") + { + std::initializer_list const v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::valarray") + { + std::valarray v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + } + + SECTION("improve test coverage") + { + SECTION("parser with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + + CHECK(json::parse("{\"foo\": true:", cb, false).is_discarded()); + + json _; + CHECK_THROWS_WITH_AS(_ = json::parse("{\"foo\": true:", cb), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing object - unexpected ':'; expected '}'", json::parse_error&); + + CHECK_THROWS_WITH_AS(_ = json::parse("1.18973e+4932", cb), "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'", json::out_of_range&); + } + + SECTION("SAX parser") + { + SECTION("} without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("{}", &s) == false); + } + + SECTION("} with value") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true}", &s) == false); + } + + SECTION("second key") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true, \"k2\": false}", &s) == false); + } + + SECTION("] without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("[]", &s) == false); + } + + SECTION("] with value") + { + SaxCountdown s(2); + CHECK(json::sax_parse("[1]", &s) == false); + } + + SECTION("float") + { + SaxCountdown s(0); + CHECK(json::sax_parse("3.14", &s) == false); + } + + SECTION("false") + { + SaxCountdown s(0); + CHECK(json::sax_parse("false", &s) == false); + } + + SECTION("null") + { + SaxCountdown s(0); + CHECK(json::sax_parse("null", &s) == false); + } + + SECTION("true") + { + SaxCountdown s(0); + CHECK(json::sax_parse("true", &s) == false); + } + + SECTION("unsigned") + { + SaxCountdown s(0); + CHECK(json::sax_parse("12", &s) == false); + } + + SECTION("integer") + { + SaxCountdown s(0); + CHECK(json::sax_parse("-12", &s) == false); + } + + SECTION("string") + { + SaxCountdown s(0); + CHECK(json::sax_parse("\"foo\"", &s) == false); + } + } + } + + SECTION("error messages for comments") + { + json _; + CHECK_THROWS_WITH_AS(_ = json::parse("/a", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid comment; expecting '/' or '*' after '/'; last read: '/a'", json::parse_error); + CHECK_THROWS_WITH_AS(_ = json::parse("/*", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid comment; missing closing '*/'; last read: '/*'", json::parse_error); + } + + // Macro for all test cases for start_pos and end_pos +#define SETUP_TESTCASES() \ + SECTION("with callback") \ + { \ + SECTION("filter nothing") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept \ + { \ + return true; \ + }; \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, expected, cb); \ + } \ + SECTION("filter element") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t event, json& j) noexcept \ + { \ + return (event != json::parse_event_t::key && event != json::parse_event_t::value) || j != json("a"); \ + }; \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, filteredExpected, cb); \ + } \ + } \ + SECTION("without callback") \ + { \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, expected); \ + } + + SECTION("retrieve start position and end position") + { + SECTION("for object") + { + // Create an object with spaces to test the start and end positions. Spaces will not be included in the + // JSON object, however, the start and end positions should include the spaces from the input JSON string. + const std::string nested_type_json_str = R"({ "a": 1,"b" : "test1"})"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test2"})"; + auto expected = json({{"nested", {{"a", 1}, {"b", "test1"}}}, {"anotherValue", "test2"}}); + auto filteredExpected = expected; + filteredExpected["nested"].erase("a"); + + SETUP_TESTCASES() + } + + SECTION("for array") + { + const std::string nested_type_json_str = R"(["a", "test", 45])"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {"a", "test", 45}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"] = json({"test", 45}); + SETUP_TESTCASES() + } + + SECTION("for array with objects") + { + const std::string nested_type_json_str = R"([{"a": 1, "b": "test"}, {"c": 2, "d": "test2"}])"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {{{"a", 1}, {"b", "test"}}, {{"c", 2}, {"d", "test2"}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"][0].erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_array = j["nested"]; + const auto& nested_obj = nested_array[0]; + CHECK(nested_type_json_str.substr(1, 21) == root_type_json_str.substr(nested_obj.start_pos(), nested_obj.end_pos() - nested_obj.start_pos())); + CHECK(nested_type_json_str.substr(24, 22) == root_type_json_str.substr(nested_array[1].start_pos(), nested_array[1].end_pos() - nested_array[1].start_pos())); + } + + SECTION("for two levels of nesting objects") + { + const std::string nested_type_json_str = R"({"nested2": {"b": "test"}})"; + const std::string root_type_json_str = R"({ "a": 2, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"a", 2}, {"nested", {{"nested2", {{"b", "test"}}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_obj = j["nested"]["nested2"]; + CHECK(nested_type_json_str.substr(12, 13) == root_type_json_str.substr(nested_obj.start_pos(), nested_obj.end_pos() - nested_obj.start_pos())); + } + + SECTION("for simple types") + { + SECTION("no nested") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + + // 1. string type + std::string json_str = R"("test")"; + auto j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, "test"); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, true); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, nullptr); + } + + SECTION("without callback") + { + // 1. string type + std::string json_str = R"("test")"; + auto j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, "test"); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1); + + json_str = R"(1.001239923)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1.001239923); + + json_str = R"(1.123812389000000)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1.123812389); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, true); + + json_str = R"(false)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, false); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, nullptr); + } + } + + SECTION("string type") + { + const std::string nested_type_json_str = R"("test")"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", "test"}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("number type") + { + const std::string nested_type_json_str = R"(2)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", 2}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("boolean type") + { + const std::string nested_type_json_str = R"(true)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", true}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("null type") + { + const std::string nested_type_json_str = R"(null)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", nullptr}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + } + SECTION("with leading whitespace and newlines around root JSON") + { + const std::string initial_whitespace = R"( + + )"; + const std::string nested_type_json_str = R"({ + "a": 1, + "nested": { + "b": "test" + }, + "anotherValue": "test" + })"; + const std::string end_whitespace = R"( + + )"; + const std::string root_type_json_str = initial_whitespace + nested_type_json_str + end_whitespace; + + auto expected = json({{"a", 1}, {"nested", {{"b", "test"}}}, {"anotherValue", "test"}}); + + auto j = json::parse(root_type_json_str); + + // 2. Check if the generated JSON is as expected + CHECK(j == expected); + + // 3. Check if the start and end positions do not include the surrounding whitespace + CHECK(j.start_pos() == initial_whitespace.size()); + CHECK(j.end_pos() == root_type_json_str.size() - end_whitespace.size()); + } + } +} diff --git a/tests/src/unit-deserialization.cpp b/tests/src/unit-deserialization.cpp index 0635c5e8e..602a3ee74 100644 --- a/tests/src/unit-deserialization.cpp +++ b/tests/src/unit-deserialization.cpp @@ -587,7 +587,7 @@ TEST_CASE("deserialization") auto first = str.begin(); auto last = str.end(); json j; - json_sax_dom_parser sax(j, true); + json_sax_dom_parser sax(j, true); CHECK(json::sax_parse(proxy(first), proxy(last), &sax, input_format_t::json, false)); diff --git a/tests/src/unit-disabled_exceptions.cpp b/tests/src/unit-disabled_exceptions.cpp index 5adf8c4cb..ef64eeeef 100644 --- a/tests/src/unit-disabled_exceptions.cpp +++ b/tests/src/unit-disabled_exceptions.cpp @@ -19,10 +19,10 @@ using json = nlohmann::json; // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: - explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} + explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) { diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 8f3a8b412..dbea75bb7 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -162,11 +162,11 @@ struct adl_serializer // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: explicit sax_no_exception(json& j) - : nlohmann::detail::json_sax_dom_parser(j, false) + : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) diff --git a/tests/src/unit-ubjson.cpp b/tests/src/unit-ubjson.cpp index 673ae43ed..a9d931cd9 100644 --- a/tests/src/unit-ubjson.cpp +++ b/tests/src/unit-ubjson.cpp @@ -1617,7 +1617,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); @@ -1631,7 +1631,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); From a27a5b544299bb7bc08c4ac849dbe077d2574b72 Mon Sep 17 00:00:00 2001 From: Einars Netlis-Galejs <37474542+EinarsNG@users.noreply.github.com> Date: Fri, 20 Dec 2024 10:31:22 +0000 Subject: [PATCH 07/15] Add ONLY_SERIALIZE for NLOHMANN_DEFINE_DERIVED_TYPE_* macros (#4562) --- .../macros/nlohmann_define_derived_type.md | 43 ++++++++++++----- docs/mkdocs/docs/features/macros.md | 2 + docs/mkdocs/mkdocs.yml | 2 + include/nlohmann/detail/macro_scope.hpp | 6 +++ single_include/nlohmann/json.hpp | 6 +++ tests/src/unit-udt_macro.cpp | 48 +++++++++++++++++++ 6 files changed, 96 insertions(+), 11 deletions(-) diff --git a/docs/mkdocs/docs/api/macros/nlohmann_define_derived_type.md b/docs/mkdocs/docs/api/macros/nlohmann_define_derived_type.md index b12280f05..9bb7ff8e7 100644 --- a/docs/mkdocs/docs/api/macros/nlohmann_define_derived_type.md +++ b/docs/mkdocs/docs/api/macros/nlohmann_define_derived_type.md @@ -1,20 +1,23 @@

NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE, NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT, - NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE, NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT

+ NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE, NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE, + NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT, NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE ```cpp -#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(type, base_type, member...) // (1) -#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT(type, base_type, member...) // (2) +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(type, base_type, member...) // (1) +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT(type, base_type, member...) // (2) +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(type, base_type, member...) // (3) -#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE(type, base_type, member...) // (3) -#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT(type, base_type, member...) // (4) +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE(type, base_type, member...) // (4) +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT(type, base_type, member...) // (5) +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(type, base_type, member...) // (6) ``` These macros can be used to simplify the serialization/deserialization of derived types if you want to use a JSON object as serialization and want to use the member variable names as object keys in that object. -- Macros 1 and 2 are to be defined **inside** the class/struct to create code for. +- Macros 1, 2 and 3 are to be defined **inside** the class/struct to create code for. Like [`NLOHMANN_DEFINE_TYPE_INTRUSIVE`](nlohmann_define_type_intrusive.md), they can access private members. -- Macros 3 and 4 are to be defined **outside** the class/struct to create code for, but **inside** its namespace. +- Macros 4, 5 and 6 are to be defined **outside** the class/struct to create code for, but **inside** its namespace. Like [`NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE`](nlohmann_define_type_non_intrusive.md), they **cannot** access private members. @@ -48,14 +51,20 @@ friend void to_json(nlohmann::json&, const type&); friend void from_json(const nlohmann::json&, type&); ``` -Macros 3 and 4 add two functions to the namespace which take care of the serialization and deserialization: +Macros 4 and 5 add two functions to the namespace which take care of the serialization and deserialization: ```cpp void to_json(nlohmann::json&, const type&); void from_json(const nlohmann::json&, type&); ``` -In both cases they call the `to_json`/`from_json` functions of the base type +Macros 3 and 6 add one function to the namespace which take care of the serialization only: + +```cpp +void to_json(nlohmann::json&, const type&); +``` + +In first two cases cases they call the `to_json`/`from_json` functions of the base type before serializing/deserializing the members of the derived type: ```cpp @@ -73,12 +82,24 @@ void from_json(const nlohmann::json& j, B& b) { } ``` +In the third case only `to_json` will be called: + +```cpp +class A { /* ... */ }; +class B : public A { /* ... */ }; + +void to_json(nlohmann::json& j, const B& b) { + nlohmann::to_json(j, static_cast(b)); + // ... +} +``` + ## Notes !!! info "Prerequisites" - - Macros 1 and 2 have the same prerequisites of NLOHMANN_DEFINE_TYPE_INTRUSIVE. - - Macros 3 and 3 have the same prerequisites of NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE. + - Macros 1, 2 and 3 have the same prerequisites of NLOHMANN_DEFINE_TYPE_INTRUSIVE. + - Macros 4, 5 and 6 have the same prerequisites of NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE. - Serialization/deserialization of base types must be defined. !!! warning "Implementation limits" diff --git a/docs/mkdocs/docs/features/macros.md b/docs/mkdocs/docs/features/macros.md index 63457290f..2fea87c18 100644 --- a/docs/mkdocs/docs/features/macros.md +++ b/docs/mkdocs/docs/features/macros.md @@ -102,8 +102,10 @@ See [full documentation of `JSON_USE_IMPLICIT_CONVERSIONS`](../api/macros/json_u ## `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(type, base_type, member...)` ## `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT(type, base_type, member...)` +## `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(type, base_type, member...)` ## `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE(type, base_type, member...)` ## `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT(type, base_type, member...)` +## `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(type, base_type, member...)` ## `NLOHMANN_DEFINE_TYPE_INTRUSIVE(type, member...)` diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index 286f161e0..ccf75a6f8 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -287,8 +287,10 @@ nav: - 'JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON': api/macros/json_use_legacy_discarded_value_comparison.md - 'NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE': api/macros/nlohmann_define_derived_type.md - 'NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT': api/macros/nlohmann_define_derived_type.md + - 'NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE': api/macros/nlohmann_define_derived_type.md - 'NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE': api/macros/nlohmann_define_derived_type.md - 'NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT': api/macros/nlohmann_define_derived_type.md + - 'NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE': api/macros/nlohmann_define_derived_type.md - 'NLOHMANN_DEFINE_TYPE_INTRUSIVE': api/macros/nlohmann_define_type_intrusive.md - 'NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT': api/macros/nlohmann_define_type_intrusive.md - 'NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE': api/macros/nlohmann_define_type_non_intrusive.md diff --git a/include/nlohmann/detail/macro_scope.hpp b/include/nlohmann/detail/macro_scope.hpp index c8547d502..1ec5e40de 100644 --- a/include/nlohmann/detail/macro_scope.hpp +++ b/include/nlohmann/detail/macro_scope.hpp @@ -442,6 +442,9 @@ friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + /*! @brief macro @def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE @@ -455,6 +458,9 @@ inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + // inspired from https://stackoverflow.com/a/26745591 // allows to call any std function as if (e.g. with begin): // using std::begin; begin(x); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 2192b4f55..df71d15cf 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2809,6 +2809,9 @@ JSON_HEDLEY_DIAGNOSTIC_POP friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + /*! @brief macro @def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE @@ -2822,6 +2825,9 @@ JSON_HEDLEY_DIAGNOSTIC_POP inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } +#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + // inspired from https://stackoverflow.com/a/26745591 // allows to call any std function as if (e.g. with begin): // using std::begin; begin(x); diff --git a/tests/src/unit-udt_macro.cpp b/tests/src/unit-udt_macro.cpp index 9189eb946..e2cc7c9ee 100644 --- a/tests/src/unit-udt_macro.cpp +++ b/tests/src/unit-udt_macro.cpp @@ -440,6 +440,32 @@ class person_without_default_constructor_2 NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(person_without_default_constructor_2, name, age) +class derived_person_only_serialize_public : public person_without_default_constructor_1 +{ + public: + std::string hair_color; + + derived_person_only_serialize_public(std::string name_, int age_, std::string hair_color_) + : person_without_default_constructor_1(std::move(name_), age_) + , hair_color(std::move(hair_color_)) + {} +}; + +NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(derived_person_only_serialize_public, person_without_default_constructor_1, hair_color) + +class derived_person_only_serialize_private : person_without_default_constructor_1 +{ + private: + std::string hair_color; + public: + derived_person_only_serialize_private(std::string name_, int age_, std::string hair_color_) + : person_without_default_constructor_1(std::move(name_), age_) + , hair_color(std::move(hair_color_)) + {} + + NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(derived_person_only_serialize_private, person_without_default_constructor_1, hair_color) +}; + } // namespace persons TEST_CASE_TEMPLATE("Serialization/deserialization via NLOHMANN_DEFINE_TYPE_INTRUSIVE and NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE", T, // NOLINT(readability-math-missing-parentheses) @@ -657,3 +683,25 @@ TEST_CASE_TEMPLATE("Serialization of non-default-constructible classes via NLOHM } } } + +TEST_CASE_TEMPLATE("Serialization of non-default-constructible classes via NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE and NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE", T, // NOLINT(readability-math-missing-parentheses) + persons::derived_person_only_serialize_public, + persons::derived_person_only_serialize_private) +{ + SECTION("derived person only serialize") + { + { + // serialization of a single object + T person{"Erik", 1, "brown"}; + CHECK(json(person).dump() == "{\"age\":1,\"hair_color\":\"brown\",\"name\":\"Erik\"}"); + + // serialization of a container with objects + std::vector const two_persons + { + {"Erik", 1, "brown"}, + {"Kyle", 2, "black"} + }; + CHECK(json(two_persons).dump() == "[{\"age\":1,\"hair_color\":\"brown\",\"name\":\"Erik\"},{\"age\":2,\"hair_color\":\"black\",\"name\":\"Kyle\"}]"); + } + } +} From a2d828c204325a743732587f5cd9dc8b91a64871 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 20 Dec 2024 15:40:20 +0100 Subject: [PATCH 08/15] :construction: first implementation for keep --- docs/mkdocs/docs/api/basic_json/dump.md | 10 ++++++---- .../docs/api/basic_json/error_handler_t.md | 8 ++++++-- include/nlohmann/detail/output/serializer.hpp | 16 +++++++++------- single_include/nlohmann/json.hpp | 16 +++++++++------- tests/src/unit-regression2.cpp | 4 ++-- tests/src/unit-serialization.cpp | 5 ++--- tests/src/unit-unicode2.cpp | 5 +++++ tests/src/unit-unicode3.cpp | 5 +++++ tests/src/unit-unicode4.cpp | 5 +++++ tests/src/unit-unicode5.cpp | 5 +++++ 10 files changed, 54 insertions(+), 25 deletions(-) diff --git a/docs/mkdocs/docs/api/basic_json/dump.md b/docs/mkdocs/docs/api/basic_json/dump.md index 41adb154d..30388f660 100644 --- a/docs/mkdocs/docs/api/basic_json/dump.md +++ b/docs/mkdocs/docs/api/basic_json/dump.md @@ -25,10 +25,11 @@ and `ensure_ascii` parameters. result consists of ASCII characters only. `error_handler` (in) -: how to react on decoding errors; there are three possible values (see [`error_handler_t`](error_handler_t.md): - `strict` (throws and exception in case a decoding error occurs; default), `replace` (replace invalid UTF-8 sequences - with U+FFFD), and `ignore` (ignore invalid UTF-8 sequences during serialization; all bytes are copied to the output - unchanged)). +: how to react on decoding errors; there are three possible values (see [`error_handler_t`](error_handler_t.md)): +: - `strict`: throws a [`type_error`](../../home/exceptions.md#type-errors) exception in case a decoding error occurs (this is the default), + - `replace`: replace invalid UTF-8 sequences with U+FFFD (� REPLACEMENT CHARACTER), + - `ignore`: ignore invalid UTF-8 sequences during serialization, and + - `keep`: keep invalid UTF-8 sequences during serialization; all bytes are copied to the output unchanged ## Return value @@ -77,3 +78,4 @@ Binary values are serialized as object containing two keys: - Indentation character `indent_char`, option `ensure_ascii` and exceptions added in version 3.0.0. - Error handlers added in version 3.4.0. - Serialization of binary values added in version 3.8.0. +- Added support for error handler value `keep` in version ???. diff --git a/docs/mkdocs/docs/api/basic_json/error_handler_t.md b/docs/mkdocs/docs/api/basic_json/error_handler_t.md index dc32ced9b..1b3c7456a 100644 --- a/docs/mkdocs/docs/api/basic_json/error_handler_t.md +++ b/docs/mkdocs/docs/api/basic_json/error_handler_t.md @@ -12,13 +12,16 @@ This enumeration is used in the [`dump`](dump.md) function to choose how to trea `basic_json` value. Three values are differentiated: strict -: throw a `type_error` exception in case of invalid UTF-8 +: throw a [`type_error`](../../home/exceptions.md#type-errors) exception in case of invalid UTF-8 replace : replace invalid UTF-8 sequences with U+FFFD (� REPLACEMENT CHARACTER) ignore -: ignore invalid UTF-8 sequences; all bytes are copied to the output unchanged +: ignore invalid UTF-8 sequences + +keep +: keep invalid UTF-8 sequences; all bytes are copied to the output unchanged ## Examples @@ -40,3 +43,4 @@ ignore ## Version history - Added in version 3.4.0. +- Added value `keep` in version ???. diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 3509de5fc..2ced730d2 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -44,7 +44,8 @@ enum class error_handler_t { strict, ///< throw a type_error exception in case of invalid UTF-8 replace, ///< replace invalid UTF-8 sequences with U+FFFD - ignore ///< ignore invalid UTF-8 sequences + ignore, ///< ignore invalid UTF-8 sequences + keep ///< keep invalid UTF-8 sequences }; template @@ -398,6 +399,13 @@ class serializer std::size_t bytes_after_last_accept = 0; std::size_t undumped_chars = 0; + // copy string as-is if error handler is set to keep + if (error_handler == error_handler_t::keep) + { + o->write_characters(s.data(), s.size()); + return; + } + for (std::size_t i = 0; i < s.size(); ++i) { const auto byte = static_cast(s[i]); @@ -529,12 +537,6 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; - // fix for #4552 - if (error_handler == error_handler_t::ignore) - { - bytes += undumped_chars; - } - if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index df71d15cf..c37bb6bdf 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18606,7 +18606,8 @@ enum class error_handler_t { strict, ///< throw a type_error exception in case of invalid UTF-8 replace, ///< replace invalid UTF-8 sequences with U+FFFD - ignore ///< ignore invalid UTF-8 sequences + ignore, ///< ignore invalid UTF-8 sequences + keep ///< keep invalid UTF-8 sequences }; template @@ -18960,6 +18961,13 @@ class serializer std::size_t bytes_after_last_accept = 0; std::size_t undumped_chars = 0; + // copy string as-is if error handler is set to keep + if (error_handler == error_handler_t::keep) + { + o->write_characters(s.data(), s.size()); + return; + } + for (std::size_t i = 0; i < s.size(); ++i) { const auto byte = static_cast(s[i]); @@ -19091,12 +19099,6 @@ class serializer // thus removing/ignoring the invalid characters bytes = bytes_after_last_accept; - // fix for #4552 - if (error_handler == error_handler_t::ignore) - { - bytes += undumped_chars; - } - if (error_handler == error_handler_t::replace) { // add a replacement character diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index dbea75bb7..b8c8b782f 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -1000,8 +1000,8 @@ TEST_CASE("regression tests 2") { nlohmann::json node; node["test"] = "test\334\005"; - const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore); - CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}"); + const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::keep); + CHECK(test_dump == "{\"test\":\"test\334\005\"}"); } } diff --git a/tests/src/unit-serialization.cpp b/tests/src/unit-serialization.cpp index bb7f1bfaf..5c3a05240 100644 --- a/tests/src/unit-serialization.cpp +++ b/tests/src/unit-serialization.cpp @@ -109,9 +109,8 @@ TEST_CASE("serialization") CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&); // see pending discussion at #4452 - // CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); - CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\""); - + CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\""); CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\""); } diff --git a/tests/src/unit-unicode2.cpp b/tests/src/unit-unicode2.cpp index 606de12e2..eb0a5a3fd 100644 --- a/tests/src/unit-unicode2.cpp +++ b/tests/src/unit-unicode2.cpp @@ -74,6 +74,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 static std::string s_replaced2; static std::string s_replaced_ascii; static std::string s_replaced2_ascii; + static std::string s_kept; // dumping with ignore/replace must not throw in any case s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); @@ -84,6 +85,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + s_kept = j.dump(-1, ' ', false, json::error_handler_t::keep); if (success_expected) { @@ -105,6 +107,9 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); } + // check if the string is unchanged (ignoring the quotes) if error_handler_t::keep is used + CHECK(json_string == s_kept.substr(1, json_string.size())); + // check that prefix and suffix are preserved CHECK(s_ignored2.substr(1, 3) == "abc"); CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); diff --git a/tests/src/unit-unicode3.cpp b/tests/src/unit-unicode3.cpp index b060f090a..db64a252d 100644 --- a/tests/src/unit-unicode3.cpp +++ b/tests/src/unit-unicode3.cpp @@ -74,6 +74,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 static std::string s_replaced2; static std::string s_replaced_ascii; static std::string s_replaced2_ascii; + static std::string s_kept; // dumping with ignore/replace must not throw in any case s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); @@ -84,6 +85,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + s_kept = j.dump(-1, ' ', false, json::error_handler_t::keep); if (success_expected) { @@ -105,6 +107,9 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); } + // check if the string is unchanged (ignoring the quotes) if error_handler_t::keep is used + CHECK(json_string == s_kept.substr(1, json_string.size())); + // check that prefix and suffix are preserved CHECK(s_ignored2.substr(1, 3) == "abc"); CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); diff --git a/tests/src/unit-unicode4.cpp b/tests/src/unit-unicode4.cpp index a6a67a029..c5d0b7034 100644 --- a/tests/src/unit-unicode4.cpp +++ b/tests/src/unit-unicode4.cpp @@ -74,6 +74,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 static std::string s_replaced2; static std::string s_replaced_ascii; static std::string s_replaced2_ascii; + static std::string s_kept; // dumping with ignore/replace must not throw in any case s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); @@ -84,6 +85,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + s_kept = j.dump(-1, ' ', false, json::error_handler_t::keep); if (success_expected) { @@ -105,6 +107,9 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); } + // check if the string is unchanged (ignoring the quotes) if error_handler_t::keep is used + CHECK(json_string == s_kept.substr(1, json_string.size())); + // check that prefix and suffix are preserved CHECK(s_ignored2.substr(1, 3) == "abc"); CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); diff --git a/tests/src/unit-unicode5.cpp b/tests/src/unit-unicode5.cpp index 7cf2fc8c0..d2a70ea0c 100644 --- a/tests/src/unit-unicode5.cpp +++ b/tests/src/unit-unicode5.cpp @@ -74,6 +74,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 static std::string s_replaced2; static std::string s_replaced_ascii; static std::string s_replaced2_ascii; + static std::string s_kept; // dumping with ignore/replace must not throw in any case s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore); @@ -84,6 +85,7 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace); s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace); s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace); + s_kept = j.dump(-1, ' ', false, json::error_handler_t::keep); if (success_expected) { @@ -105,6 +107,9 @@ void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos); } + // check if the string is unchanged (ignoring the quotes) if error_handler_t::keep is used + CHECK(json_string == s_kept.substr(1, json_string.size())); + // check that prefix and suffix are preserved CHECK(s_ignored2.substr(1, 3) == "abc"); CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz"); From 7d2a83b735d1b9fa0436b4a61285276bbe1bf3f3 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 20 Dec 2024 16:36:30 +0100 Subject: [PATCH 09/15] :rotating_light: fix warning --- include/nlohmann/detail/output/serializer.hpp | 10 ++++++---- single_include/nlohmann/json.hpp | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 2ced730d2..76bfa7a1c 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -575,8 +575,9 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + case error_handler_t::keep: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } break; } @@ -636,8 +637,9 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + case error_handler_t::keep: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c37bb6bdf..53d6b0000 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -19137,8 +19137,9 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + case error_handler_t::keep: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } break; } @@ -19198,8 +19199,9 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + case error_handler_t::keep: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } } } From 3cd50255a8f850de60ffd0ea964f6f9642f30811 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 22 Dec 2024 13:12:34 +0100 Subject: [PATCH 10/15] :construction: add support for ensure_ascii --- include/nlohmann/detail/output/serializer.hpp | 36 ++++++++++++++++--- single_include/nlohmann/json.hpp | 36 ++++++++++++++++--- tests/src/unit-serialization.cpp | 10 +++++- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 76bfa7a1c..aafa21a12 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -399,8 +399,8 @@ class serializer std::size_t bytes_after_last_accept = 0; std::size_t undumped_chars = 0; - // copy string as-is if error handler is set to keep - if (error_handler == error_handler_t::keep) + // copy string as-is if error handler is set to keep, and we don't want to ensure ASCII + if (error_handler == error_handler_t::keep && !ensure_ascii) { o->write_characters(s.data(), s.size()); return; @@ -575,7 +575,22 @@ class serializer break; } - case error_handler_t::keep: // LCOV_EXCL_LINE + case error_handler_t::keep: + { + // copy undumped chars to string buffer + for (int j = 0; j < undumped_chars; ++j) + { + string_buffer[bytes++] = s[bytes_after_last_accept + j]; + } + + // add erroneous byte to string buffer + string_buffer[bytes++] = s[i]; + + // continue processing the string + state = UTF8_ACCEPT; + break; + } + default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } @@ -614,6 +629,20 @@ class serializer JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast(s.back() | 0))), nullptr)); } + case error_handler_t::keep: + { + // copy undumped chars to string buffer + for (int j = 0; j < undumped_chars; ++j) + { + string_buffer[bytes++] = s[bytes_after_last_accept + j]; + } + undumped_chars = 0; + + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes); + break; + } + case error_handler_t::ignore: { // write all accepted bytes @@ -637,7 +666,6 @@ class serializer break; } - case error_handler_t::keep: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 53d6b0000..e8b80ad33 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18961,8 +18961,8 @@ class serializer std::size_t bytes_after_last_accept = 0; std::size_t undumped_chars = 0; - // copy string as-is if error handler is set to keep - if (error_handler == error_handler_t::keep) + // copy string as-is if error handler is set to keep, and we don't want to ensure ASCII + if (error_handler == error_handler_t::keep && !ensure_ascii) { o->write_characters(s.data(), s.size()); return; @@ -19137,7 +19137,22 @@ class serializer break; } - case error_handler_t::keep: // LCOV_EXCL_LINE + case error_handler_t::keep: + { + // copy undumped chars to string buffer + for (int j = 0; j < undumped_chars; ++j) + { + string_buffer[bytes++] = s[bytes_after_last_accept + j]; + } + + // add erroneous byte to string buffer + string_buffer[bytes++] = s[i]; + + // continue processing the string + state = UTF8_ACCEPT; + break; + } + default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } @@ -19176,6 +19191,20 @@ class serializer JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast(s.back() | 0))), nullptr)); } + case error_handler_t::keep: + { + // copy undumped chars to string buffer + for (int j = 0; j < undumped_chars; ++j) + { + string_buffer[bytes++] = s[bytes_after_last_accept + j]; + } + undumped_chars = 0; + + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes); + break; + } + case error_handler_t::ignore: { // write all accepted bytes @@ -19199,7 +19228,6 @@ class serializer break; } - case error_handler_t::keep: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } diff --git a/tests/src/unit-serialization.cpp b/tests/src/unit-serialization.cpp index 5c3a05240..ee8c7b43e 100644 --- a/tests/src/unit-serialization.cpp +++ b/tests/src/unit-serialization.cpp @@ -86,8 +86,11 @@ TEST_CASE("serialization") CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9", json::type_error&); CHECK_THROWS_WITH_AS(j.dump(1, ' ', false, json::error_handler_t::strict), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9", json::type_error&); CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"äü\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"\\u00e4\\u00fc\""); CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"ä\xEF\xBF\xBDü\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"\\u00e4\\ufffd\\u00fc\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"ä\xA9ü\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"\\u00e4\xA9\\u00fc\""); } SECTION("ending with incomplete character") @@ -97,8 +100,11 @@ TEST_CASE("serialization") CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] incomplete UTF-8 string; last byte: 0xC2", json::type_error&); CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&); CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"123\""); CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xC2\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"123\xC2\""); } SECTION("unexpected character") @@ -110,9 +116,11 @@ TEST_CASE("serialization") // see pending discussion at #4452 CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\""); - CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::ignore) == "\"123456\""); CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\""); CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\""); + CHECK(j.dump(-1, ' ', false, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\""); + CHECK(j.dump(-1, ' ', true, json::error_handler_t::keep) == "\"123\xF1\xB0\x34\x35\x36\""); } SECTION("U+FFFD Substitution of Maximal Subparts") From 15ff3701a15f4654332f04cf14e405306aaad5cc Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 22 Dec 2024 13:30:36 +0100 Subject: [PATCH 11/15] :rotating_light: fix warnings --- include/nlohmann/detail/output/serializer.hpp | 4 ++-- single_include/nlohmann/json.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index aafa21a12..4171abea8 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -578,7 +578,7 @@ class serializer case error_handler_t::keep: { // copy undumped chars to string buffer - for (int j = 0; j < undumped_chars; ++j) + for (std::size_t j = 0; j < undumped_chars; ++j) { string_buffer[bytes++] = s[bytes_after_last_accept + j]; } @@ -632,7 +632,7 @@ class serializer case error_handler_t::keep: { // copy undumped chars to string buffer - for (int j = 0; j < undumped_chars; ++j) + for (std::size_t j = 0; j < undumped_chars; ++j) { string_buffer[bytes++] = s[bytes_after_last_accept + j]; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index e8b80ad33..f0b118718 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -19140,7 +19140,7 @@ class serializer case error_handler_t::keep: { // copy undumped chars to string buffer - for (int j = 0; j < undumped_chars; ++j) + for (std::size_t j = 0; j < undumped_chars; ++j) { string_buffer[bytes++] = s[bytes_after_last_accept + j]; } @@ -19194,7 +19194,7 @@ class serializer case error_handler_t::keep: { // copy undumped chars to string buffer - for (int j = 0; j < undumped_chars; ++j) + for (std::size_t j = 0; j < undumped_chars; ++j) { string_buffer[bytes++] = s[bytes_after_last_accept + j]; } From e9876d9e46ed98601e1c58f3f245e71116dae5ac Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 22 Dec 2024 13:57:39 +0100 Subject: [PATCH 12/15] :art: format code --- include/nlohmann/detail/output/serializer.hpp | 4 ++-- single_include/nlohmann/json.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 4171abea8..5a5aac65c 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -591,8 +591,8 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } break; } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index f0b118718..2fe332518 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -19153,8 +19153,8 @@ class serializer break; } - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } break; } From b167096e82abf72cfb63a45631a5adeb926abd86 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 22 Dec 2024 14:20:10 +0100 Subject: [PATCH 13/15] :memo: clean up --- docs/mkdocs/docs/api/basic_json/dump.md | 4 ++-- docs/mkdocs/docs/api/basic_json/error_handler_t.md | 2 +- tests/src/unit-regression2.cpp | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/mkdocs/docs/api/basic_json/dump.md b/docs/mkdocs/docs/api/basic_json/dump.md index 30388f660..1ff15e3f6 100644 --- a/docs/mkdocs/docs/api/basic_json/dump.md +++ b/docs/mkdocs/docs/api/basic_json/dump.md @@ -28,8 +28,8 @@ and `ensure_ascii` parameters. : how to react on decoding errors; there are three possible values (see [`error_handler_t`](error_handler_t.md)): : - `strict`: throws a [`type_error`](../../home/exceptions.md#type-errors) exception in case a decoding error occurs (this is the default), - `replace`: replace invalid UTF-8 sequences with U+FFFD (� REPLACEMENT CHARACTER), - - `ignore`: ignore invalid UTF-8 sequences during serialization, and - - `keep`: keep invalid UTF-8 sequences during serialization; all bytes are copied to the output unchanged + - `ignore`: ignore invalid UTF-8 sequences during serialization (i.e., these bytes are skipped and not copied to the output), and + - `keep`: keep invalid UTF-8 sequences during serialization (i.e., all bytes are copied to the output unchanged) ## Return value diff --git a/docs/mkdocs/docs/api/basic_json/error_handler_t.md b/docs/mkdocs/docs/api/basic_json/error_handler_t.md index 1b3c7456a..437abdc61 100644 --- a/docs/mkdocs/docs/api/basic_json/error_handler_t.md +++ b/docs/mkdocs/docs/api/basic_json/error_handler_t.md @@ -18,7 +18,7 @@ replace : replace invalid UTF-8 sequences with U+FFFD (� REPLACEMENT CHARACTER) ignore -: ignore invalid UTF-8 sequences +: ignore invalid UTF-8 sequences; these bytes are skipped and not copied to the output keep : keep invalid UTF-8 sequences; all bytes are copied to the output unchanged diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index b8c8b782f..29bfdf541 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -1000,8 +1000,11 @@ TEST_CASE("regression tests 2") { nlohmann::json node; node["test"] = "test\334\005"; - const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::keep); - CHECK(test_dump == "{\"test\":\"test\334\005\"}"); + auto x = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore); + + CHECK(node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore) == "{\"test\":\"test\\u0005\"}"); + CHECK(node.dump(-1, ' ', false, nlohmann::json::error_handler_t::keep) == "{\"test\":\"test\334\005\"}"); + CHECK(node.dump(-1, ' ', true, nlohmann::json::error_handler_t::keep) == "{\"test\":\"test\334\005\"}"); } } From 493d1e44677354343b82f52bb28c4aadb55d35fa Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 22 Dec 2024 17:35:15 +0100 Subject: [PATCH 14/15] :rotating_light: fix warnings --- tests/src/unit-regression2.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 29bfdf541..6cde58a51 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -1000,8 +1000,6 @@ TEST_CASE("regression tests 2") { nlohmann::json node; node["test"] = "test\334\005"; - auto x = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore); - CHECK(node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore) == "{\"test\":\"test\\u0005\"}"); CHECK(node.dump(-1, ' ', false, nlohmann::json::error_handler_t::keep) == "{\"test\":\"test\334\005\"}"); CHECK(node.dump(-1, ' ', true, nlohmann::json::error_handler_t::keep) == "{\"test\":\"test\334\005\"}"); From 4ab98c39c3a7aa7ccd727f83f3350a0d1d44ca4c Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 23 Dec 2024 11:40:05 +0100 Subject: [PATCH 15/15] Update docs/mkdocs/docs/api/basic_json/dump.md Co-authored-by: gentooise --- docs/mkdocs/docs/api/basic_json/dump.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/mkdocs/docs/api/basic_json/dump.md b/docs/mkdocs/docs/api/basic_json/dump.md index 1ff15e3f6..7e1ca0f81 100644 --- a/docs/mkdocs/docs/api/basic_json/dump.md +++ b/docs/mkdocs/docs/api/basic_json/dump.md @@ -25,7 +25,7 @@ and `ensure_ascii` parameters. result consists of ASCII characters only. `error_handler` (in) -: how to react on decoding errors; there are three possible values (see [`error_handler_t`](error_handler_t.md)): +: how to react on decoding errors; there are four possible values (see [`error_handler_t`](error_handler_t.md)): : - `strict`: throws a [`type_error`](../../home/exceptions.md#type-errors) exception in case a decoding error occurs (this is the default), - `replace`: replace invalid UTF-8 sequences with U+FFFD (� REPLACEMENT CHARACTER), - `ignore`: ignore invalid UTF-8 sequences during serialization (i.e., these bytes are skipped and not copied to the output), and