From e27c12706829672d97c2760827728efc187359e1 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 11 Sep 2021 22:46:49 +0200 Subject: [PATCH] :construction: start BON8 parser --- .../nlohmann/detail/input/binary_reader.hpp | 185 ++++++++++++++- include/nlohmann/json.hpp | 30 +++ single_include/nlohmann/json.hpp | 215 +++++++++++++++++- test/src/unit-bon8.cpp | 34 +++ 4 files changed, 460 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 2cb7bc769..234017f7d 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -120,7 +120,10 @@ class binary_reader result = parse_ubjson_internal(); break; - case input_format_t::bon8: // LCOV_EXCL_LINE + case input_format_t::bon8: + result = parse_bon8_internal(true); + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE @@ -2300,6 +2303,181 @@ class binary_reader } } + ////////// + // BON8 // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true) or whether the last read character should + be considered instead (false) + + @return whether a valid BON8 value was passed to the SAX parser + */ + bool parse_bon8_internal(const bool get_char) + { + switch (get_char ? get() : current) + { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + return get_bon8_array(static_cast(current - 0x80)); + + case 0x85: + return get_bon8_array(static_cast(-1)); + + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + return get_bon8_object(static_cast(current - 0x86)); + + case 0x8B: + return get_bon8_object(static_cast(-1)); + + case 0x8C: + { + std::int32_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8D: + { + std::int64_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8E: + { + float number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x8F: + { + double number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return sax->number_unsigned(current - 0x90); + + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xC0: + case 0xC1: + return sax->number_integer(0xB7 - current); + + case 0xF8: + return sax->boolean(false); + + case 0xF9: + return sax->boolean(true); + + case 0xFA: + return sax->null(); + + case 0xFB: + return sax->number_float(-1.0, ""); + + case 0xFC: + return sax->number_float(0.0, ""); + + case 0xFD: + return sax->number_float(1.0, ""); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bon8, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); + } + } + } + + bool get_bon8_array(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(true))) + { + return false; + } + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + bool get_bon8_object(const std::size_t len) + { + return false; + } + /////////////////////// // Utility functions // /////////////////////// @@ -2497,7 +2675,10 @@ class binary_reader error_msg += "BSON"; break; - case input_format_t::bon8: // LCOV_EXCL_LINE + case input_format_t::bon8: + error_msg += "BON8"; + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index ff57fbeaf..86510b4a8 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -8211,6 +8211,36 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } + + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(InputType&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::forward(i)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bon8(InputType&&, const bool, const bool) + */ + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(IteratorType first, IteratorType last, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + /// @} ////////////////////////// diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 546bc5e95..2cea5f013 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8414,7 +8414,10 @@ class binary_reader result = parse_ubjson_internal(); break; - case input_format_t::bon8: // LCOV_EXCL_LINE + case input_format_t::bon8: + result = parse_bon8_internal(true); + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE @@ -10594,6 +10597,181 @@ class binary_reader } } + ////////// + // BON8 // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true) or whether the last read character should + be considered instead (false) + + @return whether a valid BON8 value was passed to the SAX parser + */ + bool parse_bon8_internal(const bool get_char) + { + switch (get_char ? get() : current) + { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + return get_bon8_array(static_cast(current - 0x80)); + + case 0x85: + return get_bon8_array(static_cast(-1)); + + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + return get_bon8_object(static_cast(current - 0x86)); + + case 0x8B: + return get_bon8_object(static_cast(-1)); + + case 0x8C: + { + std::int32_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8D: + { + std::int64_t number{}; + return get_number(input_format_t::bon8, number) && sax->number_integer(number); + } + + case 0x8E: + { + float number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x8F: + { + double number{}; + return get_number(input_format_t::bon8, number) && sax->number_float(static_cast(number), ""); + } + + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return sax->number_unsigned(current - 0x90); + + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xC0: + case 0xC1: + return sax->number_integer(0xB7 - current); + + case 0xF8: + return sax->boolean(false); + + case 0xF9: + return sax->boolean(true); + + case 0xFA: + return sax->null(); + + case 0xFB: + return sax->number_float(-1.0, ""); + + case 0xFC: + return sax->number_float(0.0, ""); + + case 0xFD: + return sax->number_float(1.0, ""); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bon8, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); + } + } + } + + bool get_bon8_array(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(true))) + { + return false; + } + } + } + else + { + while (get() != 0xFE) + { + if (JSON_HEDLEY_UNLIKELY(!parse_bon8_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + bool get_bon8_object(const std::size_t len) + { + return false; + } + /////////////////////// // Utility functions // /////////////////////// @@ -10791,7 +10969,10 @@ class binary_reader error_msg += "BSON"; break; - case input_format_t::bon8: // LCOV_EXCL_LINE + case input_format_t::bon8: + error_msg += "BON8"; + break; + case input_format_t::json: // LCOV_EXCL_LINE default: // LCOV_EXCL_LINE JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE @@ -25935,6 +26116,36 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } + + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(InputType&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::forward(i)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bon8(InputType&&, const bool, const bool) + */ + template + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json from_bon8(IteratorType first, IteratorType last, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + auto ia = detail::input_adapter(std::move(first), std::move(last)); + const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bon8, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + /// @} ////////////////////////// diff --git a/test/src/unit-bon8.cpp b/test/src/unit-bon8.cpp index 06cf57073..1858e05b9 100644 --- a/test/src/unit-bon8.cpp +++ b/test/src/unit-bon8.cpp @@ -56,6 +56,7 @@ TEST_CASE("BON8") std::vector expected = {0xFA}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("boolean") @@ -66,6 +67,7 @@ TEST_CASE("BON8") std::vector expected = {0xF9}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("false") @@ -74,6 +76,7 @@ TEST_CASE("BON8") std::vector expected = {0xF8}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -87,6 +90,7 @@ TEST_CASE("BON8") std::vector expected = {0x90}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("39") @@ -95,6 +99,7 @@ TEST_CASE("BON8") std::vector expected = {0xB7}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -163,6 +168,7 @@ TEST_CASE("BON8") std::vector expected = {0x8C, 0x04, 0x08, 0x0F, 0x28}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("2147483647 (int32max)") @@ -171,6 +177,7 @@ TEST_CASE("BON8") std::vector expected = {0x8C, 0x7F, 0xFF, 0xFF, 0xFF}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -182,6 +189,7 @@ TEST_CASE("BON8") std::vector expected = {0x8D, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("9223372036854775807 (int64max)") @@ -190,6 +198,7 @@ TEST_CASE("BON8") std::vector expected = {0x8D, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -210,6 +219,7 @@ TEST_CASE("BON8") std::vector expected = {0x8D, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("-2147483649") @@ -219,6 +229,7 @@ TEST_CASE("BON8") std::vector expected = {0x8D, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -231,6 +242,7 @@ TEST_CASE("BON8") std::vector expected = {0x8C, 0x80, 0x00, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("-33818507") @@ -239,6 +251,7 @@ TEST_CASE("BON8") std::vector expected = {0x8C, 0xFD, 0xFB, 0xF8, 0x75}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -307,6 +320,7 @@ TEST_CASE("BON8") std::vector expected = {0xC1}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("-1") @@ -315,6 +329,7 @@ TEST_CASE("BON8") std::vector expected = {0xB8}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -326,6 +341,7 @@ TEST_CASE("BON8") std::vector expected = {0x90}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("39") @@ -334,6 +350,7 @@ TEST_CASE("BON8") std::vector expected = {0xB7}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -443,6 +460,7 @@ TEST_CASE("BON8") std::vector expected = {0xFB}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("0.0") @@ -451,6 +469,7 @@ TEST_CASE("BON8") std::vector expected = {0xFC}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("1.0") @@ -459,6 +478,7 @@ TEST_CASE("BON8") std::vector expected = {0xFD}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("-0.0") @@ -467,6 +487,7 @@ TEST_CASE("BON8") std::vector expected = {0x8E, 0x80, 0x00, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("NAN") @@ -475,6 +496,8 @@ TEST_CASE("BON8") std::vector expected = {0x8E, 0x7F, 0x80, 0x00, 0x01}; const auto result = json::to_bon8(j); CHECK(result == expected); + json::number_float_t d{json::from_bon8(result)}; + CHECK(std::isnan(d)); } SECTION("infinity") @@ -483,6 +506,7 @@ TEST_CASE("BON8") std::vector expected = {0x8E, 0x7F, 0x80, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("-infinity") @@ -491,6 +515,7 @@ TEST_CASE("BON8") std::vector expected = {0x8E, 0xFF, 0x80, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -502,6 +527,7 @@ TEST_CASE("BON8") std::vector expected = {0x8E, 0x40, 0x00, 0x00, 0x00}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } @@ -513,6 +539,7 @@ TEST_CASE("BON8") std::vector expected = {0x8F, 0x41, 0x97, 0xD7, 0x84, 0x00, 0x66, 0x66, 0x66}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } } @@ -546,6 +573,7 @@ TEST_CASE("BON8") std::vector expected = {0x80}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[false]") @@ -554,6 +582,7 @@ TEST_CASE("BON8") std::vector expected = {0x81, 0xF8}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[false, null]") @@ -562,6 +591,7 @@ TEST_CASE("BON8") std::vector expected = {0x82, 0xF8, 0xFA}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[false, null, true]") @@ -570,6 +600,7 @@ TEST_CASE("BON8") std::vector expected = {0x83, 0xF8, 0xFA, 0xF9}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[false, null, true, 1.0]") @@ -578,6 +609,7 @@ TEST_CASE("BON8") std::vector expected = {0x84, 0xF8, 0xFA, 0xF9, 0xFD}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[\"s\", \"s\"]") @@ -610,6 +642,7 @@ TEST_CASE("BON8") std::vector expected = {0x81, 0x81, 0x81, 0x91}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } SECTION("[[[\"\"]]]") @@ -629,6 +662,7 @@ TEST_CASE("BON8") std::vector expected = {0x85, 0xF8, 0xFA, 0xF9, 0xFD, 0x80, 0xFC, 0xFE}; const auto result = json::to_bon8(j); CHECK(result == expected); + CHECK(json::from_bon8(result) == j); } } }