From 17b5262e7d43265a13c1b18fef7cddfdfabf3657 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 29 Nov 2024 17:47:58 +0100 Subject: [PATCH 1/3] :recycle: allow to continue after parse error --- include/nlohmann/detail/input/parser.hpp | 66 ++++++++++++++++-------- single_include/nlohmann/json.hpp | 66 ++++++++++++++++-------- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index e46b0ef2a..0c61305f8 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -212,9 +212,12 @@ class parser // parse key if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) { @@ -224,9 +227,12 @@ class parser // parse separator (:) if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr))) + { + return false; + } } // remember we are now inside an object @@ -267,9 +273,12 @@ class parser if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) @@ -337,18 +346,25 @@ class parser case token_type::parse_error: { // using "uninitialized" to avoid "expected" message - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr))) + { + return false; + } + break; } case token_type::end_of_input: { if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr))) + { + return false; + } } return sax->parse_error(m_lexer.get_position(), @@ -422,9 +438,12 @@ class parser // parse key if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) @@ -435,9 +454,12 @@ class parser // parse separator (:) if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr))) + { + return false; + } } // parse values diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 02441416b..d9e6e80f7 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -12568,9 +12568,12 @@ class parser // parse key if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) { @@ -12580,9 +12583,12 @@ class parser // parse separator (:) if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr))) + { + return false; + } } // remember we are now inside an object @@ -12623,9 +12629,12 @@ class parser if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) @@ -12693,18 +12702,25 @@ class parser case token_type::parse_error: { // using "uninitialized" to avoid "expected" message - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr))) + { + return false; + } + break; } case token_type::end_of_input: { if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr))) + { + return false; + } } return sax->parse_error(m_lexer.get_position(), @@ -12778,9 +12794,12 @@ class parser // parse key if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr))) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) @@ -12791,9 +12810,12 @@ class parser // parse separator (:) if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + if (!sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr))) + { + return false; + } } // parse values From 7c97655f5943a5f2be9292384ac3925ddc41ce4f Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 30 Nov 2024 13:54:43 +0100 Subject: [PATCH 2/3] :recycle: allow to continue after parse error --- include/nlohmann/detail/input/parser.hpp | 2 ++ single_include/nlohmann/json.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 0c61305f8..16fcbb552 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -354,6 +354,7 @@ class parser } break; } + case token_type::end_of_input: { if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) @@ -371,6 +372,7 @@ class parser m_lexer.get_token_string(), parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); } + case token_type::uninitialized: case token_type::end_array: case token_type::end_object: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index d9e6e80f7..cb7b272a8 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -12710,6 +12710,7 @@ class parser } break; } + case token_type::end_of_input: { if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) @@ -12727,6 +12728,7 @@ class parser m_lexer.get_token_string(), parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); } + case token_type::uninitialized: case token_type::end_array: case token_type::end_object: From cd1e38dcc498e548b743da256457423919a8233e Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 30 Nov 2024 13:56:32 +0100 Subject: [PATCH 3/3] :recycle: allow to continue after parse error --- tests/src/unit-class_parser.cpp | 38 +++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index 017a789b4..55256a30b 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -117,15 +117,28 @@ class SaxEventLogger return true; } - bool parse_error(std::size_t position, const std::string& /*unused*/, const json::exception& /*unused*/) + bool parse_error(std::size_t position, const std::string& msg, const json::exception& exception) { errored = true; - events.push_back("parse_error(" + std::to_string(position) + ")"); - return false; + events.push_back("parse_error(position=" + std::to_string(position) + ", token=<" + msg + ">, exception=" + exception.what() + ")"); + return return_value_for_parse_error; } std::vector events {}; // NOLINT(readability-redundant-member-init) bool errored = false; + const bool return_value_for_parse_error = false; + std::string event_string() + { + return std::accumulate(events.begin(), events.end(), std::string(), + [](const std::string & a, const std::string & b) + { + return a.empty() ? b : a + '\n' + b; + }); + } + + explicit SaxEventLogger(bool continue_after_parse_error) + : return_value_for_parse_error(continue_after_parse_error) + {} }; class SaxCountdown : public nlohmann::json::json_sax_t @@ -244,7 +257,7 @@ bool accept_helper(const std::string& s) CHECK(ok_noexcept == ok_accept); // 4. parse with SAX (compare with relaxed accept result) - SaxEventLogger el; + SaxEventLogger el(false); CHECK_NOTHROW(json::sax_parse(s, &el, json::input_format_t::json, false)); CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == !el.errored); @@ -1680,6 +1693,23 @@ TEST_CASE("parser class") CHECK(json::sax_parse("\"foo\"", &s) == false); } } + + SECTION("SAX parser continuing after parse error") + { + SaxEventLogger sax(true); + + SECTION("Foo") + { + CHECK(json::sax_parse("[{1}, \"a\"]", &sax)); + CHECK(sax.event_string() == "start_array()\n" + "start_object()\n" + "parse_error(position=3, token=<1>, exception=[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing object key - unexpected number literal; expected string literal)\n" + "key(1)\n" + "parse_error(position=4, token=<1}>, exception=[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing object separator - unexpected '}'; expected ':')\n" + "parse_error(position=5, token=<1},>, exception=[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - unexpected ','; expected '[', '{', or a literal)\n" + "parse_error(position=9, token=<\"a\">, exception=[json.exception.parse_error.101] parse error at line 1, column 9: syntax error while parsing value - unexpected string literal; expected end of input)"); + } + } } SECTION("error messages for comments")