WPILibC++ 2024.3.2
parser.h
Go to the documentation of this file.
1// __ _____ _____ _____
2// __| | __| | | | JSON for Modern C++
3// | | |__ | | | | | | version 3.11.2
4// |_____|_____|_____|_|___| https://github.com/nlohmann/json
5//
6// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7// SPDX-License-Identifier: MIT
8
9#pragma once
10
11#include <cmath> // isfinite
12#include <cstdint> // uint8_t
13#include <functional> // function
14#include <string> // string
15#include <utility> // move
16#include <vector> // vector
17
25#include <wpi/detail/value_t.h>
26
28namespace detail
29{
30////////////
31// parser //
32////////////
33
34enum class parse_event_t : std::uint8_t
35{
36 /// the parser read `{` and started to process a JSON object
38 /// the parser read `}` and finished processing a JSON object
40 /// the parser read `[` and started to process a JSON array
42 /// the parser read `]` and finished processing a JSON array
44 /// the parser read a key of a value in an object
45 key,
46 /// the parser finished reading a JSON value
47 value
48};
49
50template<typename BasicJsonType>
52 std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
53
54/*!
55@brief syntax analysis
56
57This class implements a recursive descent parser.
58*/
59template<typename BasicJsonType, typename InputAdapterType>
60class parser
61{
62 using number_integer_t = typename BasicJsonType::number_integer_t;
63 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
64 using number_float_t = typename BasicJsonType::number_float_t;
65 using string_t = typename BasicJsonType::string_t;
67 using token_type = typename lexer_t::token_type;
68
69 public:
70 /// a parser reading from an input adapter
71 explicit parser(InputAdapterType&& adapter,
72 const parser_callback_t<BasicJsonType> cb = nullptr,
73 const bool allow_exceptions_ = true,
74 const bool skip_comments = false)
75 : callback(cb)
76 , m_lexer(std::move(adapter), skip_comments)
77 , allow_exceptions(allow_exceptions_)
78 {
79 // read first token
80 get_token();
81 }
82
83 /*!
84 @brief public parser interface
85
86 @param[in] strict whether to expect the last token to be EOF
87 @param[in,out] result parsed JSON value
88
89 @throw parse_error.101 in case of an unexpected token
90 @throw parse_error.102 if to_unicode fails or surrogate error
91 @throw parse_error.103 if to_unicode fails
92 */
93 void parse(const bool strict, BasicJsonType& result)
94 {
95 if (callback)
96 {
97 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
98 sax_parse_internal(&sdp);
99
100 // in strict mode, input must be completely read
101 if (strict && (get_token() != token_type::end_of_input))
102 {
103 sdp.parse_error(m_lexer.get_position(),
104 m_lexer.get_token_string(),
105 parse_error::create(101, m_lexer.get_position(),
106 exception_message(token_type::end_of_input, "value"), nullptr));
107 }
108
109 // in case of an error, return discarded value
110 if (sdp.is_errored())
111 {
112 result = value_t::discarded;
113 return;
114 }
115
116 // set top-level value to null if it was discarded by the callback
117 // function
118 if (result.is_discarded())
119 {
120 result = nullptr;
121 }
122 }
123 else
124 {
125 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
126 sax_parse_internal(&sdp);
127
128 // in strict mode, input must be completely read
129 if (strict && (get_token() != token_type::end_of_input))
130 {
131 sdp.parse_error(m_lexer.get_position(),
132 m_lexer.get_token_string(),
133 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
134 }
135
136 // in case of an error, return discarded value
137 if (sdp.is_errored())
138 {
139 result = value_t::discarded;
140 return;
141 }
142 }
143
144 result.assert_invariant();
145 }
146
147 /*!
148 @brief public accept interface
149
150 @param[in] strict whether to expect the last token to be EOF
151 @return whether the input is a proper JSON text
152 */
153 bool accept(const bool strict = true)
154 {
156 return sax_parse(&sax_acceptor, strict);
157 }
158
159 template<typename SAX>
161 bool sax_parse(SAX* sax, const bool strict = true)
162 {
164 const bool result = sax_parse_internal(sax);
165
166 // strict mode: next byte must be EOF
167 if (result && strict && (get_token() != token_type::end_of_input))
168 {
169 return sax->parse_error(m_lexer.get_position(),
170 m_lexer.get_token_string(),
171 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
172 }
173
174 return result;
175 }
176
177 private:
178 template<typename SAX>
180 bool sax_parse_internal(SAX* sax)
181 {
182 // stack to remember the hierarchy of structured values we are parsing
183 // true = array; false = object
184 std::vector<bool> states;
185 // value to avoid a goto (see comment where set to true)
186 bool skip_to_state_evaluation = false;
187
188 while (true)
189 {
190 if (!skip_to_state_evaluation)
191 {
192 // invariant: get_token() was called before each iteration
193 switch (last_token)
194 {
195 case token_type::begin_object:
196 {
197 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
198 {
199 return false;
200 }
201
202 // closing } -> we are done
203 if (get_token() == token_type::end_object)
204 {
205 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
206 {
207 return false;
208 }
209 break;
210 }
211
212 // parse key
213 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
214 {
215 return sax->parse_error(m_lexer.get_position(),
216 m_lexer.get_token_string(),
217 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
218 }
219 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
220 {
221 return false;
222 }
223
224 // parse separator (:)
225 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
226 {
227 return sax->parse_error(m_lexer.get_position(),
228 m_lexer.get_token_string(),
229 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
230 }
231
232 // remember we are now inside an object
233 states.push_back(false);
234
235 // parse values
236 get_token();
237 continue;
238 }
239
240 case token_type::begin_array:
241 {
242 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
243 {
244 return false;
245 }
246
247 // closing ] -> we are done
248 if (get_token() == token_type::end_array)
249 {
250 if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
251 {
252 return false;
253 }
254 break;
255 }
256
257 // remember we are now inside an array
258 states.push_back(true);
259
260 // parse values (no need to call get_token)
261 continue;
262 }
263
264 case token_type::value_float:
265 {
266 const auto res = m_lexer.get_number_float();
267
269 {
270 return sax->parse_error(m_lexer.get_position(),
271 m_lexer.get_token_string(),
272 out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
273 }
274
275 if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
276 {
277 return false;
278 }
279
280 break;
281 }
282
283 case token_type::literal_false:
284 {
285 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
286 {
287 return false;
288 }
289 break;
290 }
291
292 case token_type::literal_null:
293 {
294 if (JSON_HEDLEY_UNLIKELY(!sax->null()))
295 {
296 return false;
297 }
298 break;
299 }
300
301 case token_type::literal_true:
302 {
303 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
304 {
305 return false;
306 }
307 break;
308 }
309
310 case token_type::value_integer:
311 {
312 if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
313 {
314 return false;
315 }
316 break;
317 }
318
319 case token_type::value_string:
320 {
321 if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
322 {
323 return false;
324 }
325 break;
326 }
327
328 case token_type::value_unsigned:
329 {
330 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
331 {
332 return false;
333 }
334 break;
335 }
336
337 case token_type::parse_error:
338 {
339 // using "uninitialized" to avoid "expected" message
340 return sax->parse_error(m_lexer.get_position(),
341 m_lexer.get_token_string(),
342 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
343 }
344
345 case token_type::uninitialized:
346 case token_type::end_array:
347 case token_type::end_object:
348 case token_type::name_separator:
349 case token_type::value_separator:
350 case token_type::end_of_input:
351 case token_type::literal_or_value:
352 default: // the last token was unexpected
353 {
354 return sax->parse_error(m_lexer.get_position(),
355 m_lexer.get_token_string(),
356 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
357 }
358 }
359 }
360 else
361 {
362 skip_to_state_evaluation = false;
363 }
364
365 // we reached this line after we successfully parsed a value
366 if (states.empty())
367 {
368 // empty stack: we reached the end of the hierarchy: done
369 return true;
370 }
371
372 if (states.back()) // array
373 {
374 // comma -> next value
375 if (get_token() == token_type::value_separator)
376 {
377 // parse a new value
378 get_token();
379 continue;
380 }
381
382 // closing ]
383 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
384 {
385 if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
386 {
387 return false;
388 }
389
390 // We are done with this array. Before we can parse a
391 // new value, we need to evaluate the new state first.
392 // By setting skip_to_state_evaluation to false, we
393 // are effectively jumping to the beginning of this if.
394 JSON_ASSERT(!states.empty());
395 states.pop_back();
396 skip_to_state_evaluation = true;
397 continue;
398 }
399
400 return sax->parse_error(m_lexer.get_position(),
401 m_lexer.get_token_string(),
402 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
403 }
404
405 // states.back() is false -> object
406
407 // comma -> next value
408 if (get_token() == token_type::value_separator)
409 {
410 // parse key
411 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
412 {
413 return sax->parse_error(m_lexer.get_position(),
414 m_lexer.get_token_string(),
415 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
416 }
417
418 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
419 {
420 return false;
421 }
422
423 // parse separator (:)
424 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
425 {
426 return sax->parse_error(m_lexer.get_position(),
427 m_lexer.get_token_string(),
428 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
429 }
430
431 // parse values
432 get_token();
433 continue;
434 }
435
436 // closing }
437 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
438 {
439 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
440 {
441 return false;
442 }
443
444 // We are done with this object. Before we can parse a
445 // new value, we need to evaluate the new state first.
446 // By setting skip_to_state_evaluation to false, we
447 // are effectively jumping to the beginning of this if.
448 JSON_ASSERT(!states.empty());
449 states.pop_back();
450 skip_to_state_evaluation = true;
451 continue;
452 }
453
454 return sax->parse_error(m_lexer.get_position(),
455 m_lexer.get_token_string(),
456 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
457 }
458 }
459
460 /// get next token from lexer
461 token_type get_token()
462 {
463 return last_token = m_lexer.scan();
464 }
465
466 std::string exception_message(const token_type expected, const std::string& context)
467 {
468 std::string error_msg = "syntax error ";
469
470 if (!context.empty())
471 {
472 error_msg += concat("while parsing ", context, ' ');
473 }
474
475 error_msg += "- ";
476
477 if (last_token == token_type::parse_error)
478 {
479 error_msg += concat(m_lexer.get_error_message(), "; last read: '",
480 m_lexer.get_token_string(), '\'');
481 }
482 else
483 {
484 error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
485 }
486
487 if (expected != token_type::uninitialized)
488 {
489 error_msg += concat("; expected ", lexer_t::token_type_name(expected));
490 }
491
492 return error_msg;
493 }
494
495 private:
496 /// callback function
497 const parser_callback_t<BasicJsonType> callback = nullptr;
498 /// the type of the last read token
499 token_type last_token = token_type::uninitialized;
500 /// the lexer
501 lexer_t m_lexer;
502 /// whether to throw exceptions in case of errors
503 const bool allow_exceptions = true;
504};
505
506} // namespace detail
#define WPI_JSON_NAMESPACE_END
Definition: abi_macros.h:59
#define WPI_JSON_NAMESPACE_BEGIN
Definition: abi_macros.h:53
Definition: json_sax.h:653
Definition: json_sax.h:346
bool parse_error(std::size_t, const std::string &, const Exception &ex)
Definition: json_sax.h:534
constexpr bool is_errored() const
Definition: json_sax.h:546
SAX implementation to create a JSON value from SAX events.
Definition: json_sax.h:163
bool parse_error(std::size_t, const std::string &, const Exception &ex)
Definition: json_sax.h:284
constexpr bool is_errored() const
Definition: json_sax.h:296
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.h:62
lexical analysis
Definition: lexer.h:112
JSON_HEDLEY_RETURNS_NON_NULL constexpr const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.h:1473
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.h:1448
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.h:1412
constexpr position_t get_position() const noexcept
return position of last read token
Definition: lexer.h:1440
token_type scan()
Definition: lexer.h:1509
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.h:1418
typename lexer_base< BasicJsonType >::token_type token_type
Definition: lexer.h:121
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.h:1430
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.h:1424
static out_of_range create(int id_, const std::string &what_arg, BasicJsonContext context)
Definition: exceptions.h:226
static parse_error create(int id_, const position_t &pos, const std::string &what_arg, BasicJsonContext context)
create a parse error exception
Definition: exceptions.h:147
syntax analysis
Definition: parser.h:61
parser(InputAdapterType &&adapter, const parser_callback_t< BasicJsonType > cb=nullptr, const bool allow_exceptions_=true, const bool skip_comments=false)
a parser reading from an input adapter
Definition: parser.h:71
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition: parser.h:93
bool accept(const bool strict=true)
public accept interface
Definition: parser.h:153
bool sax_parse(SAX *sax, const bool strict=true)
Definition: parser.h:161
Definition: core.h:1238
#define JSON_HEDLEY_LIKELY(expr)
Definition: hedley.h:1395
#define JSON_HEDLEY_NON_NULL(...)
Definition: hedley.h:1288
#define JSON_HEDLEY_UNLIKELY(expr)
Definition: hedley.h:1396
#define JSON_ASSERT(x)
Definition: macro_scope.h:192
detail namespace with internal helper functions
Definition: xchar.h:20
std::function< bool(int, parse_event_t, BasicJsonType &)> parser_callback_t
Definition: parser.h:52
OutStringType concat(Args &&... args)
Definition: string_concat.h:137
parse_event_t
Definition: parser.h:35
@ key
the parser read a key of a value in an object
@ array_end
the parser read ] and finished processing a JSON array
@ array_start
the parser read [ and started to process a JSON array
@ object_start
the parser read { and started to process a JSON object
@ object_end
the parser read } and finished processing a JSON object
@ discarded
discarded by the parser callback function
bool isfinite(T)
Definition: chrono.h:1600
@ strict
throw a type_error exception in case of invalid UTF-8
Definition: array.h:89
Definition: is_sax.h:106