16#include <initializer_list>
33template<
typename BasicJsonType>
67 return "<uninitialized>";
69 return "true literal";
71 return "false literal";
73 return "null literal";
75 return "string literal";
79 return "number literal";
93 return "<parse error>";
95 return "end of input";
97 return "'[', '{', or a literal";
100 return "unknown token";
110template<
typename BasicJsonType,
typename InputAdapterType>
113 using number_integer_t =
typename BasicJsonType::number_integer_t;
114 using number_unsigned_t =
typename BasicJsonType::number_unsigned_t;
115 using number_float_t =
typename BasicJsonType::number_float_t;
116 using string_t =
typename BasicJsonType::string_t;
117 using char_type =
typename InputAdapterType::char_type;
118 using char_int_type =
typename std::char_traits<char_type>::int_type;
123 explicit lexer(InputAdapterType&& adapter,
bool ignore_comments_ =
false) noexcept
124 : ia(
std::move(adapter))
125 , ignore_comments(ignore_comments_)
126 , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
143 static char get_decimal_point() noexcept
145 const auto* loc = localeconv();
147 return (loc->decimal_point ==
nullptr) ?
'.' : *(loc->decimal_point);
175 const auto factors = { 12u, 8u, 4u, 0u };
176 for (
const auto factor : factors)
180 if (current >=
'0' && current <=
'9')
182 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x30u) << factor);
184 else if (current >=
'A' && current <=
'F')
186 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x37u) << factor);
188 else if (current >=
'a' && current <=
'f')
190 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x57u) << factor);
198 JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
217 bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
219 JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
222 for (
auto range = ranges.begin(); range != ranges.end(); ++range)
231 error_message =
"invalid string: ill-formed UTF-8 byte";
268 case std::char_traits<char_type>::eof():
270 error_message =
"invalid string: missing closing quote";
271 return token_type::parse_error;
277 return token_type::value_string;
321 const int codepoint1 = get_codepoint();
322 int codepoint = codepoint1;
326 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
327 return token_type::parse_error;
331 if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
336 const int codepoint2 = get_codepoint();
340 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
341 return token_type::parse_error;
348 codepoint =
static_cast<int>(
350 (
static_cast<unsigned int>(codepoint1) << 10u)
352 +
static_cast<unsigned int>(codepoint2)
360 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
361 return token_type::parse_error;
366 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
367 return token_type::parse_error;
374 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
375 return token_type::parse_error;
380 JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
383 if (codepoint < 0x80)
386 add(
static_cast<char_int_type
>(codepoint));
388 else if (codepoint <= 0x7FF)
391 add(
static_cast<char_int_type
>(0xC0u | (
static_cast<unsigned int>(codepoint) >> 6u)));
392 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
394 else if (codepoint <= 0xFFFF)
397 add(
static_cast<char_int_type
>(0xE0u | (
static_cast<unsigned int>(codepoint) >> 12u)));
398 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
399 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
404 add(
static_cast<char_int_type
>(0xF0u | (
static_cast<unsigned int>(codepoint) >> 18u)));
405 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
406 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
407 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
415 error_message =
"invalid string: forbidden character after backslash";
416 return token_type::parse_error;
425 error_message =
"invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
426 return token_type::parse_error;
431 error_message =
"invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
432 return token_type::parse_error;
437 error_message =
"invalid string: control character U+0002 (STX) must be escaped to \\u0002";
438 return token_type::parse_error;
443 error_message =
"invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
444 return token_type::parse_error;
449 error_message =
"invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
450 return token_type::parse_error;
455 error_message =
"invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
456 return token_type::parse_error;
461 error_message =
"invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
462 return token_type::parse_error;
467 error_message =
"invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
468 return token_type::parse_error;
473 error_message =
"invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
474 return token_type::parse_error;
479 error_message =
"invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
480 return token_type::parse_error;
485 error_message =
"invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
486 return token_type::parse_error;
491 error_message =
"invalid string: control character U+000B (VT) must be escaped to \\u000B";
492 return token_type::parse_error;
497 error_message =
"invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
498 return token_type::parse_error;
503 error_message =
"invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
504 return token_type::parse_error;
509 error_message =
"invalid string: control character U+000E (SO) must be escaped to \\u000E";
510 return token_type::parse_error;
515 error_message =
"invalid string: control character U+000F (SI) must be escaped to \\u000F";
516 return token_type::parse_error;
521 error_message =
"invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
522 return token_type::parse_error;
527 error_message =
"invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
528 return token_type::parse_error;
533 error_message =
"invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
534 return token_type::parse_error;
539 error_message =
"invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
540 return token_type::parse_error;
545 error_message =
"invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
546 return token_type::parse_error;
551 error_message =
"invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
552 return token_type::parse_error;
557 error_message =
"invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
558 return token_type::parse_error;
563 error_message =
"invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
564 return token_type::parse_error;
569 error_message =
"invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
570 return token_type::parse_error;
575 error_message =
"invalid string: control character U+0019 (EM) must be escaped to \\u0019";
576 return token_type::parse_error;
581 error_message =
"invalid string: control character U+001A (SUB) must be escaped to \\u001A";
582 return token_type::parse_error;
587 error_message =
"invalid string: control character U+001B (ESC) must be escaped to \\u001B";
588 return token_type::parse_error;
593 error_message =
"invalid string: control character U+001C (FS) must be escaped to \\u001C";
594 return token_type::parse_error;
599 error_message =
"invalid string: control character U+001D (GS) must be escaped to \\u001D";
600 return token_type::parse_error;
605 error_message =
"invalid string: control character U+001E (RS) must be escaped to \\u001E";
606 return token_type::parse_error;
611 error_message =
"invalid string: control character U+001F (US) must be escaped to \\u001F";
612 return token_type::parse_error;
749 return token_type::parse_error;
759 return token_type::parse_error;
783 return token_type::parse_error;
793 return token_type::parse_error;
803 return token_type::parse_error;
815 return token_type::parse_error;
825 return token_type::parse_error;
833 error_message =
"invalid string: ill-formed UTF-8 byte";
834 return token_type::parse_error;
857 case std::char_traits<char_type>::eof():
874 case std::char_traits<char_type>::eof():
877 error_message =
"invalid comment; missing closing '*/'";
905 error_message =
"invalid comment; expecting '/' or '*' after '/'";
912 static
void strtof(
float& f, const
char* str,
char** endptr) noexcept
914 f = std::strtof(str, endptr);
918 static
void strtof(
double& f, const
char* str,
char** endptr) noexcept
920 f = std::strtod(str, endptr);
924 static
void strtof(
long double& f, const
char* str,
char** endptr) noexcept
926 f = std::strtold(str, endptr);
969 token_type scan_number()
976 token_type number_type = token_type::value_unsigned;
984 goto scan_number_minus;
990 goto scan_number_zero;
1004 goto scan_number_any1;
1014 number_type = token_type::value_integer;
1020 goto scan_number_zero;
1034 goto scan_number_any1;
1039 error_message =
"invalid number; expected digit after '-'";
1040 return token_type::parse_error;
1050 add(decimal_point_char);
1051 goto scan_number_decimal1;
1058 goto scan_number_exponent;
1062 goto scan_number_done;
1081 goto scan_number_any1;
1086 add(decimal_point_char);
1087 goto scan_number_decimal1;
1094 goto scan_number_exponent;
1098 goto scan_number_done;
1101scan_number_decimal1:
1103 number_type = token_type::value_float;
1118 goto scan_number_decimal2;
1123 error_message =
"invalid number; expected digit after '.'";
1124 return token_type::parse_error;
1128scan_number_decimal2:
1144 goto scan_number_decimal2;
1151 goto scan_number_exponent;
1155 goto scan_number_done;
1158scan_number_exponent:
1160 number_type = token_type::value_float;
1167 goto scan_number_sign;
1182 goto scan_number_any2;
1188 "invalid number; expected '+', '-', or digit after exponent";
1189 return token_type::parse_error;
1209 goto scan_number_any2;
1214 error_message =
"invalid number; expected digit after exponent sign";
1215 return token_type::parse_error;
1235 goto scan_number_any2;
1239 goto scan_number_done;
1247 char* endptr =
nullptr;
1251 if (number_type == token_type::value_unsigned)
1253 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1256 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1260 value_unsigned =
static_cast<number_unsigned_t
>(x);
1261 if (value_unsigned == x)
1263 return token_type::value_unsigned;
1267 else if (number_type == token_type::value_integer)
1269 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1272 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1276 value_integer =
static_cast<number_integer_t
>(x);
1277 if (value_integer == x)
1279 return token_type::value_integer;
1286 strtof(value_float, token_buffer.data(), &endptr);
1289 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1291 return token_type::value_float;
1300 token_type scan_literal(const char_type* literal_text, const
std::
size_t length,
1301 token_type return_type)
1303 JSON_ASSERT(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
1304 for (std::size_t i = 1; i < length; ++i)
1308 error_message =
"invalid literal";
1309 return token_type::parse_error;
1320 void reset() noexcept
1322 token_buffer.clear();
1323 token_string.clear();
1324 token_string.push_back(std::char_traits<char_type>::to_char_type(current));
1339 ++position.chars_read_total;
1340 ++position.chars_read_current_line;
1349 current = ia.get_character();
1354 token_string.push_back(std::char_traits<char_type>::to_char_type(current));
1357 if (current ==
'\n')
1359 ++position.lines_read;
1360 position.chars_read_current_line = 0;
1378 --position.chars_read_total;
1381 if (position.chars_read_current_line == 0)
1383 if (position.lines_read > 0)
1385 --position.lines_read;
1390 --position.chars_read_current_line;
1396 token_string.pop_back();
1401 void add(char_int_type
c)
1403 token_buffer.push_back(
static_cast<typename string_t::value_type
>(
c));
1414 return value_integer;
1420 return value_unsigned;
1432 return token_buffer;
1452 for (
const auto c : token_string)
1454 if (
static_cast<unsigned char>(
c) <=
'\x1F')
1457 std::array<char, 9>
cs{{}};
1458 static_cast<void>((std::snprintf)(
cs.data(),
cs.size(),
"<U+%.4X>",
static_cast<unsigned char>(
c)));
1459 result +=
cs.data();
1464 result.push_back(
static_cast<std::string::value_type
>(
c));
1475 return error_message;
1491 return get() == 0xBB &&
get() == 0xBF;
1506 while (current ==
' ' || current ==
'\t' || current ==
'\n' || current ==
'\r');
1512 if (position.chars_read_total == 0 && !skip_bom())
1514 error_message =
"invalid BOM; must be 0xEF 0xBB 0xBF if given";
1515 return token_type::parse_error;
1522 while (ignore_comments && current ==
'/')
1524 if (!scan_comment())
1526 return token_type::parse_error;
1537 return token_type::begin_array;
1539 return token_type::end_array;
1541 return token_type::begin_object;
1543 return token_type::end_object;
1545 return token_type::name_separator;
1547 return token_type::value_separator;
1552 std::array<char_type, 4> true_literal = {{
static_cast<char_type
>(
't'),
static_cast<char_type
>(
'r'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'e')}};
1553 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1557 std::array<char_type, 5> false_literal = {{
static_cast<char_type
>(
'f'),
static_cast<char_type
>(
'a'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
's'),
static_cast<char_type
>(
'e')}};
1558 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1562 std::array<char_type, 4> null_literal = {{
static_cast<char_type
>(
'n'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
'l')}};
1563 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1568 return scan_string();
1582 return scan_number();
1587 case std::char_traits<char_type>::eof():
1588 return token_type::end_of_input;
1592 error_message =
"invalid literal";
1593 return token_type::parse_error;
1599 InputAdapterType ia;
1602 const bool ignore_comments =
false;
1605 char_int_type current = std::char_traits<char_type>::eof();
1608 bool next_unget =
false;
1614 std::vector<char_type> token_string {};
1617 string_t token_buffer {};
1620 const char* error_message =
"";
1623 number_integer_t value_integer = 0;
1624 number_unsigned_t value_unsigned = 0;
1625 number_float_t value_float = 0;
1628 const char_int_type decimal_point_char =
'.';
#define WPI_JSON_NAMESPACE_END
Definition: abi_macros.h:59
#define WPI_JSON_NAMESPACE_BEGIN
Definition: abi_macros.h:53
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.h:62
token_type
token types for the parser
Definition: lexer.h:39
@ value_float
an floating point number – use get_number_float() for actual value
@ begin_array
the character for array begin [
@ value_string
a string – use get_string() for actual value
@ end_array
the character for array end ]
@ uninitialized
indicating the scanner is uninitialized
@ parse_error
indicating a parse error
@ value_integer
a signed integer – use get_number_integer() for actual value
@ value_separator
the value separator ,
@ end_object
the character for object end }
@ literal_true
the true literal
@ begin_object
the character for object begin {
@ value_unsigned
an unsigned integer – use get_number_unsigned() for actual value
@ literal_null
the null literal
@ end_of_input
indicating the end of the input buffer
@ name_separator
the name separator :
@ literal_or_value
a literal or the begin of a value (only for diagnostics)
@ literal_false
the false literal
lexical analysis
Definition: lexer.h:112
bool skip_bom()
skip the UTF-8 byte order mark
Definition: lexer.h:1486
void skip_whitespace()
Definition: lexer.h:1500
lexer(InputAdapterType &&adapter, bool ignore_comments_=false) noexcept
Definition: lexer.h:123
JSON_HEDLEY_RETURNS_NON_NULL constexpr const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.h:1473
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.h:1448
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.h:1412
constexpr position_t get_position() const noexcept
return position of last read token
Definition: lexer.h:1440
token_type scan()
Definition: lexer.h:1509
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.h:1418
typename lexer_base< BasicJsonType >::token_type token_type
Definition: lexer.h:121
lexer & operator=(lexer &&)=default
lexer(const lexer &)=delete
lexer & operator=(lexer &)=delete
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.h:1430
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.h:1424
exception indicating a parse error
Definition: exceptions.h:135
#define JSON_HEDLEY_CONST
Definition: hedley.h:1500
#define JSON_HEDLEY_LIKELY(expr)
Definition: hedley.h:1395
#define JSON_HEDLEY_NON_NULL(...)
Definition: hedley.h:1288
#define JSON_HEDLEY_RETURNS_NON_NULL
Definition: hedley.h:1729
#define JSON_HEDLEY_UNLIKELY(expr)
Definition: hedley.h:1396
#define JSON_HEDLEY_PURE
Definition: hedley.h:1469
#define JSON_ASSERT(x)
Definition: macro_scope.h:192
CameraServer (cscore) namespace.
Definition: cscore_oo.inc:16
detail namespace with internal helper functions
Definition: xchar.h:20
auto get(const wpi::detail::iteration_proxy_value< IteratorType > &i) -> decltype(i.key())
Definition: iteration_proxy.h:193
static constexpr const velocity::meters_per_second_t c(299792458.0)
Speed of light in vacuum.
struct to capture the start position of the current token
Definition: position_t.h:21