114 using number_integer_t =
typename BasicJsonType::number_integer_t;
115 using number_unsigned_t =
typename BasicJsonType::number_unsigned_t;
116 using number_float_t =
typename BasicJsonType::number_float_t;
117 using string_t =
typename BasicJsonType::string_t;
118 using char_type =
typename InputAdapterType::char_type;
124 explicit lexer(InputAdapterType&& adapter,
bool ignore_comments_ =
false) noexcept
125 : ia(
std::move(adapter))
126 , ignore_comments(ignore_comments_)
127 , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
144 static char get_decimal_point() noexcept
146 const auto* loc = localeconv();
148 return (loc->decimal_point ==
nullptr) ?
'.' : *(loc->decimal_point);
176 const auto factors = { 12u, 8u, 4u, 0u };
177 for (
const auto factor : factors)
181 if (current >=
'0' && current <=
'9')
183 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x30u) << factor);
185 else if (current >=
'A' && current <=
'F')
187 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x37u) << factor);
189 else if (current >=
'a' && current <=
'f')
191 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x57u) << factor);
199 JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
218 bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
220 JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
223 for (
auto range = ranges.begin(); range != ranges.end(); ++range)
232 error_message =
"invalid string: ill-formed UTF-8 byte";
269 case char_traits<char_type>::eof():
271 error_message =
"invalid string: missing closing quote";
272 return token_type::parse_error;
278 return token_type::value_string;
322 const int codepoint1 = get_codepoint();
323 int codepoint = codepoint1;
327 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
328 return token_type::parse_error;
332 if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
337 const int codepoint2 = get_codepoint();
341 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
342 return token_type::parse_error;
349 codepoint =
static_cast<int>(
351 (
static_cast<unsigned int>(codepoint1) << 10u)
353 +
static_cast<unsigned int>(codepoint2)
361 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
362 return token_type::parse_error;
367 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
368 return token_type::parse_error;
375 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
376 return token_type::parse_error;
381 JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
384 if (codepoint < 0x80)
387 add(
static_cast<char_int_type
>(codepoint));
389 else if (codepoint <= 0x7FF)
392 add(
static_cast<char_int_type
>(0xC0u | (
static_cast<unsigned int>(codepoint) >> 6u)));
393 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
395 else if (codepoint <= 0xFFFF)
398 add(
static_cast<char_int_type
>(0xE0u | (
static_cast<unsigned int>(codepoint) >> 12u)));
399 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
400 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
405 add(
static_cast<char_int_type
>(0xF0u | (
static_cast<unsigned int>(codepoint) >> 18u)));
406 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
407 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
408 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
416 error_message =
"invalid string: forbidden character after backslash";
417 return token_type::parse_error;
426 error_message =
"invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
427 return token_type::parse_error;
432 error_message =
"invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
433 return token_type::parse_error;
438 error_message =
"invalid string: control character U+0002 (STX) must be escaped to \\u0002";
439 return token_type::parse_error;
444 error_message =
"invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
445 return token_type::parse_error;
450 error_message =
"invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
451 return token_type::parse_error;
456 error_message =
"invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
457 return token_type::parse_error;
462 error_message =
"invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
463 return token_type::parse_error;
468 error_message =
"invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
469 return token_type::parse_error;
474 error_message =
"invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
475 return token_type::parse_error;
480 error_message =
"invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
481 return token_type::parse_error;
486 error_message =
"invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
487 return token_type::parse_error;
492 error_message =
"invalid string: control character U+000B (VT) must be escaped to \\u000B";
493 return token_type::parse_error;
498 error_message =
"invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
499 return token_type::parse_error;
504 error_message =
"invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
505 return token_type::parse_error;
510 error_message =
"invalid string: control character U+000E (SO) must be escaped to \\u000E";
511 return token_type::parse_error;
516 error_message =
"invalid string: control character U+000F (SI) must be escaped to \\u000F";
517 return token_type::parse_error;
522 error_message =
"invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
523 return token_type::parse_error;
528 error_message =
"invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
529 return token_type::parse_error;
534 error_message =
"invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
535 return token_type::parse_error;
540 error_message =
"invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
541 return token_type::parse_error;
546 error_message =
"invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
547 return token_type::parse_error;
552 error_message =
"invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
553 return token_type::parse_error;
558 error_message =
"invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
559 return token_type::parse_error;
564 error_message =
"invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
565 return token_type::parse_error;
570 error_message =
"invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
571 return token_type::parse_error;
576 error_message =
"invalid string: control character U+0019 (EM) must be escaped to \\u0019";
577 return token_type::parse_error;
582 error_message =
"invalid string: control character U+001A (SUB) must be escaped to \\u001A";
583 return token_type::parse_error;
588 error_message =
"invalid string: control character U+001B (ESC) must be escaped to \\u001B";
589 return token_type::parse_error;
594 error_message =
"invalid string: control character U+001C (FS) must be escaped to \\u001C";
595 return token_type::parse_error;
600 error_message =
"invalid string: control character U+001D (GS) must be escaped to \\u001D";
601 return token_type::parse_error;
606 error_message =
"invalid string: control character U+001E (RS) must be escaped to \\u001E";
607 return token_type::parse_error;
612 error_message =
"invalid string: control character U+001F (US) must be escaped to \\u001F";
613 return token_type::parse_error;
750 return token_type::parse_error;
760 return token_type::parse_error;
784 return token_type::parse_error;
794 return token_type::parse_error;
804 return token_type::parse_error;
816 return token_type::parse_error;
826 return token_type::parse_error;
834 error_message =
"invalid string: ill-formed UTF-8 byte";
835 return token_type::parse_error;
858 case char_traits<char_type>::eof():
875 case char_traits<char_type>::eof():
878 error_message =
"invalid comment; missing closing '*/'";
906 error_message =
"invalid comment; expecting '/' or '*' after '/'";
913 static
void strtof(
float& f, const
char* str,
char** endptr) noexcept
915 f = std::strtof(str, endptr);
919 static
void strtof(
double& f, const
char* str,
char** endptr) noexcept
921 f = std::strtod(str, endptr);
925 static
void strtof(
long double& f, const
char* str,
char** endptr) noexcept
927 f = std::strtold(str, endptr);
970 token_type scan_number()
977 token_type number_type = token_type::value_unsigned;
985 goto scan_number_minus;
991 goto scan_number_zero;
1005 goto scan_number_any1;
1015 number_type = token_type::value_integer;
1021 goto scan_number_zero;
1035 goto scan_number_any1;
1040 error_message =
"invalid number; expected digit after '-'";
1041 return token_type::parse_error;
1051 add(decimal_point_char);
1052 goto scan_number_decimal1;
1059 goto scan_number_exponent;
1063 goto scan_number_done;
1082 goto scan_number_any1;
1087 add(decimal_point_char);
1088 goto scan_number_decimal1;
1095 goto scan_number_exponent;
1099 goto scan_number_done;
1102scan_number_decimal1:
1104 number_type = token_type::value_float;
1119 goto scan_number_decimal2;
1124 error_message =
"invalid number; expected digit after '.'";
1125 return token_type::parse_error;
1129scan_number_decimal2:
1145 goto scan_number_decimal2;
1152 goto scan_number_exponent;
1156 goto scan_number_done;
1159scan_number_exponent:
1161 number_type = token_type::value_float;
1168 goto scan_number_sign;
1183 goto scan_number_any2;
1189 "invalid number; expected '+', '-', or digit after exponent";
1190 return token_type::parse_error;
1210 goto scan_number_any2;
1215 error_message =
"invalid number; expected digit after exponent sign";
1216 return token_type::parse_error;
1236 goto scan_number_any2;
1240 goto scan_number_done;
1248 char* endptr =
nullptr;
1252 if (number_type == token_type::value_unsigned)
1254 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1257 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1261 value_unsigned =
static_cast<number_unsigned_t
>(x);
1262 if (value_unsigned == x)
1264 return token_type::value_unsigned;
1268 else if (number_type == token_type::value_integer)
1270 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1273 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1277 value_integer =
static_cast<number_integer_t
>(x);
1278 if (value_integer == x)
1280 return token_type::value_integer;
1287 strtof(value_float, token_buffer.data(), &endptr);
1290 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1292 return token_type::value_float;
1301 token_type scan_literal(const
char_type* literal_text, const
std::
size_t length,
1302 token_type return_type)
1304 JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]);
1305 for (std::size_t i = 1; i < length; ++i)
1309 error_message =
"invalid literal";
1310 return token_type::parse_error;
1321 void reset() noexcept
1323 token_buffer.clear();
1324 token_string.clear();
1325 token_string.push_back(char_traits<char_type>::to_char_type(current));
1340 ++position.chars_read_total;
1341 ++position.chars_read_current_line;
1350 current = ia.get_character();
1355 token_string.push_back(char_traits<char_type>::to_char_type(current));
1358 if (current ==
'\n')
1360 ++position.lines_read;
1361 position.chars_read_current_line = 0;
1379 --position.chars_read_total;
1382 if (position.chars_read_current_line == 0)
1384 if (position.lines_read > 0)
1386 --position.lines_read;
1391 --position.chars_read_current_line;
1397 token_string.pop_back();
1402 void add(char_int_type c)
1404 token_buffer.push_back(
static_cast<typename string_t::value_type
>(c));
1415 return value_integer;
1421 return value_unsigned;
1433 return token_buffer;
1453 for (
const auto c : token_string)
1455 if (
static_cast<unsigned char>(c) <=
'\x1F')
1458 std::array<char, 9>
cs{{}};
1459 static_cast<void>((std::snprintf)(
cs.data(),
cs.size(),
"<U+%.4X>",
static_cast<unsigned char>(c)));
1460 result +=
cs.data();
1465 result.push_back(
static_cast<std::string::value_type
>(c));
1476 return error_message;
1492 return get() == 0xBB &&
get() == 0xBF;
1507 while (current ==
' ' || current ==
'\t' || current ==
'\n' || current ==
'\r');
1513 if (position.chars_read_total == 0 && !skip_bom())
1515 error_message =
"invalid BOM; must be 0xEF 0xBB 0xBF if given";
1516 return token_type::parse_error;
1523 while (ignore_comments && current ==
'/')
1525 if (!scan_comment())
1527 return token_type::parse_error;
1538 return token_type::begin_array;
1540 return token_type::end_array;
1542 return token_type::begin_object;
1544 return token_type::end_object;
1546 return token_type::name_separator;
1548 return token_type::value_separator;
1553 std::array<char_type, 4> true_literal = {{
static_cast<char_type
>(
't'),
static_cast<char_type
>(
'r'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'e')}};
1554 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1558 std::array<char_type, 5> false_literal = {{
static_cast<char_type
>(
'f'),
static_cast<char_type
>(
'a'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
's'),
static_cast<char_type
>(
'e')}};
1559 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1563 std::array<char_type, 4> null_literal = {{
static_cast<char_type
>(
'n'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
'l')}};
1564 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1569 return scan_string();
1583 return scan_number();
1589 return token_type::end_of_input;
1593 error_message =
"invalid literal";
1594 return token_type::parse_error;
1600 InputAdapterType ia;
1603 const bool ignore_comments =
false;
1609 bool next_unget =
false;
1615 std::vector<char_type> token_string {};
1618 string_t token_buffer {};
1621 const char* error_message =
"";
1624 number_integer_t value_integer = 0;
1625 number_unsigned_t value_unsigned = 0;
1626 number_float_t value_float = 0;
1629 const char_int_type decimal_point_char =
'.';