40 #if defined(__GNUC__) || defined(__clang__) 41 #define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1) 42 #define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0) 44 #define SAJSON_LIKELY(x) x 45 #define SAJSON_UNLIKELY(x) x 60 inline std::ostream& operator<<(std::ostream& os, type t) {
62 case TYPE_INTEGER:
return os <<
"<integer>";
63 case TYPE_DOUBLE:
return os <<
"<double>";
64 case TYPE_NULL:
return os <<
"<null>";
65 case TYPE_FALSE:
return os <<
"<false>";
66 case TYPE_TRUE:
return os <<
"<true>";
67 case TYPE_STRING:
return os <<
"<string>";
68 case TYPE_ARRAY:
return os <<
"<array>";
69 case TYPE_OBJECT:
return os <<
"<object>";
70 default:
return os <<
"<unknown type";
74 static const size_t TYPE_BITS = 3;
75 static const size_t TYPE_SHIFT =
sizeof(size_t) * 8 - TYPE_BITS;
76 static const size_t TYPE_MASK = (1 << TYPE_BITS) - 1;
77 static const size_t VALUE_MASK = size_t(-1) >> TYPE_BITS;
79 static const size_t ROOT_MARKER = size_t(-1) & VALUE_MASK;
81 inline type get_element_type(
size_t s) {
82 return static_cast<type
>((s >> TYPE_SHIFT) & TYPE_MASK);
85 inline size_t get_element_value(
size_t s) {
86 return s & VALUE_MASK;
89 inline size_t make_element(type t,
size_t value) {
92 return value | (
static_cast<size_t>(t) << TYPE_SHIFT);
97 string(
const char* text,
size_t length)
102 const char* data()
const {
106 size_t length()
const {
110 std::string as_string()
const {
111 return std::string(text, text + _length);
115 const char*
const text;
116 const size_t _length;
123 explicit literal(
const char* text)
124 :
string(text, strlen(text))
143 const size_t lhs_length = lhs.key_end - lhs.key_start;
144 const size_t rhs_length = rhs.length();
145 if (lhs_length < rhs_length) {
147 }
else if (lhs_length > rhs_length) {
150 return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
154 return !(*this)(rhs, lhs);
160 const size_t lhs_length = lhs.key_end - lhs.key_start;
161 const size_t rhs_length = rhs.key_end - rhs.key_start;
162 if (lhs_length < rhs_length) {
164 }
else if (lhs_length > rhs_length) {
167 return memcmp(data + lhs.key_start, data + rhs.key_start,
192 size_t count()
const {
212 data =
new char[length];
213 memcpy(data, s.data(), length);
219 data =
new char[length];
220 memcpy(data, s.data(), length);
223 ~mutable_string_view() {
224 if (uses.count() == 1) {
229 size_t get_length()
const {
233 char* get_data()
const {
252 word_length =
sizeof(double) /
sizeof(
size_t)
255 #if defined(_M_IX86) || defined(__i386__) || defined(_X86_) 256 static double load(
const size_t* location) {
257 return *
reinterpret_cast<const double*
>(location);
259 static void store(
size_t* location,
double value) {
260 *
reinterpret_cast<double*
>(location) = value;
263 static double load(
const size_t* location) {
265 for (
unsigned i = 0; i < double_storage::word_length; ++i) {
266 s.u[i] = location[i];
271 static void store(
size_t* location,
double value) {
275 for (
int i = 0; i < ns.word_length; ++i) {
276 location[i] = ns.u[i];
281 size_t u[word_length];
289 explicit value(type value_type,
const size_t* payload,
const char* text)
290 : value_type(value_type)
295 type get_type()
const {
300 size_t get_length()
const {
301 assert_type_2(TYPE_ARRAY, TYPE_OBJECT);
306 value get_array_element(
size_t index)
const {
307 assert_type(TYPE_ARRAY);
308 size_t element = payload[1 + index];
309 return value(get_element_type(element), payload + get_element_value(element), text);
313 string get_object_key(
size_t index)
const {
314 assert_type(TYPE_OBJECT);
315 const size_t* s = payload + 1 + index * 3;
316 return string(text + s[0], s[1] - s[0]);
320 value get_object_value(
size_t index)
const {
321 assert_type(TYPE_OBJECT);
322 size_t element = payload[3 + index * 3];
323 return value(get_element_type(element), payload + get_element_value(element), text);
327 value get_value_of_key(
const string& key)
const {
328 assert_type(TYPE_OBJECT);
329 size_t i = find_object_key(key);
331 return get_object_value(i);
336 size_t find_object_key(
const string& key)
const {
337 assert_type(TYPE_OBJECT);
342 && (i->key_end - i->key_start) == key.length()
343 && memcmp(key.data(), text + i->key_start, key.length()) == 0)? i - start : get_length();
347 int get_integer_value()
const {
348 assert_type(TYPE_INTEGER);
355 double get_double_value()
const {
356 assert_type(TYPE_DOUBLE);
357 return double_storage::load(payload);
361 double get_number_value()
const {
362 assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
363 if (get_type() == TYPE_INTEGER) {
364 return get_integer_value();
366 return get_double_value();
371 size_t get_string_length()
const {
372 assert_type(TYPE_STRING);
373 return payload[1] - payload[0];
377 std::string as_string()
const {
378 assert_type(TYPE_STRING);
379 return std::string(text + payload[0], text + payload[1]);
383 void assert_type(type expected)
const {
384 assert(expected == get_type());
387 void assert_type_2(type e1, type e2)
const {
388 assert(e1 == get_type() || e2 == get_type());
391 void assert_in_bounds(
size_t i)
const {
392 assert(i < get_length());
395 const type value_type;
396 const size_t*
const payload;
397 const char*
const text;
403 explicit document(
mutable_string_view& input,
const size_t* structure, type root_type,
const size_t* root,
size_t error_line,
size_t error_column,
const std::string& error_message)
405 , structure(structure)
406 , root_type(root_type)
408 , error_line(error_line)
409 , error_column(error_column)
410 , error_message(error_message)
413 #if __cplusplus >= 201103L 415 void operator=(
const document&) =
delete;
420 , structure(rhs.structure)
421 , root_type(rhs.root_type)
423 , error_line(rhs.error_line)
424 , error_column(rhs.error_column)
425 , error_message(rhs.error_message)
429 void operator=(
const document& rhs);
435 , structure(rhs.structure)
436 , root_type(rhs.root_type)
438 , error_line(rhs.error_line)
439 , error_column(rhs.error_column)
440 , error_message(rhs.error_message)
445 if (uses.count() == 1) {
450 bool is_valid()
const {
454 value get_root()
const {
455 return value(root_type, root, input.get_data());
458 size_t get_error_line()
const {
462 size_t get_error_column()
const {
466 std::string get_error_message()
const {
467 return error_message;
473 const size_t*
const structure;
474 const type root_type;
475 const size_t*
const root;
476 const size_t error_line;
477 const size_t error_column;
478 const std::string error_message;
485 , input_end(input.get_data() + input.get_length())
486 , structure(structure)
487 , p(input.get_data())
489 , root_type(TYPE_NULL)
490 , out(structure + input.get_length())
497 return document(input, structure, root_type, out, 0, 0, std::string());
500 return document(input, 0, TYPE_NULL, 0, error_line, error_column, error_message);
506 operator bool()
const {
514 , value_type(TYPE_NULL)
522 bool operator!()
const {
531 return p == input_end;
534 char peek_structure() {
536 if (p == input_end) {
557 char* c = input.get_data();
560 if (c + 1 < p && c[1] ==
'\n') {
568 }
else if (*c ==
'\n') {
582 va_start(ap, format);
583 vsnprintf(buf, 1023, format, ap);
591 char c = peek_structure();
593 return error(
"no root element");
596 type current_structure_type;
598 current_structure_type = TYPE_ARRAY;
599 }
else if (c ==
'{') {
600 current_structure_type = TYPE_OBJECT;
602 return error(
"document root must be object or array");
606 size_t* current_base = temp;
607 *temp++ = make_element(current_structure_type, ROOT_MARKER);
612 const char closing_bracket = (current_structure_type == TYPE_OBJECT ?
'}' :
']');
613 const bool is_first_element = temp == current_base + 1;
614 bool had_comma =
false;
616 c = peek_structure();
617 if (is_first_element) {
619 return error(
"unexpected comma");
624 c = peek_structure();
626 }
else if (c != closing_bracket) {
627 return error(
"expected ,");
631 if (current_structure_type == TYPE_OBJECT && c !=
'}') {
633 return error(
"object key must be quoted");
635 result = parse_string(temp);
637 return error(
"invalid object key");
639 if (peek_structure() !=
':') {
640 return error(
"expected :");
646 switch (peek_structure()) {
651 return error(
"unexpected end of input");
653 result = parse_null();
656 result = parse_false();
659 result = parse_true();
672 result = parse_number();
675 result = parse_string();
679 next_type = TYPE_ARRAY;
682 next_type = TYPE_OBJECT;
686 size_t* previous_base = current_base;
688 *temp++ = make_element(current_structure_type, previous_base - structure);
689 current_structure_type = next_type;
694 if (current_structure_type == TYPE_ARRAY) {
695 structure_installer = &parser::install_array;
698 return error(
"expected }");
701 if (current_structure_type == TYPE_OBJECT) {
702 structure_installer = &parser::install_object;
705 return error(
"expected ]");
709 return error(
"trailing commas not allowed");
712 size_t element = *current_base;
713 result = (this->*structure_installer)(current_base + 1);
714 size_t parent = get_element_value(element);
715 if (parent == ROOT_MARKER) {
716 root_type = result.value_type;
720 current_base = structure + parent;
721 current_structure_type = get_element_type(element);
725 return error(
"unexpected comma");
727 return error(
"cannot parse unknown value");
731 return result.success;
734 *temp++ = make_element(result.value_type, out - current_base - 1);
738 if (0 == peek_structure()) {
741 return error(
"expected end of input");
745 bool has_remaining_characters(ptrdiff_t remaining) {
746 return input_end - p >= remaining;
750 if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
751 return error(
"unexpected end of input");
756 if (SAJSON_UNLIKELY(p1 !=
'u' || p2 !=
'l' || p3 !=
'l')) {
757 return error(
"expected 'null'");
764 if (SAJSON_UNLIKELY(!has_remaining_characters(5))) {
765 return error(
"unexpected end of input");
771 if (SAJSON_UNLIKELY(p1 !=
'a' || p2 !=
'l' || p3 !=
's' || p4 !=
'e')) {
772 return error(
"expected 'false'");
779 if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
780 return error(
"unexpected end of input");
785 if (SAJSON_UNLIKELY(p1 !=
'r' || p2 !=
'u' || p3 !=
'e')) {
786 return error(
"expected 'true'");
792 static double pow10(
int exponent) {
793 if (exponent > 308) {
794 return std::numeric_limits<double>::infinity();
795 }
else if (exponent < -323) {
798 static const double constants[] = {
799 1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314,
800 1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304,
801 1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,
802 1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,
803 1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,
804 1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,
805 1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,
806 1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,
807 1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,
808 1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,
809 1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,
810 1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,
811 1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,
812 1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,
813 1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,
814 1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,
815 1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,
816 1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,
817 1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,
818 1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,
819 1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,
820 1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,
821 1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93,
822 1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81,
823 1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69,
824 1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57,
825 1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45,
826 1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33,
827 1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21,
828 1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9,
829 1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,
830 1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21,
831 1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35,
832 1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49,
833 1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63,
834 1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77,
835 1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91,
836 1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104,
837 1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116,
838 1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128,
839 1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140,
840 1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152,
841 1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164,
842 1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176,
843 1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188,
844 1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200,
845 1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212,
846 1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224,
847 1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236,
848 1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248,
849 1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260,
850 1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272,
851 1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284,
852 1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296,
853 1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308
855 return constants[exponent + 323];
859 bool negative =
false;
865 return error(
"unexpected end of input");
869 bool try_double =
false;
877 if (c < '0' || c >
'9') {
882 if (SAJSON_UNLIKELY(at_eof())) {
883 return error(
"unexpected end of input");
886 char digit = c -
'0';
888 if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) {
893 if (SAJSON_UNLIKELY(try_double)) {
894 d = 10.0 * d + digit;
909 return error(
"unexpected end of input");
913 if (c < '0' || c >
'9') {
919 return error(
"unexpected end of input");
921 d = d * 10 + (c -
'0');
927 if (
'e' == e ||
'E' == e) {
934 return error(
"unexpected end of input");
937 bool negativeExponent =
false;
939 negativeExponent =
true;
942 return error(
"unexpected end of input");
944 }
else if (
'+' == *p) {
947 return error(
"unexpected end of input");
954 if (SAJSON_UNLIKELY(c < '0' || c >
'9')) {
955 return error(
"missing exponent");
958 exp = 10 * exp + (c -
'0');
962 return error(
"unexpected end of input");
966 if (c < '0' || c >
'9') {
970 exponent += (negativeExponent ? -exp : exp);
975 d *= pow10(exponent);
986 out -= double_storage::word_length;
987 double_storage::store(out, d);
999 const size_t length = temp - array_base;
1000 size_t*
const new_base = out - length - 1;
1001 while (temp > array_base) {
1003 *(--out) = *(--temp) + (array_base - new_base);
1011 const size_t length = (temp - object_base) / 3;
1018 size_t*
const new_base = out - length * 3 - 1;
1022 *(--out) = *(--temp) + (object_base - new_base);
1023 *(--out) = *(--temp);
1024 *(--out) = *(--temp);
1038 size_t start = p - input.get_data();
1040 if (SAJSON_UNLIKELY(p >= input_end)) {
1041 return error(
"unexpected end of input");
1044 if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
1051 tag[1] = p - input.get_data();
1056 return parse_string_slow(tag, start);
1069 unsigned char c = *p++;
1070 if (c >=
'0' && c <=
'9') {
1072 }
else if (c >=
'a' && c <=
'f') {
1074 }
else if (c >=
'A' && c <=
'F') {
1077 return error(
"invalid character in unicode escape");
1086 void write_utf8(
unsigned codepoint,
char*& end) {
1087 if (codepoint < 0x80) {
1089 }
else if (codepoint < 0x800) {
1090 *end++ = 0xC0 | (codepoint >> 6);
1091 *end++ = 0x80 | (codepoint & 0x3F);
1092 }
else if (codepoint < 0x10000) {
1093 *end++ = 0xE0 | (codepoint >> 12);
1094 *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
1095 *end++ = 0x80 | (codepoint & 0x3F);
1097 assert(codepoint < 0x200000);
1098 *end++ = 0xF0 | (codepoint >> 18);
1099 *end++ = 0x80 | ((codepoint >> 12) & 0x3F);
1100 *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
1101 *end++ = 0x80 | (codepoint & 0x3F);
1105 parse_result parse_string_slow(
size_t* tag,
size_t start) {
1109 if (SAJSON_UNLIKELY(p >= input_end)) {
1110 return error(
"unexpected end of input");
1113 if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
1120 tag[1] = end - input.get_data();
1126 if (SAJSON_UNLIKELY(p >= input_end)) {
1127 return error(
"unexpected end of input");
1132 case '"': replacement =
'"';
goto replace;
1133 case '\\': replacement =
'\\';
goto replace;
1134 case '/': replacement =
'/';
goto replace;
1135 case 'b': replacement =
'\b';
goto replace;
1136 case 'f': replacement =
'\f';
goto replace;
1137 case 'n': replacement =
'\n';
goto replace;
1138 case 'r': replacement =
'\r';
goto replace;
1139 case 't': replacement =
'\t';
goto replace;
1141 *end++ = replacement;
1146 if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
1147 return error(
"unexpected end of input");
1154 if (u >= 0xD800 && u <= 0xDBFF) {
1155 if (SAJSON_UNLIKELY(!has_remaining_characters(6))) {
1156 return error(
"unexpected end of input during UTF-16 surrogate pair");
1160 if (p0 !=
'\\' || p1 !=
'u') {
1161 return error(
"expected \\u");
1165 result = read_hex(v);
1170 if (v < 0xDC00 || v > 0xDFFF) {
1171 return error(
"invalid UTF-16 trail surrogate");
1173 u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
1179 return error(
"unknown escape");
1191 char*
const input_end;
1192 size_t*
const structure;
1199 size_t error_column;
1200 std::string error_message;
1203 template<
typename StringType>
1204 document parse(
const StringType&
string) {
1207 size_t length =
string.length();
1208 size_t* structure =
new size_t[length];
1210 return parser(ms, structure).get_document();