CPPMyth
Library to interoperate with MythTV server
sajson.h
1 /*
2  * Copyright (c) 2012, 2013, 2014 Chad Austin
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include <assert.h>
28 #include <stdarg.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <math.h>
32 #include <limits.h>
33 #include <ostream>
34 #include <algorithm>
35 #include <cstdio>
36 #include <limits>
37 
38 #include <string> // for error messages. kill someday?
39 
40 #if defined(__GNUC__) || defined(__clang__)
41 #define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1)
42 #define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
43 #else
44 #define SAJSON_LIKELY(x) x
45 #define SAJSON_UNLIKELY(x) x
46 #endif
47 
48 namespace sajson {
49  enum type {
50  TYPE_INTEGER = 0,
51  TYPE_DOUBLE = 1,
52  TYPE_NULL = 2,
53  TYPE_FALSE = 3,
54  TYPE_TRUE = 4,
55  TYPE_STRING = 5,
56  TYPE_ARRAY = 6,
57  TYPE_OBJECT = 7,
58  };
59 
60  inline std::ostream& operator<<(std::ostream& os, type t) {
61  switch (t) {
62  case TYPE_INTEGER: return os << "<integer>";
63  case TYPE_DOUBLE: return os << "<double>";
64  case TYPE_NULL: return os << "<null>";
65  case TYPE_FALSE: return os << "<false>";
66  case TYPE_TRUE: return os << "<true>";
67  case TYPE_STRING: return os << "<string>";
68  case TYPE_ARRAY: return os << "<array>";
69  case TYPE_OBJECT: return os << "<object>";
70  default: return os << "<unknown type";
71  }
72  }
73 
74  static const size_t TYPE_BITS = 3;
75  static const size_t TYPE_SHIFT = sizeof(size_t) * 8 - TYPE_BITS;
76  static const size_t TYPE_MASK = (1 << TYPE_BITS) - 1;
77  static const size_t VALUE_MASK = size_t(-1) >> TYPE_BITS;
78 
79  static const size_t ROOT_MARKER = size_t(-1) & VALUE_MASK;
80 
81  inline type get_element_type(size_t s) {
82  return static_cast<type>((s >> TYPE_SHIFT) & TYPE_MASK);
83  }
84 
85  inline size_t get_element_value(size_t s) {
86  return s & VALUE_MASK;
87  }
88 
89  inline size_t make_element(type t, size_t value) {
90  //assert(value & VALUE_MASK == 0);
91  //value &= VALUE_MASK;
92  return value | (static_cast<size_t>(t) << TYPE_SHIFT);
93  }
94 
95  class string {
96  public:
97  string(const char* text, size_t length)
98  : text(text)
99  , _length(length)
100  {}
101 
102  const char* data() const {
103  return text;
104  }
105 
106  size_t length() const {
107  return _length;
108  }
109 
110  std::string as_string() const {
111  return std::string(text, text + _length);
112  }
113 
114  private:
115  const char* const text;
116  const size_t _length;
117 
118  string(); /*=delete*/
119  };
120 
121  class literal : public string {
122  public:
123  explicit literal(const char* text)
124  : string(text, strlen(text))
125  {}
126  };
127 
129  {
130  size_t key_start;
131  size_t key_end;
132  size_t value;
133  };
134 
136  {
137  object_key_comparator(const char* object_data)
138  : data(object_data)
139  {
140  }
141 
142  bool operator()(const object_key_record& lhs, const string& rhs) const {
143  const size_t lhs_length = lhs.key_end - lhs.key_start;
144  const size_t rhs_length = rhs.length();
145  if (lhs_length < rhs_length) {
146  return true;
147  } else if (lhs_length > rhs_length) {
148  return false;
149  }
150  return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
151  }
152 
153  bool operator()(const string& lhs, const object_key_record& rhs) const {
154  return !(*this)(rhs, lhs);
155  }
156 
157  bool operator()(const object_key_record& lhs, const
158  object_key_record& rhs)
159  {
160  const size_t lhs_length = lhs.key_end - lhs.key_start;
161  const size_t rhs_length = rhs.key_end - rhs.key_start;
162  if (lhs_length < rhs_length) {
163  return true;
164  } else if (lhs_length > rhs_length) {
165  return false;
166  }
167  return memcmp(data + lhs.key_start, data + rhs.key_start,
168  lhs_length) < 0;
169  }
170 
171  const char* data;
172  };
173 
174  class refcount {
175  public:
176  refcount()
177  : pn(new size_t(1))
178  {}
179 
180  refcount(const refcount& rc)
181  : pn(rc.pn)
182  {
183  ++*pn;
184  }
185 
186  ~refcount() {
187  if (--*pn == 0) {
188  delete pn;
189  }
190  }
191 
192  size_t count() const {
193  return *pn;
194  }
195 
196  private:
197  size_t* pn;
198 
199  refcount& operator=(const refcount&);
200  };
201 
203  public:
205  : length(0)
206  , data(0)
207  {}
208 
209  mutable_string_view(const literal& s)
210  : length(s.length())
211  {
212  data = new char[length];
213  memcpy(data, s.data(), length);
214  }
215 
216  mutable_string_view(const string& s)
217  : length(s.length())
218  {
219  data = new char[length];
220  memcpy(data, s.data(), length);
221  }
222 
223  ~mutable_string_view() {
224  if (uses.count() == 1) {
225  delete[] data;
226  }
227  }
228 
229  size_t get_length() const {
230  return length;
231  }
232 
233  char* get_data() const {
234  return data;
235  }
236 
237  private:
238  refcount uses;
239  size_t length;
240  char* data;
241  };
242 
244  int i;
245  size_t u;
246  };
247  // TODO: reinstate with c++03 implementation
248  //static_assert(sizeof(integer_storage) == sizeof(size_t), "integer_storage must have same size as one structure slot");
249 
251  enum {
252  word_length = sizeof(double) / sizeof(size_t)
253  };
254 
255 #if defined(_M_IX86) || defined(__i386__) || defined(_X86_)
256  static double load(const size_t* location) {
257  return *reinterpret_cast<const double*>(location);
258  }
259  static void store(size_t* location, double value) {
260  *reinterpret_cast<double*>(location) = value;
261  }
262 #else
263  static double load(const size_t* location) {
264  double_storage s;
265  for (unsigned i = 0; i < double_storage::word_length; ++i) {
266  s.u[i] = location[i];
267  }
268  return s.d;
269  }
270 
271  static void store(size_t* location, double value) {
272  double_storage ns;
273  ns.d = value;
274 
275  for (int i = 0; i < ns.word_length; ++i) {
276  location[i] = ns.u[i];
277  }
278  }
279 
280  double d;
281  size_t u[word_length];
282 #endif
283  };
284  // TODO: reinstate with c++03 implementation
285  //static_assert(sizeof(double_storage) == sizeof(double), "double_storage should have same size as double");
286 
287  class value {
288  public:
289  explicit value(type value_type, const size_t* payload, const char* text)
290  : value_type(value_type)
291  , payload(payload)
292  , text(text)
293  {}
294 
295  type get_type() const {
296  return value_type;
297  }
298 
299  // valid iff get_type() is TYPE_ARRAY or TYPE_OBJECT
300  size_t get_length() const {
301  assert_type_2(TYPE_ARRAY, TYPE_OBJECT);
302  return payload[0];
303  }
304 
305  // valid iff get_type() is TYPE_ARRAY
306  value get_array_element(size_t index) const {
307  assert_type(TYPE_ARRAY);
308  size_t element = payload[1 + index];
309  return value(get_element_type(element), payload + get_element_value(element), text);
310  }
311 
312  // valid iff get_type() is TYPE_OBJECT
313  string get_object_key(size_t index) const {
314  assert_type(TYPE_OBJECT);
315  const size_t* s = payload + 1 + index * 3;
316  return string(text + s[0], s[1] - s[0]);
317  }
318 
319  // valid iff get_type() is TYPE_OBJECT
320  value get_object_value(size_t index) const {
321  assert_type(TYPE_OBJECT);
322  size_t element = payload[3 + index * 3];
323  return value(get_element_type(element), payload + get_element_value(element), text);
324  }
325 
326  // valid iff get_type() is TYPE_OBJECT
327  value get_value_of_key(const string& key) const {
328  assert_type(TYPE_OBJECT);
329  size_t i = find_object_key(key);
330  assert_in_bounds(i);
331  return get_object_value(i);
332  }
333 
334  // valid iff get_type() is TYPE_OBJECT
335  // return get_length() if there is no such key
336  size_t find_object_key(const string& key) const {
337  assert_type(TYPE_OBJECT);
338  const object_key_record* start = reinterpret_cast<const object_key_record*>(payload + 1);
339  const object_key_record* end = start + get_length();
340  const object_key_record* i = std::lower_bound(start, end, key, object_key_comparator(text));
341  return (i != end
342  && (i->key_end - i->key_start) == key.length()
343  && memcmp(key.data(), text + i->key_start, key.length()) == 0)? i - start : get_length();
344  }
345 
346  // valid iff get_type() is TYPE_INTEGER
347  int get_integer_value() const {
348  assert_type(TYPE_INTEGER);
349  integer_storage s;
350  s.u = payload[0];
351  return s.i;
352  }
353 
354  // valid iff get_type() is TYPE_DOUBLE
355  double get_double_value() const {
356  assert_type(TYPE_DOUBLE);
357  return double_storage::load(payload);
358  }
359 
360  // valid iff get_type() is TYPE_INTEGER or TYPE_DOUBLE
361  double get_number_value() const {
362  assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
363  if (get_type() == TYPE_INTEGER) {
364  return get_integer_value();
365  } else {
366  return get_double_value();
367  }
368  }
369 
370  // valid iff get_type() is TYPE_STRING
371  size_t get_string_length() const {
372  assert_type(TYPE_STRING);
373  return payload[1] - payload[0];
374  }
375 
376  // valid iff get_type() is TYPE_STRING
377  std::string as_string() const {
378  assert_type(TYPE_STRING);
379  return std::string(text + payload[0], text + payload[1]);
380  }
381 
382  private:
383  void assert_type(type expected) const {
384  assert(expected == get_type());
385  }
386 
387  void assert_type_2(type e1, type e2) const {
388  assert(e1 == get_type() || e2 == get_type());
389  }
390 
391  void assert_in_bounds(size_t i) const {
392  assert(i < get_length());
393  }
394 
395  const type value_type;
396  const size_t* const payload;
397  const char* const text;
398 
399  };
400 
401  class document {
402  public:
403  explicit document(mutable_string_view& input, const size_t* structure, type root_type, const size_t* root, size_t error_line, size_t error_column, const std::string& error_message)
404  : input(input)
405  , structure(structure)
406  , root_type(root_type)
407  , root(root)
408  , error_line(error_line)
409  , error_column(error_column)
410  , error_message(error_message)
411  {}
412 
413 #if __cplusplus >= 201103L
414  document(const document&) = delete;
415  void operator=(const document&) = delete;
416 
417  document(document&& rhs)
418  : uses(rhs.uses)
419  , input(rhs.input)
420  , structure(rhs.structure)
421  , root_type(rhs.root_type)
422  , root(rhs.root)
423  , error_line(rhs.error_line)
424  , error_column(rhs.error_column)
425  , error_message(rhs.error_message)
426  {}
427 #else
428  private:
429  void operator=(const document& rhs);
430 
431  public:
432  document(const document& rhs)
433  : uses(rhs.uses)
434  , input(rhs.input)
435  , structure(rhs.structure)
436  , root_type(rhs.root_type)
437  , root(rhs.root)
438  , error_line(rhs.error_line)
439  , error_column(rhs.error_column)
440  , error_message(rhs.error_message)
441  {}
442 #endif
443 
444  ~document() {
445  if (uses.count() == 1) {
446  delete[] structure;
447  }
448  }
449 
450  bool is_valid() const {
451  return !!structure;
452  }
453 
454  value get_root() const {
455  return value(root_type, root, input.get_data());
456  }
457 
458  size_t get_error_line() const {
459  return error_line;
460  }
461 
462  size_t get_error_column() const {
463  return error_column;
464  }
465 
466  std::string get_error_message() const {
467  return error_message;
468  }
469 
470  private:
471  refcount uses;
472  mutable_string_view input;
473  const size_t* const structure;
474  const type root_type;
475  const size_t* const root;
476  const size_t error_line;
477  const size_t error_column;
478  const std::string error_message;
479  };
480 
481  class parser {
482  public:
483  parser(const mutable_string_view& msv, size_t* structure)
484  : input(msv)
485  , input_end(input.get_data() + input.get_length())
486  , structure(structure)
487  , p(input.get_data())
488  , temp(structure)
489  , root_type(TYPE_NULL)
490  , out(structure + input.get_length())
491  , error_line(0)
492  , error_column(0)
493  {}
494 
495  document get_document() {
496  if (parse()) {
497  return document(input, structure, root_type, out, 0, 0, std::string());
498  } else {
499  delete[] structure;
500  return document(input, 0, TYPE_NULL, 0, error_line, error_column, error_message);
501  }
502  }
503 
504  private:
505  struct error_result {
506  operator bool() const {
507  return false;
508  }
509  };
510 
511  struct parse_result {
513  : success(false)
514  , value_type(TYPE_NULL)
515  {}
516 
517  parse_result(type t)
518  : success(true)
519  , value_type(t)
520  {}
521 
522  bool operator!() const {
523  return !success;
524  }
525 
526  bool success;
527  type value_type;
528  };
529 
530  bool at_eof() {
531  return p == input_end;
532  }
533 
534  char peek_structure() {
535  for (;;) {
536  if (p == input_end) {
537  // 0 is never legal as a structural character in json text so treat it as eof
538  return 0;
539  }
540  switch (*p) {
541  case 0x20:
542  case 0x09:
543  case 0x0A:
544  case 0x0D:
545  ++p;
546  continue;
547  default:
548  return *p;
549  }
550  }
551  }
552 
553  error_result error(const char* format, ...) {
554  error_line = 1;
555  error_column = 1;
556 
557  char* c = input.get_data();
558  while (c < p) {
559  if (*c == '\r') {
560  if (c + 1 < p && c[1] == '\n') {
561  ++error_line;
562  error_column = 1;
563  ++c;
564  } else {
565  ++error_line;
566  error_column = 1;
567  }
568  } else if (*c == '\n') {
569  ++error_line;
570  error_column = 1;
571  } else {
572  // TODO: count UTF-8 characters
573  ++error_column;
574  }
575  ++c;
576  }
577 
578 
579  char buf[1024];
580  buf[1023] = 0;
581  va_list ap;
582  va_start(ap, format);
583  vsnprintf(buf, 1023, format, ap);
584  va_end(ap);
585 
586  error_message = buf;
587  return error_result();
588  }
589 
590  bool parse() {
591  char c = peek_structure();
592  if (c == 0) {
593  return error("no root element");
594  }
595 
596  type current_structure_type;
597  if (c == '[') {
598  current_structure_type = TYPE_ARRAY;
599  } else if (c == '{') {
600  current_structure_type = TYPE_OBJECT;
601  } else {
602  return error("document root must be object or array");
603  }
604  ++p;
605 
606  size_t* current_base = temp;
607  *temp++ = make_element(current_structure_type, ROOT_MARKER);
608 
609  parse_result result = error_result();
610 
611  for (;;) {
612  const char closing_bracket = (current_structure_type == TYPE_OBJECT ? '}' : ']');
613  const bool is_first_element = temp == current_base + 1;
614  bool had_comma = false;
615 
616  c = peek_structure();
617  if (is_first_element) {
618  if (c == ',') {
619  return error("unexpected comma");
620  }
621  } else {
622  if (c == ',') {
623  ++p;
624  c = peek_structure();
625  had_comma = true;
626  } else if (c != closing_bracket) {
627  return error("expected ,");
628  }
629  }
630 
631  if (current_structure_type == TYPE_OBJECT && c != '}') {
632  if (c != '"') {
633  return error("object key must be quoted");
634  }
635  result = parse_string(temp);
636  if (!result) {
637  return error("invalid object key");
638  }
639  if (peek_structure() != ':') {
640  return error("expected :");
641  }
642  ++p;
643  temp += 2;
644  }
645 
646  switch (peek_structure()) {
647  type next_type;
648  parse_result (parser::*structure_installer)(size_t* base);
649 
650  case 0:
651  return error("unexpected end of input");
652  case 'n':
653  result = parse_null();
654  break;
655  case 'f':
656  result = parse_false();
657  break;
658  case 't':
659  result = parse_true();
660  break;
661  case '0':
662  case '1':
663  case '2':
664  case '3':
665  case '4':
666  case '5':
667  case '6':
668  case '7':
669  case '8':
670  case '9':
671  case '-':
672  result = parse_number();
673  break;
674  case '"':
675  result = parse_string();
676  break;
677 
678  case '[':
679  next_type = TYPE_ARRAY;
680  goto push;
681  case '{':
682  next_type = TYPE_OBJECT;
683  goto push;
684  push: {
685  ++p;
686  size_t* previous_base = current_base;
687  current_base = temp;
688  *temp++ = make_element(current_structure_type, previous_base - structure);
689  current_structure_type = next_type;
690  continue;
691  }
692 
693  case ']':
694  if (current_structure_type == TYPE_ARRAY) {
695  structure_installer = &parser::install_array;
696  goto pop;
697  } else {
698  return error("expected }");
699  }
700  case '}':
701  if (current_structure_type == TYPE_OBJECT) {
702  structure_installer = &parser::install_object;
703  goto pop;
704  } else {
705  return error("expected ]");
706  }
707  pop: {
708  if (had_comma) {
709  return error("trailing commas not allowed");
710  }
711  ++p;
712  size_t element = *current_base;
713  result = (this->*structure_installer)(current_base + 1);
714  size_t parent = get_element_value(element);
715  if (parent == ROOT_MARKER) {
716  root_type = result.value_type;
717  goto done;
718  }
719  temp = current_base;
720  current_base = structure + parent;
721  current_structure_type = get_element_type(element);
722  break;
723  }
724  case ',':
725  return error("unexpected comma");
726  default:
727  return error("cannot parse unknown value");
728  }
729 
730  if (!result) {
731  return result.success;
732  }
733 
734  *temp++ = make_element(result.value_type, out - current_base - 1);
735  }
736 
737  done:
738  if (0 == peek_structure()) {
739  return true;
740  } else {
741  return error("expected end of input");
742  }
743  }
744 
745  bool has_remaining_characters(ptrdiff_t remaining) {
746  return input_end - p >= remaining;
747  }
748 
749  parse_result parse_null() {
750  if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
751  return error("unexpected end of input");
752  }
753  char p1 = p[1];
754  char p2 = p[2];
755  char p3 = p[3];
756  if (SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')) {
757  return error("expected 'null'");
758  }
759  p += 4;
760  return TYPE_NULL;
761  }
762 
763  parse_result parse_false() {
764  if (SAJSON_UNLIKELY(!has_remaining_characters(5))) {
765  return error("unexpected end of input");
766  }
767  char p1 = p[1];
768  char p2 = p[2];
769  char p3 = p[3];
770  char p4 = p[4];
771  if (SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')) {
772  return error("expected 'false'");
773  }
774  p += 5;
775  return TYPE_FALSE;
776  }
777 
778  parse_result parse_true() {
779  if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
780  return error("unexpected end of input");
781  }
782  char p1 = p[1];
783  char p2 = p[2];
784  char p3 = p[3];
785  if (SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')) {
786  return error("expected 'true'");
787  }
788  p += 4;
789  return TYPE_TRUE;
790  }
791 
792  static double pow10(int exponent) {
793  if (exponent > 308) {
794  return std::numeric_limits<double>::infinity();
795  } else if (exponent < -323) {
796  return 0.0;
797  }
798  static const double constants[] = {
799  1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314,
800  1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304,
801  1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,
802  1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,
803  1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,
804  1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,
805  1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,
806  1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,
807  1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,
808  1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,
809  1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,
810  1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,
811  1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,
812  1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,
813  1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,
814  1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,
815  1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,
816  1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,
817  1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,
818  1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,
819  1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,
820  1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,
821  1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93,
822  1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81,
823  1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69,
824  1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57,
825  1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45,
826  1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33,
827  1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21,
828  1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9,
829  1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,
830  1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21,
831  1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35,
832  1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49,
833  1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63,
834  1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77,
835  1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91,
836  1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104,
837  1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116,
838  1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128,
839  1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140,
840  1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152,
841  1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164,
842  1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176,
843  1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188,
844  1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200,
845  1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212,
846  1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224,
847  1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236,
848  1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248,
849  1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260,
850  1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272,
851  1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284,
852  1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296,
853  1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308
854  };
855  return constants[exponent + 323];
856  }
857 
858  parse_result parse_number() {
859  bool negative = false;
860  if ('-' == *p) {
861  ++p;
862  negative = true;
863 
864  if (at_eof()) {
865  return error("unexpected end of input");
866  }
867  }
868 
869  bool try_double = false;
870 
871  int i = 0;
872  double d = 0.0; // gcc complains that d might be used uninitialized which isn't true. appease the warning anyway.
873  if (*p == '0') {
874  ++p;
875  } else for (;;) {
876  char c = *p;
877  if (c < '0' || c > '9') {
878  break;
879  }
880 
881  ++p;
882  if (SAJSON_UNLIKELY(at_eof())) {
883  return error("unexpected end of input");
884  }
885 
886  char digit = c - '0';
887 
888  if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) {
889  // TODO: could split this into two loops
890  try_double = true;
891  d = i;
892  }
893  if (SAJSON_UNLIKELY(try_double)) {
894  d = 10.0 * d + digit;
895  } else {
896  i = 10 * i + digit;
897  }
898  }
899 
900  int exponent = 0;
901 
902  if ('.' == *p) {
903  if (!try_double) {
904  try_double = true;
905  d = i;
906  }
907  ++p;
908  if (at_eof()) {
909  return error("unexpected end of input");
910  }
911  for (;;) {
912  char c = *p;
913  if (c < '0' || c > '9') {
914  break;
915  }
916 
917  ++p;
918  if (at_eof()) {
919  return error("unexpected end of input");
920  }
921  d = d * 10 + (c - '0');
922  --exponent;
923  }
924  }
925 
926  char e = *p;
927  if ('e' == e || 'E' == e) {
928  if (!try_double) {
929  try_double = true;
930  d = i;
931  }
932  ++p;
933  if (at_eof()) {
934  return error("unexpected end of input");
935  }
936 
937  bool negativeExponent = false;
938  if ('-' == *p) {
939  negativeExponent = true;
940  ++p;
941  if (at_eof()) {
942  return error("unexpected end of input");
943  }
944  } else if ('+' == *p) {
945  ++p;
946  if (at_eof()) {
947  return error("unexpected end of input");
948  }
949  }
950 
951  int exp = 0;
952 
953  char c = *p;
954  if (SAJSON_UNLIKELY(c < '0' || c > '9')) {
955  return error("missing exponent");
956  }
957  for (;;) {
958  exp = 10 * exp + (c - '0');
959 
960  ++p;
961  if (at_eof()) {
962  return error("unexpected end of input");
963  }
964 
965  c = *p;
966  if (c < '0' || c > '9') {
967  break;
968  }
969  }
970  exponent += (negativeExponent ? -exp : exp);
971  }
972 
973  if (exponent) {
974  assert(try_double);
975  d *= pow10(exponent);
976  }
977 
978  if (negative) {
979  if (try_double) {
980  d = -d;
981  } else {
982  i = -i;
983  }
984  }
985  if (try_double) {
986  out -= double_storage::word_length;
987  double_storage::store(out, d);
988  return TYPE_DOUBLE;
989  } else {
990  integer_storage is;
991  is.i = i;
992 
993  *--out = is.u;
994  return TYPE_INTEGER;
995  }
996  }
997 
998  parse_result install_array(size_t* array_base) {
999  const size_t length = temp - array_base;
1000  size_t* const new_base = out - length - 1;
1001  while (temp > array_base) {
1002  // I think this addition is legal because the tag bits are at the top?
1003  *(--out) = *(--temp) + (array_base - new_base);
1004  }
1005  *(--out) = length;
1006 
1007  return TYPE_ARRAY;
1008  }
1009 
1010  parse_result install_object(size_t* object_base) {
1011  const size_t length = (temp - object_base) / 3;
1012  object_key_record* oir = reinterpret_cast<object_key_record*>(object_base);
1013  std::sort(
1014  oir,
1015  oir + length,
1016  object_key_comparator(input.get_data()));
1017 
1018  size_t* const new_base = out - length * 3 - 1;
1019  size_t i = length;
1020  while (i--) {
1021  // I think this addition is legal because the tag bits are at the top?
1022  *(--out) = *(--temp) + (object_base - new_base);
1023  *(--out) = *(--temp);
1024  *(--out) = *(--temp);
1025  }
1026  *(--out) = length;
1027 
1028  return TYPE_OBJECT;
1029  }
1030 
1031  parse_result parse_string(size_t* tag = 0) {
1032  if (!tag) {
1033  out -= 2;
1034  tag = out;
1035  }
1036 
1037  ++p; // "
1038  size_t start = p - input.get_data();
1039  for (;;) {
1040  if (SAJSON_UNLIKELY(p >= input_end)) {
1041  return error("unexpected end of input");
1042  }
1043 
1044  if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
1045  *p = 0x20;
1046  }
1047 
1048  switch (*p) {
1049  case '"':
1050  tag[0] = start;
1051  tag[1] = p - input.get_data();
1052  ++p;
1053  return TYPE_STRING;
1054 
1055  case '\\':
1056  return parse_string_slow(tag, start);
1057 
1058  default:
1059  ++p;
1060  break;
1061  }
1062  }
1063  }
1064 
1065  parse_result read_hex(unsigned& u) {
1066  unsigned v = 0;
1067  int i = 4;
1068  while (i--) {
1069  unsigned char c = *p++;
1070  if (c >= '0' && c <= '9') {
1071  c -= '0';
1072  } else if (c >= 'a' && c <= 'f') {
1073  c = c - 'a' + 10;
1074  } else if (c >= 'A' && c <= 'F') {
1075  c = c - 'A' + 10;
1076  } else {
1077  return error("invalid character in unicode escape");
1078  }
1079  v = (v << 4) + c;
1080  }
1081 
1082  u = v;
1083  return TYPE_NULL; // ???
1084  }
1085 
1086  void write_utf8(unsigned codepoint, char*& end) {
1087  if (codepoint < 0x80) {
1088  *end++ = codepoint;
1089  } else if (codepoint < 0x800) {
1090  *end++ = 0xC0 | (codepoint >> 6);
1091  *end++ = 0x80 | (codepoint & 0x3F);
1092  } else if (codepoint < 0x10000) {
1093  *end++ = 0xE0 | (codepoint >> 12);
1094  *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
1095  *end++ = 0x80 | (codepoint & 0x3F);
1096  } else {
1097  assert(codepoint < 0x200000);
1098  *end++ = 0xF0 | (codepoint >> 18);
1099  *end++ = 0x80 | ((codepoint >> 12) & 0x3F);
1100  *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
1101  *end++ = 0x80 | (codepoint & 0x3F);
1102  }
1103  }
1104 
1105  parse_result parse_string_slow(size_t* tag, size_t start) {
1106  char* end = p;
1107 
1108  for (;;) {
1109  if (SAJSON_UNLIKELY(p >= input_end)) {
1110  return error("unexpected end of input");
1111  }
1112 
1113  if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
1114  *p = 0x20;
1115  }
1116 
1117  switch (*p) {
1118  case '"':
1119  tag[0] = start;
1120  tag[1] = end - input.get_data();
1121  ++p;
1122  return TYPE_STRING;
1123 
1124  case '\\':
1125  ++p;
1126  if (SAJSON_UNLIKELY(p >= input_end)) {
1127  return error("unexpected end of input");
1128  }
1129 
1130  char replacement;
1131  switch (*p) {
1132  case '"': replacement = '"'; goto replace;
1133  case '\\': replacement = '\\'; goto replace;
1134  case '/': replacement = '/'; goto replace;
1135  case 'b': replacement = '\b'; goto replace;
1136  case 'f': replacement = '\f'; goto replace;
1137  case 'n': replacement = '\n'; goto replace;
1138  case 'r': replacement = '\r'; goto replace;
1139  case 't': replacement = '\t'; goto replace;
1140  replace:
1141  *end++ = replacement;
1142  ++p;
1143  break;
1144  case 'u': {
1145  ++p;
1146  if (SAJSON_UNLIKELY(!has_remaining_characters(4))) {
1147  return error("unexpected end of input");
1148  }
1149  unsigned u = 0; // gcc's complaining that this could be used uninitialized. wrong.
1150  parse_result result = read_hex(u);
1151  if (!result) {
1152  return result;
1153  }
1154  if (u >= 0xD800 && u <= 0xDBFF) {
1155  if (SAJSON_UNLIKELY(!has_remaining_characters(6))) {
1156  return error("unexpected end of input during UTF-16 surrogate pair");
1157  }
1158  char p0 = p[0];
1159  char p1 = p[1];
1160  if (p0 != '\\' || p1 != 'u') {
1161  return error("expected \\u");
1162  }
1163  p += 2;
1164  unsigned v = 0; // gcc's complaining that this could be used uninitialized. wrong.
1165  result = read_hex(v);
1166  if (!result) {
1167  return result;
1168  }
1169 
1170  if (v < 0xDC00 || v > 0xDFFF) {
1171  return error("invalid UTF-16 trail surrogate");
1172  }
1173  u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
1174  }
1175  write_utf8(u, end);
1176  break;
1177  }
1178  default:
1179  return error("unknown escape");
1180  }
1181  break;
1182 
1183  default:
1184  *end++ = *p++;
1185  break;
1186  }
1187  }
1188  }
1189 
1190  mutable_string_view input;
1191  char* const input_end;
1192  size_t* const structure;
1193 
1194  char* p;
1195  size_t* temp;
1196  type root_type;
1197  size_t* out;
1198  size_t error_line;
1199  size_t error_column;
1200  std::string error_message;
1201  };
1202 
1203  template<typename StringType>
1204  document parse(const StringType& string) {
1205  mutable_string_view ms(string);
1206 
1207  size_t length = string.length();
1208  size_t* structure = new size_t[length];
1209 
1210  return parser(ms, structure).get_document();
1211  }
1212 }
Definition: sajson.h:48