1use std::{str, slice};
21use std::char::decode_utf16;
22use std::convert::TryFrom;
23use crate::object::Object;
24use crate::number::Number;
25use crate::{JsonValue, Error, Result};
26
27const MAX_PRECISION: u64 = 576460752303423500;
30
31
32const DEPTH_LIMIT: usize = 512;
34
35
36struct Parser<'a> {
40 buffer: Vec<u8>,
43
44 source: &'a str,
46
47 byte_ptr: *const u8,
49
50 index: usize,
52
53 length: usize,
55}
56
57
58macro_rules! expect_byte {
61 ($parser:ident) => ({
62 if $parser.is_eof() {
63 return Err(Error::UnexpectedEndOfJson);
64 }
65
66 let ch = $parser.read_byte();
67 $parser.bump();
68 ch
69 })
70}
71
72
73macro_rules! expect_sequence {
82 ($parser:ident, $( $ch:pat ),*) => {
83 $(
84 match expect_byte!($parser) {
85 $ch => {},
86 _ => return $parser.unexpected_character(),
87 }
88 )*
89 }
90}
91
92
93macro_rules! expect_byte_ignore_whitespace {
96 ($parser:ident) => ({
97 let mut ch = expect_byte!($parser);
98
99 match ch {
101 9 ..= 13 | 32 => {
103 loop {
104 match expect_byte!($parser) {
105 9 ..= 13 | 32 => {},
106 next => {
107 ch = next;
108 break;
109 }
110 }
111 }
112 },
113 _ => {}
114 }
115
116 ch
117 })
118}
119
120macro_rules! expect_eof {
122 ($parser:ident) => ({
123 while !$parser.is_eof() {
124 match $parser.read_byte() {
125 9 ..= 13 | 32 => $parser.bump(),
126 _ => {
127 $parser.bump();
128 return $parser.unexpected_character();
129 }
130 }
131 }
132 })
133}
134
135macro_rules! expect {
138 ($parser:ident, $byte:expr) => ({
139 let ch = expect_byte_ignore_whitespace!($parser);
140
141 if ch != $byte {
142 return $parser.unexpected_character()
143 }
144 });
145
146 {$parser:ident $(, $byte:pat => $then:expr )*} => ({
147 let ch = expect_byte_ignore_whitespace!($parser);
148
149 match ch {
150 $(
151 $byte => $then,
152 )*
153 _ => return $parser.unexpected_character()
154 }
155
156 })
157}
158
159
160const QU: bool = false; const BS: bool = false; const CT: bool = false; const __: bool = true;
166
167static ALLOWED: [bool; 256] = [
168CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ];
186
187
188macro_rules! expect_string {
195 ($parser:ident) => ({
196 let result: &str;
197 let start = $parser.index;
198
199 loop {
200 let ch = expect_byte!($parser);
201 if ALLOWED[ch as usize] {
202 continue;
203 }
204 if ch == b'"' {
205 unsafe {
206 let ptr = $parser.byte_ptr.offset(start as isize);
207 let len = $parser.index - 1 - start;
208 result = str::from_utf8_unchecked(slice::from_raw_parts(ptr, len));
209 }
210 break;
211 }
212 if ch == b'\\' {
213 result = $parser.read_complex_string(start)?;
214 break;
215 }
216
217 return $parser.unexpected_character();
218 }
219
220 result
221 })
222}
223
224
225macro_rules! expect_number {
227 ($parser:ident, $first:ident) => ({
228 let mut num = ($first - b'0') as u64;
229
230 let result: Number;
231
232 loop {
235 if num >= MAX_PRECISION {
236 result = $parser.read_big_number(num)?;
237 break;
238 }
239
240 if $parser.is_eof() {
241 result = num.into();
242 break;
243 }
244
245 let ch = $parser.read_byte();
246
247 match ch {
248 b'0' ..= b'9' => {
249 $parser.bump();
250 num = num * 10 + (ch - b'0') as u64;
251 },
252 _ => {
253 let mut e = 0;
254 result = allow_number_extensions!($parser, num, e, ch);
255 break;
256 }
257 }
258 }
259
260 result
261 })
262}
263
264
265macro_rules! allow_number_extensions {
268 ($parser:ident, $num:ident, $e:ident, $ch:ident) => ({
269 match $ch {
270 b'.' => {
271 $parser.bump();
272 expect_fraction!($parser, $num, $e)
273 },
274 b'e' | b'E' => {
275 $parser.bump();
276 $parser.expect_exponent($num, $e)?
277 },
278 _ => $num.into()
279 }
280 });
281
282 ($parser:ident) => ({
286 if $parser.is_eof() {
287 0.into()
288 } else {
289 let mut num = 0;
290 let mut e = 0;
291 let ch = $parser.read_byte();
292 allow_number_extensions!($parser, num, e, ch)
293 }
294 })
295}
296
297
298macro_rules! expect_fraction {
301 ($parser:ident, $num:ident, $e:ident) => ({
302 let result: Number;
303
304 let ch = expect_byte!($parser);
305
306 match ch {
307 b'0' ..= b'9' => {
308 if $num < MAX_PRECISION {
309 $num = $num * 10 + (ch - b'0') as u64;
310 $e -= 1;
311 } else {
312 match $num.checked_mul(10).and_then(|num| {
313 num.checked_add((ch - b'0') as u64)
314 }) {
315 Some(result) => {
316 $num = result;
317 $e -= 1;
318 },
319 None => {}
320 }
321 }
322 },
323 _ => return $parser.unexpected_character()
324 }
325
326 loop {
327 if $parser.is_eof() {
328 result = unsafe { Number::from_parts_unchecked(true, $num, $e) };
329 break;
330 }
331 let ch = $parser.read_byte();
332
333 match ch {
334 b'0' ..= b'9' => {
335 $parser.bump();
336 if $num < MAX_PRECISION {
337 $num = $num * 10 + (ch - b'0') as u64;
338 $e -= 1;
339 } else {
340 match $num.checked_mul(10).and_then(|num| {
341 num.checked_add((ch - b'0') as u64)
342 }) {
343 Some(result) => {
344 $num = result;
345 $e -= 1;
346 },
347 None => {}
348 }
349 }
350 },
351 b'e' | b'E' => {
352 $parser.bump();
353 result = $parser.expect_exponent($num, $e)?;
354 break;
355 }
356 _ => {
357 result = unsafe { Number::from_parts_unchecked(true, $num, $e) };
358 break;
359 }
360 }
361 }
362
363 result
364 })
365}
366
367impl<'a> Parser<'a> {
368 pub fn new(source: &'a str) -> Self {
369 Parser {
370 buffer: Vec::with_capacity(30),
371 source: source,
372 byte_ptr: source.as_ptr(),
373 index: 0,
374 length: source.len(),
375 }
376 }
377
378 #[inline(always)]
380 fn is_eof(&mut self) -> bool {
381 self.index == self.length
382 }
383
384 #[inline(always)]
391 fn read_byte(&mut self) -> u8 {
392 debug_assert!(self.index < self.length, "Reading out of bounds");
393
394 unsafe { *self.byte_ptr.offset(self.index as isize) }
395 }
396
397 #[inline(always)]
400 fn bump(&mut self) {
401 self.index = self.index.wrapping_add(1);
402 }
403
404 fn unexpected_character<T: Sized>(&mut self) -> Result<T> {
407 let at = self.index - 1;
408
409 let ch = self.source[at..]
410 .chars()
411 .next()
412 .expect("Must have a character");
413
414 let (lineno, col) = self.source[..at]
415 .lines()
416 .enumerate()
417 .last()
418 .unwrap_or((0, ""));
419
420 let colno = col.chars().count();
421
422 Err(Error::UnexpectedCharacter {
423 ch: ch,
424 line: lineno + 1,
425 column: colno + 1,
426 })
427 }
428
429 fn read_hexdec_digit(&mut self) -> Result<u16> {
431 let ch = expect_byte!(self);
432 Ok(match ch {
433 b'0' ..= b'9' => (ch - b'0'),
434 b'a' ..= b'f' => (ch + 10 - b'a'),
435 b'A' ..= b'F' => (ch + 10 - b'A'),
436 _ => return self.unexpected_character(),
437 } as u16)
438 }
439
440 fn read_hexdec_codepoint(&mut self) -> Result<u16> {
442 Ok(
443 self.read_hexdec_digit()? << 12 |
444 self.read_hexdec_digit()? << 8 |
445 self.read_hexdec_digit()? << 4 |
446 self.read_hexdec_digit()?
447 )
448 }
449
450 fn read_codepoint(&mut self) -> Result<()> {
454 let mut buf = [0; 4];
455 let codepoint = self.read_hexdec_codepoint()?;
456
457 let unicode = match char::try_from(codepoint as u32) {
458 Ok(code) => code,
459 Err(_) => {
461 expect_sequence!(self, b'\\', b'u');
462
463 match decode_utf16(
464 [codepoint, self.read_hexdec_codepoint()?].iter().copied()
465 ).next() {
466 Some(Ok(code)) => code,
467 _ => return Err(Error::FailedUtf8Parsing),
468 }
469 }
470 };
471
472 self.buffer.extend_from_slice(unicode.encode_utf8(&mut buf).as_bytes());
473
474 Ok(())
475 }
476
477 fn read_complex_string(&mut self, start: usize) -> Result<&'_ str> {
486 let len = self.buffer.len();
495 let mut ch = b'\\';
497
498 self.buffer.extend_from_slice(&self.source.as_bytes()[start .. self.index - 1]);
500
501 loop {
502 if ALLOWED[ch as usize] {
503 self.buffer.push(ch);
504 ch = expect_byte!(self);
505 continue;
506 }
507 match ch {
508 b'"' => break,
509 b'\\' => {
510 let escaped = expect_byte!(self);
511 let escaped = match escaped {
512 b'u' => {
513 self.read_codepoint()?;
514 ch = expect_byte!(self);
515 continue;
516 },
517 b'"' |
518 b'\\' |
519 b'/' => escaped,
520 b'b' => 0x8,
521 b'f' => 0xC,
522 b't' => b'\t',
523 b'r' => b'\r',
524 b'n' => b'\n',
525 _ => return self.unexpected_character()
526 };
527 self.buffer.push(escaped);
528 },
529 _ => return self.unexpected_character()
530 }
531 ch = expect_byte!(self);
532 }
533
534 Ok(unsafe {
537 str::from_utf8_unchecked(
538 slice::from_raw_parts(self.buffer[len .. ].as_ptr(), self.buffer.len() - len)
546 )
547 })
548 }
549
550 fn read_big_number(&mut self, mut num: u64) -> Result<Number> {
557 let mut e = 0i16;
558 loop {
559 if self.is_eof() {
560 return Ok(unsafe { Number::from_parts_unchecked(true, num, e) });
561 }
562 let ch = self.read_byte();
563 match ch {
564 b'0' ..= b'9' => {
565 self.bump();
566 match num.checked_mul(10).and_then(|num| {
567 num.checked_add((ch - b'0') as u64)
568 }) {
569 Some(result) => num = result,
570 None => e = e.checked_add(1).ok_or_else(|| Error::ExceededDepthLimit)?,
571 }
572 },
573 b'.' => {
574 self.bump();
575 return Ok(expect_fraction!(self, num, e));
576 },
577 b'e' | b'E' => {
578 self.bump();
579 return self.expect_exponent(num, e);
580 }
581 _ => break
582 }
583 }
584
585 Ok(unsafe { Number::from_parts_unchecked(true, num, e) })
586 }
587
588 fn expect_exponent(&mut self, num: u64, big_e: i16) -> Result<Number> {
591 let mut ch = expect_byte!(self);
592 let sign = match ch {
593 b'-' => {
594 ch = expect_byte!(self);
595 -1
596 },
597 b'+' => {
598 ch = expect_byte!(self);
599 1
600 },
601 _ => 1
602 };
603
604 let mut e = match ch {
605 b'0' ..= b'9' => (ch - b'0') as i16,
606 _ => return self.unexpected_character(),
607 };
608
609 loop {
610 if self.is_eof() {
611 break;
612 }
613 let ch = self.read_byte();
614 match ch {
615 b'0' ..= b'9' => {
616 self.bump();
617 e = e.saturating_mul(10).saturating_add((ch - b'0') as i16);
618 },
619 _ => break
620 }
621 }
622
623 Ok(unsafe { Number::from_parts_unchecked(true, num, big_e.saturating_add(e * sign)) })
624 }
625
626 fn parse(&mut self) -> Result<JsonValue> {
628 let mut stack = Vec::with_capacity(3);
629 let mut ch = expect_byte_ignore_whitespace!(self);
630
631 'parsing: loop {
632 let mut value = match ch {
633 b'[' => {
634 ch = expect_byte_ignore_whitespace!(self);
635
636 if ch != b']' {
637 if stack.len() == DEPTH_LIMIT {
638 return Err(Error::ExceededDepthLimit);
639 }
640
641 stack.push(StackBlock(JsonValue::Array(Vec::with_capacity(2)), 0));
642 continue 'parsing;
643 }
644
645 JsonValue::Array(Vec::new())
646 },
647 b'{' => {
648 ch = expect_byte_ignore_whitespace!(self);
649
650 if ch != b'}' {
651 if stack.len() == DEPTH_LIMIT {
652 return Err(Error::ExceededDepthLimit);
653 }
654
655 let mut object = Object::with_capacity(3);
656
657 if ch != b'"' {
658 return self.unexpected_character()
659 }
660
661 let index = object.insert_index(expect_string!(self), JsonValue::Null);
662 expect!(self, b':');
663
664 stack.push(StackBlock(JsonValue::Object(object), index));
665
666 ch = expect_byte_ignore_whitespace!(self);
667
668 continue 'parsing;
669 }
670
671 JsonValue::Object(Object::new())
672 },
673 b'"' => expect_string!(self).into(),
674 b'0' => JsonValue::Number(allow_number_extensions!(self)),
675 b'1' ..= b'9' => {
676 JsonValue::Number(expect_number!(self, ch))
677 },
678 b'-' => {
679 let ch = expect_byte!(self);
680 JsonValue::Number(- match ch {
681 b'0' => allow_number_extensions!(self),
682 b'1' ..= b'9' => expect_number!(self, ch),
683 _ => return self.unexpected_character()
684 })
685 }
686 b't' => {
687 expect_sequence!(self, b'r', b'u', b'e');
688 JsonValue::Boolean(true)
689 },
690 b'f' => {
691 expect_sequence!(self, b'a', b'l', b's', b'e');
692 JsonValue::Boolean(false)
693 },
694 b'n' => {
695 expect_sequence!(self, b'u', b'l', b'l');
696 JsonValue::Null
697 },
698 _ => return self.unexpected_character()
699 };
700
701 'popping: loop {
702 match stack.last_mut() {
703 None => {
704 expect_eof!(self);
705
706 return Ok(value);
707 },
708
709 Some(&mut StackBlock(JsonValue::Array(ref mut array), _)) => {
710 array.push(value);
711
712 ch = expect_byte_ignore_whitespace!(self);
713
714 match ch {
715 b',' => {
716 ch = expect_byte_ignore_whitespace!(self);
717
718 continue 'parsing;
719 },
720 b']' => {},
721 _ => return self.unexpected_character()
722 }
723 },
724
725 Some(&mut StackBlock(JsonValue::Object(ref mut object), ref mut index )) => {
726 object.override_at(*index, value);
727
728 ch = expect_byte_ignore_whitespace!(self);
729
730 match ch {
731 b',' => {
732 expect!(self, b'"');
733 *index = object.insert_index(expect_string!(self), JsonValue::Null);
734 expect!(self, b':');
735
736 ch = expect_byte_ignore_whitespace!(self);
737
738 continue 'parsing;
739 },
740 b'}' => {},
741 _ => return self.unexpected_character()
742 }
743 },
744
745 _ => unreachable!(),
746 }
747
748 value = match stack.pop() {
749 Some(StackBlock(value, _)) => value,
750 None => break 'popping
751 }
752 }
753 }
754 }
755}
756
757struct StackBlock(JsonValue, usize);
758
759#[inline]
761pub fn parse(source: &str) -> Result<JsonValue> {
762 Parser::new(source).parse()
763}
764
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769 use crate::stringify;
770 use crate::JsonValue;
771
772 use crate::object;
773 use crate::array;
774
775 use std::fs::File;
776 use std::io::prelude::*;
777
778 #[test]
779 fn it_should_parse_escaped_forward_slashes_with_quotes() {
780 let mut file = File::open("tests/test_json_slashes_quotes").unwrap();
782 let mut contents = String::new();
783 file.read_to_string(&mut contents).unwrap();
784
785 let actual = parse(&contents).unwrap();
786 let serialized = stringify(actual.clone());
787
788 assert_eq!(serialized, contents);
789 }
790
791 #[test]
792 fn it_should_parse_escaped_quotes() {
793 let contents = String::from("{\"ab\":\"c\\\"d\\\"e\"}");
794
795 let actual = parse(&contents).unwrap();
796 let serialized = stringify(actual.clone());
797
798 assert_eq!(serialized, contents);
799 }
800
801 #[test]
802 fn it_should_parse_basic_json_values() {
803 let s = "{\"a\":1,\"b\":true,\"c\":false,\"d\":null,\"e\":2}";
804 let actual = parse(s).unwrap();
805 let mut expected = object! {
806 a: 1,
807 b: true,
808 c: false,
809 e: 2,
810 };
811 expected["d"] = JsonValue::Null;
812
813 assert_eq!(actual, expected);
814 }
815
816 #[test]
817 fn it_should_parse_json_arrays() {
818 let s = "{\"a\":1,\"b\":true,\"c\":false,\"d\":null,\"e\":2,\"f\":[1,2,3,false,true,[],{}]}";
819 let actual = parse(s).unwrap();
820 let mut expected = object! {
821 a: 1,
822 b: true,
823 c: false,
824 e: 2,
825 };
826 expected["d"] = JsonValue::Null;
827 expected["f"] = array![
828 1,2,3,
829 false,
830 true,
831 [],
832 {},
833 ];
834
835 assert_eq!(actual, expected);
836 }
837
838 #[test]
839 fn it_should_parse_json_nested_object() {
840 let s = "{\"a\":1,\"b\":{\"c\":2,\"d\":{\"e\":{\"f\":{\"g\":3,\"h\":[]}}},\"i\":4,\"j\":[],\"k\":{\"l\":5,\"m\":{}}}}";
841 let actual = parse(s).unwrap();
842 let expected = object! {
843 a: 1,
844 b: {
845 c: 2,
846 d: {
847 e: {
848 f: {
849 g: 3,
850 h: []
851 }
852 }
853 },
854 i: 4,
855 j: [],
856 k: {
857 l: 5,
858 m: {}
859 }
860 }
861 };
862
863 assert_eq!(actual, expected);
864 }
865
866 #[test]
867 fn it_should_parse_json_complex_object() {
868 let s = "{\"a\":1,\"b\":{\"c\":2,\"d\":{\"e\":{\"f\":{\"g\":3,\"h\":[{\"z\":1},{\"y\":2,\"x\":[{},{}]}]}}},\"i\":4,\"j\":[],\"k\":{\"l\":5,\"m\":{}}}}";
869 let actual = parse(s).unwrap();
870 let expected = object! {
871 a: 1,
872 b: {
873 c: 2,
874 d: {
875 e: {
876 f: {
877 g: 3,
878 h: [
879 { z: 1 },
880 { y: 2, x: [{}, {}]}
881 ]
882 }
883 }
884 },
885 i: 4,
886 j: [],
887 k: {
888 l: 5,
889 m: {}
890 }
891 }
892 };
893
894 assert_eq!(actual, expected);
895 }
896
897}