jzon/
object.rs

1use std::{ ptr, mem, str, slice, vec, fmt };
2use std::ops::{ Index, IndexMut, Deref };
3use std::iter::FromIterator;
4
5use crate::codegen::{ DumpGenerator, Generator, PrettyGenerator };
6use crate::value::JsonValue;
7
8const KEY_BUF_LEN: usize = 32;
9static NULL: JsonValue = JsonValue::Null;
10
11// FNV-1a implementation
12//
13// While the `Object` is implemented as a binary tree, not a hash table, the
14// order in which the tree is balanced makes absolutely no difference as long
15// as there is a deterministic left / right ordering with good spread.
16// Comparing a hashed `u64` is faster than comparing `&str` or even `&[u8]`,
17// for larger objects this yields non-trivial performance benefits.
18//
19// Additionally this "randomizes" the keys a bit. Should the keys in an object
20// be inserted in alphabetical order (an example of such a use case would be
21// using an object as a store for entries by ids, where ids are sorted), this
22// will prevent the tree from being constructed in a way where the same branch
23// of each node is always used, effectively producing linear lookup times. Bad!
24//
25// Example:
26//
27// ```
28// println!("{}", hash_key(b"10000056"));
29// println!("{}", hash_key(b"10000057"));
30// println!("{}", hash_key(b"10000058"));
31// println!("{}", hash_key(b"10000059"));
32// ```
33//
34// Produces:
35//
36// ```
37// 15043794053238616431  <-- 2nd
38// 15043792953726988220  <-- 1st
39// 15043800650308385697  <-- 4th
40// 15043799550796757486  <-- 3rd
41// ```
42#[inline]
43fn hash_key(key: &[u8]) -> u64 {
44    let mut hash: u64 = 0xcbf29ce484222325;
45    for byte in key {
46        hash ^= *byte as u64;
47        hash = hash.wrapping_mul(0x100000001b3);
48    }
49    hash
50}
51
52struct Key {
53    // Internal buffer to store keys that fit within `KEY_BUF_LEN`,
54    // otherwise this field will contain garbage.
55    pub buf: [u8; KEY_BUF_LEN],
56
57    // Length of the key in bytes.
58    pub len: usize,
59
60    // Cached raw pointer to the key, so that we can cheaply construct
61    // a `&str` slice from the `Node` without checking if the key is
62    // allocated separately on the heap, or in the `key_buf`.
63    pub ptr: *mut u8,
64
65    // A hash of the key, explanation below.
66    pub hash: u64,
67}
68
69impl Key {
70    #[inline]
71    fn new(hash: u64, len: usize) -> Self {
72        Key {
73            buf: [0; KEY_BUF_LEN],
74            len: len,
75            ptr: ptr::null_mut(),
76            hash: hash
77        }
78    }
79
80    #[inline]
81    fn as_bytes(&self) -> &[u8] {
82        unsafe {
83            slice::from_raw_parts(self.ptr, self.len)
84        }
85    }
86
87    #[inline]
88    fn as_str(&self) -> &str {
89        unsafe {
90            str::from_utf8_unchecked(self.as_bytes())
91        }
92    }
93
94    // The `buf` on the `Key` can only be filled after the struct
95    // is already on the `Vec`'s heap (along with the `Node`).
96    // For that reason it's not set in `Key::new` but only after
97    // the `Node` is created and allocated.
98    #[inline]
99    fn attach(&mut self, key: &[u8]) {
100        if self.len <= KEY_BUF_LEN {
101            unsafe {
102                ptr::copy_nonoverlapping(
103                    key.as_ptr(),
104                    self.buf.as_mut_ptr(),
105                    self.len
106                );
107            }
108            self.ptr = self.buf.as_mut_ptr();
109        } else {
110            let mut heap = key.to_vec();
111            self.ptr = heap.as_mut_ptr();
112            mem::forget(heap);
113        }
114    }
115
116    // Since we store `Node`s on a vector, it will suffer from reallocation.
117    // Whenever that happens, `key.ptr` for short keys will turn into dangling
118    // pointers and will need to be re-cached.
119    #[inline]
120    fn fix_ptr(&mut self) {
121        if self.len <= KEY_BUF_LEN {
122            self.ptr = self.buf.as_mut_ptr();
123        }
124    }
125}
126
127// Implement `Sync` and `Send` for `Key` despite the use of raw pointers. The struct
128// itself should be memory safe.
129unsafe impl Sync for Key {}
130unsafe impl Send for Key {}
131
132// Because long keys _can_ be stored separately from the `Key` on heap,
133// it's essential to clean up the heap allocation when the `Key` is dropped.
134impl Drop for Key {
135    fn drop(&mut self) {
136        unsafe {
137            if self.len > KEY_BUF_LEN {
138                // Construct a `Vec` out of the `key_ptr`. Since the key is
139                // always allocated from a slice, the capacity is equal to length.
140                let heap = Vec::from_raw_parts(
141                    self.ptr,
142                    self.len,
143                    self.len
144                );
145
146                // Now that we have an owned `Vec<u8>`, drop it.
147                drop(heap);
148            }
149        }
150    }
151}
152
153// Just like with `Drop`, `Clone` needs a custom implementation that accounts
154// for the fact that key _can_ be separately heap allocated.
155impl Clone for Key {
156    fn clone(&self) -> Self {
157        if self.len > KEY_BUF_LEN {
158            let mut heap = self.as_bytes().to_vec();
159            let ptr = heap.as_mut_ptr();
160            mem::forget(heap);
161
162            Key {
163                buf: [0; KEY_BUF_LEN],
164                len: self.len,
165                ptr: ptr,
166                hash: self.hash,
167            }
168        } else {
169            Key {
170                buf: self.buf,
171                len: self.len,
172                ptr: ptr::null_mut(), // requires a `fix_ptr` call after `Node` is on the heap
173                hash: self.hash,
174            }
175        }
176    }
177}
178
179impl From<Key> for String {
180    fn from(key: Key) -> String {
181        unsafe {
182            if key.len > KEY_BUF_LEN {
183                // Construct a `String` out of the `key_ptr`. Since the key is
184                // always allocated from a slice, the capacity is equal to length.
185                String::from_raw_parts(
186                    key.ptr,
187                    key.len,
188                    key.len
189                )
190            } else {
191                String::from_utf8_unchecked(key.buf[0..key.len].to_vec())
192            }
193        }
194    }
195}
196
197#[derive(Clone)]
198struct Node {
199    // String-esque key abstraction
200    pub key: Key,
201
202    // Value stored.
203    pub value: JsonValue,
204
205    // Store vector index pointing to the `Node` for which `key_hash` is smaller
206    // than that of this `Node`.
207    // Will default to 0 as root node can't be referenced anywhere else.
208    pub left: usize,
209
210    // Same as above but for `Node`s with hash larger than this one. If the
211    // hash is the same, but keys are different, the lookup will default
212    // to the right branch as well.
213    pub right: usize,
214}
215
216impl fmt::Debug for Node {
217    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218        fmt::Debug::fmt(&(self.key.as_str(), &self.value, self.left, self.right), f)
219    }
220}
221
222impl PartialEq for Node {
223    fn eq(&self, other: &Node) -> bool {
224        self.key.hash       == other.key.hash       &&
225        self.key.as_bytes() == other.key.as_bytes() &&
226        self.value          == other.value
227    }
228}
229
230impl Node {
231    #[inline]
232    fn new(value: JsonValue, hash: u64, len: usize) -> Node {
233        Node {
234            key: Key::new(hash, len),
235            value: value,
236            left: 0,
237            right: 0,
238        }
239    }
240}
241
242/// A binary tree implementation of a string -> `JsonValue` map. You normally don't
243/// have to interact with instances of `Object`, much more likely you will be
244/// using the `JsonValue::Object` variant, which wraps around this struct.
245#[derive(Debug)]
246pub struct Object {
247    store: Vec<Node>
248}
249
250impl Object {
251    /// Create a new, empty instance of `Object`. Empty `Object` performs no
252    /// allocation until a value is inserted into it.
253    #[inline(always)]
254    pub fn new() -> Self {
255        Object {
256            store: Vec::new()
257        }
258    }
259
260    /// Create a new `Object` with memory preallocated for `capacity` number
261    /// of entries.
262    #[inline(always)]
263    pub fn with_capacity(capacity: usize) -> Self {
264        Object {
265            store: Vec::with_capacity(capacity)
266        }
267    }
268
269    #[inline]
270    fn node_at_index_mut(&mut self, index: usize) -> *mut Node {
271        unsafe { self.store.as_mut_ptr().offset(index as isize) }
272    }
273
274    #[inline(always)]
275    fn add_node(&mut self, key: &[u8], value: JsonValue, hash: u64) -> usize {
276        let index = self.store.len();
277
278        if index < self.store.capacity() {
279            // Because we've just checked the capacity, we can avoid
280            // using `push`, and instead do unsafe magic to memcpy
281            // the new node at the correct index without additional
282            // capacity or bound checks.
283            unsafe {
284                let node = Node::new(value, hash, key.len());
285                self.store.set_len(index + 1);
286
287                // To whomever gets concerned: I got better results with
288                // copy than write. Difference in benchmarks wasn't big though.
289                ptr::copy_nonoverlapping(
290                    &node as *const Node,
291                    self.store.as_mut_ptr().offset(index as isize),
292                    1,
293                );
294
295                // Since the Node has been copied, we need to forget about
296                // the owned value, else we may run into use after free.
297                mem::forget(node);
298            }
299
300            unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
301        } else {
302            self.store.push(Node::new(value, hash, key.len()));
303
304            unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
305
306            // Index up to the index (old length), we don't need to fix
307            // anything on the Node that just got pushed.
308            for node in self.store.iter_mut().take(index) {
309                node.key.fix_ptr();
310            }
311        }
312
313        index
314    }
315
316    /// Insert a new entry, or override an existing one. Note that `key` has
317    /// to be a `&str` slice and not an owned `String`. The internals of
318    /// `Object` will handle the heap allocation of the key if needed for
319    /// better performance.
320    #[inline]
321    pub fn insert(&mut self, key: &str, value: JsonValue) {
322        self.insert_index(key, value);
323    }
324
325    pub(crate) fn insert_index(&mut self, key: &str, value: JsonValue) -> usize {
326        let key = key.as_bytes();
327        let hash = hash_key(key);
328
329        if self.store.len() == 0 {
330            self.store.push(Node::new(value, hash, key.len()));
331            self.store[0].key.attach(key);
332            return 0;
333        }
334
335        let mut node = unsafe { &mut *self.node_at_index_mut(0) };
336        let mut parent = 0;
337
338        loop {
339            if hash == node.key.hash && key == node.key.as_bytes() {
340                node.value = value;
341                return parent;
342            } else if hash < node.key.hash {
343                if node.left != 0 {
344                    parent = node.left;
345                    node = unsafe { &mut *self.node_at_index_mut(node.left) };
346                    continue;
347                }
348                let index = self.add_node(key, value, hash);
349                self.store[parent].left = index;
350
351                return index;
352            } else {
353                if node.right != 0 {
354                    parent = node.right;
355                    node = unsafe { &mut *self.node_at_index_mut(node.right) };
356                    continue;
357                }
358                let index = self.add_node(key, value, hash);
359                self.store[parent].right = index;
360
361                return index;
362            }
363        }
364    }
365
366    #[inline]
367    pub(crate) fn override_at(&mut self, index: usize, value: JsonValue) {
368        self.store[index].value = value;
369    }
370
371    #[inline]
372    #[deprecated(since="0.11.11", note="Was only meant for internal use")]
373    pub fn override_last(&mut self, value: JsonValue) {
374        if let Some(node) = self.store.last_mut() {
375            node.value = value;
376        }
377    }
378
379    pub fn get(&self, key: &str) -> Option<&JsonValue> {
380        if self.store.len() == 0 {
381            return None;
382        }
383
384        let key = key.as_bytes();
385        let hash = hash_key(key);
386
387        let mut node = unsafe { self.store.get_unchecked(0) };
388
389        loop {
390            if hash == node.key.hash && key == node.key.as_bytes() {
391                return Some(&node.value);
392            } else if hash < node.key.hash {
393                if node.left == 0 {
394                    return None;
395                }
396                node = unsafe { self.store.get_unchecked(node.left) };
397            } else {
398                if node.right == 0 {
399                    return None;
400                }
401                node = unsafe { self.store.get_unchecked(node.right) };
402            }
403        }
404    }
405
406    pub fn get_mut(&mut self, key: &str) -> Option<&mut JsonValue> {
407        if self.store.len() == 0 {
408            return None;
409        }
410
411        let key = key.as_bytes();
412        let hash = hash_key(key);
413
414        let mut index = 0;
415        {
416            let mut node = unsafe { self.store.get_unchecked(0) };
417
418            loop {
419                if hash == node.key.hash && key == node.key.as_bytes() {
420                    break;
421                } else if hash < node.key.hash {
422                    if node.left == 0 {
423                        return None;
424                    }
425                    index = node.left;
426                    node = unsafe { self.store.get_unchecked(node.left) };
427                } else {
428                    if node.right == 0 {
429                        return None;
430                    }
431                    index = node.right;
432                    node = unsafe { self.store.get_unchecked(node.right) };
433                }
434            }
435        }
436
437        let node = unsafe { self.store.get_unchecked_mut(index) };
438
439        Some(&mut node.value)
440    }
441
442    /// Attempts to remove the value behind `key`, if successful
443    /// will return the `JsonValue` stored behind the `key`.
444    pub fn remove(&mut self, key: &str) -> Option<JsonValue> {
445        if self.store.len() == 0 {
446            return None;
447        }
448
449        let key = key.as_bytes();
450        let hash = hash_key(key);
451        let mut index = 0;
452
453        {
454            let mut node = unsafe { self.store.get_unchecked(0) };
455
456            // Try to find the node
457            loop {
458                if hash == node.key.hash && key == node.key.as_bytes() {
459                    break;
460                } else if hash < node.key.hash {
461                    if node.left == 0 {
462                        return None;
463                    }
464                    index = node.left;
465                    node = unsafe { self.store.get_unchecked(node.left) };
466                } else {
467                    if node.right == 0 {
468                        return None;
469                    }
470                    index = node.right;
471                    node = unsafe { self.store.get_unchecked(node.right) };
472                }
473            }
474        }
475
476        // Removing a node would screw the tree badly, it's easier to just
477        // recreate it. This is a very costly operation, but removing nodes
478        // in JSON shouldn't happen very often if at all. Optimizing this
479        // can wait for better times.
480        let mut new_object = Object::with_capacity(self.store.len() - 1);
481        let mut removed = None;
482
483        for (i, node) in self.store.iter_mut().enumerate() {
484            if i == index {
485                // Rust doesn't like us moving things from `node`, even if
486                // it is owned. Replace fixes that.
487                removed = Some(mem::replace(&mut node.value, JsonValue::Null));
488            } else {
489                let value = mem::replace(&mut node.value, JsonValue::Null);
490
491                new_object.insert(node.key.as_str(), value);
492            }
493        }
494
495        mem::swap(self, &mut new_object);
496
497        removed
498    }
499
500    #[inline(always)]
501    pub fn len(&self) -> usize {
502        self.store.len()
503    }
504
505    #[inline(always)]
506    pub fn is_empty(&self) -> bool {
507        self.store.is_empty()
508    }
509
510    /// Wipe the `Object` clear. The capacity will remain untouched.
511    pub fn clear(&mut self) {
512        self.store.clear();
513    }
514
515    #[inline(always)]
516    pub fn iter(&self) -> Iter {
517        Iter {
518            inner: self.store.iter()
519        }
520    }
521
522    #[inline(always)]
523    pub fn iter_mut(&mut self) -> IterMut {
524        IterMut {
525            inner: self.store.iter_mut()
526        }
527    }
528
529    /// Prints out the value as JSON string.
530    pub fn dump(&self) -> String {
531        let mut gen = DumpGenerator::new();
532        gen.write_object(self).expect("Can't fail");
533        gen.consume()
534    }
535
536    /// Pretty prints out the value as JSON string. Takes an argument that's
537    /// number of spaces to indent new blocks with.
538    pub fn pretty(&self, spaces: u16) -> String {
539        let mut gen = PrettyGenerator::new(spaces);
540        gen.write_object(self).expect("Can't fail");
541        gen.consume()
542    }
543}
544
545// Custom implementation of `Clone`, as new heap allocation means
546// we have to fix key pointers everywhere!
547impl Clone for Object {
548    fn clone(&self) -> Self {
549        let mut store = self.store.clone();
550
551        for node in store.iter_mut() {
552            node.key.fix_ptr();
553        }
554
555        Object {
556            store: store
557        }
558    }
559}
560
561impl<K: AsRef<str>, V: Into<JsonValue>> FromIterator<(K, V)> for Object {
562    fn from_iter<I: IntoIterator<Item=(K, V)>>(iter: I) -> Self {
563        let iter = iter.into_iter();
564        let mut object = Object::with_capacity(iter.size_hint().0);
565
566        for (key, value) in iter {
567            object.insert(key.as_ref(), value.into());
568        }
569
570        object
571    }
572}
573
574// Because keys can inserted in different order, the safe way to
575// compare `Object`s is to iterate over one and check if the other
576// has all the same keys.
577impl PartialEq for Object {
578    fn eq(&self, other: &Object) -> bool {
579        if self.len() != other.len() {
580            return false;
581        }
582
583        for (key, value) in self.iter() {
584            match other.get(key) {
585                Some(ref other_val) => if *other_val != value { return false; },
586                None                => return false
587            }
588        }
589
590        true
591    }
592}
593
594pub struct Iter<'a> {
595    inner: slice::Iter<'a, Node>
596}
597
598impl<'a> Iter<'a> {
599    /// Create an empty iterator that always returns `None`
600    pub fn empty() -> Self {
601        Iter {
602            inner: [].iter()
603        }
604    }
605}
606
607impl<'a> Iterator for Iter<'a> {
608    type Item = (&'a str, &'a JsonValue);
609
610    #[inline(always)]
611    fn next(&mut self) -> Option<Self::Item> {
612        self.inner.next().map(|node| (node.key.as_str(), &node.value))
613    }
614}
615
616impl<'a> DoubleEndedIterator for Iter<'a> {
617    #[inline(always)]
618    fn next_back(&mut self) -> Option<Self::Item> {
619        self.inner.next_back().map(|node| (node.key.as_str(), &node.value))
620    }
621}
622
623impl<'a> ExactSizeIterator for Iter<'a> {
624    fn len(&self) -> usize {
625        self.inner.len()
626    }
627}
628
629pub struct IterMut<'a> {
630    inner: slice::IterMut<'a, Node>
631}
632
633impl<'a> IterMut<'a> {
634    /// Create an empty iterator that always returns `None`
635    pub fn empty() -> Self {
636        IterMut {
637            inner: [].iter_mut()
638        }
639    }
640}
641
642impl<'a> Iterator for IterMut<'a> {
643    type Item = (&'a str, &'a mut JsonValue);
644
645    #[inline(always)]
646    fn next(&mut self) -> Option<Self::Item> {
647        self.inner.next().map(|node| (node.key.as_str(), &mut node.value))
648    }
649}
650
651impl<'a> DoubleEndedIterator for IterMut<'a> {
652    #[inline(always)]
653    fn next_back(&mut self) -> Option<Self::Item> {
654        self.inner.next_back().map(|node| (node.key.as_str(), &mut node.value))
655    }
656}
657
658impl<'a> ExactSizeIterator for IterMut<'a> {
659    fn len(&self) -> usize {
660        self.inner.len()
661    }
662}
663
664pub struct IntoIter {
665    inner: vec::IntoIter<Node>
666}
667
668impl Iterator for IntoIter {
669    type Item = (String, JsonValue);
670
671    #[inline(always)]
672    fn next(&mut self) -> Option<Self::Item> {
673        self.inner.next().map(|node| (node.key.into(), node.value))
674    }
675}
676
677impl DoubleEndedIterator for IntoIter {
678    #[inline(always)]
679    fn next_back(&mut self) -> Option<Self::Item> {
680        self.inner.next_back().map(|node| (node.key.into(), node.value))
681    }
682}
683
684impl ExactSizeIterator for IntoIter {
685    #[inline(always)]
686    fn len(&self) -> usize {
687        self.inner.len()
688    }
689}
690
691impl IntoIterator for Object {
692    type Item = (String, JsonValue);
693    type IntoIter = IntoIter;
694
695    #[inline(always)]
696    fn into_iter(self) -> IntoIter {
697        IntoIter {
698            inner: self.store.into_iter()
699        }
700    }
701}
702
703/// Implements indexing by `&str` to easily access object members:
704///
705/// ## Example
706///
707/// ```
708/// # #[macro_use]
709/// # extern crate jzon;
710/// # use jzon::JsonValue;
711/// #
712/// # fn main() {
713/// let value = object!{
714///     foo: "bar"
715/// };
716///
717/// if let JsonValue::Object(object) = value {
718///   assert!(object["foo"] == "bar");
719/// }
720/// # }
721/// ```
722// TODO: doc
723impl<'a> Index<&'a str> for Object {
724    type Output = JsonValue;
725
726    fn index(&self, index: &str) -> &JsonValue {
727        match self.get(index) {
728            Some(value) => value,
729            _ => &NULL
730        }
731    }
732}
733
734impl Index<String> for Object {
735    type Output = JsonValue;
736
737    fn index(&self, index: String) -> &JsonValue {
738        self.index(index.deref())
739    }
740}
741
742impl<'a> Index<&'a String> for Object {
743    type Output = JsonValue;
744
745    fn index(&self, index: &String) -> &JsonValue {
746        self.index(index.deref())
747    }
748}
749
750/// Implements mutable indexing by `&str` to easily modify object members:
751///
752/// ## Example
753///
754/// ```
755/// # #[macro_use]
756/// # extern crate jzon;
757/// # use jzon::JsonValue;
758/// #
759/// # fn main() {
760/// let value = object!{};
761///
762/// if let JsonValue::Object(mut object) = value {
763///   object["foo"] = 42.into();
764///
765///   assert!(object["foo"] == 42);
766/// }
767/// # }
768/// ```
769impl<'a> IndexMut<&'a str> for Object {
770    fn index_mut(&mut self, index: &str) -> &mut JsonValue {
771        if self.get(index).is_none() {
772            self.insert(index, JsonValue::Null);
773        }
774        self.get_mut(index).unwrap()
775    }
776}
777
778impl IndexMut<String> for Object {
779    fn index_mut(&mut self, index: String) -> &mut JsonValue {
780        self.index_mut(index.deref())
781    }
782}
783
784impl<'a> IndexMut<&'a String> for Object {
785    fn index_mut(&mut self, index: &String) -> &mut JsonValue {
786        self.index_mut(index.deref())
787    }
788}