jzon/
codegen.rs

1use std::ptr;
2use std::io::Write;
3use std::io;
4
5use crate::JsonValue;
6use crate::number::Number;
7use crate::object::Object;
8use crate::util::print_dec;
9
10const QU: u8 = b'"';
11const BS: u8 = b'\\';
12const BB: u8 = b'b';
13const TT: u8 = b't';
14const NN: u8 = b'n';
15const FF: u8 = b'f';
16const RR: u8 = b'r';
17const UU: u8 = b'u';
18const __: u8 = 0;
19
20// Look up table for characters that need escaping in a product string
21static ESCAPED: [u8; 256] = [
22// 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
23  UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
24  UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
25  __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
26  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
27  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
28  __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
29  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
30  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
31  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
32  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
33  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
34  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
35  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
36  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
37  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
38  __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
39];
40
41/// Default trait for serializing JSONValue into string.
42pub trait Generator {
43    type T: Write;
44
45    fn get_writer(&mut self) -> &mut Self::T;
46
47    #[inline(always)]
48    fn write(&mut self, slice: &[u8]) -> io::Result<()> {
49        self.get_writer().write_all(slice)
50    }
51
52    #[inline(always)]
53    fn write_char(&mut self, ch: u8) -> io::Result<()> {
54        self.get_writer().write_all(&[ch])
55    }
56
57    fn write_min(&mut self, slice: &[u8], min: u8) -> io::Result<()>;
58
59    #[inline(always)]
60    fn new_line(&mut self) -> io::Result<()> { Ok(()) }
61
62    #[inline(always)]
63    fn indent(&mut self) {}
64
65    #[inline(always)]
66    fn dedent(&mut self) {}
67
68    #[inline(never)]
69    fn write_string_complex(&mut self, string: &str, mut start: usize) -> io::Result<()> {
70        self.write(&string.as_bytes()[ .. start])?;
71
72        for (index, ch) in string.bytes().enumerate().skip(start) {
73            let escape = ESCAPED[ch as usize];
74            if escape > 0 {
75                self.write(&string.as_bytes()[start .. index])?;
76                self.write(&[b'\\', escape])?;
77                start = index + 1;
78            }
79            if escape == b'u' {
80                write!(self.get_writer(), "{:04x}", ch)?;
81            }
82        }
83        self.write(&string.as_bytes()[start ..])?;
84
85        self.write_char(b'"')
86    }
87
88    #[inline(always)]
89    fn write_string(&mut self, string: &str) -> io::Result<()> {
90        self.write_char(b'"')?;
91
92        for (index, ch) in string.bytes().enumerate() {
93            if ESCAPED[ch as usize] > 0 {
94                return self.write_string_complex(string, index)
95            }
96        }
97
98        self.write(string.as_bytes())?;
99        self.write_char(b'"')
100    }
101
102    #[inline(always)]
103    fn write_number(&mut self, num: &Number) -> io::Result<()> {
104        if num.is_nan() {
105            return self.write(b"null");
106        }
107        let (positive, mantissa, exponent) = num.as_parts();
108        unsafe {
109            print_dec::write(
110                self.get_writer(),
111                positive,
112                mantissa,
113                exponent
114            )
115        }
116    }
117
118    #[inline(always)]
119    fn write_object(&mut self, object: &Object) -> io::Result<()> {
120        self.write_char(b'{')?;
121        let mut iter = object.iter();
122
123        if let Some((key, value)) = iter.next() {
124            self.indent();
125            self.new_line()?;
126            self.write_string(key)?;
127            self.write_min(b": ", b':')?;
128            self.write_json(value)?;
129        } else {
130            self.write_char(b'}')?;
131            return Ok(());
132        }
133
134        for (key, value) in iter {
135            self.write_char(b',')?;
136            self.new_line()?;
137            self.write_string(key)?;
138            self.write_min(b": ", b':')?;
139            self.write_json(value)?;
140        }
141
142        self.dedent();
143        self.new_line()?;
144        self.write_char(b'}')
145    }
146
147    fn write_json(&mut self, json: &JsonValue) -> io::Result<()> {
148        match *json {
149            JsonValue::Null               => self.write(b"null"),
150            JsonValue::Short(ref short)   => self.write_string(short.as_str()),
151            JsonValue::String(ref string) => self.write_string(string),
152            JsonValue::Number(ref number) => self.write_number(number),
153            JsonValue::Boolean(true)      => self.write(b"true"),
154            JsonValue::Boolean(false)     => self.write(b"false"),
155            JsonValue::Array(ref array)   => {
156                self.write_char(b'[')?;
157                let mut iter = array.iter();
158
159                if let Some(item) = iter.next() {
160                    self.indent();
161                    self.new_line()?;
162                    self.write_json(item)?;
163                } else {
164                    self.write_char(b']')?;
165                    return Ok(());
166                }
167
168                for item in iter {
169                    self.write_char(b',')?;
170                    self.new_line()?;
171                    self.write_json(item)?;
172                }
173
174                self.dedent();
175                self.new_line()?;
176                self.write_char(b']')
177            },
178            JsonValue::Object(ref object) => {
179                self.write_object(object)
180            }
181        }
182    }
183}
184
185/// In-Memory Generator, this uses a Vec to store the JSON result.
186pub struct DumpGenerator {
187    code: Vec<u8>,
188}
189
190impl DumpGenerator {
191    pub fn new() -> Self {
192        DumpGenerator {
193            code: Vec::with_capacity(1024),
194        }
195    }
196
197    pub fn consume(self) -> String {
198        // Original strings were unicode, numbers are all ASCII,
199        // therefore this is safe.
200        unsafe { String::from_utf8_unchecked(self.code) }
201    }
202}
203
204impl Generator for DumpGenerator {
205    type T = Vec<u8>;
206
207    fn write(&mut self, slice: &[u8]) -> io::Result<()> {
208        extend_from_slice(&mut self.code, slice);
209        Ok(())
210    }
211
212    #[inline(always)]
213    fn write_char(&mut self, ch: u8) -> io::Result<()> {
214        self.code.push(ch);
215        Ok(())
216    }
217
218    #[inline(always)]
219    fn get_writer(&mut self) -> &mut Vec<u8> {
220        &mut self.code
221    }
222
223    #[inline(always)]
224    fn write_min(&mut self, _: &[u8], min: u8) -> io::Result<()> {
225        self.code.push(min);
226        Ok(())
227    }
228}
229
230/// Pretty In-Memory Generator, this uses a Vec to store the JSON result and add indent.
231pub struct PrettyGenerator {
232    code: Vec<u8>,
233    dent: u16,
234    spaces_per_indent: u16,
235}
236
237impl PrettyGenerator {
238    pub fn new(spaces: u16) -> Self {
239        PrettyGenerator {
240            code: Vec::with_capacity(1024),
241            dent: 0,
242            spaces_per_indent: spaces
243        }
244    }
245
246    pub fn consume(self) -> String {
247        unsafe { String::from_utf8_unchecked(self.code) }
248    }
249}
250
251impl Generator for PrettyGenerator {
252    type T = Vec<u8>;
253
254    #[inline(always)]
255    fn write(&mut self, slice: &[u8]) -> io::Result<()> {
256        extend_from_slice(&mut self.code, slice);
257        Ok(())
258    }
259
260    #[inline(always)]
261    fn write_char(&mut self, ch: u8) -> io::Result<()> {
262        self.code.push(ch);
263        Ok(())
264    }
265
266    #[inline(always)]
267    fn get_writer(&mut self) -> &mut Vec<u8> {
268        &mut self.code
269    }
270
271    #[inline(always)]
272    fn write_min(&mut self, slice: &[u8], _: u8) -> io::Result<()> {
273        extend_from_slice(&mut self.code, slice);
274        Ok(())
275    }
276
277    fn new_line(&mut self) -> io::Result<()> {
278        self.code.push(b'\n');
279        for _ in 0..(self.dent * self.spaces_per_indent) {
280            self.code.push(b' ');
281        }
282        Ok(())
283    }
284
285    fn indent(&mut self) {
286        self.dent += 1;
287    }
288
289    fn dedent(&mut self) {
290        self.dent -= 1;
291    }
292}
293
294/// Writer Generator, this uses a custom writer to store the JSON result.
295pub struct WriterGenerator<'a, W: 'a + Write> {
296    writer: &'a mut W
297}
298
299impl<'a, W> WriterGenerator<'a, W> where W: 'a + Write {
300    pub fn new(writer: &'a mut W) -> Self {
301        WriterGenerator {
302            writer: writer
303        }
304    }
305}
306
307impl<'a, W> Generator for WriterGenerator<'a, W> where W: Write {
308    type T = W;
309
310    #[inline(always)]
311    fn get_writer(&mut self) -> &mut W {
312        &mut self.writer
313    }
314
315    #[inline(always)]
316    fn write_min(&mut self, _: &[u8], min: u8) -> io::Result<()> {
317        self.writer.write_all(&[min])
318    }
319}
320
321/// Pretty Writer Generator, this uses a custom writer to store the JSON result and add indent.
322pub struct PrettyWriterGenerator<'a, W: 'a + Write> {
323    writer: &'a mut W,
324    dent: u16,
325    spaces_per_indent: u16,
326}
327
328impl<'a, W> PrettyWriterGenerator<'a, W> where W: 'a + Write {
329    pub fn new(writer: &'a mut W, spaces: u16) -> Self {
330        PrettyWriterGenerator {
331            writer: writer,
332            dent: 0,
333            spaces_per_indent: spaces,
334        }
335    }
336}
337
338impl<'a, W> Generator for PrettyWriterGenerator<'a, W> where W: Write {
339    type T = W;
340
341    #[inline(always)]
342    fn get_writer(&mut self) -> &mut W {
343        &mut self.writer
344    }
345
346    #[inline(always)]
347    fn write_min(&mut self, slice: &[u8], _: u8) -> io::Result<()> {
348        self.writer.write_all(slice)
349    }
350
351    fn new_line(&mut self) -> io::Result<()> {
352        self.write_char(b'\n')?;
353        for _ in 0..(self.dent * self.spaces_per_indent) {
354            self.write_char(b' ')?;
355        }
356        Ok(())
357    }
358
359    fn indent(&mut self) {
360        self.dent += 1;
361    }
362
363    fn dedent(&mut self) {
364        self.dent -= 1;
365    }
366}
367
368// From: https://github.com/dtolnay/fastwrite/blob/master/src/lib.rs#L68
369//
370// LLVM is not able to lower `Vec::extend_from_slice` into a memcpy, so this
371// helps eke out that last bit of performance.
372#[inline]
373fn extend_from_slice(dst: &mut Vec<u8>, src: &[u8]) {
374    let dst_len = dst.len();
375    let src_len = src.len();
376
377    dst.reserve(src_len);
378
379    unsafe {
380        // We would have failed if `reserve` overflowed
381        dst.set_len(dst_len + src_len);
382
383        ptr::copy_nonoverlapping(
384            src.as_ptr(),
385            dst.as_mut_ptr().offset(dst_len as isize),
386            src_len);
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    // found while fuzzing the DumpGenerator
395    #[test]
396    fn should_not_panic_on_bad_bytes() {
397        let data = [0, 12, 128, 88, 64, 99].to_vec();
398        let s = unsafe {
399            String::from_utf8_unchecked(data)
400        };
401
402        let mut generator = DumpGenerator::new();
403        generator.write_string(&s).unwrap();
404    }
405
406    #[test]
407    fn should_not_panic_on_bad_bytes_2() {
408        let data = b"\x48\x48\x48\x57\x03\xE8\x48\x48\xE8\x03\x8F\x48\x29\x48\x48";
409        let s = unsafe {
410            String::from_utf8_unchecked(data.to_vec())
411        };
412
413        let mut generator = DumpGenerator::new();
414        generator.write_string(&s).unwrap();
415    }
416}