yaml_rust2/
yaml.rs

1//! YAML objects manipulation utilities.
2
3#![allow(clippy::module_name_repetitions)]
4
5use std::borrow::Cow;
6use std::ops::ControlFlow;
7use std::{collections::BTreeMap, convert::TryFrom, mem, ops::Index, ops::IndexMut};
8
9#[cfg(feature = "encoding")]
10use encoding_rs::{Decoder, DecoderResult, Encoding};
11use hashlink::LinkedHashMap;
12
13use crate::parser::{Event, MarkedEventReceiver, Parser, Tag};
14use crate::scanner::{Marker, ScanError, TScalarStyle};
15
16/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
17/// access your YAML document.
18///
19/// # Examples
20///
21/// ```
22/// use yaml_rust2::Yaml;
23/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type
24/// assert_eq!(foo.as_i64().unwrap(), -123);
25///
26/// // iterate over an Array
27/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]);
28/// for v in vec.as_vec().unwrap() {
29///     assert!(v.as_i64().is_some());
30/// }
31/// ```
32#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)]
33pub enum Yaml {
34    /// Float types are stored as String and parsed on demand.
35    /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`.
36    Real(String),
37    /// YAML int is stored as i64.
38    Integer(i64),
39    /// YAML scalar.
40    String(String),
41    /// YAML bool, e.g. `true` or `false`.
42    Boolean(bool),
43    /// YAML array, can be accessed as a [`Vec`].
44    Array(Array),
45    /// YAML hash, can be accessed as a [`LinkedHashMap`].
46    ///
47    /// Insertion order will match the order of insertion into the map.
48    Hash(Hash),
49    /// Alias, not fully supported yet.
50    Alias(usize),
51    /// YAML null, e.g. `null` or `~`.
52    Null,
53    /// Accessing a nonexistent node via the Index trait returns `BadValue`. This
54    /// simplifies error handling in the calling code. Invalid type conversion also
55    /// returns `BadValue`.
56    BadValue,
57}
58
59/// The type contained in the `Yaml::Array` variant. This corresponds to YAML sequences.
60pub type Array = Vec<Yaml>;
61/// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings.
62pub type Hash = LinkedHashMap<Yaml, Yaml>;
63
64// parse f64 as Core schema
65// See: https://github.com/chyh1990/yaml-rust/issues/51
66fn parse_f64(v: &str) -> Option<f64> {
67    match v {
68        ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
69        "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
70        ".nan" | ".NaN" | ".NAN" => Some(f64::NAN),
71        // Test that `v` contains a digit so as not to pass in strings like `inf`,
72        // which rust will parse as a float
73        _ if v.as_bytes().iter().any(u8::is_ascii_digit) => v.parse::<f64>().ok(),
74        _ => None,
75    }
76}
77
78/// Main structure for quickly parsing YAML.
79///
80/// See [`YamlLoader::load_from_str`].
81#[derive(Default)]
82pub struct YamlLoader {
83    /// The different YAML documents that are loaded.
84    docs: Vec<Yaml>,
85    // states
86    // (current node, anchor_id) tuple
87    doc_stack: Vec<(Yaml, usize)>,
88    key_stack: Vec<Yaml>,
89    anchor_map: BTreeMap<usize, Yaml>,
90    /// An error, if one was encountered.
91    error: Option<ScanError>,
92}
93
94impl MarkedEventReceiver for YamlLoader {
95    fn on_event(&mut self, ev: Event, mark: Marker) {
96        if self.error.is_some() {
97            return;
98        }
99        if let Err(e) = self.on_event_impl(ev, mark) {
100            self.error = Some(e);
101        }
102    }
103}
104
105/// An error that happened when loading a YAML document.
106#[derive(Debug)]
107pub enum LoadError {
108    /// An I/O error.
109    IO(std::io::Error),
110    /// An error within the scanner. This indicates a malformed YAML input.
111    Scan(ScanError),
112    /// A decoding error (e.g.: Invalid UTF-8).
113    Decode(std::borrow::Cow<'static, str>),
114}
115
116impl From<std::io::Error> for LoadError {
117    fn from(error: std::io::Error) -> Self {
118        LoadError::IO(error)
119    }
120}
121
122impl YamlLoader {
123    fn on_event_impl(&mut self, ev: Event, mark: Marker) -> Result<(), ScanError> {
124        // println!("EV {:?}", ev);
125        match ev {
126            Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => {
127                // do nothing
128            }
129            Event::DocumentEnd => {
130                match self.doc_stack.len() {
131                    // empty document
132                    0 => self.docs.push(Yaml::BadValue),
133                    1 => self.docs.push(self.doc_stack.pop().unwrap().0),
134                    _ => unreachable!(),
135                }
136            }
137            Event::SequenceStart(aid, _) => {
138                self.doc_stack.push((Yaml::Array(Vec::new()), aid));
139            }
140            Event::SequenceEnd => {
141                let node = self.doc_stack.pop().unwrap();
142                self.insert_new_node(node, mark)?;
143            }
144            Event::MappingStart(aid, _) => {
145                self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
146                self.key_stack.push(Yaml::BadValue);
147            }
148            Event::MappingEnd => {
149                self.key_stack.pop().unwrap();
150                let node = self.doc_stack.pop().unwrap();
151                self.insert_new_node(node, mark)?;
152            }
153            Event::Scalar(v, style, aid, tag) => {
154                let node = if style != TScalarStyle::Plain {
155                    Yaml::String(v)
156                } else if let Some(Tag {
157                    ref handle,
158                    ref suffix,
159                }) = tag
160                {
161                    if handle == "tag:yaml.org,2002:" {
162                        match suffix.as_ref() {
163                            "bool" => {
164                                // "true" or "false"
165                                match v.parse::<bool>() {
166                                    Err(_) => Yaml::BadValue,
167                                    Ok(v) => Yaml::Boolean(v),
168                                }
169                            }
170                            "int" => match v.parse::<i64>() {
171                                Err(_) => Yaml::BadValue,
172                                Ok(v) => Yaml::Integer(v),
173                            },
174                            "float" => match parse_f64(&v) {
175                                Some(_) => Yaml::Real(v),
176                                None => Yaml::BadValue,
177                            },
178                            "null" => match v.as_ref() {
179                                "~" | "null" => Yaml::Null,
180                                _ => Yaml::BadValue,
181                            },
182                            _ => Yaml::String(v),
183                        }
184                    } else {
185                        Yaml::String(v)
186                    }
187                } else {
188                    // Datatype is not specified, or unrecognized
189                    Yaml::from_str(&v)
190                };
191
192                self.insert_new_node((node, aid), mark)?;
193            }
194            Event::Alias(id) => {
195                let n = match self.anchor_map.get(&id) {
196                    Some(v) => v.clone(),
197                    None => Yaml::BadValue,
198                };
199                self.insert_new_node((n, 0), mark)?;
200            }
201        }
202        // println!("DOC {:?}", self.doc_stack);
203        Ok(())
204    }
205
206    fn insert_new_node(&mut self, node: (Yaml, usize), mark: Marker) -> Result<(), ScanError> {
207        // valid anchor id starts from 1
208        if node.1 > 0 {
209            self.anchor_map.insert(node.1, node.0.clone());
210        }
211        if self.doc_stack.is_empty() {
212            self.doc_stack.push(node);
213        } else {
214            let parent = self.doc_stack.last_mut().unwrap();
215            match *parent {
216                (Yaml::Array(ref mut v), _) => v.push(node.0),
217                (Yaml::Hash(ref mut h), _) => {
218                    let cur_key = self.key_stack.last_mut().unwrap();
219                    // current node is a key
220                    if cur_key.is_badvalue() {
221                        *cur_key = node.0;
222                    // current node is a value
223                    } else {
224                        let mut newkey = Yaml::BadValue;
225                        mem::swap(&mut newkey, cur_key);
226                        if h.insert(newkey, node.0).is_some() {
227                            let inserted_key = h.back().unwrap().0;
228                            return Err(ScanError::new_string(
229                                mark,
230                                format!("{inserted_key:?}: duplicated key in mapping"),
231                            ));
232                        }
233                    }
234                }
235                _ => unreachable!(),
236            }
237        }
238        Ok(())
239    }
240
241    /// Load the given string as a set of YAML documents.
242    ///
243    /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
244    /// if all documents are parsed successfully. An error in a latter document prevents the former
245    /// from being returned.
246    /// # Errors
247    /// Returns `ScanError` when loading fails.
248    pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
249        Self::load_from_iter(source.chars())
250    }
251
252    /// Load the contents of the given iterator as a set of YAML documents.
253    ///
254    /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only
255    /// if all documents are parsed successfully. An error in a latter document prevents the former
256    /// from being returned.
257    /// # Errors
258    /// Returns `ScanError` when loading fails.
259    pub fn load_from_iter<I: Iterator<Item = char>>(source: I) -> Result<Vec<Yaml>, ScanError> {
260        let mut parser = Parser::new(source);
261        Self::load_from_parser(&mut parser)
262    }
263
264    /// Load the contents from the specified Parser as a set of YAML documents.
265    ///
266    /// Parsing succeeds if and only if all documents are parsed successfully.
267    /// An error in a latter document prevents the former from being returned.
268    /// # Errors
269    /// Returns `ScanError` when loading fails.
270    pub fn load_from_parser<I: Iterator<Item = char>>(
271        parser: &mut Parser<I>,
272    ) -> Result<Vec<Yaml>, ScanError> {
273        let mut loader = YamlLoader::default();
274        parser.load(&mut loader, true)?;
275        if let Some(e) = loader.error {
276            Err(e)
277        } else {
278            Ok(loader.docs)
279        }
280    }
281
282    /// Return a reference to the parsed Yaml documents.
283    #[must_use]
284    pub fn documents(&self) -> &[Yaml] {
285        &self.docs
286    }
287}
288
289/// The signature of the function to call when using [`YAMLDecodingTrap::Call`].
290///
291/// The arguments are as follows:
292///  * `malformation_length`: The length of the sequence the decoder failed to decode.
293///  * `bytes_read_after_malformation`: The number of lookahead bytes the decoder consumed after
294///    the malformation.
295///  * `input_at_malformation`: What the input buffer is at the malformation.
296///    This is the buffer starting at the malformation. The first `malformation_length` bytes are
297///    the problematic sequence. The following `bytes_read_after_malformation` are already stored
298///    in the decoder and will not be re-fed.
299///  * `output`: The output string.
300///
301/// The function must modify `output` as it feels is best. For instance, one could recreate the
302/// behavior of [`YAMLDecodingTrap::Ignore`] with an empty function, [`YAMLDecodingTrap::Replace`]
303/// by pushing a `\u{FFFD}` into `output` and [`YAMLDecodingTrap::Strict`] by returning
304/// [`ControlFlow::Break`].
305///
306/// # Returns
307/// The function must return [`ControlFlow::Continue`] if decoding may continue or
308/// [`ControlFlow::Break`] if decoding must be aborted. An optional error string may be supplied.
309#[cfg(feature = "encoding")]
310pub type YAMLDecodingTrapFn = fn(
311    malformation_length: u8,
312    bytes_read_after_malformation: u8,
313    input_at_malformation: &[u8],
314    output: &mut String,
315) -> ControlFlow<Cow<'static, str>>;
316
317/// The behavior [`YamlDecoder`] must have when an decoding error occurs.
318#[cfg(feature = "encoding")]
319#[derive(Copy, Clone)]
320pub enum YAMLDecodingTrap {
321    /// Ignore the offending bytes, remove them from the output.
322    Ignore,
323    /// Error out.
324    Strict,
325    /// Replace them with the Unicode REPLACEMENT CHARACTER.
326    Replace,
327    /// Call the user-supplied function upon decoding malformation.
328    Call(YAMLDecodingTrapFn),
329}
330
331impl PartialEq for YAMLDecodingTrap {
332    fn eq(&self, other: &YAMLDecodingTrap) -> bool {
333        match (self, other) {
334            (YAMLDecodingTrap::Call(self_fn), YAMLDecodingTrap::Call(other_fn)) => {
335                *self_fn as usize == *other_fn as usize
336            }
337            (x, y) => x == y,
338        }
339    }
340}
341
342impl Eq for YAMLDecodingTrap {}
343
344/// `YamlDecoder` is a `YamlLoader` builder that allows you to supply your own encoding error trap.
345/// For example, to read a YAML file while ignoring Unicode decoding errors you can set the
346/// `encoding_trap` to `encoding::DecoderTrap::Ignore`.
347/// ```rust
348/// use yaml_rust2::yaml::{YamlDecoder, YAMLDecodingTrap};
349///
350/// let string = b"---
351/// a\xa9: 1
352/// b: 2.2
353/// c: [1, 2]
354/// ";
355/// let out = YamlDecoder::read(string as &[u8])
356///     .encoding_trap(YAMLDecodingTrap::Ignore)
357///     .decode()
358///     .unwrap();
359/// ```
360#[cfg(feature = "encoding")]
361pub struct YamlDecoder<T: std::io::Read> {
362    source: T,
363    trap: YAMLDecodingTrap,
364}
365
366#[cfg(feature = "encoding")]
367impl<T: std::io::Read> YamlDecoder<T> {
368    /// Create a `YamlDecoder` decoding the given source.
369    pub fn read(source: T) -> YamlDecoder<T> {
370        YamlDecoder {
371            source,
372            trap: YAMLDecodingTrap::Strict,
373        }
374    }
375
376    /// Set the behavior of the decoder when the encoding is invalid.
377    pub fn encoding_trap(&mut self, trap: YAMLDecodingTrap) -> &mut Self {
378        self.trap = trap;
379        self
380    }
381
382    /// Run the decode operation with the source and trap the `YamlDecoder` was built with.
383    ///
384    /// # Errors
385    /// Returns `LoadError` when decoding fails.
386    pub fn decode(&mut self) -> Result<Vec<Yaml>, LoadError> {
387        let mut buffer = Vec::new();
388        self.source.read_to_end(&mut buffer)?;
389
390        // Check if the `encoding` library can detect encoding from the BOM, otherwise use
391        // `detect_utf16_endianness`.
392        let (encoding, _) =
393            Encoding::for_bom(&buffer).unwrap_or_else(|| (detect_utf16_endianness(&buffer), 2));
394        let mut decoder = encoding.new_decoder();
395        let mut output = String::new();
396
397        // Decode the input buffer.
398        decode_loop(&buffer, &mut output, &mut decoder, self.trap)?;
399
400        YamlLoader::load_from_str(&output).map_err(LoadError::Scan)
401    }
402}
403
404/// Perform a loop of [`Decoder::decode_to_string`], reallocating `output` if needed.
405#[cfg(feature = "encoding")]
406fn decode_loop(
407    input: &[u8],
408    output: &mut String,
409    decoder: &mut Decoder,
410    trap: YAMLDecodingTrap,
411) -> Result<(), LoadError> {
412    output.reserve(input.len());
413    let mut total_bytes_read = 0;
414
415    loop {
416        match decoder.decode_to_string_without_replacement(&input[total_bytes_read..], output, true)
417        {
418            // If the input is empty, we processed the whole input.
419            (DecoderResult::InputEmpty, _) => break Ok(()),
420            // If the output is full, we must reallocate.
421            (DecoderResult::OutputFull, bytes_read) => {
422                total_bytes_read += bytes_read;
423                // The output is already reserved to the size of the input. We slowly resize. Here,
424                // we're expecting that 10% of bytes will double in size when converting to UTF-8.
425                output.reserve(input.len() / 10);
426            }
427            (DecoderResult::Malformed(malformed_len, bytes_after_malformed), bytes_read) => {
428                total_bytes_read += bytes_read;
429                match trap {
430                    // Ignore (skip over) malformed character.
431                    YAMLDecodingTrap::Ignore => {}
432                    // Replace them with the Unicode REPLACEMENT CHARACTER.
433                    YAMLDecodingTrap::Replace => {
434                        output.push('\u{FFFD}');
435                    }
436                    // Otherwise error, getting as much context as possible.
437                    YAMLDecodingTrap::Strict => {
438                        let malformed_len = malformed_len as usize;
439                        let bytes_after_malformed = bytes_after_malformed as usize;
440                        let byte_idx = total_bytes_read - (malformed_len + bytes_after_malformed);
441                        let malformed_sequence = &input[byte_idx..byte_idx + malformed_len];
442
443                        break Err(LoadError::Decode(Cow::Owned(format!(
444                            "Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
445                        ))));
446                    }
447                    YAMLDecodingTrap::Call(callback) => {
448                        let byte_idx =
449                            total_bytes_read - ((malformed_len + bytes_after_malformed) as usize);
450                        let malformed_sequence =
451                            &input[byte_idx..byte_idx + malformed_len as usize];
452                        if let ControlFlow::Break(error) = callback(
453                            malformed_len,
454                            bytes_after_malformed,
455                            &input[byte_idx..],
456                            output,
457                        ) {
458                            if error.is_empty() {
459                                break Err(LoadError::Decode(Cow::Owned(format!(
460                                    "Invalid character sequence at {byte_idx}: {malformed_sequence:?}",
461                                ))));
462                            }
463                            break Err(LoadError::Decode(error));
464                        }
465                    }
466                }
467            }
468        }
469    }
470}
471
472/// The encoding crate knows how to tell apart UTF-8 from UTF-16LE and utf-16BE, when the
473/// bytestream starts with BOM codepoint.
474/// However, it doesn't even attempt to guess the UTF-16 endianness of the input bytestream since
475/// in the general case the bytestream could start with a codepoint that uses both bytes.
476///
477/// The YAML-1.2 spec mandates that the first character of a YAML document is an ASCII character.
478/// This allows the encoding to be deduced by the pattern of null (#x00) characters.
479//
480/// See spec at <https://yaml.org/spec/1.2/spec.html#id2771184>
481#[cfg(feature = "encoding")]
482fn detect_utf16_endianness(b: &[u8]) -> &'static Encoding {
483    if b.len() > 1 && (b[0] != b[1]) {
484        if b[0] == 0 {
485            return encoding_rs::UTF_16BE;
486        } else if b[1] == 0 {
487            return encoding_rs::UTF_16LE;
488        }
489    }
490    encoding_rs::UTF_8
491}
492
493macro_rules! define_as (
494    ($name:ident, $t:ident, $yt:ident) => (
495/// Get a copy of the inner object in the YAML enum if it is a `$t`.
496///
497/// # Return
498/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained.
499/// Otherwise, return `None`.
500#[must_use]
501pub fn $name(&self) -> Option<$t> {
502    match *self {
503        Yaml::$yt(v) => Some(v),
504        _ => None
505    }
506}
507    );
508);
509
510macro_rules! define_as_ref (
511    ($name:ident, $t:ty, $yt:ident) => (
512/// Get a reference to the inner object in the YAML enum if it is a `$t`.
513///
514/// # Return
515/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise,
516/// return `None`.
517#[must_use]
518pub fn $name(&self) -> Option<$t> {
519    match *self {
520        Yaml::$yt(ref v) => Some(v),
521        _ => None
522    }
523}
524    );
525);
526
527macro_rules! define_as_mut_ref (
528    ($name:ident, $t:ty, $yt:ident) => (
529/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`.
530///
531/// # Return
532/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained.
533/// Otherwise, return `None`.
534#[must_use]
535pub fn $name(&mut self) -> Option<$t> {
536    match *self {
537        Yaml::$yt(ref mut v) => Some(v),
538        _ => None
539    }
540}
541    );
542);
543
544macro_rules! define_into (
545    ($name:ident, $t:ty, $yt:ident) => (
546/// Get the inner object in the YAML enum if it is a `$t`.
547///
548/// # Return
549/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise,
550/// return `None`.
551#[must_use]
552pub fn $name(self) -> Option<$t> {
553    match self {
554        Yaml::$yt(v) => Some(v),
555        _ => None
556    }
557}
558    );
559);
560
561impl Yaml {
562    define_as!(as_bool, bool, Boolean);
563    define_as!(as_i64, i64, Integer);
564
565    define_as_ref!(as_str, &str, String);
566    define_as_ref!(as_hash, &Hash, Hash);
567    define_as_ref!(as_vec, &Array, Array);
568
569    define_as_mut_ref!(as_mut_hash, &mut Hash, Hash);
570    define_as_mut_ref!(as_mut_vec, &mut Array, Array);
571
572    define_into!(into_bool, bool, Boolean);
573    define_into!(into_i64, i64, Integer);
574    define_into!(into_string, String, String);
575    define_into!(into_hash, Hash, Hash);
576    define_into!(into_vec, Array, Array);
577
578    /// Return whether `self` is a [`Yaml::Null`] node.
579    #[must_use]
580    pub fn is_null(&self) -> bool {
581        matches!(*self, Yaml::Null)
582    }
583
584    /// Return whether `self` is a [`Yaml::BadValue`] node.
585    #[must_use]
586    pub fn is_badvalue(&self) -> bool {
587        matches!(*self, Yaml::BadValue)
588    }
589
590    /// Return whether `self` is a [`Yaml::Array`] node.
591    #[must_use]
592    pub fn is_array(&self) -> bool {
593        matches!(*self, Yaml::Array(_))
594    }
595
596    /// Return whether `self` is a [`Yaml::Hash`] node.
597    #[must_use]
598    pub fn is_hash(&self) -> bool {
599        matches!(*self, Yaml::Hash(_))
600    }
601
602    /// Return the `f64` value contained in this YAML node.
603    ///
604    /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string,
605    /// `None` is returned.
606    #[must_use]
607    pub fn as_f64(&self) -> Option<f64> {
608        if let Yaml::Real(ref v) = self {
609            parse_f64(v)
610        } else {
611            None
612        }
613    }
614
615    /// Return the `f64` value contained in this YAML node.
616    ///
617    /// If the node is not a [`Yaml::Real`] YAML node or its contents is not a valid `f64` string,
618    /// `None` is returned.
619    #[must_use]
620    pub fn into_f64(self) -> Option<f64> {
621        self.as_f64()
622    }
623
624    /// If a value is null or otherwise bad (see variants), consume it and
625    /// replace it with a given value `other`. Otherwise, return self unchanged.
626    ///
627    /// ```
628    /// use yaml_rust2::yaml::Yaml;
629    ///
630    /// assert_eq!(Yaml::BadValue.or(Yaml::Integer(3)),  Yaml::Integer(3));
631    /// assert_eq!(Yaml::Integer(3).or(Yaml::BadValue),  Yaml::Integer(3));
632    /// ```
633    #[must_use]
634    pub fn or(self, other: Self) -> Self {
635        match self {
636            Yaml::BadValue | Yaml::Null => other,
637            this => this,
638        }
639    }
640
641    /// See `or` for behavior. This performs the same operations, but with
642    /// borrowed values for less linear pipelines.
643    #[must_use]
644    pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self {
645        match self {
646            Yaml::BadValue | Yaml::Null => other,
647            this => this,
648        }
649    }
650}
651
652#[allow(clippy::should_implement_trait)]
653impl Yaml {
654    /// Convert a string to a [`Yaml`] node.
655    ///
656    /// [`Yaml`] does not implement [`std::str::FromStr`] since conversion may not fail. This
657    /// function falls back to [`Yaml::String`] if nothing else matches.
658    ///
659    /// # Examples
660    /// ```
661    /// # use yaml_rust2::yaml::Yaml;
662    /// assert!(matches!(Yaml::from_str("42"), Yaml::Integer(42)));
663    /// assert!(matches!(Yaml::from_str("0x2A"), Yaml::Integer(42)));
664    /// assert!(matches!(Yaml::from_str("0o52"), Yaml::Integer(42)));
665    /// assert!(matches!(Yaml::from_str("~"), Yaml::Null));
666    /// assert!(matches!(Yaml::from_str("null"), Yaml::Null));
667    /// assert!(matches!(Yaml::from_str("true"), Yaml::Boolean(true)));
668    /// assert!(matches!(Yaml::from_str("3.14"), Yaml::Real(_)));
669    /// assert!(matches!(Yaml::from_str("foo"), Yaml::String(_)));
670    /// ```
671    #[must_use]
672    pub fn from_str(v: &str) -> Yaml {
673        if let Some(number) = v.strip_prefix("0x") {
674            if let Ok(i) = i64::from_str_radix(number, 16) {
675                return Yaml::Integer(i);
676            }
677        } else if let Some(number) = v.strip_prefix("0o") {
678            if let Ok(i) = i64::from_str_radix(number, 8) {
679                return Yaml::Integer(i);
680            }
681        } else if let Some(number) = v.strip_prefix('+') {
682            if let Ok(i) = number.parse::<i64>() {
683                return Yaml::Integer(i);
684            }
685        }
686        match v {
687            "" | "~" | "null" => Yaml::Null,
688            "true" => Yaml::Boolean(true),
689            "false" => Yaml::Boolean(false),
690            _ => {
691                if let Ok(integer) = v.parse::<i64>() {
692                    Yaml::Integer(integer)
693                } else if parse_f64(v).is_some() {
694                    Yaml::Real(v.to_owned())
695                } else {
696                    Yaml::String(v.to_owned())
697                }
698            }
699        }
700    }
701}
702
703static BAD_VALUE: Yaml = Yaml::BadValue;
704impl<'a> Index<&'a str> for Yaml {
705    type Output = Yaml;
706
707    /// Perform indexing if `self` is a mapping.
708    ///
709    /// # Return
710    /// If `self` is a [`Yaml::Hash`], returns an immutable borrow to the value associated to the
711    /// given key in the hash.
712    ///
713    /// This function returns a [`Yaml::BadValue`] if the underlying [`type@Hash`] does not contain
714    /// [`Yaml::String`]`{idx}` as a key.
715    ///
716    /// This function also returns a [`Yaml::BadValue`] if `self` is not a [`Yaml::Hash`].
717    fn index(&self, idx: &'a str) -> &Yaml {
718        let key = Yaml::String(idx.to_owned());
719        match self.as_hash() {
720            Some(h) => h.get(&key).unwrap_or(&BAD_VALUE),
721            None => &BAD_VALUE,
722        }
723    }
724}
725
726impl<'a> IndexMut<&'a str> for Yaml {
727    /// Perform indexing if `self` is a mapping.
728    ///
729    /// Since we cannot return a mutable borrow to a static [`Yaml::BadValue`] as we return an
730    /// immutable one in [`Index<&'a str>`], this function panics on out of bounds.
731    ///
732    /// # Panics
733    /// This function panics if the given key is not contained in `self` (as per [`IndexMut`]).
734    ///
735    /// This function also panics if `self` is not a [`Yaml::Hash`].
736    fn index_mut(&mut self, idx: &'a str) -> &mut Yaml {
737        let key = Yaml::String(idx.to_owned());
738        match self.as_mut_hash() {
739            Some(h) => h.get_mut(&key).unwrap(),
740            None => panic!("Not a hash type"),
741        }
742    }
743}
744
745impl Index<usize> for Yaml {
746    type Output = Yaml;
747
748    /// Perform indexing if `self` is a sequence or a mapping.
749    ///
750    /// # Return
751    /// If `self` is a [`Yaml::Array`], returns an immutable borrow to the value located at the
752    /// given index in the array.
753    ///
754    /// Otherwise, if `self` is a [`Yaml::Hash`], returns a borrow to the value whose key is
755    /// [`Yaml::Integer`]`(idx)` (this would not work if the key is [`Yaml::String`]`("1")`.
756    ///
757    /// This function returns a [`Yaml::BadValue`] if the index given is out of range. If `self` is
758    /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the
759    /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does not
760    /// contain [`Yaml::Integer`]`(idx)` as a key.
761    ///
762    /// This function also returns a [`Yaml::BadValue`] if `self` is not a [`Yaml::Array`] nor a
763    /// [`Yaml::Hash`].
764    fn index(&self, idx: usize) -> &Yaml {
765        if let Some(v) = self.as_vec() {
766            v.get(idx).unwrap_or(&BAD_VALUE)
767        } else if let Some(v) = self.as_hash() {
768            let key = Yaml::Integer(i64::try_from(idx).unwrap());
769            v.get(&key).unwrap_or(&BAD_VALUE)
770        } else {
771            &BAD_VALUE
772        }
773    }
774}
775
776impl IndexMut<usize> for Yaml {
777    /// Perform indexing if `self` is a sequence or a mapping.
778    ///
779    /// Since we cannot return a mutable borrow to a static [`Yaml::BadValue`] as we return an
780    /// immutable one in [`Index<usize>`], this function panics on out of bounds.
781    ///
782    /// # Panics
783    /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is
784    /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the
785    /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does not
786    /// contain [`Yaml::Integer`]`(idx)` as a key.
787    ///
788    /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`].
789    fn index_mut(&mut self, idx: usize) -> &mut Yaml {
790        match self {
791            Yaml::Array(sequence) => sequence.index_mut(idx),
792            Yaml::Hash(mapping) => {
793                let key = Yaml::Integer(i64::try_from(idx).unwrap());
794                mapping.get_mut(&key).unwrap()
795            }
796            _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"),
797        }
798    }
799}
800
801impl IntoIterator for Yaml {
802    type Item = Yaml;
803    type IntoIter = YamlIter;
804
805    /// Extract the [`Array`] from `self` and iterate over it.
806    ///
807    /// If `self` is **not** of the [`Yaml::Array`] variant, this function will not panic or return
808    /// an error (as per the [`IntoIterator`] trait it cannot) but will instead return an iterator
809    /// over an empty [`Array`]. Callers have to ensure (using [`Yaml::is_array`], [`matches`] or
810    /// something similar) that the [`Yaml`] object is a [`Yaml::Array`] if they want to do error
811    /// handling.
812    ///
813    /// # Examples
814    /// ```
815    /// # use yaml_rust2::{Yaml, YamlLoader};
816    ///
817    /// // An array of 2 integers, 1 and 2.
818    /// let arr = &YamlLoader::load_from_str("- 1\n- 2").unwrap()[0];
819    ///
820    /// assert_eq!(arr.clone().into_iter().count(), 2);
821    /// assert_eq!(arr.clone().into_iter().next(), Some(Yaml::Integer(1)));
822    /// assert_eq!(arr.clone().into_iter().nth(1), Some(Yaml::Integer(2)));
823    ///
824    /// // An empty array returns an empty iterator.
825    /// let empty = Yaml::Array(vec![]);
826    /// assert_eq!(empty.into_iter().count(), 0);
827    ///
828    /// // A hash with 2 key-value pairs, `(a, b)` and `(c, d)`.
829    /// let hash = YamlLoader::load_from_str("a: b\nc: d").unwrap().remove(0);
830    /// // The hash has 2 elements.
831    /// assert_eq!(hash.as_hash().unwrap().iter().count(), 2);
832    /// // But since `into_iter` can't be used with a `Yaml::Hash`, `into_iter` returns an empty
833    /// // iterator.
834    /// assert_eq!(hash.into_iter().count(), 0);
835    /// ```
836    fn into_iter(self) -> Self::IntoIter {
837        YamlIter {
838            yaml: self.into_vec().unwrap_or_default().into_iter(),
839        }
840    }
841}
842
843/// An iterator over a [`Yaml`] node.
844pub struct YamlIter {
845    yaml: std::vec::IntoIter<Yaml>,
846}
847
848impl Iterator for YamlIter {
849    type Item = Yaml;
850
851    fn next(&mut self) -> Option<Yaml> {
852        self.yaml.next()
853    }
854}
855
856#[cfg(test)]
857mod test {
858    use super::{YAMLDecodingTrap, Yaml, YamlDecoder};
859
860    #[test]
861    fn test_read_bom() {
862        let s = b"\xef\xbb\xbf---
863a: 1
864b: 2.2
865c: [1, 2]
866";
867        let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
868        let doc = &out[0];
869        assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
870        assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
871        assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
872        assert!(doc["d"][0].is_badvalue());
873    }
874
875    #[test]
876    fn test_read_utf16le() {
877        let s = b"\xff\xfe-\x00-\x00-\x00
878\x00a\x00:\x00 \x001\x00
879\x00b\x00:\x00 \x002\x00.\x002\x00
880\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
881\x00";
882        let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
883        let doc = &out[0];
884        println!("GOT: {doc:?}");
885        assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
886        assert!((doc["b"].as_f64().unwrap() - 2.2f64) <= f64::EPSILON);
887        assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
888        assert!(doc["d"][0].is_badvalue());
889    }
890
891    #[test]
892    fn test_read_utf16be() {
893        let s = b"\xfe\xff\x00-\x00-\x00-\x00
894\x00a\x00:\x00 \x001\x00
895\x00b\x00:\x00 \x002\x00.\x002\x00
896\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
897";
898        let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
899        let doc = &out[0];
900        println!("GOT: {doc:?}");
901        assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
902        assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
903        assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
904        assert!(doc["d"][0].is_badvalue());
905    }
906
907    #[test]
908    fn test_read_utf16le_nobom() {
909        let s = b"-\x00-\x00-\x00
910\x00a\x00:\x00 \x001\x00
911\x00b\x00:\x00 \x002\x00.\x002\x00
912\x00c\x00:\x00 \x00[\x001\x00,\x00 \x002\x00]\x00
913\x00";
914        let out = YamlDecoder::read(s as &[u8]).decode().unwrap();
915        let doc = &out[0];
916        println!("GOT: {doc:?}");
917        assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
918        assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
919        assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
920        assert!(doc["d"][0].is_badvalue());
921    }
922
923    #[test]
924    fn test_read_trap() {
925        let s = b"---
926a\xa9: 1
927b: 2.2
928c: [1, 2]
929";
930        let out = YamlDecoder::read(s as &[u8])
931            .encoding_trap(YAMLDecodingTrap::Ignore)
932            .decode()
933            .unwrap();
934        let doc = &out[0];
935        println!("GOT: {doc:?}");
936        assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
937        assert!((doc["b"].as_f64().unwrap() - 2.2f64).abs() <= f64::EPSILON);
938        assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
939        assert!(doc["d"][0].is_badvalue());
940    }
941
942    #[test]
943    fn test_or() {
944        assert_eq!(Yaml::Null.or(Yaml::Integer(3)), Yaml::Integer(3));
945        assert_eq!(Yaml::Integer(3).or(Yaml::Integer(7)), Yaml::Integer(3));
946    }
947}