konst_kernel/chr/
char_formatting.rs

1pub const fn encode_utf8(char: char) -> Utf8Encoded {
2    let u32 = char as u32;
3    match u32 {
4        0..=127 => Utf8Encoded {
5            encoded: [u32 as u8, 0, 0, 0],
6            len: 1,
7        },
8        0x80..=0x7FF => {
9            let b0 = 0b1100_0000 | (u32 >> 6) as u8;
10            let b1 = 0b1000_0000 | (u32 & 0b0011_1111) as u8;
11            Utf8Encoded {
12                encoded: [b0, b1, 0, 0],
13                len: 2,
14            }
15        }
16        0x800..=0xFFFF => {
17            let b0 = 0b1110_0000 | (u32 >> 12) as u8;
18            let b1 = 0b1000_0000 | ((u32 >> 6) & 0b0011_1111) as u8;
19            let b2 = 0b1000_0000 | (u32 & 0b0011_1111) as u8;
20            Utf8Encoded {
21                encoded: [b0, b1, b2, 0],
22                len: 3,
23            }
24        }
25        0x10000..=u32::MAX => {
26            let b0 = 0b1111_0000 | (u32 >> 18) as u8;
27            let b1 = 0b1000_0000 | ((u32 >> 12) & 0b0011_1111) as u8;
28            let b2 = 0b1000_0000 | ((u32 >> 6) & 0b0011_1111) as u8;
29            let b3 = 0b1000_0000 | (u32 & 0b0011_1111) as u8;
30            Utf8Encoded {
31                encoded: [b0, b1, b2, b3],
32                len: 4,
33            }
34        }
35    }
36}
37
38#[derive(Copy, Clone)]
39pub struct Utf8Encoded {
40    encoded: [u8; 4],
41    len: u8,
42}
43
44impl Utf8Encoded {
45    /// Gets the utf8-encoded char as a `&str`
46    pub const fn as_str(&self) -> &str {
47        unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
48    }
49
50    /// Gets the utf8-encoded char as a `&[u8]`
51    pub const fn as_bytes(&self) -> &[u8] {
52        crate::slice::slice_up_to(&self.encoded, self.len as usize)
53    }
54}
55
56#[cfg(all(test, not(miri)))]
57mod tests {
58    use super::{encode_utf8, Utf8Encoded};
59
60    fn as_bytes(fmt: &Utf8Encoded) -> &[u8] {
61        &fmt.encoded[..fmt.len as usize]
62    }
63
64    #[test]
65    fn char_to_utf8_encoding_test() {
66        for c in '\0'..=core::char::MAX {
67            let mut utf8_std = [0u8; 4];
68            let utf8_std = c.encode_utf8(&mut utf8_std);
69
70            let utf8_konst = encode_utf8(c);
71            assert_eq!(utf8_std.as_bytes(), as_bytes(&utf8_konst));
72            assert_eq!(utf8_std.as_bytes(), utf8_konst.as_bytes());
73
74            {
75                assert_eq!(
76                    core::str::from_utf8(utf8_std.as_bytes()).unwrap(),
77                    utf8_konst.as_str(),
78                );
79            }
80        }
81    }
82}