prettytty/
util.rs

1//! Helpers for parsing and displaying byte strings.
2
3use core::fmt;
4use std::io;
5
6/// Parse a byte string into an unsigned integer.
7///
8/// This enum parses byte strings comprising decimal or hexadecimal ASCII digits
9/// into `u16` or `u32`. Its methods are `const`, with the implementation
10/// working around the limitations of `const` Rust, notably by using a macro to
11/// unwrap options instead of the `?` operator.
12///
13/// # Example
14///
15/// ```
16/// # use prettytty::util::ByteParser;
17/// assert_eq!(
18///     ByteParser::Hexadecimal.to_u16(b"ffff"),
19///     Some(0xffff)
20/// );
21/// assert_eq!(
22///     ByteParser::Decimal.to_u16(b"65536"),
23///     None
24/// );
25/// ```
26#[derive(Copy, Clone, Debug, PartialEq, Eq)]
27pub enum ByteParser {
28    Decimal = 10,
29    Hexadecimal = 16,
30}
31
32// Macro to unwrap options, since ? operator can't be used in const functions.
33macro_rules! unwrap {
34    ($expr:expr) => {
35        match $expr {
36            Some(value) => value,
37            None => return None,
38        }
39    };
40}
41
42impl ByteParser {
43    /// Get this ASCII digit's value.
44    const fn digit(&self, byte: u8) -> Option<u8> {
45        let value = match byte {
46            0x30..=0x39 => byte - 0x30,
47            0x41..=0x46 => byte - 0x41 + 10,
48            0x61..=0x66 => byte - 0x61 + 10,
49            _ => return None,
50        };
51
52        if (*self as u8) <= value {
53            return None;
54        }
55
56        Some(value)
57    }
58
59    /// Parse the byte string as a u16.
60    pub const fn to_u16(&self, bytes: &[u8]) -> Option<u16> {
61        let value = unwrap!(self.to_u32(bytes));
62        if value <= 0xffff {
63            Some(value as u16)
64        } else {
65            None
66        }
67    }
68
69    /// Parse the byte string as a u32.
70    pub const fn to_u32(&self, bytes: &[u8]) -> Option<u32> {
71        let mut value: u32 = 0;
72        let mut index = 0;
73
74        while index < bytes.len() {
75            let digit = unwrap!(self.digit(bytes[index]));
76            value = unwrap!(value.checked_mul(*self as u32));
77            value = unwrap!(value.checked_add(digit as u32));
78            index += 1;
79        }
80
81        Some(value)
82    }
83}
84
85// -----------------------------------------------------------------------------------------------
86
87/// Display a byte string in a more humane manner.
88///
89/// The intended use for this enumeration is wrapping byte strings before
90/// handing them off to one of Rust's formatting macros. However, the low-level
91/// [`ByteFormat::render`] method, especially when combined with a [`Rewriter`]
92/// instance, enables other use cases, too.
93///
94/// # Example
95///
96/// ```
97/// # use prettytty::util::ByteFormat;
98/// assert_eq!(
99///     format!("{}", ByteFormat::Concise(b"\x1b[1m\x90@\xfe\x07")),
100///     "␛[1m.@.␇"
101/// );
102/// assert_eq!(
103///     format!("{}", ByteFormat::Nicely(b"\x1b[1m\x90@\xfe\x07")),
104///     "‹ESC›[1m‹DCS›@「FE」‹BEL›"
105/// );
106/// assert_eq!(
107///     format!("{}", ByteFormat::Hexdump(b"\x1b[1m\x90@\xfe\x07")),
108///     "0000:  1b5b 316d 9040 fe07  ␛[1m.@.␇"
109/// );
110/// ```
111#[derive(Debug)]
112pub enum ByteFormat<'a> {
113    /// The concise format uses one character per byte. It displays C0 control
114    /// codes with Unicode control pictures (which may be hard to read) and
115    /// replaces bytes larger than 0x7F with a period `.`
116    Concise(&'a [u8]),
117    /// The elaborate format uses more than one character per byte where
118    /// necessary. It displays C0 control codes as well as select C1 control
119    /// codes as mnemonics between guillemets, e.g., `‹ESC›` for 0x1B. It
120    /// displays bytes larger than 0x7F as hexadecimal numbers between corner
121    /// brackets, e.g., `「A0」` for 0xA0.
122    Nicely(&'a [u8]),
123    /// The hexdump format combines hexadecimal and concise formatting. Unlike
124    /// the other formats, it is line-oriented, displaying up to 16 bytes per
125    /// line.
126    Hexdump(&'a [u8]),
127}
128
129const C0: [&str; 32] = [
130    "‹NUL›",
131    "‹SOH›",
132    "‹STX›",
133    "‹ETX›",
134    "‹EOT›",
135    "‹ENQ›",
136    "‹ACK›",
137    "‹BEL›",
138    "‹BS›",
139    "‹HT›",
140    "‹LF›",
141    "‹VT›",
142    "‹FF›",
143    "‹CR›",
144    "‹SO›",
145    "‹SI›",
146    "‹DLE›",
147    "‹DC1›",
148    "‹DC2›",
149    "‹DC3›",
150    "‹DC4›",
151    "‹NAK›",
152    "‹SYN›",
153    "‹ETB›",
154    "‹CAN›",
155    "‹EM›",
156    "‹SUB›",
157    "‹ESC›",
158    "‹FS›",
159    "‹GS›",
160    "‹RS›",
161    "‹US›",
162];
163
164const C1: [&str; 5] = ["‹CSI›", "‹ST›", "‹OSC›", "‹PM›", "‹APC›"];
165
166impl ByteFormat<'_> {
167    /// Render the bytes with the given writer.
168    ///
169    /// This method largely is an implementation detail. It differs from the
170    /// display trait by accepting arbitrary writers and by returning the number
171    /// of characters (not bytes) written. It is public to support applications
172    /// that require either of these features.
173    ///
174    /// Since the hexdump format is line-oriented, it emits newlines for all but
175    /// the last line. The number of characters written only covers that last
176    /// line.
177    pub fn render<W: fmt::Write + ?Sized>(&self, writer: &mut W) -> Result<usize, fmt::Error> {
178        match *self {
179            ByteFormat::Concise(bytes) => ByteFormat::render_concise(bytes, writer),
180            ByteFormat::Nicely(bytes) => ByteFormat::render_nicely(bytes, writer),
181            ByteFormat::Hexdump(bytes) => ByteFormat::render_hexdump(bytes, writer),
182        }
183    }
184
185    fn render_concise<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
186    where
187        W: fmt::Write + ?Sized,
188    {
189        for byte in bytes {
190            let display = match *byte {
191                0x00..=0x1f => {
192                    char::from_u32(0x2400_u32 + *byte as u32).expect("known good Unicode character")
193                }
194                0x20..=0x7e => *byte as char,
195                0x7f => char::from_u32(0x2421).expect("known good Unicode character"),
196                _ => '.',
197            };
198            writer.write_char(display)?;
199        }
200
201        Ok(bytes.len())
202    }
203
204    fn render_nicely<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
205    where
206        W: fmt::Write + ?Sized,
207    {
208        let mut ascii = [0; 1];
209        let mut characters = 0;
210
211        for &byte in bytes {
212            let display = match byte {
213                0x00..=0x1f => C0[byte as usize],
214                0x20..=0x7e => {
215                    ascii[0] = byte;
216                    // SAFETY: Guaranteed to be ASCII by match arm
217                    core::str::from_utf8(&ascii).expect("ASCII characters are valid UTF-8, too")
218                }
219                0x7f => "‹DEL›",
220                0x90 => "‹DCS›",
221                0x98 => "‹SOS›",
222                0x9b..=0x9f => C1[(byte - 0x9b) as usize],
223                _ => "",
224            };
225
226            if display.is_empty() {
227                writer.write_fmt(format_args!("「{:02X}」", byte))?;
228                characters += 4;
229            } else {
230                writer.write_str(display)?;
231                characters += match display.len() {
232                    n @ (1 | 2) => n,
233                    n => n - 6 + 2,
234                };
235            }
236        }
237
238        Ok(characters)
239    }
240
241    fn render_hexdump<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
242    where
243        W: fmt::Write + ?Sized,
244    {
245        const CHUNK_SIZE: usize = 16;
246        let compact = bytes.len() < CHUNK_SIZE;
247        let mut chunk_index = 0;
248        let mut characters = 0;
249
250        for chunk in bytes.chunks(CHUNK_SIZE) {
251            if 0 < chunk_index {
252                writer.write_char('\n')?;
253            }
254
255            write!(writer, "{:04x}:  ", chunk_index)?;
256            characters = 7; // Restart counting so we only count last line
257
258            for pair in chunk.chunks(2) {
259                // Allow for uneven number of bytes in final chunk.
260                assert!(!pair.is_empty(), "chunk must not be empty");
261                if pair.len() == 1 {
262                    write!(writer, "{:02x}   ", pair[0])?;
263                } else {
264                    assert!(pair.len() == 2, "chunk has two elements");
265                    write!(writer, "{:02x}{:02x} ", pair[0], pair[1])?;
266                }
267                characters += 5;
268            }
269
270            if !compact {
271                for _ in 0..(CHUNK_SIZE - chunk.len()) / 2 {
272                    // Pad out remaining hexadecimal slots for final chunk.
273                    writer.write_str("     ")?;
274                    characters += 5;
275                }
276            }
277
278            // Separate hexadecimal from character display by two columns
279            writer.write_str(" ")?;
280            characters += 1;
281
282            ByteFormat::render_concise(chunk, writer)?;
283
284            chunk_index += chunk.len();
285            characters += chunk.len();
286        }
287
288        Ok(characters)
289    }
290}
291
292impl fmt::Display for ByteFormat<'_> {
293    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294        self.render(f)?;
295        Ok(())
296    }
297}
298
299// -----------------------------------------------------------------------------------------------
300
301/// A lightweight adapter from [`std::io::Write`] to [`core::fmt::Write`].
302///
303/// Since Rust encodes strings and string slices in UTF-8, forwarding
304/// [`core::fmt::Write::write_str`] to [`std::io::Write::write_all`] is
305/// straight-forward. The challenge is that [`std::io::Error`] covers many
306/// different error conditions, whereas [`core::fmt::Error`] is a unit-like
307/// struct. The primary benefit of this adapter is that it tracks the most
308/// recent I/O error. Hence, if the rewriter fails with a format error, code
309/// using this struct can recover the underlying I/O error.
310///
311/// # Example
312///
313/// The match below illustrates how to do just that:
314/// ```
315/// # use prettytty::util::Rewriter;
316/// # use std::io::{Cursor, Write};
317/// # use core::fmt::Write as FmtWrite;
318/// # fn main() -> std::io::Result<()> {
319/// let mut cursor = Cursor::new(vec![0; 10]);
320/// let mut writer = Rewriter::new(&mut cursor);
321///
322/// match writer.write_str("Hello!") {
323///     Ok(()) => (),
324///     Err(_) => return Err(writer.into_err()),
325/// }
326///
327/// assert_eq!(&cursor.get_ref()[0..5], b"Hello");
328/// # Ok(())
329/// # }
330/// ```
331pub struct Rewriter<'a, W: ?Sized + 'a> {
332    writer: &'a mut W,
333    result: io::Result<()>,
334}
335
336impl<'a, W: ?Sized + 'a> Rewriter<'a, W> {
337    /// Create a new rewriter.
338    pub fn new(writer: &'a mut W) -> Self {
339        Self {
340            writer,
341            result: Ok(()),
342        }
343    }
344
345    /// Consume the rewriter to get its error.
346    ///
347    /// If the code using this rewriter produced a [`fmt::Error`], this method
348    /// produces the underlying I/O error.
349    ///
350    /// # Panics
351    ///
352    /// If the rewriter didn't record an error.
353    pub fn into_err(self) -> io::Error {
354        match self.result {
355            Err(err) => err,
356            Ok(_) => panic!("display trait returned error without underlying I/O error"),
357        }
358    }
359}
360
361impl<W: io::Write + ?Sized> fmt::Write for Rewriter<'_, W> {
362    fn write_str(&mut self, s: &str) -> fmt::Result {
363        self.writer.write_all(s.as_bytes()).map_err(|err| {
364            self.result = Err(err);
365            fmt::Error
366        })
367    }
368}
369
370// ------------------------------------------------------------------------------------------------
371
372#[cfg(test)]
373mod test {
374    use super::*;
375    use std::io::{Cursor, Error, Write};
376
377    #[test]
378    fn test_radix_parse() {
379        assert_eq!(ByteParser::Decimal.to_u16(b"665"), Some(665));
380        assert_eq!(ByteParser::Decimal.to_u16(b"65536"), None);
381        assert_eq!(ByteParser::Decimal.to_u16(b"665A"), None);
382        assert_eq!(ByteParser::Hexadecimal.to_u16(b"665"), Some(1_637));
383        assert_eq!(ByteParser::Hexadecimal.to_u16(b"665A"), Some(26_202));
384        assert_eq!(ByteParser::Hexadecimal.to_u16(b"fFfF"), Some(0xffff));
385        assert_eq!(ByteParser::Hexadecimal.to_u16(b"10000"), None);
386
387        assert_eq!(ByteParser::Decimal.to_u32(b"665"), Some(665));
388        assert_eq!(ByteParser::Decimal.to_u32(b"65536"), Some(65_536));
389        assert_eq!(ByteParser::Decimal.to_u32(b"665A"), None);
390        assert_eq!(ByteParser::Hexadecimal.to_u32(b"665"), Some(1_637));
391        assert_eq!(ByteParser::Hexadecimal.to_u32(b"665A"), Some(26_202));
392        assert_eq!(
393            ByteParser::Hexadecimal.to_u32(b"fFfFfFfF"),
394            Some(0xffff_ffff)
395        );
396        assert_eq!(ByteParser::Hexadecimal.to_u32(b"100000000"), None);
397    }
398
399    #[test]
400    fn test_format() -> std::io::Result<()> {
401        let mut buffer = Cursor::new(vec![0; 500]);
402        write!(
403            buffer,
404            "{}",
405            ByteFormat::Hexdump(b"\x1bP>|Terminal\x07\x1bP>|Name\x1b\\")
406        )?;
407
408        assert_eq!(
409            &buffer.get_ref()[0..buffer.position() as usize],
410            b"0000:  1b50 3e7c 5465 726d 696e 616c 071b 503e  \xe2\x90\x9bP>|Terminal\
411                                                                    \xe2\x90\x87\
412                                                                    \xe2\x90\x9bP>\n\
413              0010:  7c4e 616d 651b 5c                        |Name\xe2\x90\x9b\\"
414        );
415        Ok(())
416    }
417
418    #[test]
419    fn test_nicely() -> std::io::Result<()> {
420        let mut buffer = Cursor::new(vec![0; 100]);
421        let mut writer = Rewriter::new(&mut buffer);
422
423        assert_eq!(ByteFormat::Nicely(b"R").render(&mut writer), Ok(1));
424        assert_eq!(ByteFormat::Nicely(b"\x1b").render(&mut writer), Ok(5));
425        assert_eq!(ByteFormat::Nicely(b"#").render(&mut writer), Ok(1));
426        assert_eq!(ByteFormat::Nicely(b"\xaf").render(&mut writer), Ok(4));
427        assert_eq!(ByteFormat::Nicely(b"\\").render(&mut writer), Ok(1));
428        assert_eq!(ByteFormat::Nicely(b"\"").render(&mut writer), Ok(1));
429
430        assert_eq!(
431            &buffer.get_ref()[0..buffer.position() as usize],
432            "R‹ESC›#「AF」\\\"".as_bytes()
433        );
434        assert_eq!(buffer.position(), 21);
435        Ok::<(), Error>(())
436    }
437}