prettytty/
util.rs

1//! Helpers for parsing and displaying byte strings.
2
3use core::fmt;
4use std::io;
5
6/// Parse a byte string into an unsigned integer.
7///
8/// This enum parses byte strings comprising decimal or hexadecimal ASCII digits
9/// into `u16` or `u32`. Its methods are `const`, with the implementation
10/// working around the limitations of `const` Rust, notably by using a macro to
11/// unwrap options instead of the `?` operator.
12///
13/// # Example
14///
15/// ```
16/// # use prettytty::util::ByteParser;
17/// assert_eq!(
18///     ByteParser::Hexadecimal.to_u16(b"ffff"),
19///     Some(0xffff)
20/// );
21/// assert_eq!(
22///     ByteParser::Decimal.to_u16(b"65536"),
23///     None
24/// );
25/// ```
26#[derive(Copy, Clone, Debug, PartialEq, Eq)]
27pub enum ByteParser {
28    Decimal = 10,
29    Hexadecimal = 16,
30}
31
32// Macro to unwrap options, since ? operator can't be used in const functions.
33macro_rules! unwrap {
34    ($expr:expr) => {
35        match $expr {
36            Some(value) => value,
37            None => return None,
38        }
39    };
40}
41
42impl ByteParser {
43    /// Get this ASCII digit's value.
44    const fn digit(&self, byte: u8) -> Option<u8> {
45        let value = match byte {
46            0x30..=0x39 => byte - 0x30,
47            0x41..=0x46 => byte - 0x41 + 10,
48            0x61..=0x66 => byte - 0x61 + 10,
49            _ => return None,
50        };
51
52        if (*self as u8) <= value {
53            return None;
54        }
55
56        Some(value)
57    }
58
59    /// Parse the byte string as a u16.
60    pub const fn to_u16(&self, bytes: &[u8]) -> Option<u16> {
61        let value = unwrap!(self.to_u32(bytes));
62        if value <= 0xffff {
63            Some(value as u16)
64        } else {
65            None
66        }
67    }
68
69    /// Parse the byte string as a u32.
70    pub const fn to_u32(&self, bytes: &[u8]) -> Option<u32> {
71        let mut value: u32 = 0;
72        let mut index = 0;
73
74        while index < bytes.len() {
75            let digit = unwrap!(self.digit(bytes[index]));
76            value = unwrap!(value.checked_mul(*self as u32));
77            value = unwrap!(value.checked_add(digit as u32));
78            index += 1;
79        }
80
81        Some(value)
82    }
83}
84
85// -----------------------------------------------------------------------------------------------
86
87/// Display a byte string in a more humane manner.
88///
89/// The intended use for this enumeration is wrapping byte strings before
90/// handing them off to one of Rust's formatting macros. However, the low-level
91/// [`ByteFormat::render`] method, especially when combined with a [`Rewriter`]
92/// instance, enables other use cases, too.
93///
94/// # Example
95///
96/// ```
97/// # use prettytty::util::ByteFormat;
98/// assert_eq!(
99///     format!("{}", ByteFormat::Concise(b"\x1b[1m\x90@\xfe\x07")),
100///     "␛[1m.@.␇"
101/// );
102/// assert_eq!(
103///     format!("{}", ByteFormat::Nicely(b"\x1b[1m\x90@\xfe\x07")),
104///     "‹ESC›[1m‹DCS›@「FE」‹BEL›"
105/// );
106/// assert_eq!(
107///     format!("{}", ByteFormat::Hexdump(b"\x1b[1m\x90@\xfe\x07")),
108///     "0000:  1b5b 316d 9040 fe07  ␛[1m.@.␇"
109/// );
110/// ```
111#[derive(Debug)]
112pub enum ByteFormat<'a> {
113    /// The concise format uses one character per byte. It displays C0 control
114    /// codes with Unicode control pictures (which may be hard to read) and
115    /// replaces bytes larger than 0x7F with a period `.`
116    Concise(&'a [u8]),
117    /// The elaborate format uses more than one character per byte where
118    /// necessary. It displays C0 control codes as well as select C1 control
119    /// codes as mnemonics between guillemets, e.g., `‹ESC›` for 0x1B. It
120    /// displays bytes larger than 0x7F as hexadecimal numbers between corner
121    /// brackets, e.g., `「A0」` for 0xA0.
122    Nicely(&'a [u8]),
123    /// The hexdump format combines hexadecimal and concise formatting. Unlike
124    /// the other formats, it is line-oriented, displaying up to 16 bytes per
125    /// line.
126    Hexdump(&'a [u8]),
127}
128
129const C0: [&str; 32] = [
130    "‹NUL›",
131    "‹SOH›",
132    "‹STX›",
133    "‹ETX›",
134    "‹EOT›",
135    "‹ENQ›",
136    "‹ACK›",
137    "‹BEL›",
138    "‹BS›",
139    "‹HT›",
140    "‹LF›",
141    "‹VT›",
142    "‹FF›",
143    "‹CR›",
144    "‹SO›",
145    "‹SI›",
146    "‹DLE›",
147    "‹DC1›",
148    "‹DC2›",
149    "‹DC3›",
150    "‹DC4›",
151    "‹NAK›",
152    "‹SYN›",
153    "‹ETB›",
154    "‹CAN›",
155    "‹EM›",
156    "‹SUB›",
157    "‹ESC›",
158    "‹FS›",
159    "‹GS›",
160    "‹RS›",
161    "‹US›",
162];
163
164const C1: [&str; 5] = ["‹CSI›", "‹ST›", "‹OSC›", "‹PM›", "‹APC›"];
165
166impl ByteFormat<'_> {
167    /// Render the bytes with the given writer.
168    ///
169    /// This method largely is an implementation detail. It differs from the
170    /// display trait by accepting arbitrary writers and by returning the number
171    /// of characters (not bytes) written. It is public to support applications
172    /// that require either of these features.
173    ///
174    /// Since the hexdump format is line-oriented, it emits newlines for all but
175    /// the last line. The number of characters written only covers that last
176    /// line.
177    pub fn render<W: fmt::Write + ?Sized>(&self, writer: &mut W) -> Result<usize, fmt::Error> {
178        match *self {
179            ByteFormat::Concise(bytes) => ByteFormat::render_concise(bytes, writer),
180            ByteFormat::Nicely(bytes) => ByteFormat::render_nicely(bytes, writer),
181            ByteFormat::Hexdump(bytes) => ByteFormat::render_hexdump(bytes, writer),
182        }
183    }
184
185    fn render_concise<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
186    where
187        W: fmt::Write + ?Sized,
188    {
189        for byte in bytes {
190            let display = match *byte {
191                0x00..=0x1f => {
192                    char::from_u32(0x2400_u32 + *byte as u32).expect("known good Unicode character")
193                }
194                0x20..=0x7e => *byte as char,
195                0x7f => char::from_u32(0x2421).expect("known good Unicode character"),
196                _ => '.',
197            };
198            writer.write_char(display)?;
199        }
200
201        Ok(bytes.len())
202    }
203
204    fn render_nicely<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
205    where
206        W: fmt::Write + ?Sized,
207    {
208        let mut ascii = [0; 1];
209        let mut characters = 0;
210
211        for &byte in bytes {
212            let display = match byte {
213                0x00..=0x1f => C0[byte as usize],
214                0x20..=0x7e => {
215                    ascii[0] = byte;
216                    // SAFETY: Guaranteed to be ASCII by match arm
217                    core::str::from_utf8(&ascii).expect("ASCII characters are valid UTF-8, too")
218                }
219                0x7f => "‹DEL›",
220                0x90 => "‹DCS›",
221                0x98 => "‹SOS›",
222                0x9b..=0x9f => C1[(byte - 0x9b) as usize],
223                _ => "",
224            };
225
226            if display.is_empty() {
227                writer.write_fmt(format_args!("「{:02X}」", byte))?;
228                characters += 4;
229            } else {
230                writer.write_str(display)?;
231                characters += match display.len() {
232                    n @ (1 | 2) => n,
233                    n => n - 6 + 2,
234                };
235            }
236        }
237
238        Ok(characters)
239    }
240
241    // Grr, if I add the annotation to the offending for loop over pairs, it is
242    // ineffective.
243    #[allow(clippy::missing_asserts_for_indexing)]
244    fn render_hexdump<W>(bytes: &[u8], writer: &mut W) -> Result<usize, fmt::Error>
245    where
246        W: fmt::Write + ?Sized,
247    {
248        const CHUNK_SIZE: usize = 16;
249        let compact = bytes.len() < CHUNK_SIZE;
250        let mut chunk_index = 0;
251        let mut characters = 0;
252
253        for chunk in bytes.chunks(CHUNK_SIZE) {
254            if 0 < chunk_index {
255                writer.write_char('\n')?;
256            }
257
258            write!(writer, "{:04x}:  ", chunk_index)?;
259            characters = 7; // Restart counting so we only count last line
260
261            for pair in chunk.chunks(2) {
262                // Allow for uneven number of bytes in final chunk.
263                write!(writer, "{:02x}", pair[0])?;
264                if pair.len() == 1 {
265                    write!(writer, "   ")?;
266                } else {
267                    write!(writer, "{:02x} ", pair[1])?;
268                }
269                characters += 5;
270            }
271
272            if !compact {
273                for _ in 0..(CHUNK_SIZE - chunk.len()) / 2 {
274                    // Pad out remaining hexadecimal slots for final chunk.
275                    writer.write_str("     ")?;
276                    characters += 5;
277                }
278            }
279
280            // Separate hexadecimal from character display by two columns
281            writer.write_str(" ")?;
282            characters += 1;
283
284            ByteFormat::render_concise(chunk, writer)?;
285
286            chunk_index += chunk.len();
287            characters += chunk.len();
288        }
289
290        Ok(characters)
291    }
292}
293
294impl fmt::Display for ByteFormat<'_> {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        self.render(f)?;
297        Ok(())
298    }
299}
300
301// -----------------------------------------------------------------------------------------------
302
303/// A lightweight adapter from [`std::io::Write`] to [`core::fmt::Write`].
304///
305/// Since Rust encodes strings and string slices in UTF-8, forwarding
306/// [`core::fmt::Write::write_str`] to [`std::io::Write::write_all`] is
307/// straight-forward. The challenge is that [`std::io::Error`] covers many
308/// different error conditions, whereas [`core::fmt::Error`] is a unit-like
309/// struct. The primary benefit of this adapter is that it tracks the most
310/// recent I/O error. Hence, if the rewriter fails with a format error, code
311/// using this struct can recover the underlying I/O error.
312///
313/// # Example
314///
315/// The match below illustrates how to do just that:
316/// ```
317/// # use prettytty::util::Rewriter;
318/// # use std::io::{Cursor, Write};
319/// # use core::fmt::Write as FmtWrite;
320/// # fn main() -> std::io::Result<()> {
321/// let mut cursor = Cursor::new(vec![0; 10]);
322/// let mut writer = Rewriter::new(&mut cursor);
323///
324/// match writer.write_str("Hello!") {
325///     Ok(()) => (),
326///     Err(_) => return Err(writer.into_err()),
327/// }
328///
329/// assert_eq!(&cursor.get_ref()[0..5], b"Hello");
330/// # Ok(())
331/// # }
332/// ```
333pub struct Rewriter<'a, W: ?Sized + 'a> {
334    writer: &'a mut W,
335    result: io::Result<()>,
336}
337
338impl<'a, W: ?Sized + 'a> Rewriter<'a, W> {
339    /// Create a new rewriter.
340    pub fn new(writer: &'a mut W) -> Self {
341        Self {
342            writer,
343            result: Ok(()),
344        }
345    }
346
347    /// Determine whether this rewriter wraps an error result.
348    pub fn is_err(&self) -> bool {
349        self.result.is_err()
350    }
351
352    /// Consume the rewriter to get its error.
353    ///
354    /// If the code using this rewriter produced a [`fmt::Error`], this method
355    /// produces the underlying I/O error.
356    ///
357    /// # Panics
358    ///
359    /// If the rewriter didn't record an error.
360    pub fn into_err(self) -> io::Error {
361        match self.result {
362            Err(err) => err,
363            Ok(_) => panic!("display trait returned error without underlying I/O error"),
364        }
365    }
366}
367
368impl<W: io::Write + ?Sized> fmt::Write for Rewriter<'_, W> {
369    fn write_str(&mut self, s: &str) -> fmt::Result {
370        self.writer.write_all(s.as_bytes()).map_err(|err| {
371            self.result = Err(err);
372            fmt::Error
373        })
374    }
375}
376
377// ------------------------------------------------------------------------------------------------
378
379#[cfg(test)]
380mod test {
381    use super::*;
382    use std::io::{Cursor, Error, Write};
383
384    #[test]
385    fn test_radix_parse() {
386        assert_eq!(ByteParser::Decimal.to_u16(b"665"), Some(665));
387        assert_eq!(ByteParser::Decimal.to_u16(b"65536"), None);
388        assert_eq!(ByteParser::Decimal.to_u16(b"665A"), None);
389        assert_eq!(ByteParser::Hexadecimal.to_u16(b"665"), Some(1_637));
390        assert_eq!(ByteParser::Hexadecimal.to_u16(b"665A"), Some(26_202));
391        assert_eq!(ByteParser::Hexadecimal.to_u16(b"fFfF"), Some(0xffff));
392        assert_eq!(ByteParser::Hexadecimal.to_u16(b"10000"), None);
393
394        assert_eq!(ByteParser::Decimal.to_u32(b"665"), Some(665));
395        assert_eq!(ByteParser::Decimal.to_u32(b"65536"), Some(65_536));
396        assert_eq!(ByteParser::Decimal.to_u32(b"665A"), None);
397        assert_eq!(ByteParser::Hexadecimal.to_u32(b"665"), Some(1_637));
398        assert_eq!(ByteParser::Hexadecimal.to_u32(b"665A"), Some(26_202));
399        assert_eq!(
400            ByteParser::Hexadecimal.to_u32(b"fFfFfFfF"),
401            Some(0xffff_ffff)
402        );
403        assert_eq!(ByteParser::Hexadecimal.to_u32(b"100000000"), None);
404    }
405
406    #[test]
407    fn test_format() -> std::io::Result<()> {
408        let mut buffer = Cursor::new(vec![0; 500]);
409        write!(
410            buffer,
411            "{}",
412            ByteFormat::Hexdump(b"\x1bP>|Terminal\x07\x1bP>|Name\x1b\\")
413        )?;
414
415        assert_eq!(
416            &buffer.get_ref()[0..buffer.position() as usize],
417            b"0000:  1b50 3e7c 5465 726d 696e 616c 071b 503e  \xe2\x90\x9bP>|Terminal\
418                                                                    \xe2\x90\x87\
419                                                                    \xe2\x90\x9bP>\n\
420              0010:  7c4e 616d 651b 5c                        |Name\xe2\x90\x9b\\"
421        );
422        Ok(())
423    }
424
425    #[test]
426    fn test_nicely() -> std::io::Result<()> {
427        let mut buffer = Cursor::new(vec![0; 100]);
428        let mut writer = Rewriter::new(&mut buffer);
429
430        assert_eq!(ByteFormat::Nicely(b"R").render(&mut writer), Ok(1));
431        assert_eq!(ByteFormat::Nicely(b"\x1b").render(&mut writer), Ok(5));
432        assert_eq!(ByteFormat::Nicely(b"#").render(&mut writer), Ok(1));
433        assert_eq!(ByteFormat::Nicely(b"\xaf").render(&mut writer), Ok(4));
434        assert_eq!(ByteFormat::Nicely(b"\\").render(&mut writer), Ok(1));
435        assert_eq!(ByteFormat::Nicely(b"\"").render(&mut writer), Ok(1));
436
437        assert_eq!(
438            &buffer.get_ref()[0..buffer.position() as usize],
439            "R‹ESC›#「AF」\\\"".as_bytes()
440        );
441        assert_eq!(buffer.position(), 21);
442        Ok::<(), Error>(())
443    }
444}