anstyle_parse/
lib.rs

1//! Parser for implementing virtual terminal emulators
2//!
3//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
4//! state machine]. The state machine doesn't assign meaning to the parsed data
5//! and is thus not itself sufficient for writing a terminal emulator. Instead,
6//! it is expected that an implementation of [`Perform`] is provided which does
7//! something useful with the parsed data. The [`Parser`] handles the book
8//! keeping, and the [`Perform`] gets to simply handle actions.
9//!
10//! # Examples
11//!
12//! For an example of using the [`Parser`] please see the examples folder. The example included
13//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
14//! pipe `vim` into it
15//!
16//! ```sh
17//! cargo build --release --example parselog
18//! vim | target/release/examples/parselog
19//! ```
20//!
21//! Just type `:q` to exit.
22//!
23//! # Differences from original state machine description
24//!
25//! * UTF-8 Support for Input
26//! * OSC Strings can be terminated by 0x07
27//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
28//!   all states.
29//!
30//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
31#![cfg_attr(not(test), no_std)]
32
33#[cfg(not(feature = "core"))]
34extern crate alloc;
35
36use core::mem::MaybeUninit;
37
38#[cfg(feature = "core")]
39use arrayvec::ArrayVec;
40#[cfg(feature = "utf8")]
41use utf8parse as utf8;
42
43mod params;
44pub mod state;
45
46pub use params::{Params, ParamsIter};
47
48use state::{state_change, Action, State};
49
50const MAX_INTERMEDIATES: usize = 2;
51const MAX_OSC_PARAMS: usize = 16;
52#[cfg(feature = "core")]
53const MAX_OSC_RAW: usize = 1024;
54
55/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
56#[derive(Default, Clone, Debug, PartialEq, Eq)]
57pub struct Parser<C = DefaultCharAccumulator> {
58    state: State,
59    intermediates: [u8; MAX_INTERMEDIATES],
60    intermediate_idx: usize,
61    params: Params,
62    param: u16,
63    #[cfg(feature = "core")]
64    osc_raw: ArrayVec<u8, MAX_OSC_RAW>,
65    #[cfg(not(feature = "core"))]
66    osc_raw: alloc::vec::Vec<u8>,
67    osc_params: [(usize, usize); MAX_OSC_PARAMS],
68    osc_num_params: usize,
69    ignoring: bool,
70    utf8_parser: C,
71}
72
73impl<C> Parser<C>
74where
75    C: CharAccumulator,
76{
77    /// Create a new Parser
78    pub fn new() -> Parser {
79        Parser::default()
80    }
81
82    #[inline]
83    fn params(&self) -> &Params {
84        &self.params
85    }
86
87    #[inline]
88    fn intermediates(&self) -> &[u8] {
89        &self.intermediates[..self.intermediate_idx]
90    }
91
92    /// Advance the parser state
93    ///
94    /// Requires a [`Perform`] in case `byte` triggers an action
95    #[inline]
96    pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
97        // Utf8 characters are handled out-of-band.
98        if let State::Utf8 = self.state {
99            self.process_utf8(performer, byte);
100            return;
101        }
102
103        let (state, action) = state_change(self.state, byte);
104        self.perform_state_change(performer, state, action, byte);
105    }
106
107    #[inline]
108    fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
109    where
110        P: Perform,
111    {
112        if let Some(c) = self.utf8_parser.add(byte) {
113            performer.print(c);
114            self.state = State::Ground;
115        }
116    }
117
118    #[inline]
119    fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
120    where
121        P: Perform,
122    {
123        match state {
124            State::Anywhere => {
125                // Just run the action
126                self.perform_action(performer, action, byte);
127            }
128            state => {
129                match self.state {
130                    State::DcsPassthrough => {
131                        self.perform_action(performer, Action::Unhook, byte);
132                    }
133                    State::OscString => {
134                        self.perform_action(performer, Action::OscEnd, byte);
135                    }
136                    _ => (),
137                }
138
139                match action {
140                    Action::Nop => (),
141                    action => {
142                        self.perform_action(performer, action, byte);
143                    }
144                }
145
146                match state {
147                    State::CsiEntry | State::DcsEntry | State::Escape => {
148                        self.perform_action(performer, Action::Clear, byte);
149                    }
150                    State::DcsPassthrough => {
151                        self.perform_action(performer, Action::Hook, byte);
152                    }
153                    State::OscString => {
154                        self.perform_action(performer, Action::OscStart, byte);
155                    }
156                    _ => (),
157                }
158
159                // Assume the new state
160                self.state = state;
161            }
162        }
163    }
164
165    /// Separate method for osc_dispatch that borrows self as read-only
166    ///
167    /// The aliasing is needed here for multiple slices into self.osc_raw
168    #[inline]
169    fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
170        let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
171            unsafe { MaybeUninit::uninit().assume_init() };
172
173        for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
174            let indices = self.osc_params[i];
175            *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
176        }
177
178        unsafe {
179            let num_params = self.osc_num_params;
180            let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
181            performer.osc_dispatch(&*params, byte == 0x07);
182        }
183    }
184
185    #[inline]
186    fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
187        match action {
188            Action::Print => performer.print(byte as char),
189            Action::Execute => performer.execute(byte),
190            Action::Hook => {
191                if self.params.is_full() {
192                    self.ignoring = true;
193                } else {
194                    self.params.push(self.param);
195                }
196
197                performer.hook(self.params(), self.intermediates(), self.ignoring, byte);
198            }
199            Action::Put => performer.put(byte),
200            Action::OscStart => {
201                self.osc_raw.clear();
202                self.osc_num_params = 0;
203            }
204            Action::OscPut => {
205                #[cfg(feature = "core")]
206                {
207                    if self.osc_raw.is_full() {
208                        return;
209                    }
210                }
211
212                let idx = self.osc_raw.len();
213
214                // Param separator
215                if byte == b';' {
216                    let param_idx = self.osc_num_params;
217                    match param_idx {
218                        // Only process up to MAX_OSC_PARAMS
219                        MAX_OSC_PARAMS => return,
220
221                        // First param is special - 0 to current byte index
222                        0 => {
223                            self.osc_params[param_idx] = (0, idx);
224                        }
225
226                        // All other params depend on previous indexing
227                        _ => {
228                            let prev = self.osc_params[param_idx - 1];
229                            let begin = prev.1;
230                            self.osc_params[param_idx] = (begin, idx);
231                        }
232                    }
233
234                    self.osc_num_params += 1;
235                } else {
236                    self.osc_raw.push(byte);
237                }
238            }
239            Action::OscEnd => {
240                let param_idx = self.osc_num_params;
241                let idx = self.osc_raw.len();
242
243                match param_idx {
244                    // Finish last parameter if not already maxed
245                    MAX_OSC_PARAMS => (),
246
247                    // First param is special - 0 to current byte index
248                    0 => {
249                        self.osc_params[param_idx] = (0, idx);
250                        self.osc_num_params += 1;
251                    }
252
253                    // All other params depend on previous indexing
254                    _ => {
255                        let prev = self.osc_params[param_idx - 1];
256                        let begin = prev.1;
257                        self.osc_params[param_idx] = (begin, idx);
258                        self.osc_num_params += 1;
259                    }
260                }
261                self.osc_dispatch(performer, byte);
262            }
263            Action::Unhook => performer.unhook(),
264            Action::CsiDispatch => {
265                if self.params.is_full() {
266                    self.ignoring = true;
267                } else {
268                    self.params.push(self.param);
269                }
270
271                performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte);
272            }
273            Action::EscDispatch => {
274                performer.esc_dispatch(self.intermediates(), self.ignoring, byte);
275            }
276            Action::Collect => {
277                if self.intermediate_idx == MAX_INTERMEDIATES {
278                    self.ignoring = true;
279                } else {
280                    self.intermediates[self.intermediate_idx] = byte;
281                    self.intermediate_idx += 1;
282                }
283            }
284            Action::Param => {
285                if self.params.is_full() {
286                    self.ignoring = true;
287                    return;
288                }
289
290                if byte == b';' {
291                    self.params.push(self.param);
292                    self.param = 0;
293                } else if byte == b':' {
294                    self.params.extend(self.param);
295                    self.param = 0;
296                } else {
297                    // Continue collecting bytes into param
298                    self.param = self.param.saturating_mul(10);
299                    self.param = self.param.saturating_add((byte - b'0') as u16);
300                }
301            }
302            Action::Clear => {
303                // Reset everything on ESC/CSI/DCS entry
304                self.intermediate_idx = 0;
305                self.ignoring = false;
306                self.param = 0;
307
308                self.params.clear();
309            }
310            Action::BeginUtf8 => self.process_utf8(performer, byte),
311            Action::Ignore => (),
312            Action::Nop => (),
313        }
314    }
315}
316
317/// Build a `char` out of bytes
318pub trait CharAccumulator: Default {
319    /// Build a `char` out of bytes
320    ///
321    /// Return `None` when more data is needed
322    fn add(&mut self, byte: u8) -> Option<char>;
323}
324
325#[cfg(feature = "utf8")]
326pub type DefaultCharAccumulator = Utf8Parser;
327#[cfg(not(feature = "utf8"))]
328pub type DefaultCharAccumulator = AsciiParser;
329
330/// Only allow parsing 7-bit ASCII
331#[derive(Default, Clone, Debug, PartialEq, Eq)]
332pub struct AsciiParser;
333
334impl CharAccumulator for AsciiParser {
335    fn add(&mut self, _byte: u8) -> Option<char> {
336        unreachable!("multi-byte UTF8 characters are unsupported")
337    }
338}
339
340/// Allow parsing UTF-8
341#[cfg(feature = "utf8")]
342#[derive(Default, Clone, Debug, PartialEq, Eq)]
343pub struct Utf8Parser {
344    utf8_parser: utf8::Parser,
345}
346
347#[cfg(feature = "utf8")]
348impl CharAccumulator for Utf8Parser {
349    fn add(&mut self, byte: u8) -> Option<char> {
350        let mut c = None;
351        let mut receiver = VtUtf8Receiver(&mut c);
352        self.utf8_parser.advance(&mut receiver, byte);
353        c
354    }
355}
356
357#[cfg(feature = "utf8")]
358struct VtUtf8Receiver<'a>(&'a mut Option<char>);
359
360#[cfg(feature = "utf8")]
361impl<'a> utf8::Receiver for VtUtf8Receiver<'a> {
362    fn codepoint(&mut self, c: char) {
363        *self.0 = Some(c);
364    }
365
366    fn invalid_sequence(&mut self) {
367        *self.0 = Some('�');
368    }
369}
370
371/// Performs actions requested by the [`Parser`]
372///
373/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
374/// movement, or simply printing characters to the screen.
375///
376/// The methods on this type correspond to actions described in
377/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
378/// a useful way in my own words for completeness, but the site should be
379/// referenced if something isn't clear. If the site disappears at some point in
380/// the future, consider checking archive.org.
381pub trait Perform {
382    /// Draw a character to the screen and update states.
383    fn print(&mut self, _c: char) {}
384
385    /// Execute a C0 or C1 control function.
386    fn execute(&mut self, _byte: u8) {}
387
388    /// Invoked when a final character arrives in first part of device control string.
389    ///
390    /// The control function should be determined from the private marker, final character, and
391    /// execute with a parameter list. A handler should be selected for remaining characters in the
392    /// string; the handler function should subsequently be called by `put` for every character in
393    /// the control string.
394    ///
395    /// The `ignore` flag indicates that more than two intermediates arrived and
396    /// subsequent characters were ignored.
397    fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {}
398
399    /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
400    /// will also be passed to the handler.
401    fn put(&mut self, _byte: u8) {}
402
403    /// Called when a device control string is terminated.
404    ///
405    /// The previously selected handler should be notified that the DCS has
406    /// terminated.
407    fn unhook(&mut self) {}
408
409    /// Dispatch an operating system command.
410    fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {}
411
412    /// A final character has arrived for a CSI sequence
413    ///
414    /// The `ignore` flag indicates that either more than two intermediates arrived
415    /// or the number of parameters exceeded the maximum supported length,
416    /// and subsequent characters were ignored.
417    fn csi_dispatch(
418        &mut self,
419        _params: &Params,
420        _intermediates: &[u8],
421        _ignore: bool,
422        _action: u8,
423    ) {
424    }
425
426    /// The final character of an escape sequence has arrived.
427    ///
428    /// The `ignore` flag indicates that more than two intermediates arrived and
429    /// subsequent characters were ignored.
430    fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
431}