console/
ansi.rs

1#[cfg(feature = "alloc")]
2use alloc::{borrow::Cow, string::String};
3use core::{
4    fmt::Display,
5    iter::{FusedIterator, Peekable},
6    str::CharIndices,
7};
8
9#[derive(Debug, Clone, Copy, Default)]
10enum State {
11    #[default]
12    Start,
13    S1,
14    S2,
15    S3,
16    S4,
17    S5,
18    S6,
19    S7,
20    S8,
21    S9,
22    S10,
23    S11,
24    Trap,
25}
26
27impl State {
28    fn is_final(&self) -> bool {
29        #[allow(clippy::match_like_matches_macro)]
30        match self {
31            Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
32            _ => false,
33        }
34    }
35
36    fn is_trapped(&self) -> bool {
37        #[allow(clippy::match_like_matches_macro)]
38        match self {
39            Self::Trap => true,
40            _ => false,
41        }
42    }
43
44    fn transition(&mut self, c: char) {
45        *self = match c {
46            '\u{1b}' | '\u{9b}' => match self {
47                Self::Start => Self::S1,
48                _ => Self::Trap,
49            },
50            '(' | ')' => match self {
51                Self::S1 => Self::S2,
52                Self::S2 | Self::S4 => Self::S4,
53                _ => Self::Trap,
54            },
55            ';' => match self {
56                Self::S1 | Self::S2 | Self::S4 => Self::S4,
57                Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
58                _ => Self::Trap,
59            },
60
61            '[' | '#' | '?' => match self {
62                Self::S1 | Self::S2 | Self::S4 => Self::S4,
63                _ => Self::Trap,
64            },
65            '0'..='2' => match self {
66                Self::S1 | Self::S4 => Self::S5,
67                Self::S2 => Self::S3,
68                Self::S5 => Self::S6,
69                Self::S6 => Self::S7,
70                Self::S7 => Self::S8,
71                Self::S8 => Self::S9,
72                Self::S10 => Self::S5,
73                _ => Self::Trap,
74            },
75            '3'..='9' => match self {
76                Self::S1 | Self::S4 => Self::S5,
77                Self::S2 => Self::S5,
78                Self::S5 => Self::S6,
79                Self::S6 => Self::S7,
80                Self::S7 => Self::S8,
81                Self::S8 => Self::S9,
82                Self::S10 => Self::S5,
83                _ => Self::Trap,
84            },
85            'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
86                match self {
87                    Self::S1
88                    | Self::S2
89                    | Self::S4
90                    | Self::S5
91                    | Self::S6
92                    | Self::S7
93                    | Self::S8
94                    | Self::S10 => Self::S11,
95                    _ => Self::Trap,
96                }
97            }
98            _ => Self::Trap,
99        };
100    }
101}
102
103#[derive(Debug)]
104struct Matches<'a> {
105    s: &'a str,
106    it: Peekable<CharIndices<'a>>,
107}
108
109impl<'a> Matches<'a> {
110    fn new(s: &'a str) -> Self {
111        let it = s.char_indices().peekable();
112        Self { s, it }
113    }
114}
115
116#[derive(Debug)]
117struct Match<'a> {
118    text: &'a str,
119    start: usize,
120    end: usize,
121}
122
123impl<'a> Match<'a> {
124    #[inline]
125    pub(crate) fn as_str(&self) -> &'a str {
126        &self.text[self.start..self.end]
127    }
128}
129
130impl<'a> Iterator for Matches<'a> {
131    type Item = Match<'a>;
132
133    fn next(&mut self) -> Option<Self::Item> {
134        find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
135            text: self.s,
136            start,
137            end,
138        })
139    }
140}
141
142impl FusedIterator for Matches<'_> {}
143
144fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
145    'outer: loop {
146        if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
147            let start = *start;
148            let mut state = State::default();
149            let mut maybe_end = None;
150
151            loop {
152                let item = it.peek();
153
154                if let Some((idx, c)) = item {
155                    state.transition(*c);
156
157                    if state.is_final() {
158                        maybe_end = Some(*idx);
159                    }
160                }
161
162                // The match is greedy so run till we hit the trap state no matter what. A valid
163                // match is just one that was final at some point
164                if state.is_trapped() || item.is_none() {
165                    match maybe_end {
166                        Some(end) => {
167                            // All possible final characters are a single byte so it's safe to make
168                            // the end exclusive by just adding one
169                            return Some((start, end + 1));
170                        }
171                        // The character we are peeking right now might be the start of a match so
172                        // we want to continue the loop without popping off that char
173                        None => continue 'outer,
174                    }
175                }
176
177                it.next();
178            }
179        }
180
181        it.next();
182    }
183}
184
185/// Helper function to strip ansi codes.
186#[cfg(feature = "alloc")]
187pub fn strip_ansi_codes(s: &str) -> Cow<'_, str> {
188    let mut char_it = s.char_indices().peekable();
189    match find_ansi_code_exclusive(&mut char_it) {
190        Some(_) => {
191            let stripped: String = AnsiCodeIterator::new(s)
192                .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
193                .collect();
194            Cow::Owned(stripped)
195        }
196        None => Cow::Borrowed(s),
197    }
198}
199
200/// A wrapper struct that implements [`core::fmt::Display`], only displaying non-ansi parts.
201pub struct WithoutAnsi<'a> {
202    str: &'a str,
203}
204
205impl<'a> WithoutAnsi<'a> {
206    pub fn new(str: &'a str) -> Self {
207        Self { str }
208    }
209}
210
211impl Display for WithoutAnsi<'_> {
212    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
213        for (str, is_ansi) in AnsiCodeIterator::new(self.str) {
214            if !is_ansi {
215                f.write_str(str)?;
216            }
217        }
218        Ok(())
219    }
220}
221
222/// An iterator over ansi codes in a string.
223///
224/// This type can be used to scan over ansi codes in a string.
225/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of
226/// the original string and `is_ansi` indicates if the slice contains
227/// ansi codes or string values.
228pub struct AnsiCodeIterator<'a> {
229    s: &'a str,
230    pending_item: Option<(&'a str, bool)>,
231    last_idx: usize,
232    cur_idx: usize,
233    iter: Matches<'a>,
234}
235
236impl<'a> AnsiCodeIterator<'a> {
237    /// Creates a new ansi code iterator.
238    pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
239        AnsiCodeIterator {
240            s,
241            pending_item: None,
242            last_idx: 0,
243            cur_idx: 0,
244            iter: Matches::new(s),
245        }
246    }
247
248    /// Returns the string slice up to the current match.
249    pub fn current_slice(&self) -> &str {
250        &self.s[..self.cur_idx]
251    }
252
253    /// Returns the string slice from the current match to the end.
254    pub fn rest_slice(&self) -> &str {
255        &self.s[self.cur_idx..]
256    }
257}
258
259impl<'a> Iterator for AnsiCodeIterator<'a> {
260    type Item = (&'a str, bool);
261
262    fn next(&mut self) -> Option<(&'a str, bool)> {
263        if let Some(pending_item) = self.pending_item.take() {
264            self.cur_idx += pending_item.0.len();
265            Some(pending_item)
266        } else if let Some(m) = self.iter.next() {
267            let s = &self.s[self.last_idx..m.start];
268            self.last_idx = m.end;
269            if s.is_empty() {
270                self.cur_idx = m.end;
271                Some((m.as_str(), true))
272            } else {
273                self.cur_idx = m.start;
274                self.pending_item = Some((m.as_str(), true));
275                Some((s, false))
276            }
277        } else if self.last_idx < self.s.len() {
278            let rv = &self.s[self.last_idx..];
279            self.cur_idx = self.s.len();
280            self.last_idx = self.s.len();
281            Some((rv, false))
282        } else {
283            None
284        }
285    }
286}
287
288impl FusedIterator for AnsiCodeIterator<'_> {}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    use core::fmt::Write;
295    use once_cell::sync::Lazy;
296    use proptest::prelude::*;
297    use regex::Regex;
298
299    // The manual dfa `State` is a handwritten translation from the previously used regex. That
300    // regex is kept here and used to ensure that the new matches are the same as the old
301    static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
302        Regex::new(
303            r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
304        )
305        .unwrap()
306    });
307
308    impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
309        fn eq(&self, other: &Match<'a>) -> bool {
310            self.start() == other.start && self.end() == other.end
311        }
312    }
313
314    proptest! {
315        #[test]
316        fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
317            let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
318            let new_matches: Vec<_> = Matches::new(&s).collect();
319            assert_eq!(old_matches, new_matches);
320        }
321    }
322
323    #[test]
324    fn dfa_matches_regex_on_small_strings() {
325        // To make sure the test runs in a reasonable time this is a slimmed down list of
326        // characters to reduce the groups that are only used with each other along with one
327        // arbitrarily chosen character not used in the regex (' ')
328        const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
329
330        fn check_all_strings_of_len(len: usize) {
331            _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
332        }
333
334        fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
335            if len == 0 {
336                if let Ok(s) = core::str::from_utf8(chunk) {
337                    let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
338                    let new_matches: Vec<_> = Matches::new(s).collect();
339                    assert_eq!(old_matches, new_matches);
340                }
341
342                return;
343            }
344
345            for b in POSSIBLE_BYTES {
346                chunk.push(*b);
347                _check_all_strings_of_len(len - 1, chunk);
348                chunk.pop();
349            }
350        }
351
352        for str_len in 0..=6 {
353            check_all_strings_of_len(str_len);
354        }
355    }
356
357    #[test]
358    fn complex_data() {
359        let s = std::fs::read_to_string(
360            std::path::Path::new("tests")
361                .join("data")
362                .join("sample_zellij_session.log"),
363        )
364        .unwrap();
365
366        let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
367        let new_matches: Vec<_> = Matches::new(&s).collect();
368        assert_eq!(old_matches, new_matches);
369    }
370
371    #[test]
372    fn state_machine() {
373        let ansi_code = "\x1b)B";
374        let mut state = State::default();
375        assert!(!state.is_final());
376
377        for c in ansi_code.chars() {
378            state.transition(c);
379        }
380        assert!(state.is_final());
381
382        state.transition('A');
383        assert!(state.is_trapped());
384    }
385
386    #[test]
387    fn back_to_back_entry_char() {
388        let s = "\x1b\x1bf";
389        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
390        assert_eq!(&["\x1bf"], matches.as_slice());
391    }
392
393    #[test]
394    fn early_paren_can_use_many_chars() {
395        let s = "\x1b(C";
396        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
397        assert_eq!(&[s], matches.as_slice());
398    }
399
400    #[test]
401    fn long_run_of_digits() {
402        let s = "\u{1b}00000";
403        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
404        assert_eq!(&[s], matches.as_slice());
405    }
406
407    #[test]
408    fn test_without_ansi() {
409        let str_with_ansi = "\x1b[1;97;41mError\x1b[0m";
410        let without_ansi = WithoutAnsi::new(str_with_ansi);
411        for _ in 0..2 {
412            let mut output = String::default();
413            write!(output, "{without_ansi}").unwrap();
414            assert_eq!(output, "Error");
415        }
416    }
417
418    #[test]
419    fn test_ansi_iter_re_vt100() {
420        let s = "\x1b(0lpq\x1b)Benglish";
421        let mut iter = AnsiCodeIterator::new(s);
422        assert_eq!(iter.next(), Some(("\x1b(0", true)));
423        assert_eq!(iter.next(), Some(("lpq", false)));
424        assert_eq!(iter.next(), Some(("\x1b)B", true)));
425        assert_eq!(iter.next(), Some(("english", false)));
426    }
427
428    #[test]
429    fn test_ansi_iter_re() {
430        use crate::style;
431        let s = format!("Hello {}!", style("World").red().force_styling(true));
432        let mut iter = AnsiCodeIterator::new(&s);
433        assert_eq!(iter.next(), Some(("Hello ", false)));
434        assert_eq!(iter.current_slice(), "Hello ");
435        assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
436        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
437        assert_eq!(iter.current_slice(), "Hello \x1b[31m");
438        assert_eq!(iter.rest_slice(), "World\x1b[0m!");
439        assert_eq!(iter.next(), Some(("World", false)));
440        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
441        assert_eq!(iter.rest_slice(), "\x1b[0m!");
442        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
443        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
444        assert_eq!(iter.rest_slice(), "!");
445        assert_eq!(iter.next(), Some(("!", false)));
446        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
447        assert_eq!(iter.rest_slice(), "");
448        assert_eq!(iter.next(), None);
449    }
450
451    #[test]
452    fn test_ansi_iter_re_on_multi() {
453        use crate::style;
454        let s = format!("{}", style("a").red().bold().force_styling(true));
455        let mut iter = AnsiCodeIterator::new(&s);
456        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
457        assert_eq!(iter.current_slice(), "\x1b[31m");
458        assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
459        assert_eq!(iter.next(), Some(("\x1b[1m", true)));
460        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
461        assert_eq!(iter.rest_slice(), "a\x1b[0m");
462        assert_eq!(iter.next(), Some(("a", false)));
463        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
464        assert_eq!(iter.rest_slice(), "\x1b[0m");
465        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
466        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
467        assert_eq!(iter.rest_slice(), "");
468        assert_eq!(iter.next(), None);
469    }
470}