pest_meta/optimizer/
mod.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Different optimizations for pest's ASTs.
11
12use crate::ast::*;
13use std::collections::HashMap;
14
15#[cfg(test)]
16macro_rules! box_tree {
17    ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18      $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19    );
20    ($expr:expr) => ($expr);
21}
22
23mod concatenator;
24mod factorizer;
25mod lister;
26mod restorer;
27mod rotater;
28mod skipper;
29mod unroller;
30
31/// Takes pest's ASTs and optimizes them
32pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33    let optimized: Vec<OptimizedRule> = rules
34        .into_iter()
35        .map(rotater::rotate)
36        .map(skipper::skip)
37        .map(unroller::unroll)
38        .map(concatenator::concatenate)
39        .map(factorizer::factor)
40        .map(lister::list)
41        .map(rule_to_optimized_rule)
42        .collect();
43
44    let rules = to_hash_map(&optimized);
45    optimized
46        .into_iter()
47        .map(|rule| restorer::restore_on_err(rule, &rules))
48        .collect()
49}
50
51fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52    fn to_optimized(expr: Expr) -> OptimizedExpr {
53        match expr {
54            Expr::Str(string) => OptimizedExpr::Str(string),
55            Expr::Insens(string) => OptimizedExpr::Insens(string),
56            Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57            Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58            Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59            Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60            Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61            Expr::Seq(lhs, rhs) => {
62                OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63            }
64            Expr::Choice(lhs, rhs) => {
65                OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66            }
67            Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68            Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69            Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70            Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71            Expr::RepOnce(_)
72            | Expr::RepExact(..)
73            | Expr::RepMin(..)
74            | Expr::RepMax(..)
75            | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"),
76        }
77    }
78
79    OptimizedRule {
80        name: rule.name,
81        ty: rule.ty,
82        expr: to_optimized(rule.expr),
83    }
84}
85
86fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
87    rules
88        .iter()
89        .map(|r| (r.name.clone(), r.expr.clone()))
90        .collect()
91}
92
93/// The optimized version of the pest AST's `Rule`.
94#[derive(Clone, Debug, Eq, PartialEq)]
95pub struct OptimizedRule {
96    /// The name of the rule.
97    pub name: String,
98    /// The type of the rule.
99    pub ty: RuleType,
100    /// The optimized expression of the rule.
101    pub expr: OptimizedExpr,
102}
103
104/// The optimized version of the pest AST's `Expr`.
105///
106/// # Warning: Semantic Versioning
107/// There may be non-breaking changes to the meta-grammar
108/// between minor versions. Those non-breaking changes, however,
109/// may translate into semver-breaking changes due to the additional variants
110/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
111/// future (e.g. by increasing MSRV and non_exhaustive annotations).
112#[derive(Clone, Debug, Eq, PartialEq)]
113pub enum OptimizedExpr {
114    /// Matches an exact string, e.g. `"a"`
115    Str(String),
116    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
117    Insens(String),
118    /// Matches one character in the range, e.g. `'a'..'z'`
119    Range(String, String),
120    /// Matches the rule with the given name, e.g. `a`
121    Ident(String),
122    /// Matches a custom part of the stack, e.g. `PEEK[..]`
123    PeekSlice(i32, Option<i32>),
124    /// Positive lookahead; matches expression without making progress, e.g. `&e`
125    PosPred(Box<OptimizedExpr>),
126    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
127    NegPred(Box<OptimizedExpr>),
128    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
129    Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
130    /// Matches either of two expressions, e.g. `e1 | e2`
131    Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
132    /// Optionally matches an expression, e.g. `e?`
133    Opt(Box<OptimizedExpr>),
134    /// Matches an expression zero or more times, e.g. `e*`
135    Rep(Box<OptimizedExpr>),
136    /// Continues to match expressions until one of the strings in the `Vec` is found
137    Skip(Vec<String>),
138    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
139    Push(Box<OptimizedExpr>),
140    /// Restores an expression's checkpoint
141    RestoreOnErr(Box<OptimizedExpr>),
142}
143
144impl OptimizedExpr {
145    /// Returns a top-down iterator over the `OptimizedExpr`.
146    pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
147        OptimizedExprTopDownIterator::new(self)
148    }
149
150    /// Applies `f` to the `OptimizedExpr` top-down.
151    pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
152    where
153        F: FnMut(OptimizedExpr) -> OptimizedExpr,
154    {
155        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
156        where
157            F: FnMut(OptimizedExpr) -> OptimizedExpr,
158        {
159            let expr = f(expr);
160
161            match expr {
162                // TODO: Use box syntax when it gets stabilized.
163                OptimizedExpr::PosPred(expr) => {
164                    let mapped = Box::new(map_internal(*expr, f));
165                    OptimizedExpr::PosPred(mapped)
166                }
167                OptimizedExpr::NegPred(expr) => {
168                    let mapped = Box::new(map_internal(*expr, f));
169                    OptimizedExpr::NegPred(mapped)
170                }
171                OptimizedExpr::Seq(lhs, rhs) => {
172                    let mapped_lhs = Box::new(map_internal(*lhs, f));
173                    let mapped_rhs = Box::new(map_internal(*rhs, f));
174                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
175                }
176                OptimizedExpr::Choice(lhs, rhs) => {
177                    let mapped_lhs = Box::new(map_internal(*lhs, f));
178                    let mapped_rhs = Box::new(map_internal(*rhs, f));
179                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
180                }
181                OptimizedExpr::Rep(expr) => {
182                    let mapped = Box::new(map_internal(*expr, f));
183                    OptimizedExpr::Rep(mapped)
184                }
185                OptimizedExpr::Opt(expr) => {
186                    let mapped = Box::new(map_internal(*expr, f));
187                    OptimizedExpr::Opt(mapped)
188                }
189                OptimizedExpr::Push(expr) => {
190                    let mapped = Box::new(map_internal(*expr, f));
191                    OptimizedExpr::Push(mapped)
192                }
193                expr => expr,
194            }
195        }
196
197        map_internal(self, &mut f)
198    }
199
200    /// Applies `f` to the `OptimizedExpr` bottom-up.
201    pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
202    where
203        F: FnMut(OptimizedExpr) -> OptimizedExpr,
204    {
205        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
206        where
207            F: FnMut(OptimizedExpr) -> OptimizedExpr,
208        {
209            let mapped = match expr {
210                OptimizedExpr::PosPred(expr) => {
211                    // TODO: Use box syntax when it gets stabilized.
212                    let mapped = Box::new(map_internal(*expr, f));
213                    OptimizedExpr::PosPred(mapped)
214                }
215                OptimizedExpr::NegPred(expr) => {
216                    let mapped = Box::new(map_internal(*expr, f));
217                    OptimizedExpr::NegPred(mapped)
218                }
219                OptimizedExpr::Seq(lhs, rhs) => {
220                    let mapped_lhs = Box::new(map_internal(*lhs, f));
221                    let mapped_rhs = Box::new(map_internal(*rhs, f));
222                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
223                }
224                OptimizedExpr::Choice(lhs, rhs) => {
225                    let mapped_lhs = Box::new(map_internal(*lhs, f));
226                    let mapped_rhs = Box::new(map_internal(*rhs, f));
227                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
228                }
229                OptimizedExpr::Rep(expr) => {
230                    let mapped = Box::new(map_internal(*expr, f));
231                    OptimizedExpr::Rep(mapped)
232                }
233                OptimizedExpr::Opt(expr) => {
234                    let mapped = Box::new(map_internal(*expr, f));
235                    OptimizedExpr::Opt(mapped)
236                }
237                OptimizedExpr::Push(expr) => {
238                    let mapped = Box::new(map_internal(*expr, f));
239                    OptimizedExpr::Push(mapped)
240                }
241                expr => expr,
242            };
243
244            f(mapped)
245        }
246
247        map_internal(self, &mut f)
248    }
249}
250
251/// A top-down iterator over an `OptimizedExpr`.
252pub struct OptimizedExprTopDownIterator {
253    current: Option<OptimizedExpr>,
254    next: Option<OptimizedExpr>,
255    right_branches: Vec<OptimizedExpr>,
256}
257
258impl OptimizedExprTopDownIterator {
259    /// Creates a new top down iterator from an `OptimizedExpr`.
260    pub fn new(expr: &OptimizedExpr) -> Self {
261        let mut iter = OptimizedExprTopDownIterator {
262            current: None,
263            next: None,
264            right_branches: vec![],
265        };
266        iter.iterate_expr(expr.clone());
267        iter
268    }
269
270    fn iterate_expr(&mut self, expr: OptimizedExpr) {
271        self.current = Some(expr.clone());
272        match expr {
273            OptimizedExpr::Seq(lhs, rhs) => {
274                self.right_branches.push(*rhs);
275                self.next = Some(*lhs);
276            }
277            OptimizedExpr::Choice(lhs, rhs) => {
278                self.right_branches.push(*rhs);
279                self.next = Some(*lhs);
280            }
281            OptimizedExpr::PosPred(expr)
282            | OptimizedExpr::NegPred(expr)
283            | OptimizedExpr::Rep(expr)
284            | OptimizedExpr::Opt(expr)
285            | OptimizedExpr::Push(expr) => {
286                self.next = Some(*expr);
287            }
288            _ => {
289                self.next = None;
290            }
291        }
292    }
293}
294
295impl Iterator for OptimizedExprTopDownIterator {
296    type Item = OptimizedExpr;
297
298    fn next(&mut self) -> Option<Self::Item> {
299        let result = self.current.take();
300
301        if let Some(expr) = self.next.take() {
302            self.iterate_expr(expr);
303        } else if let Some(expr) = self.right_branches.pop() {
304            self.iterate_expr(expr);
305        }
306
307        result
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn rotate() {
317        let rules = {
318            use crate::ast::Expr::*;
319            vec![Rule {
320                name: "rule".to_owned(),
321                ty: RuleType::Normal,
322                expr: box_tree!(Choice(
323                    Choice(
324                        Choice(Str(String::from("a")), Str(String::from("b"))),
325                        Str(String::from("c"))
326                    ),
327                    Str(String::from("d"))
328                )),
329            }]
330        };
331        let rotated = {
332            use crate::optimizer::OptimizedExpr::*;
333            vec![OptimizedRule {
334                name: "rule".to_owned(),
335                ty: RuleType::Normal,
336                expr: box_tree!(Choice(
337                    Str(String::from("a")),
338                    Choice(
339                        Str(String::from("b")),
340                        Choice(Str(String::from("c")), Str(String::from("d")))
341                    )
342                )),
343            }]
344        };
345
346        assert_eq!(optimize(rules), rotated);
347    }
348
349    #[test]
350    fn skip() {
351        let rules = {
352            use crate::ast::Expr::*;
353            vec![Rule {
354                name: "rule".to_owned(),
355                ty: RuleType::Atomic,
356                expr: box_tree!(Rep(Seq(
357                    NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
358                    Ident(String::from("ANY"))
359                ))),
360            }]
361        };
362        let skipped = vec![OptimizedRule {
363            name: "rule".to_owned(),
364            ty: RuleType::Atomic,
365            expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
366        }];
367
368        assert_eq!(optimize(rules), skipped);
369    }
370
371    #[test]
372    fn concat_strings() {
373        let rules = {
374            use crate::ast::Expr::*;
375            vec![Rule {
376                name: "rule".to_owned(),
377                ty: RuleType::Atomic,
378                expr: box_tree!(Seq(
379                    Seq(Str(String::from("a")), Str(String::from("b"))),
380                    Seq(Str(String::from("c")), Str(String::from("d")))
381                )),
382            }]
383        };
384        let concatenated = vec![OptimizedRule {
385            name: "rule".to_owned(),
386            ty: RuleType::Atomic,
387            expr: OptimizedExpr::Str(String::from("abcd")),
388        }];
389
390        assert_eq!(optimize(rules), concatenated);
391    }
392
393    #[test]
394    fn unroll_loop_exact() {
395        let rules = vec![Rule {
396            name: "rule".to_owned(),
397            ty: RuleType::Atomic,
398            expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
399        }];
400        let unrolled = {
401            use crate::optimizer::OptimizedExpr::*;
402            vec![OptimizedRule {
403                name: "rule".to_owned(),
404                ty: RuleType::Atomic,
405                expr: box_tree!(Seq(
406                    Ident(String::from("a")),
407                    Seq(Ident(String::from("a")), Ident(String::from("a")))
408                )),
409            }]
410        };
411
412        assert_eq!(optimize(rules), unrolled);
413    }
414
415    #[test]
416    fn unroll_loop_max() {
417        let rules = vec![Rule {
418            name: "rule".to_owned(),
419            ty: RuleType::Atomic,
420            expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
421        }];
422        let unrolled = {
423            use crate::optimizer::OptimizedExpr::*;
424            vec![OptimizedRule {
425                name: "rule".to_owned(),
426                ty: RuleType::Atomic,
427                expr: box_tree!(Seq(
428                    Opt(Str(String::from("a"))),
429                    Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
430                )),
431            }]
432        };
433
434        assert_eq!(optimize(rules), unrolled);
435    }
436
437    #[test]
438    fn unroll_loop_min() {
439        let rules = vec![Rule {
440            name: "rule".to_owned(),
441            ty: RuleType::Atomic,
442            expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
443        }];
444        let unrolled = {
445            use crate::optimizer::OptimizedExpr::*;
446            vec![OptimizedRule {
447                name: "rule".to_owned(),
448                ty: RuleType::Atomic,
449                expr: box_tree!(Seq(
450                    Str(String::from("a")),
451                    Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
452                )),
453            }]
454        };
455
456        assert_eq!(optimize(rules), unrolled);
457    }
458
459    #[test]
460    fn unroll_loop_min_max() {
461        let rules = vec![Rule {
462            name: "rule".to_owned(),
463            ty: RuleType::Atomic,
464            expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
465        }];
466        let unrolled = {
467            use crate::optimizer::OptimizedExpr::*;
468            vec![OptimizedRule {
469                name: "rule".to_owned(),
470                ty: RuleType::Atomic,
471                /* TODO possible room for improvement here:
472                 * if the sequences were rolled out in the opposite
473                 * order, we could further optimize the strings
474                 * in cases like this.
475                Str(String::from(("aa")),
476                Opt(Str(String::from("a")))
477                */
478                expr: box_tree!(Seq(
479                    Str(String::from("a")),
480                    Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
481                )),
482            }]
483        };
484
485        assert_eq!(optimize(rules), unrolled);
486    }
487
488    #[test]
489    fn concat_insensitive_strings() {
490        let rules = {
491            use crate::ast::Expr::*;
492            vec![Rule {
493                name: "rule".to_owned(),
494                ty: RuleType::Atomic,
495                expr: box_tree!(Seq(
496                    Seq(Insens(String::from("a")), Insens(String::from("b"))),
497                    Seq(Insens(String::from("c")), Insens(String::from("d")))
498                )),
499            }]
500        };
501        let concatenated = vec![OptimizedRule {
502            name: "rule".to_owned(),
503            ty: RuleType::Atomic,
504            expr: OptimizedExpr::Insens(String::from("abcd")),
505        }];
506
507        assert_eq!(optimize(rules), concatenated);
508    }
509
510    #[test]
511    fn long_common_sequence() {
512        let rules = {
513            use crate::ast::Expr::*;
514            vec![Rule {
515                name: "rule".to_owned(),
516                ty: RuleType::Silent,
517                expr: box_tree!(Choice(
518                    Seq(
519                        Ident(String::from("a")),
520                        Seq(Ident(String::from("b")), Ident(String::from("c")))
521                    ),
522                    Seq(
523                        Seq(Ident(String::from("a")), Ident(String::from("b"))),
524                        Ident(String::from("d"))
525                    )
526                )),
527            }]
528        };
529        let optimized = {
530            use crate::optimizer::OptimizedExpr::*;
531            vec![OptimizedRule {
532                name: "rule".to_owned(),
533                ty: RuleType::Silent,
534                expr: box_tree!(Seq(
535                    Ident(String::from("a")),
536                    Seq(
537                        Ident(String::from("b")),
538                        Choice(Ident(String::from("c")), Ident(String::from("d")))
539                    )
540                )),
541            }]
542        };
543
544        assert_eq!(optimize(rules), optimized);
545    }
546
547    #[test]
548    fn short_common_sequence() {
549        let rules = {
550            use crate::ast::Expr::*;
551            vec![Rule {
552                name: "rule".to_owned(),
553                ty: RuleType::Atomic,
554                expr: box_tree!(Choice(
555                    Seq(Ident(String::from("a")), Ident(String::from("b"))),
556                    Ident(String::from("a"))
557                )),
558            }]
559        };
560        let optimized = {
561            use crate::optimizer::OptimizedExpr::*;
562            vec![OptimizedRule {
563                name: "rule".to_owned(),
564                ty: RuleType::Atomic,
565                expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
566            }]
567        };
568
569        assert_eq!(optimize(rules), optimized);
570    }
571
572    #[test]
573    fn impossible_common_sequence() {
574        let rules = {
575            use crate::ast::Expr::*;
576            vec![Rule {
577                name: "rule".to_owned(),
578                ty: RuleType::Silent,
579                expr: box_tree!(Choice(
580                    Ident(String::from("a")),
581                    Seq(Ident(String::from("a")), Ident(String::from("b")))
582                )),
583            }]
584        };
585        let optimized = {
586            use crate::optimizer::OptimizedExpr::*;
587            vec![OptimizedRule {
588                name: "rule".to_owned(),
589                ty: RuleType::Silent,
590                expr: box_tree!(Ident(String::from("a"))),
591            }]
592        };
593
594        assert_eq!(optimize(rules), optimized);
595    }
596
597    #[test]
598    fn lister() {
599        let rules = {
600            use crate::ast::Expr::*;
601            vec![Rule {
602                name: "rule".to_owned(),
603                ty: RuleType::Silent,
604                expr: box_tree!(Seq(
605                    Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
606                    Ident(String::from("a"))
607                )),
608            }]
609        };
610        let optimized = {
611            use crate::optimizer::OptimizedExpr::*;
612            vec![OptimizedRule {
613                name: "rule".to_owned(),
614                ty: RuleType::Silent,
615                expr: box_tree!(Seq(
616                    Ident(String::from("a")),
617                    Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
618                )),
619            }]
620        };
621
622        assert_eq!(optimize(rules), optimized);
623    }
624}