1use std::char;
13use std::iter::Peekable;
14
15use pest::error::{Error, ErrorVariant};
16use pest::iterators::{Pair, Pairs};
17use pest::pratt_parser::{Assoc, Op, PrattParser};
18use pest::{Parser, Span};
19
20use crate::ast::{Expr, Rule as AstRule, RuleType};
21use crate::validator;
22
23#[allow(
25 missing_docs,
26 unused_attributes,
27 elided_lifetimes_in_paths,
28 unused_qualifications
29)]
30mod grammar {
31 include!("grammar.rs");
32}
33
34pub use self::grammar::*;
35
36#[allow(clippy::perf)]
38pub fn parse(rule: Rule, data: &str) -> Result<Pairs<'_, Rule>, Error<Rule>> {
39 PestParser::parse(rule, data)
40}
41
42#[derive(Clone, Debug, Eq, PartialEq)]
44pub struct ParserRule<'i> {
45 pub name: String,
47 pub span: Span<'i>,
49 pub ty: RuleType,
51 pub node: ParserNode<'i>,
53}
54
55#[derive(Clone, Debug, Eq, PartialEq)]
57pub struct ParserNode<'i> {
58 pub expr: ParserExpr<'i>,
60 pub span: Span<'i>,
62}
63
64impl<'i> ParserNode<'i> {
65 pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T>
67 where
68 F: FnMut(ParserNode<'i>) -> Option<T>,
69 {
70 pub fn filter_internal<'i, F, T>(node: ParserNode<'i>, f: &mut F, result: &mut Vec<T>)
71 where
72 F: FnMut(ParserNode<'i>) -> Option<T>,
73 {
74 if let Some(value) = f(node.clone()) {
75 result.push(value);
76 }
77
78 match node.expr {
79 ParserExpr::PosPred(node) => {
81 filter_internal(*node, f, result);
82 }
83 ParserExpr::NegPred(node) => {
84 filter_internal(*node, f, result);
85 }
86 ParserExpr::Seq(lhs, rhs) => {
87 filter_internal(*lhs, f, result);
88 filter_internal(*rhs, f, result);
89 }
90 ParserExpr::Choice(lhs, rhs) => {
91 filter_internal(*lhs, f, result);
92 filter_internal(*rhs, f, result);
93 }
94 ParserExpr::Rep(node) => {
95 filter_internal(*node, f, result);
96 }
97 ParserExpr::RepOnce(node) => {
98 filter_internal(*node, f, result);
99 }
100 ParserExpr::RepExact(node, _) => {
101 filter_internal(*node, f, result);
102 }
103 ParserExpr::RepMin(node, _) => {
104 filter_internal(*node, f, result);
105 }
106 ParserExpr::RepMax(node, _) => {
107 filter_internal(*node, f, result);
108 }
109 ParserExpr::RepMinMax(node, ..) => {
110 filter_internal(*node, f, result);
111 }
112 ParserExpr::Opt(node) => {
113 filter_internal(*node, f, result);
114 }
115 ParserExpr::Push(node) => {
116 filter_internal(*node, f, result);
117 }
118 _ => (),
119 }
120 }
121
122 let mut result = vec![];
123
124 filter_internal(self, &mut f, &mut result);
125
126 result
127 }
128}
129
130#[derive(Clone, Debug, Eq, PartialEq)]
132pub enum ParserExpr<'i> {
133 Str(String),
135 Insens(String),
137 Range(String, String),
139 Ident(String),
141 PeekSlice(i32, Option<i32>),
143 PosPred(Box<ParserNode<'i>>),
145 NegPred(Box<ParserNode<'i>>),
147 Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
149 Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
151 Opt(Box<ParserNode<'i>>),
153 Rep(Box<ParserNode<'i>>),
155 RepOnce(Box<ParserNode<'i>>),
157 RepExact(Box<ParserNode<'i>>, u32),
159 RepMin(Box<ParserNode<'i>>, u32),
161 RepMax(Box<ParserNode<'i>>, u32),
163 RepMinMax(Box<ParserNode<'i>>, u32, u32),
165 Push(Box<ParserNode<'i>>),
167}
168
169fn convert_rule(rule: ParserRule<'_>) -> AstRule {
170 let ParserRule { name, ty, node, .. } = rule;
171 let expr = convert_node(node);
172 AstRule { name, ty, expr }
173}
174
175fn convert_node(node: ParserNode<'_>) -> Expr {
176 match node.expr {
177 ParserExpr::Str(string) => Expr::Str(string),
178 ParserExpr::Insens(string) => Expr::Insens(string),
179 ParserExpr::Range(start, end) => Expr::Range(start, end),
180 ParserExpr::Ident(ident) => Expr::Ident(ident),
181 ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
182 ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
183 ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
184 ParserExpr::Seq(node1, node2) => Expr::Seq(
185 Box::new(convert_node(*node1)),
186 Box::new(convert_node(*node2)),
187 ),
188 ParserExpr::Choice(node1, node2) => Expr::Choice(
189 Box::new(convert_node(*node1)),
190 Box::new(convert_node(*node2)),
191 ),
192 ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))),
193 ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))),
194 ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))),
195 ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num),
196 ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max),
197 ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max),
198 ParserExpr::RepMinMax(node, min, max) => {
199 Expr::RepMinMax(Box::new(convert_node(*node)), min, max)
200 }
201 ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))),
202 }
203}
204
205pub fn consume_rules(pairs: Pairs<'_, Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> {
207 let rules = consume_rules_with_spans(pairs)?;
208 let errors = validator::validate_ast(&rules);
209 if errors.is_empty() {
210 Ok(rules.into_iter().map(convert_rule).collect())
211 } else {
212 Err(errors)
213 }
214}
215
216#[inline]
219pub fn rename_meta_rule(rule: &Rule) -> String {
220 match *rule {
221 Rule::grammar_rule => "rule".to_owned(),
222 Rule::_push => "PUSH".to_owned(),
223 Rule::assignment_operator => "`=`".to_owned(),
224 Rule::silent_modifier => "`_`".to_owned(),
225 Rule::atomic_modifier => "`@`".to_owned(),
226 Rule::compound_atomic_modifier => "`$`".to_owned(),
227 Rule::non_atomic_modifier => "`!`".to_owned(),
228 Rule::opening_brace => "`{`".to_owned(),
229 Rule::closing_brace => "`}`".to_owned(),
230 Rule::opening_brack => "`[`".to_owned(),
231 Rule::closing_brack => "`]`".to_owned(),
232 Rule::opening_paren => "`(`".to_owned(),
233 Rule::positive_predicate_operator => "`&`".to_owned(),
234 Rule::negative_predicate_operator => "`!`".to_owned(),
235 Rule::sequence_operator => "`&`".to_owned(),
236 Rule::choice_operator => "`|`".to_owned(),
237 Rule::optional_operator => "`?`".to_owned(),
238 Rule::repeat_operator => "`*`".to_owned(),
239 Rule::repeat_once_operator => "`+`".to_owned(),
240 Rule::comma => "`,`".to_owned(),
241 Rule::closing_paren => "`)`".to_owned(),
242 Rule::quote => "`\"`".to_owned(),
243 Rule::insensitive_string => "`^`".to_owned(),
244 Rule::range_operator => "`..`".to_owned(),
245 Rule::single_quote => "`'`".to_owned(),
246 Rule::grammar_doc => "//!".to_owned(),
247 Rule::line_doc => "///".to_owned(),
248 other_rule => format!("{:?}", other_rule),
249 }
250}
251
252fn consume_rules_with_spans(
253 pairs: Pairs<'_, Rule>,
254) -> Result<Vec<ParserRule<'_>>, Vec<Error<Rule>>> {
255 let pratt = PrattParser::new()
256 .op(Op::infix(Rule::choice_operator, Assoc::Left))
257 .op(Op::infix(Rule::sequence_operator, Assoc::Left));
258
259 pairs
260 .filter(|pair| pair.as_rule() == Rule::grammar_rule)
261 .filter(|pair| {
262 let mut pairs = pair.clone().into_inner();
264 let pair = pairs.next().unwrap();
265
266 pair.as_rule() != Rule::line_doc
267 })
268 .map(|pair| {
269 let mut pairs = pair.into_inner().peekable();
270
271 let span = pairs.next().unwrap().as_span();
272 let name = span.as_str().to_owned();
273
274 pairs.next().unwrap(); let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
277 match pairs.next().unwrap().as_rule() {
278 Rule::silent_modifier => RuleType::Silent,
279 Rule::atomic_modifier => RuleType::Atomic,
280 Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
281 Rule::non_atomic_modifier => RuleType::NonAtomic,
282 _ => unreachable!(),
283 }
284 } else {
285 RuleType::Normal
286 };
287
288 pairs.next().unwrap(); let mut inner_nodes = pairs.next().unwrap().into_inner().peekable();
292 if inner_nodes.peek().unwrap().as_rule() == Rule::choice_operator {
293 inner_nodes.next().unwrap();
294 }
295
296 let node = consume_expr(inner_nodes, &pratt)?;
297
298 Ok(ParserRule {
299 name,
300 span,
301 ty,
302 node,
303 })
304 })
305 .collect()
306}
307
308fn consume_expr<'i>(
309 pairs: Peekable<Pairs<'i, Rule>>,
310 pratt: &PrattParser<Rule>,
311) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
312 fn unaries<'i>(
313 mut pairs: Peekable<Pairs<'i, Rule>>,
314 pratt: &PrattParser<Rule>,
315 ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
316 let pair = pairs.next().unwrap();
317
318 let node = match pair.as_rule() {
319 Rule::opening_paren => {
320 let node = unaries(pairs, pratt)?;
321 let end = node.span.end_pos();
322
323 ParserNode {
324 expr: node.expr,
325 span: pair.as_span().start_pos().span(&end),
326 }
327 }
328 Rule::positive_predicate_operator => {
329 let node = unaries(pairs, pratt)?;
330 let end = node.span.end_pos();
331
332 ParserNode {
333 expr: ParserExpr::PosPred(Box::new(node)),
334 span: pair.as_span().start_pos().span(&end),
335 }
336 }
337 Rule::negative_predicate_operator => {
338 let node = unaries(pairs, pratt)?;
339 let end = node.span.end_pos();
340
341 ParserNode {
342 expr: ParserExpr::NegPred(Box::new(node)),
343 span: pair.as_span().start_pos().span(&end),
344 }
345 }
346 other_rule => {
347 let node = match other_rule {
348 Rule::expression => consume_expr(pair.into_inner().peekable(), pratt)?,
349 Rule::_push => {
350 let start = pair.clone().as_span().start_pos();
351 let mut pairs = pair.into_inner();
352 pairs.next().unwrap(); let pair = pairs.next().unwrap();
354
355 let node = consume_expr(pair.into_inner().peekable(), pratt)?;
356 let end = node.span.end_pos();
357
358 ParserNode {
359 expr: ParserExpr::Push(Box::new(node)),
360 span: start.span(&end),
361 }
362 }
363 Rule::peek_slice => {
364 let mut pairs = pair.clone().into_inner();
365 pairs.next().unwrap(); let pair_start = pairs.next().unwrap(); let start: i32 = match pair_start.as_rule() {
368 Rule::range_operator => 0,
369 Rule::integer => {
370 pairs.next().unwrap(); pair_start.as_str().parse().unwrap()
372 }
373 _ => unreachable!(),
374 };
375 let pair_end = pairs.next().unwrap(); let end: Option<i32> = match pair_end.as_rule() {
377 Rule::closing_brack => None,
378 Rule::integer => {
379 pairs.next().unwrap(); Some(pair_end.as_str().parse().unwrap())
381 }
382 _ => unreachable!(),
383 };
384 ParserNode {
385 expr: ParserExpr::PeekSlice(start, end),
386 span: pair.as_span(),
387 }
388 }
389 Rule::identifier => ParserNode {
390 expr: ParserExpr::Ident(pair.as_str().to_owned()),
391 span: pair.clone().as_span(),
392 },
393 Rule::string => {
394 let string = unescape(pair.as_str()).expect("incorrect string literal");
395 ParserNode {
396 expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()),
397 span: pair.clone().as_span(),
398 }
399 }
400 Rule::insensitive_string => {
401 let string = unescape(pair.as_str()).expect("incorrect string literal");
402 ParserNode {
403 expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()),
404 span: pair.clone().as_span(),
405 }
406 }
407 Rule::range => {
408 let mut pairs = pair.into_inner();
409 let pair = pairs.next().unwrap();
410 let start = unescape(pair.as_str()).expect("incorrect char literal");
411 let start_pos = pair.clone().as_span().start_pos();
412 pairs.next();
413 let pair = pairs.next().unwrap();
414 let end = unescape(pair.as_str()).expect("incorrect char literal");
415 let end_pos = pair.clone().as_span().end_pos();
416
417 ParserNode {
418 expr: ParserExpr::Range(
419 start[1..start.len() - 1].to_owned(),
420 end[1..end.len() - 1].to_owned(),
421 ),
422 span: start_pos.span(&end_pos),
423 }
424 }
425 _ => unreachable!(),
426 };
427
428 pairs.fold(
429 Ok(node),
430 |node: Result<ParserNode<'i>, Vec<Error<Rule>>>, pair| {
431 let node = node?;
432
433 let node = match pair.as_rule() {
434 Rule::optional_operator => {
435 let start = node.span.start_pos();
436 ParserNode {
437 expr: ParserExpr::Opt(Box::new(node)),
438 span: start.span(&pair.as_span().end_pos()),
439 }
440 }
441 Rule::repeat_operator => {
442 let start = node.span.start_pos();
443 ParserNode {
444 expr: ParserExpr::Rep(Box::new(node)),
445 span: start.span(&pair.as_span().end_pos()),
446 }
447 }
448 Rule::repeat_once_operator => {
449 let start = node.span.start_pos();
450 ParserNode {
451 expr: ParserExpr::RepOnce(Box::new(node)),
452 span: start.span(&pair.as_span().end_pos()),
453 }
454 }
455 Rule::repeat_exact => {
456 let mut inner = pair.clone().into_inner();
457
458 inner.next().unwrap(); let number = inner.next().unwrap();
461 let num = if let Ok(num) = number.as_str().parse::<u32>() {
462 num
463 } else {
464 return Err(vec![Error::new_from_span(
465 ErrorVariant::CustomError {
466 message: "number cannot overflow u32".to_owned(),
467 },
468 number.as_span(),
469 )]);
470 };
471
472 if num == 0 {
473 let error: Error<Rule> = Error::new_from_span(
474 ErrorVariant::CustomError {
475 message: "cannot repeat 0 times".to_owned(),
476 },
477 number.as_span(),
478 );
479
480 return Err(vec![error]);
481 }
482
483 let start = node.span.start_pos();
484 ParserNode {
485 expr: ParserExpr::RepExact(Box::new(node), num),
486 span: start.span(&pair.as_span().end_pos()),
487 }
488 }
489 Rule::repeat_min => {
490 let mut inner = pair.clone().into_inner();
491
492 inner.next().unwrap(); let min_number = inner.next().unwrap();
495 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
496 min
497 } else {
498 return Err(vec![Error::new_from_span(
499 ErrorVariant::CustomError {
500 message: "number cannot overflow u32".to_owned(),
501 },
502 min_number.as_span(),
503 )]);
504 };
505
506 let start = node.span.start_pos();
507 ParserNode {
508 expr: ParserExpr::RepMin(Box::new(node), min),
509 span: start.span(&pair.as_span().end_pos()),
510 }
511 }
512 Rule::repeat_max => {
513 let mut inner = pair.clone().into_inner();
514
515 inner.next().unwrap(); inner.next().unwrap(); let max_number = inner.next().unwrap();
519 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
520 max
521 } else {
522 return Err(vec![Error::new_from_span(
523 ErrorVariant::CustomError {
524 message: "number cannot overflow u32".to_owned(),
525 },
526 max_number.as_span(),
527 )]);
528 };
529
530 if max == 0 {
531 let error: Error<Rule> = Error::new_from_span(
532 ErrorVariant::CustomError {
533 message: "cannot repeat 0 times".to_owned(),
534 },
535 max_number.as_span(),
536 );
537
538 return Err(vec![error]);
539 }
540
541 let start = node.span.start_pos();
542 ParserNode {
543 expr: ParserExpr::RepMax(Box::new(node), max),
544 span: start.span(&pair.as_span().end_pos()),
545 }
546 }
547 Rule::repeat_min_max => {
548 let mut inner = pair.clone().into_inner();
549
550 inner.next().unwrap(); let min_number = inner.next().unwrap();
553 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
554 min
555 } else {
556 return Err(vec![Error::new_from_span(
557 ErrorVariant::CustomError {
558 message: "number cannot overflow u32".to_owned(),
559 },
560 min_number.as_span(),
561 )]);
562 };
563
564 inner.next().unwrap(); let max_number = inner.next().unwrap();
567 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
568 max
569 } else {
570 return Err(vec![Error::new_from_span(
571 ErrorVariant::CustomError {
572 message: "number cannot overflow u32".to_owned(),
573 },
574 max_number.as_span(),
575 )]);
576 };
577
578 if max == 0 {
579 let error: Error<Rule> = Error::new_from_span(
580 ErrorVariant::CustomError {
581 message: "cannot repeat 0 times".to_owned(),
582 },
583 max_number.as_span(),
584 );
585
586 return Err(vec![error]);
587 }
588
589 let start = node.span.start_pos();
590 ParserNode {
591 expr: ParserExpr::RepMinMax(Box::new(node), min, max),
592 span: start.span(&pair.as_span().end_pos()),
593 }
594 }
595 Rule::closing_paren => {
596 let start = node.span.start_pos();
597
598 ParserNode {
599 expr: node.expr,
600 span: start.span(&pair.as_span().end_pos()),
601 }
602 }
603 _ => unreachable!(),
604 };
605
606 Ok(node)
607 },
608 )?
609 }
610 };
611
612 Ok(node)
613 }
614
615 let term = |pair: Pair<'i, Rule>| unaries(pair.into_inner().peekable(), pratt);
616 let infix = |lhs: Result<ParserNode<'i>, Vec<Error<Rule>>>,
617 op: Pair<'i, Rule>,
618 rhs: Result<ParserNode<'i>, Vec<Error<Rule>>>| match op.as_rule() {
619 Rule::sequence_operator => {
620 let lhs = lhs?;
621 let rhs = rhs?;
622
623 let start = lhs.span.start_pos();
624 let end = rhs.span.end_pos();
625
626 Ok(ParserNode {
627 expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)),
628 span: start.span(&end),
629 })
630 }
631 Rule::choice_operator => {
632 let lhs = lhs?;
633 let rhs = rhs?;
634
635 let start = lhs.span.start_pos();
636 let end = rhs.span.end_pos();
637
638 Ok(ParserNode {
639 expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)),
640 span: start.span(&end),
641 })
642 }
643 _ => unreachable!(),
644 };
645
646 pratt.map_primary(term).map_infix(infix).parse(pairs)
647}
648
649fn unescape(string: &str) -> Option<String> {
650 let mut result = String::new();
651 let mut chars = string.chars();
652
653 loop {
654 match chars.next() {
655 Some('\\') => match chars.next()? {
656 '"' => result.push('"'),
657 '\\' => result.push('\\'),
658 'r' => result.push('\r'),
659 'n' => result.push('\n'),
660 't' => result.push('\t'),
661 '0' => result.push('\0'),
662 '\'' => result.push('\''),
663 'x' => {
664 let string: String = chars.clone().take(2).collect();
665
666 if string.len() != 2 {
667 return None;
668 }
669
670 for _ in 0..string.len() {
671 chars.next()?;
672 }
673
674 let value = u8::from_str_radix(&string, 16).ok()?;
675
676 result.push(char::from(value));
677 }
678 'u' => {
679 if chars.next()? != '{' {
680 return None;
681 }
682
683 let string: String = chars.clone().take_while(|c| *c != '}').collect();
684
685 if string.len() < 2 || 6 < string.len() {
686 return None;
687 }
688
689 for _ in 0..string.len() + 1 {
690 chars.next()?;
691 }
692
693 let value = u32::from_str_radix(&string, 16).ok()?;
694
695 result.push(char::from_u32(value)?);
696 }
697 _ => return None,
698 },
699 Some(c) => result.push(c),
700 None => return Some(result),
701 };
702 }
703}
704
705#[cfg(test)]
706mod tests {
707 use std::convert::TryInto;
708
709 use super::super::unwrap_or_report;
710 use super::*;
711
712 #[test]
713 fn rules() {
714 parses_to! {
715 parser: PestParser,
716 input: "a = { b } c = { d }",
717 rule: Rule::grammar_rules,
718 tokens: [
719 grammar_rule(0, 9, [
720 identifier(0, 1),
721 assignment_operator(2, 3),
722 opening_brace(4, 5),
723 expression(6, 8, [
724 term(6, 8, [
725 identifier(6, 7)
726 ])
727 ]),
728 closing_brace(8, 9)
729 ]),
730 grammar_rule(10, 19, [
731 identifier(10, 11),
732 assignment_operator(12, 13),
733 opening_brace(14, 15),
734 expression(16, 18, [
735 term(16, 18, [
736 identifier(16, 17)
737 ])
738 ]),
739 closing_brace(18, 19)
740 ])
741 ]
742 };
743 }
744
745 #[test]
746 fn rule() {
747 parses_to! {
748 parser: PestParser,
749 input: "a = ! { b ~ c }",
750 rule: Rule::grammar_rule,
751 tokens: [
752 grammar_rule(0, 15, [
753 identifier(0, 1),
754 assignment_operator(2, 3),
755 non_atomic_modifier(4, 5),
756 opening_brace(6, 7),
757 expression(8, 14, [
758 term(8, 10, [
759 identifier(8, 9)
760 ]),
761 sequence_operator(10, 11),
762 term(12, 14, [
763 identifier(12, 13)
764 ])
765 ]),
766 closing_brace(14, 15)
767 ])
768 ]
769 };
770 }
771
772 #[test]
773 fn expression() {
774 parses_to! {
775 parser: PestParser,
776 input: "_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?",
777 rule: Rule::expression,
778 tokens: [
779 expression(0, 35, [
780 term(0, 3, [
781 identifier(0, 2)
782 ]),
783 choice_operator(3, 4),
784 term(5, 14, [
785 range(5, 13, [
786 character(5, 8, [
787 single_quote(5, 6),
788 inner_chr(6, 7),
789 single_quote(7, 8)
790 ]),
791 range_operator(8, 10),
792 character(10, 13, [
793 single_quote(10, 11),
794 inner_chr(11, 12),
795 single_quote(12, 13)
796 ])
797 ])
798 ]),
799 sequence_operator(14, 15),
800 term(16, 24, [
801 negative_predicate_operator(16, 17),
802 insensitive_string(17, 23, [
803 string(18, 23, [
804 quote(18, 19),
805 inner_str(19, 22),
806 quote(22, 23)
807 ])
808 ])
809 ]),
810 sequence_operator(24, 25),
811 term(26, 35, [
812 opening_paren(26, 27),
813 expression(27, 32, [
814 term(27, 29, [
815 identifier(27, 28)
816 ]),
817 choice_operator(29, 30),
818 term(31, 32, [
819 identifier(31, 32)
820 ])
821 ]),
822 closing_paren(32, 33),
823 repeat_operator(33, 34),
824 optional_operator(34, 35)
825 ])
826 ])
827 ]
828 };
829 }
830
831 #[test]
832 fn repeat_exact() {
833 parses_to! {
834 parser: PestParser,
835 input: "{1}",
836 rule: Rule::repeat_exact,
837 tokens: [
838 repeat_exact(0, 3, [
839 opening_brace(0, 1),
840 number(1, 2),
841 closing_brace(2, 3)
842 ])
843 ]
844 };
845 }
846
847 #[test]
848 fn repeat_min() {
849 parses_to! {
850 parser: PestParser,
851 input: "{2,}",
852 rule: Rule::repeat_min,
853 tokens: [
854 repeat_min(0, 4, [
855 opening_brace(0,1),
856 number(1,2),
857 comma(2,3),
858 closing_brace(3,4)
859 ])
860 ]
861 }
862 }
863
864 #[test]
865 fn repeat_max() {
866 parses_to! {
867 parser: PestParser,
868 input: "{, 3}",
869 rule: Rule::repeat_max,
870 tokens: [
871 repeat_max(0, 5, [
872 opening_brace(0,1),
873 comma(1,2),
874 number(3,4),
875 closing_brace(4,5)
876 ])
877 ]
878 }
879 }
880
881 #[test]
882 fn repeat_min_max() {
883 parses_to! {
884 parser: PestParser,
885 input: "{1, 2}",
886 rule: Rule::repeat_min_max,
887 tokens: [
888 repeat_min_max(0, 6, [
889 opening_brace(0, 1),
890 number(1, 2),
891 comma(2, 3),
892 number(4, 5),
893 closing_brace(5, 6)
894 ])
895 ]
896 };
897 }
898
899 #[test]
900 fn push() {
901 parses_to! {
902 parser: PestParser,
903 input: "PUSH ( a )",
904 rule: Rule::_push,
905 tokens: [
906 _push(0, 10, [
907 opening_paren(5, 6),
908 expression(7, 9, [
909 term(7, 9, [
910 identifier(7, 8)
911 ])
912 ]),
913 closing_paren(9, 10)
914 ])
915 ]
916 };
917 }
918
919 #[test]
920 fn peek_slice_all() {
921 parses_to! {
922 parser: PestParser,
923 input: "PEEK[..]",
924 rule: Rule::peek_slice,
925 tokens: [
926 peek_slice(0, 8, [
927 opening_brack(4, 5),
928 range_operator(5, 7),
929 closing_brack(7, 8)
930 ])
931 ]
932 };
933 }
934
935 #[test]
936 fn peek_slice_start() {
937 parses_to! {
938 parser: PestParser,
939 input: "PEEK[1..]",
940 rule: Rule::peek_slice,
941 tokens: [
942 peek_slice(0, 9, [
943 opening_brack(4, 5),
944 integer(5, 6),
945 range_operator(6, 8),
946 closing_brack(8, 9)
947 ])
948 ]
949 };
950 }
951
952 #[test]
953 fn peek_slice_end() {
954 parses_to! {
955 parser: PestParser,
956 input: "PEEK[ ..-1]",
957 rule: Rule::peek_slice,
958 tokens: [
959 peek_slice(0, 11, [
960 opening_brack(4, 5),
961 range_operator(6, 8),
962 integer(8, 10),
963 closing_brack(10, 11)
964 ])
965 ]
966 };
967 }
968
969 #[test]
970 fn peek_slice_start_end() {
971 parses_to! {
972 parser: PestParser,
973 input: "PEEK[-5..10]",
974 rule: Rule::peek_slice,
975 tokens: [
976 peek_slice(0, 12, [
977 opening_brack(4, 5),
978 integer(5, 7),
979 range_operator(7, 9),
980 integer(9, 11),
981 closing_brack(11, 12)
982 ])
983 ]
984 };
985 }
986
987 #[test]
988 fn identifier() {
989 parses_to! {
990 parser: PestParser,
991 input: "_a8943",
992 rule: Rule::identifier,
993 tokens: [
994 identifier(0, 6)
995 ]
996 };
997 }
998
999 #[test]
1000 fn string() {
1001 parses_to! {
1002 parser: PestParser,
1003 input: "\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\"",
1004 rule: Rule::string,
1005 tokens: [
1006 string(0, 46, [
1007 quote(0, 1),
1008 inner_str(1, 45),
1009 quote(45, 46)
1010 ])
1011 ]
1012 };
1013 }
1014
1015 #[test]
1016 fn insensitive_string() {
1017 parses_to! {
1018 parser: PestParser,
1019 input: "^ \"\\\"hi\"",
1020 rule: Rule::insensitive_string,
1021 tokens: [
1022 insensitive_string(0, 9, [
1023 string(3, 9, [
1024 quote(3, 4),
1025 inner_str(4, 8),
1026 quote(8, 9)
1027 ])
1028 ])
1029 ]
1030 };
1031 }
1032
1033 #[test]
1034 fn range() {
1035 parses_to! {
1036 parser: PestParser,
1037 input: "'\\n' .. '\\x1a'",
1038 rule: Rule::range,
1039 tokens: [
1040 range(0, 14, [
1041 character(0, 4, [
1042 single_quote(0, 1),
1043 inner_chr(1, 3),
1044 single_quote(3, 4)
1045 ]),
1046 range_operator(5, 7),
1047 character(8, 14, [
1048 single_quote(8, 9),
1049 inner_chr(9, 13),
1050 single_quote(13, 14)
1051 ])
1052 ])
1053 ]
1054 };
1055 }
1056
1057 #[test]
1058 fn character() {
1059 parses_to! {
1060 parser: PestParser,
1061 input: "'\\u{123abC}'",
1062 rule: Rule::character,
1063 tokens: [
1064 character(0, 12, [
1065 single_quote(0, 1),
1066 inner_chr(1, 11),
1067 single_quote(11, 12)
1068 ])
1069 ]
1070 };
1071 }
1072
1073 #[test]
1074 fn number() {
1075 parses_to! {
1076 parser: PestParser,
1077 input: "0123",
1078 rule: Rule::number,
1079 tokens: [
1080 number(0, 4)
1081 ]
1082 };
1083 }
1084
1085 #[test]
1086 fn comment() {
1087 parses_to! {
1088 parser: PestParser,
1089 input: "a ~ // asda\n b",
1090 rule: Rule::expression,
1091 tokens: [
1092 expression(0, 17, [
1093 term(0, 2, [
1094 identifier(0, 1)
1095 ]),
1096 sequence_operator(2, 3),
1097 term(16, 17, [
1098 identifier(16, 17)
1099 ])
1100 ])
1101 ]
1102 };
1103 }
1104
1105 #[test]
1106 fn grammar_doc_and_line_doc() {
1107 let input = "//! hello\n/// world\na = { \"a\" }";
1108 parses_to! {
1109 parser: PestParser,
1110 input: input,
1111 rule: Rule::grammar_rules,
1112 tokens: [
1113 grammar_doc(0, 9, [
1114 inner_doc(4, 9),
1115 ]),
1116 grammar_rule(10, 19, [
1117 line_doc(10, 19, [
1118 inner_doc(14, 19),
1119 ]),
1120 ]),
1121 grammar_rule(20, 31, [
1122 identifier(20, 21),
1123 assignment_operator(22, 23),
1124 opening_brace(24, 25),
1125 expression(26, 30, [
1126 term(26, 30, [
1127 string(26, 29, [
1128 quote(26, 27),
1129 inner_str(27, 28),
1130 quote(28, 29)
1131 ])
1132 ])
1133 ]),
1134 closing_brace(30, 31),
1135 ])
1136 ]
1137 };
1138 }
1139
1140 #[test]
1141 fn wrong_identifier() {
1142 fails_with! {
1143 parser: PestParser,
1144 input: "0",
1145 rule: Rule::grammar_rules,
1146 positives: vec![Rule::grammar_rule, Rule::grammar_doc],
1147 negatives: vec![],
1148 pos: 0
1149 };
1150 }
1151
1152 #[test]
1153 fn missing_assignment_operator() {
1154 fails_with! {
1155 parser: PestParser,
1156 input: "a {}",
1157 rule: Rule::grammar_rules,
1158 positives: vec![Rule::assignment_operator],
1159 negatives: vec![],
1160 pos: 2
1161 };
1162 }
1163
1164 #[test]
1165 fn wrong_modifier() {
1166 fails_with! {
1167 parser: PestParser,
1168 input: "a = *{}",
1169 rule: Rule::grammar_rules,
1170 positives: vec![
1171 Rule::opening_brace,
1172 Rule::silent_modifier,
1173 Rule::atomic_modifier,
1174 Rule::compound_atomic_modifier,
1175 Rule::non_atomic_modifier
1176 ],
1177 negatives: vec![],
1178 pos: 4
1179 };
1180 }
1181
1182 #[test]
1183 fn missing_opening_brace() {
1184 fails_with! {
1185 parser: PestParser,
1186 input: "a = _",
1187 rule: Rule::grammar_rules,
1188 positives: vec![Rule::opening_brace],
1189 negatives: vec![],
1190 pos: 5
1191 };
1192 }
1193
1194 #[test]
1195 fn empty_rule() {
1196 fails_with! {
1197 parser: PestParser,
1198 input: "a = {}",
1199 rule: Rule::grammar_rules,
1200 positives: vec![Rule::expression],
1201 negatives: vec![],
1202 pos: 5
1203 };
1204 }
1205
1206 #[test]
1207 fn missing_rhs() {
1208 fails_with! {
1209 parser: PestParser,
1210 input: "a = { b ~ }",
1211 rule: Rule::grammar_rules,
1212 positives: vec![Rule::term],
1213 negatives: vec![],
1214 pos: 10
1215 };
1216 }
1217
1218 #[test]
1219 fn incorrect_prefix() {
1220 fails_with! {
1221 parser: PestParser,
1222 input: "a = { ~ b}",
1223 rule: Rule::grammar_rules,
1224 positives: vec![Rule::expression],
1225 negatives: vec![],
1226 pos: 6
1227 };
1228 }
1229
1230 #[test]
1231 fn wrong_op() {
1232 fails_with! {
1233 parser: PestParser,
1234 input: "a = { b % }",
1235 rule: Rule::grammar_rules,
1236 positives: vec![
1237 Rule::opening_brace,
1238 Rule::closing_brace,
1239 Rule::sequence_operator,
1240 Rule::choice_operator,
1241 Rule::optional_operator,
1242 Rule::repeat_operator,
1243 Rule::repeat_once_operator
1244 ],
1245 negatives: vec![],
1246 pos: 8
1247 };
1248 }
1249
1250 #[test]
1251 fn missing_closing_paren() {
1252 fails_with! {
1253 parser: PestParser,
1254 input: "a = { (b }",
1255 rule: Rule::grammar_rules,
1256 positives: vec![
1257 Rule::opening_brace,
1258 Rule::closing_paren,
1259 Rule::sequence_operator,
1260 Rule::choice_operator,
1261 Rule::optional_operator,
1262 Rule::repeat_operator,
1263 Rule::repeat_once_operator
1264 ],
1265 negatives: vec![],
1266 pos: 9
1267 };
1268 }
1269
1270 #[test]
1271 fn missing_term() {
1272 fails_with! {
1273 parser: PestParser,
1274 input: "a = { ! }",
1275 rule: Rule::grammar_rules,
1276 positives: vec![
1277 Rule::opening_paren,
1278 Rule::positive_predicate_operator,
1279 Rule::negative_predicate_operator,
1280 Rule::_push,
1281 Rule::peek_slice,
1282 Rule::identifier,
1283 Rule::insensitive_string,
1284 Rule::quote,
1285 Rule::single_quote
1286 ],
1287 negatives: vec![],
1288 pos: 8
1289 };
1290 }
1291
1292 #[test]
1293 fn string_missing_ending_quote() {
1294 fails_with! {
1295 parser: PestParser,
1296 input: "a = { \" }",
1297 rule: Rule::grammar_rules,
1298 positives: vec![Rule::quote],
1299 negatives: vec![],
1300 pos: 9
1301 };
1302 }
1303
1304 #[test]
1305 fn insensitive_missing_string() {
1306 fails_with! {
1307 parser: PestParser,
1308 input: "a = { ^ }",
1309 rule: Rule::grammar_rules,
1310 positives: vec![Rule::quote],
1311 negatives: vec![],
1312 pos: 8
1313 };
1314 }
1315
1316 #[test]
1317 fn char_missing_ending_single_quote() {
1318 fails_with! {
1319 parser: PestParser,
1320 input: "a = { \' }",
1321 rule: Rule::grammar_rules,
1322 positives: vec![Rule::single_quote],
1323 negatives: vec![],
1324 pos: 8
1325 };
1326 }
1327
1328 #[test]
1329 fn range_missing_range_operator() {
1330 fails_with! {
1331 parser: PestParser,
1332 input: "a = { \'a\' }",
1333 rule: Rule::grammar_rules,
1334 positives: vec![Rule::range_operator],
1335 negatives: vec![],
1336 pos: 10
1337 };
1338 }
1339
1340 #[test]
1341 fn wrong_postfix() {
1342 fails_with! {
1343 parser: PestParser,
1344 input: "a = { a& }",
1345 rule: Rule::grammar_rules,
1346 positives: vec![
1347 Rule::opening_brace,
1348 Rule::closing_brace,
1349 Rule::sequence_operator,
1350 Rule::choice_operator,
1351 Rule::optional_operator,
1352 Rule::repeat_operator,
1353 Rule::repeat_once_operator
1354 ],
1355 negatives: vec![],
1356 pos: 7
1357 };
1358 }
1359
1360 #[test]
1361 fn ast() {
1362 let input = r##"
1363 /// This is line comment
1364 /// This is rule
1365 rule = _{ a{1} ~ "a"{3,} ~ b{, 2} ~ "b"{1, 2} | !(^"c" | PUSH('d'..'e'))?* }
1366 "##;
1367
1368 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1369 let ast = consume_rules_with_spans(pairs).unwrap();
1370 let ast: Vec<_> = ast.into_iter().map(convert_rule).collect();
1371
1372 assert_eq!(
1373 ast,
1374 vec![AstRule {
1375 name: "rule".to_owned(),
1376 ty: RuleType::Silent,
1377 expr: Expr::Choice(
1378 Box::new(Expr::Seq(
1379 Box::new(Expr::Seq(
1380 Box::new(Expr::Seq(
1381 Box::new(Expr::RepExact(Box::new(Expr::Ident("a".to_owned())), 1)),
1382 Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3))
1383 )),
1384 Box::new(Expr::RepMax(Box::new(Expr::Ident("b".to_owned())), 2))
1385 )),
1386 Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2))
1387 )),
1388 Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt(
1389 Box::new(Expr::Choice(
1390 Box::new(Expr::Insens("c".to_owned())),
1391 Box::new(Expr::Push(Box::new(Expr::Range(
1392 "d".to_owned(),
1393 "e".to_owned()
1394 ))))
1395 ))
1396 ))))))
1397 )
1398 },]
1399 );
1400 }
1401
1402 #[test]
1403 fn ast_peek_slice() {
1404 let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }";
1405
1406 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1407 let ast = consume_rules_with_spans(pairs).unwrap();
1408 let ast: Vec<_> = ast.into_iter().map(convert_rule).collect();
1409
1410 assert_eq!(
1411 ast,
1412 vec![AstRule {
1413 name: "rule".to_owned(),
1414 ty: RuleType::Silent,
1415 expr: Expr::Seq(
1416 Box::new(Expr::PeekSlice(-4, None)),
1417 Box::new(Expr::PeekSlice(0, Some(3))),
1418 ),
1419 }],
1420 );
1421 }
1422
1423 #[test]
1424 #[should_panic(expected = "grammar error
1425
1426 --> 1:13
1427 |
14281 | rule = { \"\"{4294967297} }
1429 | ^--------^
1430 |
1431 = number cannot overflow u32")]
1432 fn repeat_exact_overflow() {
1433 let input = "rule = { \"\"{4294967297} }";
1434
1435 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1436 unwrap_or_report(consume_rules_with_spans(pairs));
1437 }
1438
1439 #[test]
1440 #[should_panic(expected = "grammar error
1441
1442 --> 1:13
1443 |
14441 | rule = { \"\"{0} }
1445 | ^
1446 |
1447 = cannot repeat 0 times")]
1448 fn repeat_exact_zero() {
1449 let input = "rule = { \"\"{0} }";
1450
1451 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1452 unwrap_or_report(consume_rules_with_spans(pairs));
1453 }
1454
1455 #[test]
1456 #[should_panic(expected = "grammar error
1457
1458 --> 1:13
1459 |
14601 | rule = { \"\"{4294967297,} }
1461 | ^--------^
1462 |
1463 = number cannot overflow u32")]
1464 fn repeat_min_overflow() {
1465 let input = "rule = { \"\"{4294967297,} }";
1466
1467 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1468 unwrap_or_report(consume_rules_with_spans(pairs));
1469 }
1470
1471 #[test]
1472 #[should_panic(expected = "grammar error
1473
1474 --> 1:14
1475 |
14761 | rule = { \"\"{,4294967297} }
1477 | ^--------^
1478 |
1479 = number cannot overflow u32")]
1480 fn repeat_max_overflow() {
1481 let input = "rule = { \"\"{,4294967297} }";
1482
1483 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1484 unwrap_or_report(consume_rules_with_spans(pairs));
1485 }
1486
1487 #[test]
1488 #[should_panic(expected = "grammar error
1489
1490 --> 1:14
1491 |
14921 | rule = { \"\"{,0} }
1493 | ^
1494 |
1495 = cannot repeat 0 times")]
1496 fn repeat_max_zero() {
1497 let input = "rule = { \"\"{,0} }";
1498
1499 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1500 unwrap_or_report(consume_rules_with_spans(pairs));
1501 }
1502
1503 #[test]
1504 #[should_panic(expected = "grammar error
1505
1506 --> 1:13
1507 |
15081 | rule = { \"\"{4294967297,4294967298} }
1509 | ^--------^
1510 |
1511 = number cannot overflow u32")]
1512 fn repeat_min_max_overflow() {
1513 let input = "rule = { \"\"{4294967297,4294967298} }";
1514
1515 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1516 unwrap_or_report(consume_rules_with_spans(pairs));
1517 }
1518
1519 #[test]
1520 #[should_panic(expected = "grammar error
1521
1522 --> 1:15
1523 |
15241 | rule = { \"\"{0,0} }
1525 | ^
1526 |
1527 = cannot repeat 0 times")]
1528 fn repeat_min_max_zero() {
1529 let input = "rule = { \"\"{0,0} }";
1530
1531 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1532 unwrap_or_report(consume_rules_with_spans(pairs));
1533 }
1534
1535 #[test]
1536 fn unescape_all() {
1537 let string = r"a\nb\x55c\u{111}d";
1538
1539 assert_eq!(unescape(string), Some("a\nb\x55c\u{111}d".to_owned()));
1540 }
1541
1542 #[test]
1543 fn unescape_empty_escape() {
1544 let string = r"\";
1545
1546 assert_eq!(unescape(string), None);
1547 }
1548
1549 #[test]
1550 fn unescape_wrong_escape() {
1551 let string = r"\w";
1552
1553 assert_eq!(unescape(string), None);
1554 }
1555
1556 #[test]
1557 fn unescape_backslash() {
1558 let string = "\\\\";
1559 assert_eq!(unescape(string), Some("\\".to_owned()));
1560 }
1561
1562 #[test]
1563 fn unescape_return() {
1564 let string = "\\r";
1565 assert_eq!(unescape(string), Some("\r".to_owned()));
1566 }
1567
1568 #[test]
1569 fn unescape_tab() {
1570 let string = "\\t";
1571 assert_eq!(unescape(string), Some("\t".to_owned()));
1572 }
1573
1574 #[test]
1575 fn unescape_null() {
1576 let string = "\\0";
1577 assert_eq!(unescape(string), Some("\0".to_owned()));
1578 }
1579
1580 #[test]
1581 fn unescape_single_quote() {
1582 let string = "\\'";
1583 assert_eq!(unescape(string), Some("\'".to_owned()));
1584 }
1585
1586 #[test]
1587 fn unescape_wrong_byte() {
1588 let string = r"\xfg";
1589
1590 assert_eq!(unescape(string), None);
1591 }
1592
1593 #[test]
1594 fn unescape_short_byte() {
1595 let string = r"\xf";
1596
1597 assert_eq!(unescape(string), None);
1598 }
1599
1600 #[test]
1601 fn unescape_no_open_brace_unicode() {
1602 let string = r"\u11";
1603
1604 assert_eq!(unescape(string), None);
1605 }
1606
1607 #[test]
1608 fn unescape_no_close_brace_unicode() {
1609 let string = r"\u{11";
1610
1611 assert_eq!(unescape(string), None);
1612 }
1613
1614 #[test]
1615 fn unescape_short_unicode() {
1616 let string = r"\u{1}";
1617
1618 assert_eq!(unescape(string), None);
1619 }
1620
1621 #[test]
1622 fn unescape_long_unicode() {
1623 let string = r"\u{1111111}";
1624
1625 assert_eq!(unescape(string), None);
1626 }
1627
1628 #[test]
1629 fn handles_deep_nesting() {
1630 let sample1 = include_str!(concat!(
1631 env!("CARGO_MANIFEST_DIR"),
1632 "/resources/test/fuzzsample1.grammar"
1633 ));
1634 let sample2 = include_str!(concat!(
1635 env!("CARGO_MANIFEST_DIR"),
1636 "/resources/test/fuzzsample2.grammar"
1637 ));
1638 let sample3 = include_str!(concat!(
1639 env!("CARGO_MANIFEST_DIR"),
1640 "/resources/test/fuzzsample3.grammar"
1641 ));
1642 let sample4 = include_str!(concat!(
1643 env!("CARGO_MANIFEST_DIR"),
1644 "/resources/test/fuzzsample4.grammar"
1645 ));
1646 let sample5 = include_str!(concat!(
1647 env!("CARGO_MANIFEST_DIR"),
1648 "/resources/test/fuzzsample5.grammar"
1649 ));
1650 const ERROR: &str = "call limit reached";
1651 pest::set_call_limit(Some(5_000usize.try_into().unwrap()));
1652 let s1 = parse(Rule::grammar_rules, sample1);
1653 assert!(s1.is_err());
1654 assert_eq!(s1.unwrap_err().variant.message(), ERROR);
1655 let s2 = parse(Rule::grammar_rules, sample2);
1656 assert!(s2.is_err());
1657 assert_eq!(s2.unwrap_err().variant.message(), ERROR);
1658 let s3 = parse(Rule::grammar_rules, sample3);
1659 assert!(s3.is_err());
1660 assert_eq!(s3.unwrap_err().variant.message(), ERROR);
1661 let s4 = parse(Rule::grammar_rules, sample4);
1662 assert!(s4.is_err());
1663 assert_eq!(s4.unwrap_err().variant.message(), ERROR);
1664 let s5 = parse(Rule::grammar_rules, sample5);
1665 assert!(s5.is_err());
1666 assert_eq!(s5.unwrap_err().variant.message(), ERROR);
1667 }
1668}