1use once_cell::sync::Lazy;
14use std::collections::{HashMap, HashSet};
15
16use pest::error::{Error, ErrorVariant, InputLocation};
17use pest::iterators::Pairs;
18use pest::unicode::unicode_property_names;
19use pest::Span;
20
21use crate::parser::{ParserExpr, ParserNode, ParserRule, Rule};
22
23static RUST_KEYWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
24 [
25 "abstract", "alignof", "as", "become", "box", "break", "const", "continue", "crate", "do",
26 "else", "enum", "extern", "false", "final", "fn", "for", "if", "impl", "in", "let", "loop",
27 "macro", "match", "mod", "move", "mut", "offsetof", "override", "priv", "proc", "pure",
28 "pub", "ref", "return", "Self", "self", "sizeof", "static", "struct", "super", "trait",
29 "true", "type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while", "yield",
30 ]
31 .iter()
32 .cloned()
33 .collect()
34});
35
36static PEST_KEYWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
37 [
38 "_", "ANY", "DROP", "EOI", "PEEK", "PEEK_ALL", "POP", "POP_ALL", "PUSH", "SOI",
39 ]
40 .iter()
41 .cloned()
42 .collect()
43});
44
45static BUILTINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
46 [
47 "ANY",
48 "DROP",
49 "EOI",
50 "PEEK",
51 "PEEK_ALL",
52 "POP",
53 "POP_ALL",
54 "SOI",
55 "ASCII_DIGIT",
56 "ASCII_NONZERO_DIGIT",
57 "ASCII_BIN_DIGIT",
58 "ASCII_OCT_DIGIT",
59 "ASCII_HEX_DIGIT",
60 "ASCII_ALPHA_LOWER",
61 "ASCII_ALPHA_UPPER",
62 "ASCII_ALPHA",
63 "ASCII_ALPHANUMERIC",
64 "ASCII",
65 "NEWLINE",
66 ]
67 .iter()
68 .cloned()
69 .chain(unicode_property_names())
70 .collect::<HashSet<&str>>()
71});
72
73pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result<Vec<&str>, Vec<Error<Rule>>> {
81 let definitions: Vec<_> = pairs
82 .clone()
83 .filter(|pair| pair.as_rule() == Rule::grammar_rule)
84 .map(|pair| pair.into_inner().next().unwrap())
85 .filter(|pair| pair.as_rule() != Rule::line_doc)
86 .map(|pair| pair.as_span())
87 .collect();
88
89 let called_rules: Vec<_> = pairs
90 .clone()
91 .filter(|pair| pair.as_rule() == Rule::grammar_rule)
92 .flat_map(|pair| {
93 pair.into_inner()
94 .flatten()
95 .skip(1)
96 .filter(|pair| pair.as_rule() == Rule::identifier)
97 .map(|pair| pair.as_span())
98 })
99 .collect();
100
101 let mut errors = vec![];
102
103 errors.extend(validate_pest_keywords(&definitions));
104 errors.extend(validate_already_defined(&definitions));
105 errors.extend(validate_undefined(&definitions, &called_rules));
106
107 if !errors.is_empty() {
108 return Err(errors);
109 }
110
111 let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
112 let called_rules: HashSet<_> = called_rules.iter().map(|span| span.as_str()).collect();
113
114 let defaults = called_rules.difference(&definitions);
115
116 Ok(defaults.cloned().collect())
117}
118
119#[allow(clippy::ptr_arg)]
121#[deprecated = "Rust keywords are no longer restricted from the pest grammar"]
122pub fn validate_rust_keywords(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
123 let mut errors = vec![];
124
125 for definition in definitions {
126 let name = definition.as_str();
127
128 if RUST_KEYWORDS.contains(name) {
129 errors.push(Error::new_from_span(
130 ErrorVariant::CustomError {
131 message: format!("{} is a rust keyword", name),
132 },
133 *definition,
134 ))
135 }
136 }
137
138 errors
139}
140
141#[allow(clippy::ptr_arg)]
143pub fn validate_pest_keywords(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
144 let mut errors = vec![];
145
146 for definition in definitions {
147 let name = definition.as_str();
148
149 if PEST_KEYWORDS.contains(name) {
150 errors.push(Error::new_from_span(
151 ErrorVariant::CustomError {
152 message: format!("{} is a pest keyword", name),
153 },
154 *definition,
155 ))
156 }
157 }
158
159 errors
160}
161
162#[allow(clippy::ptr_arg)]
164pub fn validate_already_defined(definitions: &Vec<Span<'_>>) -> Vec<Error<Rule>> {
165 let mut errors = vec![];
166 let mut defined = HashSet::new();
167
168 for definition in definitions {
169 let name = definition.as_str();
170
171 if defined.contains(&name) {
172 errors.push(Error::new_from_span(
173 ErrorVariant::CustomError {
174 message: format!("rule {} already defined", name),
175 },
176 *definition,
177 ))
178 } else {
179 defined.insert(name);
180 }
181 }
182
183 errors
184}
185
186#[allow(clippy::ptr_arg)]
188pub fn validate_undefined<'i>(
189 definitions: &Vec<Span<'i>>,
190 called_rules: &Vec<Span<'i>>,
191) -> Vec<Error<Rule>> {
192 let mut errors = vec![];
193 let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
194
195 for rule in called_rules {
196 let name = rule.as_str();
197
198 if !definitions.contains(name) && !BUILTINS.contains(name) {
199 errors.push(Error::new_from_span(
200 ErrorVariant::CustomError {
201 message: format!("rule {} is undefined", name),
202 },
203 *rule,
204 ))
205 }
206 }
207
208 errors
209}
210
211#[allow(clippy::ptr_arg)]
216pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec<ParserRule<'i>>) -> Vec<Error<Rule>> {
217 let mut errors = vec![];
218
219 errors.extend(validate_repetition(rules));
220 errors.extend(validate_choices(rules));
221 errors.extend(validate_whitespace_comment(rules));
222 errors.extend(validate_left_recursion(rules));
223
224 errors.sort_by_key(|error| match error.location {
225 InputLocation::Span(span) => span,
226 _ => unreachable!(),
227 });
228
229 errors
230}
231
232fn is_non_progressing<'i>(
233 expr: &ParserExpr<'i>,
234 rules: &HashMap<String, &ParserNode<'i>>,
235 trace: &mut Vec<String>,
236) -> bool {
237 match *expr {
238 ParserExpr::Str(ref string) => string.is_empty(),
239 ParserExpr::Ident(ref ident) => {
240 if ident == "soi" || ident == "eoi" {
241 return true;
242 }
243
244 if !trace.contains(ident) {
245 if let Some(node) = rules.get(ident) {
246 trace.push(ident.clone());
247 let result = is_non_progressing(&node.expr, rules, trace);
248 trace.pop().unwrap();
249
250 return result;
251 }
252 }
253
254 false
255 }
256 ParserExpr::PosPred(_) => true,
257 ParserExpr::NegPred(_) => true,
258 ParserExpr::Seq(ref lhs, ref rhs) => {
259 is_non_progressing(&lhs.expr, rules, trace)
260 && is_non_progressing(&rhs.expr, rules, trace)
261 }
262 ParserExpr::Choice(ref lhs, ref rhs) => {
263 is_non_progressing(&lhs.expr, rules, trace)
264 || is_non_progressing(&rhs.expr, rules, trace)
265 }
266 _ => false,
267 }
268}
269
270fn is_non_failing<'i>(
271 expr: &ParserExpr<'i>,
272 rules: &HashMap<String, &ParserNode<'i>>,
273 trace: &mut Vec<String>,
274) -> bool {
275 match *expr {
276 ParserExpr::Str(ref string) => string.is_empty(),
277 ParserExpr::Ident(ref ident) => {
278 if !trace.contains(ident) {
279 if let Some(node) = rules.get(ident) {
280 trace.push(ident.clone());
281 let result = is_non_failing(&node.expr, rules, trace);
282 trace.pop().unwrap();
283
284 return result;
285 }
286 }
287
288 false
289 }
290 ParserExpr::Opt(_) => true,
291 ParserExpr::Rep(_) => true,
292 ParserExpr::Seq(ref lhs, ref rhs) => {
293 is_non_failing(&lhs.expr, rules, trace) && is_non_failing(&rhs.expr, rules, trace)
294 }
295 ParserExpr::Choice(ref lhs, ref rhs) => {
296 is_non_failing(&lhs.expr, rules, trace) || is_non_failing(&rhs.expr, rules, trace)
297 }
298 _ => false,
299 }
300}
301
302fn validate_repetition<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
303 let mut result = vec![];
304 let map = to_hash_map(rules);
305
306 for rule in rules {
307 let mut errors = rule.node
308 .clone()
309 .filter_map_top_down(|node| match node.expr {
310 ParserExpr::Rep(ref other)
311 | ParserExpr::RepOnce(ref other)
312 | ParserExpr::RepMin(ref other, _) => {
313 if is_non_failing(&other.expr, &map, &mut vec![]) {
314 Some(Error::new_from_span(
315 ErrorVariant::CustomError {
316 message:
317 "expression inside repetition cannot fail and will repeat \
318 infinitely"
319 .to_owned()
320 },
321 node.span
322 ))
323 } else if is_non_progressing(&other.expr, &map, &mut vec![]) {
324 Some(Error::new_from_span(
325 ErrorVariant::CustomError {
326 message:
327 "expression inside repetition is non-progressing and will repeat \
328 infinitely"
329 .to_owned(),
330 },
331 node.span
332 ))
333 } else {
334 None
335 }
336 }
337 _ => None
338 });
339
340 result.append(&mut errors);
341 }
342
343 result
344}
345
346fn validate_choices<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
347 let mut result = vec![];
348 let map = to_hash_map(rules);
349
350 for rule in rules {
351 let mut errors = rule
352 .node
353 .clone()
354 .filter_map_top_down(|node| match node.expr {
355 ParserExpr::Choice(ref lhs, _) => {
356 let node = match lhs.expr {
357 ParserExpr::Choice(_, ref rhs) => rhs,
358 _ => lhs,
359 };
360
361 if is_non_failing(&node.expr, &map, &mut vec![]) {
362 Some(Error::new_from_span(
363 ErrorVariant::CustomError {
364 message:
365 "expression cannot fail; following choices cannot be reached"
366 .to_owned(),
367 },
368 node.span,
369 ))
370 } else {
371 None
372 }
373 }
374 _ => None,
375 });
376
377 result.append(&mut errors);
378 }
379
380 result
381}
382
383fn validate_whitespace_comment<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
384 let map = to_hash_map(rules);
385
386 rules
387 .iter()
388 .filter_map(|rule| {
389 if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
390 if is_non_failing(&rule.node.expr, &map, &mut vec![]) {
391 Some(Error::new_from_span(
392 ErrorVariant::CustomError {
393 message: format!(
394 "{} cannot fail and will repeat infinitely",
395 &rule.name
396 ),
397 },
398 rule.node.span,
399 ))
400 } else if is_non_progressing(&rule.node.expr, &map, &mut vec![]) {
401 Some(Error::new_from_span(
402 ErrorVariant::CustomError {
403 message: format!(
404 "{} is non-progressing and will repeat infinitely",
405 &rule.name
406 ),
407 },
408 rule.node.span,
409 ))
410 } else {
411 None
412 }
413 } else {
414 None
415 }
416 })
417 .collect()
418}
419
420fn validate_left_recursion<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
421 left_recursion(to_hash_map(rules))
422}
423
424fn to_hash_map<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> HashMap<String, &'a ParserNode<'i>> {
425 rules.iter().map(|r| (r.name.clone(), &r.node)).collect()
426}
427
428fn left_recursion<'a, 'i: 'a>(rules: HashMap<String, &'a ParserNode<'i>>) -> Vec<Error<Rule>> {
429 fn check_expr<'a, 'i: 'a>(
430 node: &'a ParserNode<'i>,
431 rules: &'a HashMap<String, &ParserNode<'i>>,
432 trace: &mut Vec<String>,
433 ) -> Option<Error<Rule>> {
434 match node.expr.clone() {
435 ParserExpr::Ident(other) => {
436 if trace[0] == other {
437 trace.push(other);
438 let chain = trace
439 .iter()
440 .map(|ident| ident.as_ref())
441 .collect::<Vec<_>>()
442 .join(" -> ");
443
444 return Some(Error::new_from_span(
445 ErrorVariant::CustomError {
446 message: format!(
447 "rule {} is left-recursive ({}); pest::pratt_parser might be useful \
448 in this case",
449 node.span.as_str(),
450 chain
451 )
452 },
453 node.span
454 ));
455 }
456
457 if !trace.contains(&other) {
458 if let Some(node) = rules.get(&other) {
459 trace.push(other);
460 let result = check_expr(node, rules, trace);
461 trace.pop().unwrap();
462
463 return result;
464 }
465 }
466
467 None
468 }
469 ParserExpr::Seq(ref lhs, ref rhs) => {
470 if is_non_failing(&lhs.expr, rules, &mut vec![trace.last().unwrap().clone()]) {
471 check_expr(rhs, rules, trace)
472 } else {
473 check_expr(lhs, rules, trace)
474 }
475 }
476 ParserExpr::Choice(ref lhs, ref rhs) => {
477 check_expr(lhs, rules, trace).or_else(|| check_expr(rhs, rules, trace))
478 }
479 ParserExpr::Rep(ref node) => check_expr(node, rules, trace),
480 ParserExpr::RepOnce(ref node) => check_expr(node, rules, trace),
481 ParserExpr::Opt(ref node) => check_expr(node, rules, trace),
482 ParserExpr::PosPred(ref node) => check_expr(node, rules, trace),
483 ParserExpr::NegPred(ref node) => check_expr(node, rules, trace),
484 ParserExpr::Push(ref node) => check_expr(node, rules, trace),
485 _ => None,
486 }
487 }
488
489 let mut errors = vec![];
490
491 for (name, node) in &rules {
492 let name = name.clone();
493
494 if let Some(error) = check_expr(node, &rules, &mut vec![name]) {
495 errors.push(error);
496 }
497 }
498
499 errors
500}
501
502#[cfg(test)]
503mod tests {
504 use super::super::parser::{consume_rules, PestParser};
505 use super::super::unwrap_or_report;
506 use super::*;
507 use pest::Parser;
508
509 #[test]
510 #[should_panic(expected = "grammar error
511
512 --> 1:1
513 |
5141 | ANY = { \"a\" }
515 | ^-^
516 |
517 = ANY is a pest keyword")]
518 fn pest_keyword() {
519 let input = "ANY = { \"a\" }";
520 unwrap_or_report(validate_pairs(
521 PestParser::parse(Rule::grammar_rules, input).unwrap(),
522 ));
523 }
524
525 #[test]
526 #[should_panic(expected = "grammar error
527
528 --> 1:13
529 |
5301 | a = { \"a\" } a = { \"a\" }
531 | ^
532 |
533 = rule a already defined")]
534 fn already_defined() {
535 let input = "a = { \"a\" } a = { \"a\" }";
536 unwrap_or_report(validate_pairs(
537 PestParser::parse(Rule::grammar_rules, input).unwrap(),
538 ));
539 }
540
541 #[test]
542 #[should_panic(expected = "grammar error
543
544 --> 1:7
545 |
5461 | a = { b }
547 | ^
548 |
549 = rule b is undefined")]
550 fn undefined() {
551 let input = "a = { b }";
552 unwrap_or_report(validate_pairs(
553 PestParser::parse(Rule::grammar_rules, input).unwrap(),
554 ));
555 }
556
557 #[test]
558 fn valid_recursion() {
559 let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"b\") ~ a }";
560 unwrap_or_report(consume_rules(
561 PestParser::parse(Rule::grammar_rules, input).unwrap(),
562 ));
563 }
564
565 #[test]
566 #[should_panic(expected = "grammar error
567
568 --> 1:16
569 |
5701 | WHITESPACE = { \"\" }
571 | ^^
572 |
573 = WHITESPACE cannot fail and will repeat infinitely")]
574 fn non_failing_whitespace() {
575 let input = "WHITESPACE = { \"\" }";
576 unwrap_or_report(consume_rules(
577 PestParser::parse(Rule::grammar_rules, input).unwrap(),
578 ));
579 }
580
581 #[test]
582 #[should_panic(expected = "grammar error
583
584 --> 1:13
585 |
5861 | COMMENT = { soi }
587 | ^-^
588 |
589 = COMMENT is non-progressing and will repeat infinitely")]
590 fn non_progressing_comment() {
591 let input = "COMMENT = { soi }";
592 unwrap_or_report(consume_rules(
593 PestParser::parse(Rule::grammar_rules, input).unwrap(),
594 ));
595 }
596
597 #[test]
598 #[should_panic(expected = "grammar error
599
600 --> 1:7
601 |
6021 | a = { (\"\")* }
603 | ^---^
604 |
605 = expression inside repetition cannot fail and will repeat infinitely")]
606 fn non_failing_repetition() {
607 let input = "a = { (\"\")* }";
608 unwrap_or_report(consume_rules(
609 PestParser::parse(Rule::grammar_rules, input).unwrap(),
610 ));
611 }
612
613 #[test]
614 #[should_panic(expected = "grammar error
615
616 --> 1:18
617 |
6181 | a = { \"\" } b = { a* }
619 | ^^
620 |
621 = expression inside repetition cannot fail and will repeat infinitely")]
622 fn indirect_non_failing_repetition() {
623 let input = "a = { \"\" } b = { a* }";
624 unwrap_or_report(consume_rules(
625 PestParser::parse(Rule::grammar_rules, input).unwrap(),
626 ));
627 }
628
629 #[test]
630 #[should_panic(expected = "grammar error
631
632 --> 1:20
633 |
6341 | a = { \"a\" ~ (\"b\" ~ (\"\")*) }
635 | ^---^
636 |
637 = expression inside repetition cannot fail and will repeat infinitely")]
638 fn deep_non_failing_repetition() {
639 let input = "a = { \"a\" ~ (\"b\" ~ (\"\")*) }";
640 unwrap_or_report(consume_rules(
641 PestParser::parse(Rule::grammar_rules, input).unwrap(),
642 ));
643 }
644
645 #[test]
646 #[should_panic(expected = "grammar error
647
648 --> 1:7
649 |
6501 | a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }
651 | ^-------------------------------^
652 |
653 = expression inside repetition is non-progressing and will repeat infinitely")]
654 fn non_progressing_repetition() {
655 let input = "a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }";
656 unwrap_or_report(consume_rules(
657 PestParser::parse(Rule::grammar_rules, input).unwrap(),
658 ));
659 }
660
661 #[test]
662 #[should_panic(expected = "grammar error
663
664 --> 1:20
665 |
6661 | a = { !\"a\" } b = { a* }
667 | ^^
668 |
669 = expression inside repetition is non-progressing and will repeat infinitely")]
670 fn indirect_non_progressing_repetition() {
671 let input = "a = { !\"a\" } b = { a* }";
672 unwrap_or_report(consume_rules(
673 PestParser::parse(Rule::grammar_rules, input).unwrap(),
674 ));
675 }
676
677 #[test]
678 #[should_panic(expected = "grammar error
679
680 --> 1:7
681 |
6821 | a = { a }
683 | ^
684 |
685 = rule a is left-recursive (a -> a); pest::pratt_parser might be useful in this case")]
686 fn simple_left_recursion() {
687 let input = "a = { a }";
688 unwrap_or_report(consume_rules(
689 PestParser::parse(Rule::grammar_rules, input).unwrap(),
690 ));
691 }
692
693 #[test]
694 #[should_panic(expected = "grammar error
695
696 --> 1:7
697 |
6981 | a = { b } b = { a }
699 | ^
700 |
701 = rule b is left-recursive (b -> a -> b); pest::pratt_parser might be useful in this case
702
703 --> 1:17
704 |
7051 | a = { b } b = { a }
706 | ^
707 |
708 = rule a is left-recursive (a -> b -> a); pest::pratt_parser might be useful in this case")]
709 fn indirect_left_recursion() {
710 let input = "a = { b } b = { a }";
711 unwrap_or_report(consume_rules(
712 PestParser::parse(Rule::grammar_rules, input).unwrap(),
713 ));
714 }
715
716 #[test]
717 #[should_panic(expected = "grammar error
718
719 --> 1:39
720 |
7211 | a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }
722 | ^
723 |
724 = rule a is left-recursive (a -> a); pest::pratt_parser might be useful in this case")]
725 fn non_failing_left_recursion() {
726 let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }";
727 unwrap_or_report(consume_rules(
728 PestParser::parse(Rule::grammar_rules, input).unwrap(),
729 ));
730 }
731
732 #[test]
733 #[should_panic(expected = "grammar error
734
735 --> 1:13
736 |
7371 | a = { \"a\" | a }
738 | ^
739 |
740 = rule a is left-recursive (a -> a); pest::pratt_parser might be useful in this case")]
741 fn non_primary_choice_left_recursion() {
742 let input = "a = { \"a\" | a }";
743 unwrap_or_report(consume_rules(
744 PestParser::parse(Rule::grammar_rules, input).unwrap(),
745 ));
746 }
747
748 #[test]
749 #[should_panic(expected = "grammar error
750
751 --> 1:7
752 |
7531 | a = { \"a\"* | \"a\" | \"b\" }
754 | ^--^
755 |
756 = expression cannot fail; following choices cannot be reached")]
757 fn lhs_non_failing_choice() {
758 let input = "a = { \"a\"* | \"a\" | \"b\" }";
759 unwrap_or_report(consume_rules(
760 PestParser::parse(Rule::grammar_rules, input).unwrap(),
761 ));
762 }
763
764 #[test]
765 #[should_panic(expected = "grammar error
766
767 --> 1:13
768 |
7691 | a = { \"a\" | \"a\"* | \"b\" }
770 | ^--^
771 |
772 = expression cannot fail; following choices cannot be reached")]
773 fn lhs_non_failing_choice_middle() {
774 let input = "a = { \"a\" | \"a\"* | \"b\" }";
775 unwrap_or_report(consume_rules(
776 PestParser::parse(Rule::grammar_rules, input).unwrap(),
777 ));
778 }
779
780 #[test]
781 #[should_panic(expected = "grammar error
782
783 --> 1:7
784 |
7851 | a = { b | \"a\" } b = { \"b\"* | \"c\" }
786 | ^
787 |
788 = expression cannot fail; following choices cannot be reached
789
790 --> 1:23
791 |
7921 | a = { b | \"a\" } b = { \"b\"* | \"c\" }
793 | ^--^
794 |
795 = expression cannot fail; following choices cannot be reached")]
796 fn lhs_non_failing_nested_choices() {
797 let input = "a = { b | \"a\" } b = { \"b\"* | \"c\" }";
798 unwrap_or_report(consume_rules(
799 PestParser::parse(Rule::grammar_rules, input).unwrap(),
800 ));
801 }
802
803 #[test]
804 fn skip_can_be_defined() {
805 let input = "skip = { \"\" }";
806 unwrap_or_report(consume_rules(
807 PestParser::parse(Rule::grammar_rules, input).unwrap(),
808 ));
809 }
810}