1#![deny(missing_docs)]
105
106use std::borrow::Cow;
107use std::collections::{BTreeMap, HashMap};
108use std::error::Error as StdError;
109use std::fmt;
110use std::hash;
111use std::path::Path;
112use std::str;
113
114use aho_corasick::AhoCorasick;
115use bstr::{ByteSlice, ByteVec, B};
116use regex::bytes::{Regex, RegexBuilder, RegexSet};
117
118use crate::glob::MatchStrategy;
119pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
120use crate::pathutil::{file_name, file_name_ext, normalize_path};
121
122mod glob;
123mod pathutil;
124
125#[cfg(feature = "serde1")]
126mod serde_impl;
127
128#[cfg(feature = "log")]
129macro_rules! debug {
130 ($($token:tt)*) => (::log::debug!($($token)*);)
131}
132
133#[cfg(not(feature = "log"))]
134macro_rules! debug {
135 ($($token:tt)*) => {};
136}
137
138#[derive(Clone, Debug, Eq, PartialEq)]
140pub struct Error {
141 glob: Option<String>,
143 kind: ErrorKind,
145}
146
147#[derive(Clone, Debug, Eq, PartialEq)]
149pub enum ErrorKind {
150 InvalidRecursive,
158 UnclosedClass,
160 InvalidRange(char, char),
164 UnopenedAlternates,
166 UnclosedAlternates,
168 NestedAlternates,
171 DanglingEscape,
173 Regex(String),
175 #[doc(hidden)]
181 __Nonexhaustive,
182}
183
184impl StdError for Error {
185 fn description(&self) -> &str {
186 self.kind.description()
187 }
188}
189
190impl Error {
191 pub fn glob(&self) -> Option<&str> {
193 self.glob.as_ref().map(|s| &**s)
194 }
195
196 pub fn kind(&self) -> &ErrorKind {
198 &self.kind
199 }
200}
201
202impl ErrorKind {
203 fn description(&self) -> &str {
204 match *self {
205 ErrorKind::InvalidRecursive => {
206 "invalid use of **; must be one path component"
207 }
208 ErrorKind::UnclosedClass => {
209 "unclosed character class; missing ']'"
210 }
211 ErrorKind::InvalidRange(_, _) => "invalid character range",
212 ErrorKind::UnopenedAlternates => {
213 "unopened alternate group; missing '{' \
214 (maybe escape '}' with '[}]'?)"
215 }
216 ErrorKind::UnclosedAlternates => {
217 "unclosed alternate group; missing '}' \
218 (maybe escape '{' with '[{]'?)"
219 }
220 ErrorKind::NestedAlternates => {
221 "nested alternate groups are not allowed"
222 }
223 ErrorKind::DanglingEscape => "dangling '\\'",
224 ErrorKind::Regex(ref err) => err,
225 ErrorKind::__Nonexhaustive => unreachable!(),
226 }
227 }
228}
229
230impl fmt::Display for Error {
231 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232 match self.glob {
233 None => self.kind.fmt(f),
234 Some(ref glob) => {
235 write!(f, "error parsing glob '{}': {}", glob, self.kind)
236 }
237 }
238 }
239}
240
241impl fmt::Display for ErrorKind {
242 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243 match *self {
244 ErrorKind::InvalidRecursive
245 | ErrorKind::UnclosedClass
246 | ErrorKind::UnopenedAlternates
247 | ErrorKind::UnclosedAlternates
248 | ErrorKind::NestedAlternates
249 | ErrorKind::DanglingEscape
250 | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
251 ErrorKind::InvalidRange(s, e) => {
252 write!(f, "invalid range; '{}' > '{}'", s, e)
253 }
254 ErrorKind::__Nonexhaustive => unreachable!(),
255 }
256 }
257}
258
259fn new_regex(pat: &str) -> Result<Regex, Error> {
260 RegexBuilder::new(pat)
261 .dot_matches_new_line(true)
262 .size_limit(10 * (1 << 20))
263 .dfa_size_limit(10 * (1 << 20))
264 .build()
265 .map_err(|err| Error {
266 glob: Some(pat.to_string()),
267 kind: ErrorKind::Regex(err.to_string()),
268 })
269}
270
271fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
272where
273 S: AsRef<str>,
274 I: IntoIterator<Item = S>,
275{
276 RegexSet::new(pats).map_err(|err| Error {
277 glob: None,
278 kind: ErrorKind::Regex(err.to_string()),
279 })
280}
281
282type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
283
284#[derive(Clone, Debug)]
287pub struct GlobSet {
288 len: usize,
289 strats: Vec<GlobSetMatchStrategy>,
290}
291
292impl GlobSet {
293 #[inline]
295 pub fn empty() -> GlobSet {
296 GlobSet { len: 0, strats: vec![] }
297 }
298
299 #[inline]
301 pub fn is_empty(&self) -> bool {
302 self.len == 0
303 }
304
305 #[inline]
307 pub fn len(&self) -> usize {
308 self.len
309 }
310
311 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
313 self.is_match_candidate(&Candidate::new(path.as_ref()))
314 }
315
316 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
321 if self.is_empty() {
322 return false;
323 }
324 for strat in &self.strats {
325 if strat.is_match(path) {
326 return true;
327 }
328 }
329 false
330 }
331
332 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
335 self.matches_candidate(&Candidate::new(path.as_ref()))
336 }
337
338 pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
344 let mut into = vec![];
345 if self.is_empty() {
346 return into;
347 }
348 self.matches_candidate_into(path, &mut into);
349 into
350 }
351
352 pub fn matches_into<P: AsRef<Path>>(
359 &self,
360 path: P,
361 into: &mut Vec<usize>,
362 ) {
363 self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
364 }
365
366 pub fn matches_candidate_into(
376 &self,
377 path: &Candidate<'_>,
378 into: &mut Vec<usize>,
379 ) {
380 into.clear();
381 if self.is_empty() {
382 return;
383 }
384 for strat in &self.strats {
385 strat.matches_into(path, into);
386 }
387 into.sort();
388 into.dedup();
389 }
390
391 fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
392 if pats.is_empty() {
393 return Ok(GlobSet { len: 0, strats: vec![] });
394 }
395 let mut lits = LiteralStrategy::new();
396 let mut base_lits = BasenameLiteralStrategy::new();
397 let mut exts = ExtensionStrategy::new();
398 let mut prefixes = MultiStrategyBuilder::new();
399 let mut suffixes = MultiStrategyBuilder::new();
400 let mut required_exts = RequiredExtensionStrategyBuilder::new();
401 let mut regexes = MultiStrategyBuilder::new();
402 for (i, p) in pats.iter().enumerate() {
403 match MatchStrategy::new(p) {
404 MatchStrategy::Literal(lit) => {
405 lits.add(i, lit);
406 }
407 MatchStrategy::BasenameLiteral(lit) => {
408 base_lits.add(i, lit);
409 }
410 MatchStrategy::Extension(ext) => {
411 exts.add(i, ext);
412 }
413 MatchStrategy::Prefix(prefix) => {
414 prefixes.add(i, prefix);
415 }
416 MatchStrategy::Suffix { suffix, component } => {
417 if component {
418 lits.add(i, suffix[1..].to_string());
419 }
420 suffixes.add(i, suffix);
421 }
422 MatchStrategy::RequiredExtension(ext) => {
423 required_exts.add(i, ext, p.regex().to_owned());
424 }
425 MatchStrategy::Regex => {
426 debug!("glob converted to regex: {:?}", p);
427 regexes.add(i, p.regex().to_owned());
428 }
429 }
430 }
431 debug!(
432 "built glob set; {} literals, {} basenames, {} extensions, \
433 {} prefixes, {} suffixes, {} required extensions, {} regexes",
434 lits.0.len(),
435 base_lits.0.len(),
436 exts.0.len(),
437 prefixes.literals.len(),
438 suffixes.literals.len(),
439 required_exts.0.len(),
440 regexes.literals.len()
441 );
442 Ok(GlobSet {
443 len: pats.len(),
444 strats: vec![
445 GlobSetMatchStrategy::Extension(exts),
446 GlobSetMatchStrategy::BasenameLiteral(base_lits),
447 GlobSetMatchStrategy::Literal(lits),
448 GlobSetMatchStrategy::Suffix(suffixes.suffix()),
449 GlobSetMatchStrategy::Prefix(prefixes.prefix()),
450 GlobSetMatchStrategy::RequiredExtension(
451 required_exts.build()?,
452 ),
453 GlobSetMatchStrategy::Regex(regexes.regex_set()?),
454 ],
455 })
456 }
457}
458
459impl Default for GlobSet {
460 fn default() -> Self {
462 GlobSet::empty()
463 }
464}
465
466#[derive(Clone, Debug)]
469pub struct GlobSetBuilder {
470 pats: Vec<Glob>,
471}
472
473impl GlobSetBuilder {
474 pub fn new() -> GlobSetBuilder {
478 GlobSetBuilder { pats: vec![] }
479 }
480
481 pub fn build(&self) -> Result<GlobSet, Error> {
485 GlobSet::new(&self.pats)
486 }
487
488 pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
490 self.pats.push(pat);
491 self
492 }
493}
494
495#[derive(Clone, Debug)]
502pub struct Candidate<'a> {
503 path: Cow<'a, [u8]>,
504 basename: Cow<'a, [u8]>,
505 ext: Cow<'a, [u8]>,
506}
507
508impl<'a> Candidate<'a> {
509 pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
511 let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
512 let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
513 let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
514 Candidate { path: path, basename: basename, ext: ext }
515 }
516
517 fn path_prefix(&self, max: usize) -> &[u8] {
518 if self.path.len() <= max {
519 &*self.path
520 } else {
521 &self.path[..max]
522 }
523 }
524
525 fn path_suffix(&self, max: usize) -> &[u8] {
526 if self.path.len() <= max {
527 &*self.path
528 } else {
529 &self.path[self.path.len() - max..]
530 }
531 }
532}
533
534#[derive(Clone, Debug)]
535enum GlobSetMatchStrategy {
536 Literal(LiteralStrategy),
537 BasenameLiteral(BasenameLiteralStrategy),
538 Extension(ExtensionStrategy),
539 Prefix(PrefixStrategy),
540 Suffix(SuffixStrategy),
541 RequiredExtension(RequiredExtensionStrategy),
542 Regex(RegexSetStrategy),
543}
544
545impl GlobSetMatchStrategy {
546 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
547 use self::GlobSetMatchStrategy::*;
548 match *self {
549 Literal(ref s) => s.is_match(candidate),
550 BasenameLiteral(ref s) => s.is_match(candidate),
551 Extension(ref s) => s.is_match(candidate),
552 Prefix(ref s) => s.is_match(candidate),
553 Suffix(ref s) => s.is_match(candidate),
554 RequiredExtension(ref s) => s.is_match(candidate),
555 Regex(ref s) => s.is_match(candidate),
556 }
557 }
558
559 fn matches_into(
560 &self,
561 candidate: &Candidate<'_>,
562 matches: &mut Vec<usize>,
563 ) {
564 use self::GlobSetMatchStrategy::*;
565 match *self {
566 Literal(ref s) => s.matches_into(candidate, matches),
567 BasenameLiteral(ref s) => s.matches_into(candidate, matches),
568 Extension(ref s) => s.matches_into(candidate, matches),
569 Prefix(ref s) => s.matches_into(candidate, matches),
570 Suffix(ref s) => s.matches_into(candidate, matches),
571 RequiredExtension(ref s) => s.matches_into(candidate, matches),
572 Regex(ref s) => s.matches_into(candidate, matches),
573 }
574 }
575}
576
577#[derive(Clone, Debug)]
578struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
579
580impl LiteralStrategy {
581 fn new() -> LiteralStrategy {
582 LiteralStrategy(BTreeMap::new())
583 }
584
585 fn add(&mut self, global_index: usize, lit: String) {
586 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
587 }
588
589 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
590 self.0.contains_key(candidate.path.as_bytes())
591 }
592
593 #[inline(never)]
594 fn matches_into(
595 &self,
596 candidate: &Candidate<'_>,
597 matches: &mut Vec<usize>,
598 ) {
599 if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
600 matches.extend(hits);
601 }
602 }
603}
604
605#[derive(Clone, Debug)]
606struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
607
608impl BasenameLiteralStrategy {
609 fn new() -> BasenameLiteralStrategy {
610 BasenameLiteralStrategy(BTreeMap::new())
611 }
612
613 fn add(&mut self, global_index: usize, lit: String) {
614 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
615 }
616
617 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
618 if candidate.basename.is_empty() {
619 return false;
620 }
621 self.0.contains_key(candidate.basename.as_bytes())
622 }
623
624 #[inline(never)]
625 fn matches_into(
626 &self,
627 candidate: &Candidate<'_>,
628 matches: &mut Vec<usize>,
629 ) {
630 if candidate.basename.is_empty() {
631 return;
632 }
633 if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
634 matches.extend(hits);
635 }
636 }
637}
638
639#[derive(Clone, Debug)]
640struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
641
642impl ExtensionStrategy {
643 fn new() -> ExtensionStrategy {
644 ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
645 }
646
647 fn add(&mut self, global_index: usize, ext: String) {
648 self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
649 }
650
651 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
652 if candidate.ext.is_empty() {
653 return false;
654 }
655 self.0.contains_key(candidate.ext.as_bytes())
656 }
657
658 #[inline(never)]
659 fn matches_into(
660 &self,
661 candidate: &Candidate<'_>,
662 matches: &mut Vec<usize>,
663 ) {
664 if candidate.ext.is_empty() {
665 return;
666 }
667 if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
668 matches.extend(hits);
669 }
670 }
671}
672
673#[derive(Clone, Debug)]
674struct PrefixStrategy {
675 matcher: AhoCorasick,
676 map: Vec<usize>,
677 longest: usize,
678}
679
680impl PrefixStrategy {
681 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
682 let path = candidate.path_prefix(self.longest);
683 for m in self.matcher.find_overlapping_iter(path) {
684 if m.start() == 0 {
685 return true;
686 }
687 }
688 false
689 }
690
691 fn matches_into(
692 &self,
693 candidate: &Candidate<'_>,
694 matches: &mut Vec<usize>,
695 ) {
696 let path = candidate.path_prefix(self.longest);
697 for m in self.matcher.find_overlapping_iter(path) {
698 if m.start() == 0 {
699 matches.push(self.map[m.pattern()]);
700 }
701 }
702 }
703}
704
705#[derive(Clone, Debug)]
706struct SuffixStrategy {
707 matcher: AhoCorasick,
708 map: Vec<usize>,
709 longest: usize,
710}
711
712impl SuffixStrategy {
713 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714 let path = candidate.path_suffix(self.longest);
715 for m in self.matcher.find_overlapping_iter(path) {
716 if m.end() == path.len() {
717 return true;
718 }
719 }
720 false
721 }
722
723 fn matches_into(
724 &self,
725 candidate: &Candidate<'_>,
726 matches: &mut Vec<usize>,
727 ) {
728 let path = candidate.path_suffix(self.longest);
729 for m in self.matcher.find_overlapping_iter(path) {
730 if m.end() == path.len() {
731 matches.push(self.map[m.pattern()]);
732 }
733 }
734 }
735}
736
737#[derive(Clone, Debug)]
738struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
739
740impl RequiredExtensionStrategy {
741 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
742 if candidate.ext.is_empty() {
743 return false;
744 }
745 match self.0.get(candidate.ext.as_bytes()) {
746 None => false,
747 Some(regexes) => {
748 for &(_, ref re) in regexes {
749 if re.is_match(candidate.path.as_bytes()) {
750 return true;
751 }
752 }
753 false
754 }
755 }
756 }
757
758 #[inline(never)]
759 fn matches_into(
760 &self,
761 candidate: &Candidate<'_>,
762 matches: &mut Vec<usize>,
763 ) {
764 if candidate.ext.is_empty() {
765 return;
766 }
767 if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
768 for &(global_index, ref re) in regexes {
769 if re.is_match(candidate.path.as_bytes()) {
770 matches.push(global_index);
771 }
772 }
773 }
774 }
775}
776
777#[derive(Clone, Debug)]
778struct RegexSetStrategy {
779 matcher: RegexSet,
780 map: Vec<usize>,
781}
782
783impl RegexSetStrategy {
784 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
785 self.matcher.is_match(candidate.path.as_bytes())
786 }
787
788 fn matches_into(
789 &self,
790 candidate: &Candidate<'_>,
791 matches: &mut Vec<usize>,
792 ) {
793 for i in self.matcher.matches(candidate.path.as_bytes()) {
794 matches.push(self.map[i]);
795 }
796 }
797}
798
799#[derive(Clone, Debug)]
800struct MultiStrategyBuilder {
801 literals: Vec<String>,
802 map: Vec<usize>,
803 longest: usize,
804}
805
806impl MultiStrategyBuilder {
807 fn new() -> MultiStrategyBuilder {
808 MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
809 }
810
811 fn add(&mut self, global_index: usize, literal: String) {
812 if literal.len() > self.longest {
813 self.longest = literal.len();
814 }
815 self.map.push(global_index);
816 self.literals.push(literal);
817 }
818
819 fn prefix(self) -> PrefixStrategy {
820 PrefixStrategy {
821 matcher: AhoCorasick::new_auto_configured(&self.literals),
822 map: self.map,
823 longest: self.longest,
824 }
825 }
826
827 fn suffix(self) -> SuffixStrategy {
828 SuffixStrategy {
829 matcher: AhoCorasick::new_auto_configured(&self.literals),
830 map: self.map,
831 longest: self.longest,
832 }
833 }
834
835 fn regex_set(self) -> Result<RegexSetStrategy, Error> {
836 Ok(RegexSetStrategy {
837 matcher: new_regex_set(self.literals)?,
838 map: self.map,
839 })
840 }
841}
842
843#[derive(Clone, Debug)]
844struct RequiredExtensionStrategyBuilder(
845 HashMap<Vec<u8>, Vec<(usize, String)>>,
846);
847
848impl RequiredExtensionStrategyBuilder {
849 fn new() -> RequiredExtensionStrategyBuilder {
850 RequiredExtensionStrategyBuilder(HashMap::new())
851 }
852
853 fn add(&mut self, global_index: usize, ext: String, regex: String) {
854 self.0
855 .entry(ext.into_bytes())
856 .or_insert(vec![])
857 .push((global_index, regex));
858 }
859
860 fn build(self) -> Result<RequiredExtensionStrategy, Error> {
861 let mut exts = HashMap::with_hasher(Fnv::default());
862 for (ext, regexes) in self.0.into_iter() {
863 exts.insert(ext.clone(), vec![]);
864 for (global_index, regex) in regexes {
865 let compiled = new_regex(®ex)?;
866 exts.get_mut(&ext).unwrap().push((global_index, compiled));
867 }
868 }
869 Ok(RequiredExtensionStrategy(exts))
870 }
871}
872
873#[cfg(test)]
874mod tests {
875 use super::{GlobSet, GlobSetBuilder};
876 use crate::glob::Glob;
877
878 #[test]
879 fn set_works() {
880 let mut builder = GlobSetBuilder::new();
881 builder.add(Glob::new("src/**/*.rs").unwrap());
882 builder.add(Glob::new("*.c").unwrap());
883 builder.add(Glob::new("src/lib.rs").unwrap());
884 let set = builder.build().unwrap();
885
886 assert!(set.is_match("foo.c"));
887 assert!(set.is_match("src/foo.c"));
888 assert!(!set.is_match("foo.rs"));
889 assert!(!set.is_match("tests/foo.rs"));
890 assert!(set.is_match("src/foo.rs"));
891 assert!(set.is_match("src/grep/src/main.rs"));
892
893 let matches = set.matches("src/lib.rs");
894 assert_eq!(2, matches.len());
895 assert_eq!(0, matches[0]);
896 assert_eq!(2, matches[1]);
897 }
898
899 #[test]
900 fn empty_set_works() {
901 let set = GlobSetBuilder::new().build().unwrap();
902 assert!(!set.is_match(""));
903 assert!(!set.is_match("a"));
904 }
905
906 #[test]
907 fn default_set_is_empty_works() {
908 let set: GlobSet = Default::default();
909 assert!(!set.is_match(""));
910 assert!(!set.is_match("a"));
911 }
912}