globset/
lib.rs

1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8```ignore
9*.rs
10src/lib.rs
11src/**/foo.rs
12```
13
14and a path `src/bar/baz/foo.rs`, then the set would report the first and third
15globs as matching.
16
17# Example: one glob
18
19This example shows how to match a single glob against a single file path.
20
21```
22# fn example() -> Result<(), globset::Error> {
23use globset::Glob;
24
25let glob = Glob::new("*.rs")?.compile_matcher();
26
27assert!(glob.is_match("foo.rs"));
28assert!(glob.is_match("foo/bar.rs"));
29assert!(!glob.is_match("Cargo.toml"));
30# Ok(()) } example().unwrap();
31```
32
33# Example: configuring a glob matcher
34
35This example shows how to use a `GlobBuilder` to configure aspects of match
36semantics. In this example, we prevent wildcards from matching path separators.
37
38```
39# fn example() -> Result<(), globset::Error> {
40use globset::GlobBuilder;
41
42let glob = GlobBuilder::new("*.rs")
43    .literal_separator(true).build()?.compile_matcher();
44
45assert!(glob.is_match("foo.rs"));
46assert!(!glob.is_match("foo/bar.rs")); // no longer matches
47assert!(!glob.is_match("Cargo.toml"));
48# Ok(()) } example().unwrap();
49```
50
51# Example: match multiple globs at once
52
53This example shows how to match multiple glob patterns at once.
54
55```
56# fn example() -> Result<(), globset::Error> {
57use globset::{Glob, GlobSetBuilder};
58
59let mut builder = GlobSetBuilder::new();
60// A GlobBuilder can be used to configure each glob's match semantics
61// independently.
62builder.add(Glob::new("*.rs")?);
63builder.add(Glob::new("src/lib.rs")?);
64builder.add(Glob::new("src/**/foo.rs")?);
65let set = builder.build()?;
66
67assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
68# Ok(()) } example().unwrap();
69```
70
71# Syntax
72
73Standard Unix-style glob syntax is supported:
74
75* `?` matches any single character. (If the `literal_separator` option is
76  enabled, then `?` can never match a path separator.)
77* `*` matches zero or more characters. (If the `literal_separator` option is
78  enabled, then `*` can never match a path separator.)
79* `**` recursively matches directories but are only legal in three situations.
80  First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
81  all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
82  and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
83  <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
84  <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
85  Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
86  the pattern, then it matches zero or more directories. Using `**` anywhere
87  else is illegal (N.B. the glob `**` is allowed and means "match everything").
88* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
89  (N.B. Nesting `{...}` is not currently allowed.)
90* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
91  `[!ab]` to match any character except for `a` and `b`.
92* Metacharacters such as `*` and `?` can be escaped with character class
93  notation. e.g., `[*]` matches `*`.
94* When backslash escapes are enabled, a backslash (`\`) will escape all meta
95  characters in a glob. If it precedes a non-meta character, then the slash is
96  ignored. A `\\` will match a literal `\\`. Note that this mode is only
97  enabled on Unix platforms by default, but can be enabled on any platform
98  via the `backslash_escape` setting on `Glob`.
99
100A `GlobBuilder` can be used to prevent wildcards from matching path separators,
101or to enable case insensitive matching.
102*/
103
104#![deny(missing_docs)]
105
106use std::borrow::Cow;
107use std::collections::{BTreeMap, HashMap};
108use std::error::Error as StdError;
109use std::fmt;
110use std::hash;
111use std::path::Path;
112use std::str;
113
114use aho_corasick::AhoCorasick;
115use bstr::{ByteSlice, ByteVec, B};
116use regex::bytes::{Regex, RegexBuilder, RegexSet};
117
118use crate::glob::MatchStrategy;
119pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
120use crate::pathutil::{file_name, file_name_ext, normalize_path};
121
122mod glob;
123mod pathutil;
124
125#[cfg(feature = "serde1")]
126mod serde_impl;
127
128#[cfg(feature = "log")]
129macro_rules! debug {
130    ($($token:tt)*) => (::log::debug!($($token)*);)
131}
132
133#[cfg(not(feature = "log"))]
134macro_rules! debug {
135    ($($token:tt)*) => {};
136}
137
138/// Represents an error that can occur when parsing a glob pattern.
139#[derive(Clone, Debug, Eq, PartialEq)]
140pub struct Error {
141    /// The original glob provided by the caller.
142    glob: Option<String>,
143    /// The kind of error.
144    kind: ErrorKind,
145}
146
147/// The kind of error that can occur when parsing a glob pattern.
148#[derive(Clone, Debug, Eq, PartialEq)]
149pub enum ErrorKind {
150    /// **DEPRECATED**.
151    ///
152    /// This error used to occur for consistency with git's glob specification,
153    /// but the specification now accepts all uses of `**`. When `**` does not
154    /// appear adjacent to a path separator or at the beginning/end of a glob,
155    /// it is now treated as two consecutive `*` patterns. As such, this error
156    /// is no longer used.
157    InvalidRecursive,
158    /// Occurs when a character class (e.g., `[abc]`) is not closed.
159    UnclosedClass,
160    /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
161    /// example, if the range starts with a lexicographically larger character
162    /// than it ends with.
163    InvalidRange(char, char),
164    /// Occurs when a `}` is found without a matching `{`.
165    UnopenedAlternates,
166    /// Occurs when a `{` is found without a matching `}`.
167    UnclosedAlternates,
168    /// Occurs when an alternating group is nested inside another alternating
169    /// group, e.g., `{{a,b},{c,d}}`.
170    NestedAlternates,
171    /// Occurs when an unescaped '\' is found at the end of a glob.
172    DanglingEscape,
173    /// An error associated with parsing or compiling a regex.
174    Regex(String),
175    /// Hints that destructuring should not be exhaustive.
176    ///
177    /// This enum may grow additional variants, so this makes sure clients
178    /// don't count on exhaustive matching. (Otherwise, adding a new variant
179    /// could break existing code.)
180    #[doc(hidden)]
181    __Nonexhaustive,
182}
183
184impl StdError for Error {
185    fn description(&self) -> &str {
186        self.kind.description()
187    }
188}
189
190impl Error {
191    /// Return the glob that caused this error, if one exists.
192    pub fn glob(&self) -> Option<&str> {
193        self.glob.as_ref().map(|s| &**s)
194    }
195
196    /// Return the kind of this error.
197    pub fn kind(&self) -> &ErrorKind {
198        &self.kind
199    }
200}
201
202impl ErrorKind {
203    fn description(&self) -> &str {
204        match *self {
205            ErrorKind::InvalidRecursive => {
206                "invalid use of **; must be one path component"
207            }
208            ErrorKind::UnclosedClass => {
209                "unclosed character class; missing ']'"
210            }
211            ErrorKind::InvalidRange(_, _) => "invalid character range",
212            ErrorKind::UnopenedAlternates => {
213                "unopened alternate group; missing '{' \
214                (maybe escape '}' with '[}]'?)"
215            }
216            ErrorKind::UnclosedAlternates => {
217                "unclosed alternate group; missing '}' \
218                (maybe escape '{' with '[{]'?)"
219            }
220            ErrorKind::NestedAlternates => {
221                "nested alternate groups are not allowed"
222            }
223            ErrorKind::DanglingEscape => "dangling '\\'",
224            ErrorKind::Regex(ref err) => err,
225            ErrorKind::__Nonexhaustive => unreachable!(),
226        }
227    }
228}
229
230impl fmt::Display for Error {
231    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232        match self.glob {
233            None => self.kind.fmt(f),
234            Some(ref glob) => {
235                write!(f, "error parsing glob '{}': {}", glob, self.kind)
236            }
237        }
238    }
239}
240
241impl fmt::Display for ErrorKind {
242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243        match *self {
244            ErrorKind::InvalidRecursive
245            | ErrorKind::UnclosedClass
246            | ErrorKind::UnopenedAlternates
247            | ErrorKind::UnclosedAlternates
248            | ErrorKind::NestedAlternates
249            | ErrorKind::DanglingEscape
250            | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
251            ErrorKind::InvalidRange(s, e) => {
252                write!(f, "invalid range; '{}' > '{}'", s, e)
253            }
254            ErrorKind::__Nonexhaustive => unreachable!(),
255        }
256    }
257}
258
259fn new_regex(pat: &str) -> Result<Regex, Error> {
260    RegexBuilder::new(pat)
261        .dot_matches_new_line(true)
262        .size_limit(10 * (1 << 20))
263        .dfa_size_limit(10 * (1 << 20))
264        .build()
265        .map_err(|err| Error {
266            glob: Some(pat.to_string()),
267            kind: ErrorKind::Regex(err.to_string()),
268        })
269}
270
271fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
272where
273    S: AsRef<str>,
274    I: IntoIterator<Item = S>,
275{
276    RegexSet::new(pats).map_err(|err| Error {
277        glob: None,
278        kind: ErrorKind::Regex(err.to_string()),
279    })
280}
281
282type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
283
284/// GlobSet represents a group of globs that can be matched together in a
285/// single pass.
286#[derive(Clone, Debug)]
287pub struct GlobSet {
288    len: usize,
289    strats: Vec<GlobSetMatchStrategy>,
290}
291
292impl GlobSet {
293    /// Create an empty `GlobSet`. An empty set matches nothing.
294    #[inline]
295    pub fn empty() -> GlobSet {
296        GlobSet { len: 0, strats: vec![] }
297    }
298
299    /// Returns true if this set is empty, and therefore matches nothing.
300    #[inline]
301    pub fn is_empty(&self) -> bool {
302        self.len == 0
303    }
304
305    /// Returns the number of globs in this set.
306    #[inline]
307    pub fn len(&self) -> usize {
308        self.len
309    }
310
311    /// Returns true if any glob in this set matches the path given.
312    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
313        self.is_match_candidate(&Candidate::new(path.as_ref()))
314    }
315
316    /// Returns true if any glob in this set matches the path given.
317    ///
318    /// This takes a Candidate as input, which can be used to amortize the
319    /// cost of preparing a path for matching.
320    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
321        if self.is_empty() {
322            return false;
323        }
324        for strat in &self.strats {
325            if strat.is_match(path) {
326                return true;
327            }
328        }
329        false
330    }
331
332    /// Returns the sequence number of every glob pattern that matches the
333    /// given path.
334    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
335        self.matches_candidate(&Candidate::new(path.as_ref()))
336    }
337
338    /// Returns the sequence number of every glob pattern that matches the
339    /// given path.
340    ///
341    /// This takes a Candidate as input, which can be used to amortize the
342    /// cost of preparing a path for matching.
343    pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
344        let mut into = vec![];
345        if self.is_empty() {
346            return into;
347        }
348        self.matches_candidate_into(path, &mut into);
349        into
350    }
351
352    /// Adds the sequence number of every glob pattern that matches the given
353    /// path to the vec given.
354    ///
355    /// `into` is cleared before matching begins, and contains the set of
356    /// sequence numbers (in ascending order) after matching ends. If no globs
357    /// were matched, then `into` will be empty.
358    pub fn matches_into<P: AsRef<Path>>(
359        &self,
360        path: P,
361        into: &mut Vec<usize>,
362    ) {
363        self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
364    }
365
366    /// Adds the sequence number of every glob pattern that matches the given
367    /// path to the vec given.
368    ///
369    /// `into` is cleared before matching begins, and contains the set of
370    /// sequence numbers (in ascending order) after matching ends. If no globs
371    /// were matched, then `into` will be empty.
372    ///
373    /// This takes a Candidate as input, which can be used to amortize the
374    /// cost of preparing a path for matching.
375    pub fn matches_candidate_into(
376        &self,
377        path: &Candidate<'_>,
378        into: &mut Vec<usize>,
379    ) {
380        into.clear();
381        if self.is_empty() {
382            return;
383        }
384        for strat in &self.strats {
385            strat.matches_into(path, into);
386        }
387        into.sort();
388        into.dedup();
389    }
390
391    fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
392        if pats.is_empty() {
393            return Ok(GlobSet { len: 0, strats: vec![] });
394        }
395        let mut lits = LiteralStrategy::new();
396        let mut base_lits = BasenameLiteralStrategy::new();
397        let mut exts = ExtensionStrategy::new();
398        let mut prefixes = MultiStrategyBuilder::new();
399        let mut suffixes = MultiStrategyBuilder::new();
400        let mut required_exts = RequiredExtensionStrategyBuilder::new();
401        let mut regexes = MultiStrategyBuilder::new();
402        for (i, p) in pats.iter().enumerate() {
403            match MatchStrategy::new(p) {
404                MatchStrategy::Literal(lit) => {
405                    lits.add(i, lit);
406                }
407                MatchStrategy::BasenameLiteral(lit) => {
408                    base_lits.add(i, lit);
409                }
410                MatchStrategy::Extension(ext) => {
411                    exts.add(i, ext);
412                }
413                MatchStrategy::Prefix(prefix) => {
414                    prefixes.add(i, prefix);
415                }
416                MatchStrategy::Suffix { suffix, component } => {
417                    if component {
418                        lits.add(i, suffix[1..].to_string());
419                    }
420                    suffixes.add(i, suffix);
421                }
422                MatchStrategy::RequiredExtension(ext) => {
423                    required_exts.add(i, ext, p.regex().to_owned());
424                }
425                MatchStrategy::Regex => {
426                    debug!("glob converted to regex: {:?}", p);
427                    regexes.add(i, p.regex().to_owned());
428                }
429            }
430        }
431        debug!(
432            "built glob set; {} literals, {} basenames, {} extensions, \
433                {} prefixes, {} suffixes, {} required extensions, {} regexes",
434            lits.0.len(),
435            base_lits.0.len(),
436            exts.0.len(),
437            prefixes.literals.len(),
438            suffixes.literals.len(),
439            required_exts.0.len(),
440            regexes.literals.len()
441        );
442        Ok(GlobSet {
443            len: pats.len(),
444            strats: vec![
445                GlobSetMatchStrategy::Extension(exts),
446                GlobSetMatchStrategy::BasenameLiteral(base_lits),
447                GlobSetMatchStrategy::Literal(lits),
448                GlobSetMatchStrategy::Suffix(suffixes.suffix()),
449                GlobSetMatchStrategy::Prefix(prefixes.prefix()),
450                GlobSetMatchStrategy::RequiredExtension(
451                    required_exts.build()?,
452                ),
453                GlobSetMatchStrategy::Regex(regexes.regex_set()?),
454            ],
455        })
456    }
457}
458
459impl Default for GlobSet {
460    /// Create a default empty GlobSet.
461    fn default() -> Self {
462        GlobSet::empty()
463    }
464}
465
466/// GlobSetBuilder builds a group of patterns that can be used to
467/// simultaneously match a file path.
468#[derive(Clone, Debug)]
469pub struct GlobSetBuilder {
470    pats: Vec<Glob>,
471}
472
473impl GlobSetBuilder {
474    /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
475    /// patterns. Once all patterns have been added, `build` should be called
476    /// to produce a `GlobSet`, which can then be used for matching.
477    pub fn new() -> GlobSetBuilder {
478        GlobSetBuilder { pats: vec![] }
479    }
480
481    /// Builds a new matcher from all of the glob patterns added so far.
482    ///
483    /// Once a matcher is built, no new patterns can be added to it.
484    pub fn build(&self) -> Result<GlobSet, Error> {
485        GlobSet::new(&self.pats)
486    }
487
488    /// Add a new pattern to this set.
489    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
490        self.pats.push(pat);
491        self
492    }
493}
494
495/// A candidate path for matching.
496///
497/// All glob matching in this crate operates on `Candidate` values.
498/// Constructing candidates has a very small cost associated with it, so
499/// callers may find it beneficial to amortize that cost when matching a single
500/// path against multiple globs or sets of globs.
501#[derive(Clone, Debug)]
502pub struct Candidate<'a> {
503    path: Cow<'a, [u8]>,
504    basename: Cow<'a, [u8]>,
505    ext: Cow<'a, [u8]>,
506}
507
508impl<'a> Candidate<'a> {
509    /// Create a new candidate for matching from the given path.
510    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
511        let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
512        let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
513        let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
514        Candidate { path: path, basename: basename, ext: ext }
515    }
516
517    fn path_prefix(&self, max: usize) -> &[u8] {
518        if self.path.len() <= max {
519            &*self.path
520        } else {
521            &self.path[..max]
522        }
523    }
524
525    fn path_suffix(&self, max: usize) -> &[u8] {
526        if self.path.len() <= max {
527            &*self.path
528        } else {
529            &self.path[self.path.len() - max..]
530        }
531    }
532}
533
534#[derive(Clone, Debug)]
535enum GlobSetMatchStrategy {
536    Literal(LiteralStrategy),
537    BasenameLiteral(BasenameLiteralStrategy),
538    Extension(ExtensionStrategy),
539    Prefix(PrefixStrategy),
540    Suffix(SuffixStrategy),
541    RequiredExtension(RequiredExtensionStrategy),
542    Regex(RegexSetStrategy),
543}
544
545impl GlobSetMatchStrategy {
546    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
547        use self::GlobSetMatchStrategy::*;
548        match *self {
549            Literal(ref s) => s.is_match(candidate),
550            BasenameLiteral(ref s) => s.is_match(candidate),
551            Extension(ref s) => s.is_match(candidate),
552            Prefix(ref s) => s.is_match(candidate),
553            Suffix(ref s) => s.is_match(candidate),
554            RequiredExtension(ref s) => s.is_match(candidate),
555            Regex(ref s) => s.is_match(candidate),
556        }
557    }
558
559    fn matches_into(
560        &self,
561        candidate: &Candidate<'_>,
562        matches: &mut Vec<usize>,
563    ) {
564        use self::GlobSetMatchStrategy::*;
565        match *self {
566            Literal(ref s) => s.matches_into(candidate, matches),
567            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
568            Extension(ref s) => s.matches_into(candidate, matches),
569            Prefix(ref s) => s.matches_into(candidate, matches),
570            Suffix(ref s) => s.matches_into(candidate, matches),
571            RequiredExtension(ref s) => s.matches_into(candidate, matches),
572            Regex(ref s) => s.matches_into(candidate, matches),
573        }
574    }
575}
576
577#[derive(Clone, Debug)]
578struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
579
580impl LiteralStrategy {
581    fn new() -> LiteralStrategy {
582        LiteralStrategy(BTreeMap::new())
583    }
584
585    fn add(&mut self, global_index: usize, lit: String) {
586        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
587    }
588
589    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
590        self.0.contains_key(candidate.path.as_bytes())
591    }
592
593    #[inline(never)]
594    fn matches_into(
595        &self,
596        candidate: &Candidate<'_>,
597        matches: &mut Vec<usize>,
598    ) {
599        if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
600            matches.extend(hits);
601        }
602    }
603}
604
605#[derive(Clone, Debug)]
606struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
607
608impl BasenameLiteralStrategy {
609    fn new() -> BasenameLiteralStrategy {
610        BasenameLiteralStrategy(BTreeMap::new())
611    }
612
613    fn add(&mut self, global_index: usize, lit: String) {
614        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
615    }
616
617    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
618        if candidate.basename.is_empty() {
619            return false;
620        }
621        self.0.contains_key(candidate.basename.as_bytes())
622    }
623
624    #[inline(never)]
625    fn matches_into(
626        &self,
627        candidate: &Candidate<'_>,
628        matches: &mut Vec<usize>,
629    ) {
630        if candidate.basename.is_empty() {
631            return;
632        }
633        if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
634            matches.extend(hits);
635        }
636    }
637}
638
639#[derive(Clone, Debug)]
640struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
641
642impl ExtensionStrategy {
643    fn new() -> ExtensionStrategy {
644        ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
645    }
646
647    fn add(&mut self, global_index: usize, ext: String) {
648        self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
649    }
650
651    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
652        if candidate.ext.is_empty() {
653            return false;
654        }
655        self.0.contains_key(candidate.ext.as_bytes())
656    }
657
658    #[inline(never)]
659    fn matches_into(
660        &self,
661        candidate: &Candidate<'_>,
662        matches: &mut Vec<usize>,
663    ) {
664        if candidate.ext.is_empty() {
665            return;
666        }
667        if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
668            matches.extend(hits);
669        }
670    }
671}
672
673#[derive(Clone, Debug)]
674struct PrefixStrategy {
675    matcher: AhoCorasick,
676    map: Vec<usize>,
677    longest: usize,
678}
679
680impl PrefixStrategy {
681    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
682        let path = candidate.path_prefix(self.longest);
683        for m in self.matcher.find_overlapping_iter(path) {
684            if m.start() == 0 {
685                return true;
686            }
687        }
688        false
689    }
690
691    fn matches_into(
692        &self,
693        candidate: &Candidate<'_>,
694        matches: &mut Vec<usize>,
695    ) {
696        let path = candidate.path_prefix(self.longest);
697        for m in self.matcher.find_overlapping_iter(path) {
698            if m.start() == 0 {
699                matches.push(self.map[m.pattern()]);
700            }
701        }
702    }
703}
704
705#[derive(Clone, Debug)]
706struct SuffixStrategy {
707    matcher: AhoCorasick,
708    map: Vec<usize>,
709    longest: usize,
710}
711
712impl SuffixStrategy {
713    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714        let path = candidate.path_suffix(self.longest);
715        for m in self.matcher.find_overlapping_iter(path) {
716            if m.end() == path.len() {
717                return true;
718            }
719        }
720        false
721    }
722
723    fn matches_into(
724        &self,
725        candidate: &Candidate<'_>,
726        matches: &mut Vec<usize>,
727    ) {
728        let path = candidate.path_suffix(self.longest);
729        for m in self.matcher.find_overlapping_iter(path) {
730            if m.end() == path.len() {
731                matches.push(self.map[m.pattern()]);
732            }
733        }
734    }
735}
736
737#[derive(Clone, Debug)]
738struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
739
740impl RequiredExtensionStrategy {
741    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
742        if candidate.ext.is_empty() {
743            return false;
744        }
745        match self.0.get(candidate.ext.as_bytes()) {
746            None => false,
747            Some(regexes) => {
748                for &(_, ref re) in regexes {
749                    if re.is_match(candidate.path.as_bytes()) {
750                        return true;
751                    }
752                }
753                false
754            }
755        }
756    }
757
758    #[inline(never)]
759    fn matches_into(
760        &self,
761        candidate: &Candidate<'_>,
762        matches: &mut Vec<usize>,
763    ) {
764        if candidate.ext.is_empty() {
765            return;
766        }
767        if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
768            for &(global_index, ref re) in regexes {
769                if re.is_match(candidate.path.as_bytes()) {
770                    matches.push(global_index);
771                }
772            }
773        }
774    }
775}
776
777#[derive(Clone, Debug)]
778struct RegexSetStrategy {
779    matcher: RegexSet,
780    map: Vec<usize>,
781}
782
783impl RegexSetStrategy {
784    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
785        self.matcher.is_match(candidate.path.as_bytes())
786    }
787
788    fn matches_into(
789        &self,
790        candidate: &Candidate<'_>,
791        matches: &mut Vec<usize>,
792    ) {
793        for i in self.matcher.matches(candidate.path.as_bytes()) {
794            matches.push(self.map[i]);
795        }
796    }
797}
798
799#[derive(Clone, Debug)]
800struct MultiStrategyBuilder {
801    literals: Vec<String>,
802    map: Vec<usize>,
803    longest: usize,
804}
805
806impl MultiStrategyBuilder {
807    fn new() -> MultiStrategyBuilder {
808        MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
809    }
810
811    fn add(&mut self, global_index: usize, literal: String) {
812        if literal.len() > self.longest {
813            self.longest = literal.len();
814        }
815        self.map.push(global_index);
816        self.literals.push(literal);
817    }
818
819    fn prefix(self) -> PrefixStrategy {
820        PrefixStrategy {
821            matcher: AhoCorasick::new_auto_configured(&self.literals),
822            map: self.map,
823            longest: self.longest,
824        }
825    }
826
827    fn suffix(self) -> SuffixStrategy {
828        SuffixStrategy {
829            matcher: AhoCorasick::new_auto_configured(&self.literals),
830            map: self.map,
831            longest: self.longest,
832        }
833    }
834
835    fn regex_set(self) -> Result<RegexSetStrategy, Error> {
836        Ok(RegexSetStrategy {
837            matcher: new_regex_set(self.literals)?,
838            map: self.map,
839        })
840    }
841}
842
843#[derive(Clone, Debug)]
844struct RequiredExtensionStrategyBuilder(
845    HashMap<Vec<u8>, Vec<(usize, String)>>,
846);
847
848impl RequiredExtensionStrategyBuilder {
849    fn new() -> RequiredExtensionStrategyBuilder {
850        RequiredExtensionStrategyBuilder(HashMap::new())
851    }
852
853    fn add(&mut self, global_index: usize, ext: String, regex: String) {
854        self.0
855            .entry(ext.into_bytes())
856            .or_insert(vec![])
857            .push((global_index, regex));
858    }
859
860    fn build(self) -> Result<RequiredExtensionStrategy, Error> {
861        let mut exts = HashMap::with_hasher(Fnv::default());
862        for (ext, regexes) in self.0.into_iter() {
863            exts.insert(ext.clone(), vec![]);
864            for (global_index, regex) in regexes {
865                let compiled = new_regex(&regex)?;
866                exts.get_mut(&ext).unwrap().push((global_index, compiled));
867            }
868        }
869        Ok(RequiredExtensionStrategy(exts))
870    }
871}
872
873#[cfg(test)]
874mod tests {
875    use super::{GlobSet, GlobSetBuilder};
876    use crate::glob::Glob;
877
878    #[test]
879    fn set_works() {
880        let mut builder = GlobSetBuilder::new();
881        builder.add(Glob::new("src/**/*.rs").unwrap());
882        builder.add(Glob::new("*.c").unwrap());
883        builder.add(Glob::new("src/lib.rs").unwrap());
884        let set = builder.build().unwrap();
885
886        assert!(set.is_match("foo.c"));
887        assert!(set.is_match("src/foo.c"));
888        assert!(!set.is_match("foo.rs"));
889        assert!(!set.is_match("tests/foo.rs"));
890        assert!(set.is_match("src/foo.rs"));
891        assert!(set.is_match("src/grep/src/main.rs"));
892
893        let matches = set.matches("src/lib.rs");
894        assert_eq!(2, matches.len());
895        assert_eq!(0, matches[0]);
896        assert_eq!(2, matches[1]);
897    }
898
899    #[test]
900    fn empty_set_works() {
901        let set = GlobSetBuilder::new().build().unwrap();
902        assert!(!set.is_match(""));
903        assert!(!set.is_match("a"));
904    }
905
906    #[test]
907    fn default_set_is_empty_works() {
908        let set: GlobSet = Default::default();
909        assert!(!set.is_match(""));
910        assert!(!set.is_match("a"));
911    }
912}