globset/
glob.rs

1use std::fmt;
2use std::hash;
3use std::iter;
4use std::ops::{Deref, DerefMut};
5use std::path::{is_separator, Path};
6use std::str;
7
8use regex;
9use regex::bytes::Regex;
10
11use crate::{new_regex, Candidate, Error, ErrorKind};
12
13/// Describes a matching strategy for a particular pattern.
14///
15/// This provides a way to more quickly determine whether a pattern matches
16/// a particular file path in a way that scales with a large number of
17/// patterns. For example, if many patterns are of the form `*.ext`, then it's
18/// possible to test whether any of those patterns matches by looking up a
19/// file path's extension in a hash table.
20#[derive(Clone, Debug, Eq, PartialEq)]
21pub enum MatchStrategy {
22    /// A pattern matches if and only if the entire file path matches this
23    /// literal string.
24    Literal(String),
25    /// A pattern matches if and only if the file path's basename matches this
26    /// literal string.
27    BasenameLiteral(String),
28    /// A pattern matches if and only if the file path's extension matches this
29    /// literal string.
30    Extension(String),
31    /// A pattern matches if and only if this prefix literal is a prefix of the
32    /// candidate file path.
33    Prefix(String),
34    /// A pattern matches if and only if this prefix literal is a prefix of the
35    /// candidate file path.
36    ///
37    /// An exception: if `component` is true, then `suffix` must appear at the
38    /// beginning of a file path or immediately following a `/`.
39    Suffix {
40        /// The actual suffix.
41        suffix: String,
42        /// Whether this must start at the beginning of a path component.
43        component: bool,
44    },
45    /// A pattern matches only if the given extension matches the file path's
46    /// extension. Note that this is a necessary but NOT sufficient criterion.
47    /// Namely, if the extension matches, then a full regex search is still
48    /// required.
49    RequiredExtension(String),
50    /// A regex needs to be used for matching.
51    Regex,
52}
53
54impl MatchStrategy {
55    /// Returns a matching strategy for the given pattern.
56    pub fn new(pat: &Glob) -> MatchStrategy {
57        if let Some(lit) = pat.basename_literal() {
58            MatchStrategy::BasenameLiteral(lit)
59        } else if let Some(lit) = pat.literal() {
60            MatchStrategy::Literal(lit)
61        } else if let Some(ext) = pat.ext() {
62            MatchStrategy::Extension(ext)
63        } else if let Some(prefix) = pat.prefix() {
64            MatchStrategy::Prefix(prefix)
65        } else if let Some((suffix, component)) = pat.suffix() {
66            MatchStrategy::Suffix { suffix: suffix, component: component }
67        } else if let Some(ext) = pat.required_ext() {
68            MatchStrategy::RequiredExtension(ext)
69        } else {
70            MatchStrategy::Regex
71        }
72    }
73}
74
75/// Glob represents a successfully parsed shell glob pattern.
76///
77/// It cannot be used directly to match file paths, but it can be converted
78/// to a regular expression string or a matcher.
79#[derive(Clone, Debug, Eq)]
80pub struct Glob {
81    glob: String,
82    re: String,
83    opts: GlobOptions,
84    tokens: Tokens,
85}
86
87impl PartialEq for Glob {
88    fn eq(&self, other: &Glob) -> bool {
89        self.glob == other.glob && self.opts == other.opts
90    }
91}
92
93impl hash::Hash for Glob {
94    fn hash<H: hash::Hasher>(&self, state: &mut H) {
95        self.glob.hash(state);
96        self.opts.hash(state);
97    }
98}
99
100impl fmt::Display for Glob {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        self.glob.fmt(f)
103    }
104}
105
106impl str::FromStr for Glob {
107    type Err = Error;
108
109    fn from_str(glob: &str) -> Result<Self, Self::Err> {
110        Self::new(glob)
111    }
112}
113
114/// A matcher for a single pattern.
115#[derive(Clone, Debug)]
116pub struct GlobMatcher {
117    /// The underlying pattern.
118    pat: Glob,
119    /// The pattern, as a compiled regex.
120    re: Regex,
121}
122
123impl GlobMatcher {
124    /// Tests whether the given path matches this pattern or not.
125    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
126        self.is_match_candidate(&Candidate::new(path.as_ref()))
127    }
128
129    /// Tests whether the given path matches this pattern or not.
130    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
131        self.re.is_match(&path.path)
132    }
133
134    /// Returns the `Glob` used to compile this matcher.
135    pub fn glob(&self) -> &Glob {
136        &self.pat
137    }
138}
139
140/// A strategic matcher for a single pattern.
141#[cfg(test)]
142#[derive(Clone, Debug)]
143struct GlobStrategic {
144    /// The match strategy to use.
145    strategy: MatchStrategy,
146    /// The pattern, as a compiled regex.
147    re: Regex,
148}
149
150#[cfg(test)]
151impl GlobStrategic {
152    /// Tests whether the given path matches this pattern or not.
153    fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
154        self.is_match_candidate(&Candidate::new(path.as_ref()))
155    }
156
157    /// Tests whether the given path matches this pattern or not.
158    fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
159        let byte_path = &*candidate.path;
160
161        match self.strategy {
162            MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
163            MatchStrategy::BasenameLiteral(ref lit) => {
164                lit.as_bytes() == &*candidate.basename
165            }
166            MatchStrategy::Extension(ref ext) => {
167                ext.as_bytes() == &*candidate.ext
168            }
169            MatchStrategy::Prefix(ref pre) => {
170                starts_with(pre.as_bytes(), byte_path)
171            }
172            MatchStrategy::Suffix { ref suffix, component } => {
173                if component && byte_path == &suffix.as_bytes()[1..] {
174                    return true;
175                }
176                ends_with(suffix.as_bytes(), byte_path)
177            }
178            MatchStrategy::RequiredExtension(ref ext) => {
179                let ext = ext.as_bytes();
180                &*candidate.ext == ext && self.re.is_match(byte_path)
181            }
182            MatchStrategy::Regex => self.re.is_match(byte_path),
183        }
184    }
185}
186
187/// A builder for a pattern.
188///
189/// This builder enables configuring the match semantics of a pattern. For
190/// example, one can make matching case insensitive.
191///
192/// The lifetime `'a` refers to the lifetime of the pattern string.
193#[derive(Clone, Debug)]
194pub struct GlobBuilder<'a> {
195    /// The glob pattern to compile.
196    glob: &'a str,
197    /// Options for the pattern.
198    opts: GlobOptions,
199}
200
201#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
202struct GlobOptions {
203    /// Whether to match case insensitively.
204    case_insensitive: bool,
205    /// Whether to require a literal separator to match a separator in a file
206    /// path. e.g., when enabled, `*` won't match `/`.
207    literal_separator: bool,
208    /// Whether or not to use `\` to escape special characters.
209    /// e.g., when enabled, `\*` will match a literal `*`.
210    backslash_escape: bool,
211}
212
213impl GlobOptions {
214    fn default() -> GlobOptions {
215        GlobOptions {
216            case_insensitive: false,
217            literal_separator: false,
218            backslash_escape: !is_separator('\\'),
219        }
220    }
221}
222
223#[derive(Clone, Debug, Default, Eq, PartialEq)]
224struct Tokens(Vec<Token>);
225
226impl Deref for Tokens {
227    type Target = Vec<Token>;
228    fn deref(&self) -> &Vec<Token> {
229        &self.0
230    }
231}
232
233impl DerefMut for Tokens {
234    fn deref_mut(&mut self) -> &mut Vec<Token> {
235        &mut self.0
236    }
237}
238
239#[derive(Clone, Debug, Eq, PartialEq)]
240enum Token {
241    Literal(char),
242    Any,
243    ZeroOrMore,
244    RecursivePrefix,
245    RecursiveSuffix,
246    RecursiveZeroOrMore,
247    Class { negated: bool, ranges: Vec<(char, char)> },
248    Alternates(Vec<Tokens>),
249}
250
251impl Glob {
252    /// Builds a new pattern with default options.
253    pub fn new(glob: &str) -> Result<Glob, Error> {
254        GlobBuilder::new(glob).build()
255    }
256
257    /// Returns a matcher for this pattern.
258    pub fn compile_matcher(&self) -> GlobMatcher {
259        let re =
260            new_regex(&self.re).expect("regex compilation shouldn't fail");
261        GlobMatcher { pat: self.clone(), re: re }
262    }
263
264    /// Returns a strategic matcher.
265    ///
266    /// This isn't exposed because it's not clear whether it's actually
267    /// faster than just running a regex for a *single* pattern. If it
268    /// is faster, then GlobMatcher should do it automatically.
269    #[cfg(test)]
270    fn compile_strategic_matcher(&self) -> GlobStrategic {
271        let strategy = MatchStrategy::new(self);
272        let re =
273            new_regex(&self.re).expect("regex compilation shouldn't fail");
274        GlobStrategic { strategy: strategy, re: re }
275    }
276
277    /// Returns the original glob pattern used to build this pattern.
278    pub fn glob(&self) -> &str {
279        &self.glob
280    }
281
282    /// Returns the regular expression string for this glob.
283    ///
284    /// Note that regular expressions for globs are intended to be matched on
285    /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
286    /// particular, globs are frequently used on file paths, where there is no
287    /// general guarantee that file paths are themselves valid UTF-8. As a
288    /// result, callers will need to ensure that they are using a regex API
289    /// that can match on arbitrary bytes. For example, the
290    /// [`regex`](https://crates.io/regex)
291    /// crate's
292    /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
293    /// API is not suitable for this since it matches on `&str`, but its
294    /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
295    /// API is suitable for this.
296    pub fn regex(&self) -> &str {
297        &self.re
298    }
299
300    /// Returns the pattern as a literal if and only if the pattern must match
301    /// an entire path exactly.
302    ///
303    /// The basic format of these patterns is `{literal}`.
304    fn literal(&self) -> Option<String> {
305        if self.opts.case_insensitive {
306            return None;
307        }
308        let mut lit = String::new();
309        for t in &*self.tokens {
310            match *t {
311                Token::Literal(c) => lit.push(c),
312                _ => return None,
313            }
314        }
315        if lit.is_empty() {
316            None
317        } else {
318            Some(lit)
319        }
320    }
321
322    /// Returns an extension if this pattern matches a file path if and only
323    /// if the file path has the extension returned.
324    ///
325    /// Note that this extension returned differs from the extension that
326    /// std::path::Path::extension returns. Namely, this extension includes
327    /// the '.'. Also, paths like `.rs` are considered to have an extension
328    /// of `.rs`.
329    fn ext(&self) -> Option<String> {
330        if self.opts.case_insensitive {
331            return None;
332        }
333        let start = match self.tokens.get(0) {
334            Some(&Token::RecursivePrefix) => 1,
335            Some(_) => 0,
336            _ => return None,
337        };
338        match self.tokens.get(start) {
339            Some(&Token::ZeroOrMore) => {
340                // If there was no recursive prefix, then we only permit
341                // `*` if `*` can match a `/`. For example, if `*` can't
342                // match `/`, then `*.c` doesn't match `foo/bar.c`.
343                if start == 0 && self.opts.literal_separator {
344                    return None;
345                }
346            }
347            _ => return None,
348        }
349        match self.tokens.get(start + 1) {
350            Some(&Token::Literal('.')) => {}
351            _ => return None,
352        }
353        let mut lit = ".".to_string();
354        for t in self.tokens[start + 2..].iter() {
355            match *t {
356                Token::Literal('.') | Token::Literal('/') => return None,
357                Token::Literal(c) => lit.push(c),
358                _ => return None,
359            }
360        }
361        if lit.is_empty() {
362            None
363        } else {
364            Some(lit)
365        }
366    }
367
368    /// This is like `ext`, but returns an extension even if it isn't sufficient
369    /// to imply a match. Namely, if an extension is returned, then it is
370    /// necessary but not sufficient for a match.
371    fn required_ext(&self) -> Option<String> {
372        if self.opts.case_insensitive {
373            return None;
374        }
375        // We don't care at all about the beginning of this pattern. All we
376        // need to check for is if it ends with a literal of the form `.ext`.
377        let mut ext: Vec<char> = vec![]; // built in reverse
378        for t in self.tokens.iter().rev() {
379            match *t {
380                Token::Literal('/') => return None,
381                Token::Literal(c) => {
382                    ext.push(c);
383                    if c == '.' {
384                        break;
385                    }
386                }
387                _ => return None,
388            }
389        }
390        if ext.last() != Some(&'.') {
391            None
392        } else {
393            ext.reverse();
394            Some(ext.into_iter().collect())
395        }
396    }
397
398    /// Returns a literal prefix of this pattern if the entire pattern matches
399    /// if the literal prefix matches.
400    fn prefix(&self) -> Option<String> {
401        if self.opts.case_insensitive {
402            return None;
403        }
404        let (end, need_sep) = match self.tokens.last() {
405            Some(&Token::ZeroOrMore) => {
406                if self.opts.literal_separator {
407                    // If a trailing `*` can't match a `/`, then we can't
408                    // assume a match of the prefix corresponds to a match
409                    // of the overall pattern. e.g., `foo/*` with
410                    // `literal_separator` enabled matches `foo/bar` but not
411                    // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
412                    // literal prefix.
413                    return None;
414                }
415                (self.tokens.len() - 1, false)
416            }
417            Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
418            _ => (self.tokens.len(), false),
419        };
420        let mut lit = String::new();
421        for t in &self.tokens[0..end] {
422            match *t {
423                Token::Literal(c) => lit.push(c),
424                _ => return None,
425            }
426        }
427        if need_sep {
428            lit.push('/');
429        }
430        if lit.is_empty() {
431            None
432        } else {
433            Some(lit)
434        }
435    }
436
437    /// Returns a literal suffix of this pattern if the entire pattern matches
438    /// if the literal suffix matches.
439    ///
440    /// If a literal suffix is returned and it must match either the entire
441    /// file path or be preceded by a `/`, then also return true. This happens
442    /// with a pattern like `**/foo/bar`. Namely, this pattern matches
443    /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
444    /// suffix returned is `/foo/bar` (but should match the entire path
445    /// `foo/bar`).
446    ///
447    /// When this returns true, the suffix literal is guaranteed to start with
448    /// a `/`.
449    fn suffix(&self) -> Option<(String, bool)> {
450        if self.opts.case_insensitive {
451            return None;
452        }
453        let mut lit = String::new();
454        let (start, entire) = match self.tokens.get(0) {
455            Some(&Token::RecursivePrefix) => {
456                // We only care if this follows a path component if the next
457                // token is a literal.
458                if let Some(&Token::Literal(_)) = self.tokens.get(1) {
459                    lit.push('/');
460                    (1, true)
461                } else {
462                    (1, false)
463                }
464            }
465            _ => (0, false),
466        };
467        let start = match self.tokens.get(start) {
468            Some(&Token::ZeroOrMore) => {
469                // If literal_separator is enabled, then a `*` can't
470                // necessarily match everything, so reporting a suffix match
471                // as a match of the pattern would be a false positive.
472                if self.opts.literal_separator {
473                    return None;
474                }
475                start + 1
476            }
477            _ => start,
478        };
479        for t in &self.tokens[start..] {
480            match *t {
481                Token::Literal(c) => lit.push(c),
482                _ => return None,
483            }
484        }
485        if lit.is_empty() || lit == "/" {
486            None
487        } else {
488            Some((lit, entire))
489        }
490    }
491
492    /// If this pattern only needs to inspect the basename of a file path,
493    /// then the tokens corresponding to only the basename match are returned.
494    ///
495    /// For example, given a pattern of `**/*.foo`, only the tokens
496    /// corresponding to `*.foo` are returned.
497    ///
498    /// Note that this will return None if any match of the basename tokens
499    /// doesn't correspond to a match of the entire pattern. For example, the
500    /// glob `foo` only matches when a file path has a basename of `foo`, but
501    /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
502    /// `foo` doesn't match `abc/foo`.
503    fn basename_tokens(&self) -> Option<&[Token]> {
504        if self.opts.case_insensitive {
505            return None;
506        }
507        let start = match self.tokens.get(0) {
508            Some(&Token::RecursivePrefix) => 1,
509            _ => {
510                // With nothing to gobble up the parent portion of a path,
511                // we can't assume that matching on only the basename is
512                // correct.
513                return None;
514            }
515        };
516        if self.tokens[start..].is_empty() {
517            return None;
518        }
519        for t in &self.tokens[start..] {
520            match *t {
521                Token::Literal('/') => return None,
522                Token::Literal(_) => {} // OK
523                Token::Any | Token::ZeroOrMore => {
524                    if !self.opts.literal_separator {
525                        // In this case, `*` and `?` can match a path
526                        // separator, which means this could reach outside
527                        // the basename.
528                        return None;
529                    }
530                }
531                Token::RecursivePrefix
532                | Token::RecursiveSuffix
533                | Token::RecursiveZeroOrMore => {
534                    return None;
535                }
536                Token::Class { .. } | Token::Alternates(..) => {
537                    // We *could* be a little smarter here, but either one
538                    // of these is going to prevent our literal optimizations
539                    // anyway, so give up.
540                    return None;
541                }
542            }
543        }
544        Some(&self.tokens[start..])
545    }
546
547    /// Returns the pattern as a literal if and only if the pattern exclusively
548    /// matches the basename of a file path *and* is a literal.
549    ///
550    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
551    /// does not contain a path separator.
552    fn basename_literal(&self) -> Option<String> {
553        let tokens = match self.basename_tokens() {
554            None => return None,
555            Some(tokens) => tokens,
556        };
557        let mut lit = String::new();
558        for t in tokens {
559            match *t {
560                Token::Literal(c) => lit.push(c),
561                _ => return None,
562            }
563        }
564        Some(lit)
565    }
566}
567
568impl<'a> GlobBuilder<'a> {
569    /// Create a new builder for the pattern given.
570    ///
571    /// The pattern is not compiled until `build` is called.
572    pub fn new(glob: &'a str) -> GlobBuilder<'a> {
573        GlobBuilder { glob: glob, opts: GlobOptions::default() }
574    }
575
576    /// Parses and builds the pattern.
577    pub fn build(&self) -> Result<Glob, Error> {
578        let mut p = Parser {
579            glob: &self.glob,
580            stack: vec![Tokens::default()],
581            chars: self.glob.chars().peekable(),
582            prev: None,
583            cur: None,
584            opts: &self.opts,
585        };
586        p.parse()?;
587        if p.stack.is_empty() {
588            Err(Error {
589                glob: Some(self.glob.to_string()),
590                kind: ErrorKind::UnopenedAlternates,
591            })
592        } else if p.stack.len() > 1 {
593            Err(Error {
594                glob: Some(self.glob.to_string()),
595                kind: ErrorKind::UnclosedAlternates,
596            })
597        } else {
598            let tokens = p.stack.pop().unwrap();
599            Ok(Glob {
600                glob: self.glob.to_string(),
601                re: tokens.to_regex_with(&self.opts),
602                opts: self.opts,
603                tokens: tokens,
604            })
605        }
606    }
607
608    /// Toggle whether the pattern matches case insensitively or not.
609    ///
610    /// This is disabled by default.
611    pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
612        self.opts.case_insensitive = yes;
613        self
614    }
615
616    /// Toggle whether a literal `/` is required to match a path separator.
617    ///
618    /// By default this is false: `*` and `?` will match `/`.
619    pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
620        self.opts.literal_separator = yes;
621        self
622    }
623
624    /// When enabled, a back slash (`\`) may be used to escape
625    /// special characters in a glob pattern. Additionally, this will
626    /// prevent `\` from being interpreted as a path separator on all
627    /// platforms.
628    ///
629    /// This is enabled by default on platforms where `\` is not a
630    /// path separator and disabled by default on platforms where `\`
631    /// is a path separator.
632    pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
633        self.opts.backslash_escape = yes;
634        self
635    }
636}
637
638impl Tokens {
639    /// Convert this pattern to a string that is guaranteed to be a valid
640    /// regular expression and will represent the matching semantics of this
641    /// glob pattern and the options given.
642    fn to_regex_with(&self, options: &GlobOptions) -> String {
643        let mut re = String::new();
644        re.push_str("(?-u)");
645        if options.case_insensitive {
646            re.push_str("(?i)");
647        }
648        re.push('^');
649        // Special case. If the entire glob is just `**`, then it should match
650        // everything.
651        if self.len() == 1 && self[0] == Token::RecursivePrefix {
652            re.push_str(".*");
653            re.push('$');
654            return re;
655        }
656        self.tokens_to_regex(options, &self, &mut re);
657        re.push('$');
658        re
659    }
660
661    fn tokens_to_regex(
662        &self,
663        options: &GlobOptions,
664        tokens: &[Token],
665        re: &mut String,
666    ) {
667        for tok in tokens {
668            match *tok {
669                Token::Literal(c) => {
670                    re.push_str(&char_to_escaped_literal(c));
671                }
672                Token::Any => {
673                    if options.literal_separator {
674                        re.push_str("[^/]");
675                    } else {
676                        re.push_str(".");
677                    }
678                }
679                Token::ZeroOrMore => {
680                    if options.literal_separator {
681                        re.push_str("[^/]*");
682                    } else {
683                        re.push_str(".*");
684                    }
685                }
686                Token::RecursivePrefix => {
687                    re.push_str("(?:/?|.*/)");
688                }
689                Token::RecursiveSuffix => {
690                    re.push_str("/.*");
691                }
692                Token::RecursiveZeroOrMore => {
693                    re.push_str("(?:/|/.*/)");
694                }
695                Token::Class { negated, ref ranges } => {
696                    re.push('[');
697                    if negated {
698                        re.push('^');
699                    }
700                    for r in ranges {
701                        if r.0 == r.1 {
702                            // Not strictly necessary, but nicer to look at.
703                            re.push_str(&char_to_escaped_literal(r.0));
704                        } else {
705                            re.push_str(&char_to_escaped_literal(r.0));
706                            re.push('-');
707                            re.push_str(&char_to_escaped_literal(r.1));
708                        }
709                    }
710                    re.push(']');
711                }
712                Token::Alternates(ref patterns) => {
713                    let mut parts = vec![];
714                    for pat in patterns {
715                        let mut altre = String::new();
716                        self.tokens_to_regex(options, &pat, &mut altre);
717                        if !altre.is_empty() {
718                            parts.push(altre);
719                        }
720                    }
721
722                    // It is possible to have an empty set in which case the
723                    // resulting alternation '()' would be an error.
724                    if !parts.is_empty() {
725                        re.push('(');
726                        re.push_str(&parts.join("|"));
727                        re.push(')');
728                    }
729                }
730            }
731        }
732    }
733}
734
735/// Convert a Unicode scalar value to an escaped string suitable for use as
736/// a literal in a non-Unicode regex.
737fn char_to_escaped_literal(c: char) -> String {
738    bytes_to_escaped_literal(&c.to_string().into_bytes())
739}
740
741/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
742/// code units are converted to their escaped form.
743fn bytes_to_escaped_literal(bs: &[u8]) -> String {
744    let mut s = String::with_capacity(bs.len());
745    for &b in bs {
746        if b <= 0x7F {
747            s.push_str(&regex::escape(&(b as char).to_string()));
748        } else {
749            s.push_str(&format!("\\x{:02x}", b));
750        }
751    }
752    s
753}
754
755struct Parser<'a> {
756    glob: &'a str,
757    stack: Vec<Tokens>,
758    chars: iter::Peekable<str::Chars<'a>>,
759    prev: Option<char>,
760    cur: Option<char>,
761    opts: &'a GlobOptions,
762}
763
764impl<'a> Parser<'a> {
765    fn error(&self, kind: ErrorKind) -> Error {
766        Error { glob: Some(self.glob.to_string()), kind: kind }
767    }
768
769    fn parse(&mut self) -> Result<(), Error> {
770        while let Some(c) = self.bump() {
771            match c {
772                '?' => self.push_token(Token::Any)?,
773                '*' => self.parse_star()?,
774                '[' => self.parse_class()?,
775                '{' => self.push_alternate()?,
776                '}' => self.pop_alternate()?,
777                ',' => self.parse_comma()?,
778                '\\' => self.parse_backslash()?,
779                c => self.push_token(Token::Literal(c))?,
780            }
781        }
782        Ok(())
783    }
784
785    fn push_alternate(&mut self) -> Result<(), Error> {
786        if self.stack.len() > 1 {
787            return Err(self.error(ErrorKind::NestedAlternates));
788        }
789        Ok(self.stack.push(Tokens::default()))
790    }
791
792    fn pop_alternate(&mut self) -> Result<(), Error> {
793        let mut alts = vec![];
794        while self.stack.len() >= 2 {
795            alts.push(self.stack.pop().unwrap());
796        }
797        self.push_token(Token::Alternates(alts))
798    }
799
800    fn push_token(&mut self, tok: Token) -> Result<(), Error> {
801        if let Some(ref mut pat) = self.stack.last_mut() {
802            return Ok(pat.push(tok));
803        }
804        Err(self.error(ErrorKind::UnopenedAlternates))
805    }
806
807    fn pop_token(&mut self) -> Result<Token, Error> {
808        if let Some(ref mut pat) = self.stack.last_mut() {
809            return Ok(pat.pop().unwrap());
810        }
811        Err(self.error(ErrorKind::UnopenedAlternates))
812    }
813
814    fn have_tokens(&self) -> Result<bool, Error> {
815        match self.stack.last() {
816            None => Err(self.error(ErrorKind::UnopenedAlternates)),
817            Some(ref pat) => Ok(!pat.is_empty()),
818        }
819    }
820
821    fn parse_comma(&mut self) -> Result<(), Error> {
822        // If we aren't inside a group alternation, then don't
823        // treat commas specially. Otherwise, we need to start
824        // a new alternate.
825        if self.stack.len() <= 1 {
826            self.push_token(Token::Literal(','))
827        } else {
828            Ok(self.stack.push(Tokens::default()))
829        }
830    }
831
832    fn parse_backslash(&mut self) -> Result<(), Error> {
833        if self.opts.backslash_escape {
834            match self.bump() {
835                None => Err(self.error(ErrorKind::DanglingEscape)),
836                Some(c) => self.push_token(Token::Literal(c)),
837            }
838        } else if is_separator('\\') {
839            // Normalize all patterns to use / as a separator.
840            self.push_token(Token::Literal('/'))
841        } else {
842            self.push_token(Token::Literal('\\'))
843        }
844    }
845
846    fn parse_star(&mut self) -> Result<(), Error> {
847        let prev = self.prev;
848        if self.peek() != Some('*') {
849            self.push_token(Token::ZeroOrMore)?;
850            return Ok(());
851        }
852        assert!(self.bump() == Some('*'));
853        if !self.have_tokens()? {
854            if !self.peek().map_or(true, is_separator) {
855                self.push_token(Token::ZeroOrMore)?;
856                self.push_token(Token::ZeroOrMore)?;
857            } else {
858                self.push_token(Token::RecursivePrefix)?;
859                assert!(self.bump().map_or(true, is_separator));
860            }
861            return Ok(());
862        }
863
864        if !prev.map(is_separator).unwrap_or(false) {
865            if self.stack.len() <= 1
866                || (prev != Some(',') && prev != Some('{'))
867            {
868                self.push_token(Token::ZeroOrMore)?;
869                self.push_token(Token::ZeroOrMore)?;
870                return Ok(());
871            }
872        }
873        let is_suffix = match self.peek() {
874            None => {
875                assert!(self.bump().is_none());
876                true
877            }
878            Some(',') | Some('}') if self.stack.len() >= 2 => true,
879            Some(c) if is_separator(c) => {
880                assert!(self.bump().map(is_separator).unwrap_or(false));
881                false
882            }
883            _ => {
884                self.push_token(Token::ZeroOrMore)?;
885                self.push_token(Token::ZeroOrMore)?;
886                return Ok(());
887            }
888        };
889        match self.pop_token()? {
890            Token::RecursivePrefix => {
891                self.push_token(Token::RecursivePrefix)?;
892            }
893            Token::RecursiveSuffix => {
894                self.push_token(Token::RecursiveSuffix)?;
895            }
896            _ => {
897                if is_suffix {
898                    self.push_token(Token::RecursiveSuffix)?;
899                } else {
900                    self.push_token(Token::RecursiveZeroOrMore)?;
901                }
902            }
903        }
904        Ok(())
905    }
906
907    fn parse_class(&mut self) -> Result<(), Error> {
908        fn add_to_last_range(
909            glob: &str,
910            r: &mut (char, char),
911            add: char,
912        ) -> Result<(), Error> {
913            r.1 = add;
914            if r.1 < r.0 {
915                Err(Error {
916                    glob: Some(glob.to_string()),
917                    kind: ErrorKind::InvalidRange(r.0, r.1),
918                })
919            } else {
920                Ok(())
921            }
922        }
923        let mut ranges = vec![];
924        let negated = match self.chars.peek() {
925            Some(&'!') | Some(&'^') => {
926                let bump = self.bump();
927                assert!(bump == Some('!') || bump == Some('^'));
928                true
929            }
930            _ => false,
931        };
932        let mut first = true;
933        let mut in_range = false;
934        loop {
935            let c = match self.bump() {
936                Some(c) => c,
937                // The only way to successfully break this loop is to observe
938                // a ']'.
939                None => return Err(self.error(ErrorKind::UnclosedClass)),
940            };
941            match c {
942                ']' => {
943                    if first {
944                        ranges.push((']', ']'));
945                    } else {
946                        break;
947                    }
948                }
949                '-' => {
950                    if first {
951                        ranges.push(('-', '-'));
952                    } else if in_range {
953                        // invariant: in_range is only set when there is
954                        // already at least one character seen.
955                        let r = ranges.last_mut().unwrap();
956                        add_to_last_range(&self.glob, r, '-')?;
957                        in_range = false;
958                    } else {
959                        assert!(!ranges.is_empty());
960                        in_range = true;
961                    }
962                }
963                c => {
964                    if in_range {
965                        // invariant: in_range is only set when there is
966                        // already at least one character seen.
967                        add_to_last_range(
968                            &self.glob,
969                            ranges.last_mut().unwrap(),
970                            c,
971                        )?;
972                    } else {
973                        ranges.push((c, c));
974                    }
975                    in_range = false;
976                }
977            }
978            first = false;
979        }
980        if in_range {
981            // Means that the last character in the class was a '-', so add
982            // it as a literal.
983            ranges.push(('-', '-'));
984        }
985        self.push_token(Token::Class { negated: negated, ranges: ranges })
986    }
987
988    fn bump(&mut self) -> Option<char> {
989        self.prev = self.cur;
990        self.cur = self.chars.next();
991        self.cur
992    }
993
994    fn peek(&mut self) -> Option<char> {
995        self.chars.peek().map(|&ch| ch)
996    }
997}
998
999#[cfg(test)]
1000fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1001    needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1002}
1003
1004#[cfg(test)]
1005fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1006    if needle.len() > haystack.len() {
1007        return false;
1008    }
1009    needle == &haystack[haystack.len() - needle.len()..]
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014    use super::Token::*;
1015    use super::{Glob, GlobBuilder, Token};
1016    use crate::{ErrorKind, GlobSetBuilder};
1017
1018    #[derive(Clone, Copy, Debug, Default)]
1019    struct Options {
1020        casei: Option<bool>,
1021        litsep: Option<bool>,
1022        bsesc: Option<bool>,
1023    }
1024
1025    macro_rules! syntax {
1026        ($name:ident, $pat:expr, $tokens:expr) => {
1027            #[test]
1028            fn $name() {
1029                let pat = Glob::new($pat).unwrap();
1030                assert_eq!($tokens, pat.tokens.0);
1031            }
1032        };
1033    }
1034
1035    macro_rules! syntaxerr {
1036        ($name:ident, $pat:expr, $err:expr) => {
1037            #[test]
1038            fn $name() {
1039                let err = Glob::new($pat).unwrap_err();
1040                assert_eq!(&$err, err.kind());
1041            }
1042        };
1043    }
1044
1045    macro_rules! toregex {
1046        ($name:ident, $pat:expr, $re:expr) => {
1047            toregex!($name, $pat, $re, Options::default());
1048        };
1049        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1050            #[test]
1051            fn $name() {
1052                let mut builder = GlobBuilder::new($pat);
1053                if let Some(casei) = $options.casei {
1054                    builder.case_insensitive(casei);
1055                }
1056                if let Some(litsep) = $options.litsep {
1057                    builder.literal_separator(litsep);
1058                }
1059                if let Some(bsesc) = $options.bsesc {
1060                    builder.backslash_escape(bsesc);
1061                }
1062                let pat = builder.build().unwrap();
1063                assert_eq!(format!("(?-u){}", $re), pat.regex());
1064            }
1065        };
1066    }
1067
1068    macro_rules! matches {
1069        ($name:ident, $pat:expr, $path:expr) => {
1070            matches!($name, $pat, $path, Options::default());
1071        };
1072        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1073            #[test]
1074            fn $name() {
1075                let mut builder = GlobBuilder::new($pat);
1076                if let Some(casei) = $options.casei {
1077                    builder.case_insensitive(casei);
1078                }
1079                if let Some(litsep) = $options.litsep {
1080                    builder.literal_separator(litsep);
1081                }
1082                if let Some(bsesc) = $options.bsesc {
1083                    builder.backslash_escape(bsesc);
1084                }
1085                let pat = builder.build().unwrap();
1086                let matcher = pat.compile_matcher();
1087                let strategic = pat.compile_strategic_matcher();
1088                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1089                assert!(matcher.is_match($path));
1090                assert!(strategic.is_match($path));
1091                assert!(set.is_match($path));
1092            }
1093        };
1094    }
1095
1096    macro_rules! nmatches {
1097        ($name:ident, $pat:expr, $path:expr) => {
1098            nmatches!($name, $pat, $path, Options::default());
1099        };
1100        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1101            #[test]
1102            fn $name() {
1103                let mut builder = GlobBuilder::new($pat);
1104                if let Some(casei) = $options.casei {
1105                    builder.case_insensitive(casei);
1106                }
1107                if let Some(litsep) = $options.litsep {
1108                    builder.literal_separator(litsep);
1109                }
1110                if let Some(bsesc) = $options.bsesc {
1111                    builder.backslash_escape(bsesc);
1112                }
1113                let pat = builder.build().unwrap();
1114                let matcher = pat.compile_matcher();
1115                let strategic = pat.compile_strategic_matcher();
1116                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1117                assert!(!matcher.is_match($path));
1118                assert!(!strategic.is_match($path));
1119                assert!(!set.is_match($path));
1120            }
1121        };
1122    }
1123
1124    fn s(string: &str) -> String {
1125        string.to_string()
1126    }
1127
1128    fn class(s: char, e: char) -> Token {
1129        Class { negated: false, ranges: vec![(s, e)] }
1130    }
1131
1132    fn classn(s: char, e: char) -> Token {
1133        Class { negated: true, ranges: vec![(s, e)] }
1134    }
1135
1136    fn rclass(ranges: &[(char, char)]) -> Token {
1137        Class { negated: false, ranges: ranges.to_vec() }
1138    }
1139
1140    fn rclassn(ranges: &[(char, char)]) -> Token {
1141        Class { negated: true, ranges: ranges.to_vec() }
1142    }
1143
1144    syntax!(literal1, "a", vec![Literal('a')]);
1145    syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1146    syntax!(any1, "?", vec![Any]);
1147    syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1148    syntax!(seq1, "*", vec![ZeroOrMore]);
1149    syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1150    syntax!(
1151        seq3,
1152        "*a*b*",
1153        vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1154    );
1155    syntax!(rseq1, "**", vec![RecursivePrefix]);
1156    syntax!(rseq2, "**/", vec![RecursivePrefix]);
1157    syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1158    syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1159    syntax!(
1160        rseq5,
1161        "a/**/b",
1162        vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1163    );
1164    syntax!(cls1, "[a]", vec![class('a', 'a')]);
1165    syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1166    syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1167    syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1168    syntax!(cls5, "[-]", vec![class('-', '-')]);
1169    syntax!(cls6, "[]]", vec![class(']', ']')]);
1170    syntax!(cls7, "[*]", vec![class('*', '*')]);
1171    syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1172    syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1173    syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1174    syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1175    syntax!(
1176        cls12,
1177        "[-a-z-]",
1178        vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1179    );
1180    syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1181    syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1182    syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1183    syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1184    syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1185    syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1186    syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1187    syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1188    syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1189
1190    syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1191    syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1192    syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1193    syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1194    syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1195    syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1196
1197    const CASEI: Options =
1198        Options { casei: Some(true), litsep: None, bsesc: None };
1199    const SLASHLIT: Options =
1200        Options { casei: None, litsep: Some(true), bsesc: None };
1201    const NOBSESC: Options =
1202        Options { casei: None, litsep: None, bsesc: Some(false) };
1203    const BSESC: Options =
1204        Options { casei: None, litsep: None, bsesc: Some(true) };
1205
1206    toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1207
1208    toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1209    toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1210
1211    toregex!(re1, "a", "^a$");
1212    toregex!(re2, "?", "^.$");
1213    toregex!(re3, "*", "^.*$");
1214    toregex!(re4, "a?", "^a.$");
1215    toregex!(re5, "?a", "^.a$");
1216    toregex!(re6, "a*", "^a.*$");
1217    toregex!(re7, "*a", "^.*a$");
1218    toregex!(re8, "[*]", r"^[\*]$");
1219    toregex!(re9, "[+]", r"^[\+]$");
1220    toregex!(re10, "+", r"^\+$");
1221    toregex!(re11, "☃", r"^\xe2\x98\x83$");
1222    toregex!(re12, "**", r"^.*$");
1223    toregex!(re13, "**/", r"^.*$");
1224    toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1225    toregex!(re15, "**/**", r"^.*$");
1226    toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1227    toregex!(re17, "**/**/**", r"^.*$");
1228    toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1229    toregex!(re19, "a/**", r"^a/.*$");
1230    toregex!(re20, "a/**/**", r"^a/.*$");
1231    toregex!(re21, "a/**/**/**", r"^a/.*$");
1232    toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1233    toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1234    toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1235    toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1236    toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1237    toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1238    toregex!(re28, "a**", r"^a.*.*$");
1239    toregex!(re29, "**a", r"^.*.*a$");
1240    toregex!(re30, "a**b", r"^a.*.*b$");
1241    toregex!(re31, "***", r"^.*.*.*$");
1242    toregex!(re32, "/a**", r"^/a.*.*$");
1243    toregex!(re33, "/**a", r"^/.*.*a$");
1244    toregex!(re34, "/a**b", r"^/a.*.*b$");
1245
1246    matches!(match1, "a", "a");
1247    matches!(match2, "a*b", "a_b");
1248    matches!(match3, "a*b*c", "abc");
1249    matches!(match4, "a*b*c", "a_b_c");
1250    matches!(match5, "a*b*c", "a___b___c");
1251    matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1252    matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1253    matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1254    matches!(match9, "*.rs", ".rs");
1255    matches!(match10, "☃", "☃");
1256
1257    matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1258    matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1259    matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1260    matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1261    matches!(matchrec5, "**", "abcde");
1262    matches!(matchrec6, "**", "");
1263    matches!(matchrec7, "**", ".asdf");
1264    matches!(matchrec8, "**", "/x/.asdf");
1265    matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1266    matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1267    matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1268    matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1269    matches!(matchrec13, "**/test", "one/two/test");
1270    matches!(matchrec14, "**/test", "one/test");
1271    matches!(matchrec15, "**/test", "test");
1272    matches!(matchrec16, "/**/test", "/one/two/test");
1273    matches!(matchrec17, "/**/test", "/one/test");
1274    matches!(matchrec18, "/**/test", "/test");
1275    matches!(matchrec19, "**/.*", ".abc");
1276    matches!(matchrec20, "**/.*", "abc/.abc");
1277    matches!(matchrec21, "**/foo/bar", "foo/bar");
1278    matches!(matchrec22, ".*/**", ".abc/abc");
1279    matches!(matchrec23, "test/**", "test/");
1280    matches!(matchrec24, "test/**", "test/one");
1281    matches!(matchrec25, "test/**", "test/one/two");
1282    matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1283
1284    matches!(matchrange1, "a[0-9]b", "a0b");
1285    matches!(matchrange2, "a[0-9]b", "a9b");
1286    matches!(matchrange3, "a[!0-9]b", "a_b");
1287    matches!(matchrange4, "[a-z123]", "1");
1288    matches!(matchrange5, "[1a-z23]", "1");
1289    matches!(matchrange6, "[123a-z]", "1");
1290    matches!(matchrange7, "[abc-]", "-");
1291    matches!(matchrange8, "[-abc]", "-");
1292    matches!(matchrange9, "[-a-c]", "b");
1293    matches!(matchrange10, "[a-c-]", "b");
1294    matches!(matchrange11, "[-]", "-");
1295    matches!(matchrange12, "a[^0-9]b", "a_b");
1296
1297    matches!(matchpat1, "*hello.txt", "hello.txt");
1298    matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1299    matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1300    matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1301    matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1302    matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1303    matches!(
1304        matchpat7,
1305        "*some/path/to/hello.txt",
1306        "a/bigger/some/path/to/hello.txt"
1307    );
1308
1309    matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1310
1311    matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1312    matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1313    matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1314    matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1315
1316    matches!(matchalt1, "a,b", "a,b");
1317    matches!(matchalt2, ",", ",");
1318    matches!(matchalt3, "{a,b}", "a");
1319    matches!(matchalt4, "{a,b}", "b");
1320    matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1321    matches!(matchalt6, "{**/src/**,foo}", "foo");
1322    matches!(matchalt7, "{[}],foo}", "}");
1323    matches!(matchalt8, "{foo}", "foo");
1324    matches!(matchalt9, "{}", "");
1325    matches!(matchalt10, "{,}", "");
1326    matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1327    matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1328    matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1329
1330    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1331    #[cfg(unix)]
1332    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1333    #[cfg(not(unix))]
1334    nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1335    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1336    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1337    #[cfg(unix)]
1338    nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1339    #[cfg(not(unix))]
1340    matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1341
1342    matches!(matchbackslash1, "\\[", "[", BSESC);
1343    matches!(matchbackslash2, "\\?", "?", BSESC);
1344    matches!(matchbackslash3, "\\*", "*", BSESC);
1345    matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1346    matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1347    matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1348    #[cfg(unix)]
1349    matches!(matchbackslash7, "\\a", "a");
1350    #[cfg(not(unix))]
1351    matches!(matchbackslash8, "\\a", "/a");
1352
1353    nmatches!(matchnot1, "a*b*c", "abcd");
1354    nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1355    nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1356    nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1357    nmatches!(matchnot5, "/**/test", "test");
1358    nmatches!(matchnot6, "/**/test", "/one/notthis");
1359    nmatches!(matchnot7, "/**/test", "/notthis");
1360    nmatches!(matchnot8, "**/.*", "ab.c");
1361    nmatches!(matchnot9, "**/.*", "abc/ab.c");
1362    nmatches!(matchnot10, ".*/**", "a.bc");
1363    nmatches!(matchnot11, ".*/**", "abc/a.bc");
1364    nmatches!(matchnot12, "a[0-9]b", "a_b");
1365    nmatches!(matchnot13, "a[!0-9]b", "a0b");
1366    nmatches!(matchnot14, "a[!0-9]b", "a9b");
1367    nmatches!(matchnot15, "[!-]", "-");
1368    nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1369    nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1370    nmatches!(
1371        matchnot18,
1372        "*some/path/to/hello.txt",
1373        "some/path/to/hello.txt-and-then-some"
1374    );
1375    nmatches!(
1376        matchnot19,
1377        "*some/path/to/hello.txt",
1378        "some/other/path/to/hello.txt"
1379    );
1380    nmatches!(matchnot20, "a", "foo/a");
1381    nmatches!(matchnot21, "./foo", "foo");
1382    nmatches!(matchnot22, "**/foo", "foofoo");
1383    nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1384    nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1385    nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1386    nmatches!(
1387        matchnot26,
1388        "**/m4/ltoptions.m4",
1389        "csharp/src/packages/repositories.config",
1390        SLASHLIT
1391    );
1392    nmatches!(matchnot27, "a[^0-9]b", "a0b");
1393    nmatches!(matchnot28, "a[^0-9]b", "a9b");
1394    nmatches!(matchnot29, "[^-]", "-");
1395    nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1396    nmatches!(
1397        matchrec31,
1398        "some/*/needle.txt",
1399        "some/one/two/needle.txt",
1400        SLASHLIT
1401    );
1402    nmatches!(
1403        matchrec32,
1404        "some/*/needle.txt",
1405        "some/one/two/three/needle.txt",
1406        SLASHLIT
1407    );
1408    nmatches!(matchrec33, ".*/**", ".abc");
1409    nmatches!(matchrec34, "foo/**", "foo");
1410
1411    macro_rules! extract {
1412        ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1413            extract!($which, $name, $pat, $expect, Options::default());
1414        };
1415        ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1416            #[test]
1417            fn $name() {
1418                let mut builder = GlobBuilder::new($pat);
1419                if let Some(casei) = $options.casei {
1420                    builder.case_insensitive(casei);
1421                }
1422                if let Some(litsep) = $options.litsep {
1423                    builder.literal_separator(litsep);
1424                }
1425                if let Some(bsesc) = $options.bsesc {
1426                    builder.backslash_escape(bsesc);
1427                }
1428                let pat = builder.build().unwrap();
1429                assert_eq!($expect, pat.$which());
1430            }
1431        };
1432    }
1433
1434    macro_rules! literal {
1435        ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1436    }
1437
1438    macro_rules! basetokens {
1439        ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1440    }
1441
1442    macro_rules! ext {
1443        ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1444    }
1445
1446    macro_rules! required_ext {
1447        ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1448    }
1449
1450    macro_rules! prefix {
1451        ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1452    }
1453
1454    macro_rules! suffix {
1455        ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1456    }
1457
1458    macro_rules! baseliteral {
1459        ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1460    }
1461
1462    literal!(extract_lit1, "foo", Some(s("foo")));
1463    literal!(extract_lit2, "foo", None, CASEI);
1464    literal!(extract_lit3, "/foo", Some(s("/foo")));
1465    literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1466    literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1467    literal!(extract_lit6, "*.foo", None);
1468    literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1469    literal!(extract_lit8, "**/foo/bar", None);
1470
1471    basetokens!(
1472        extract_basetoks1,
1473        "**/foo",
1474        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1475    );
1476    basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1477    basetokens!(
1478        extract_basetoks3,
1479        "**/foo",
1480        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1481        SLASHLIT
1482    );
1483    basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1484    basetokens!(extract_basetoks5, "*foo", None);
1485    basetokens!(extract_basetoks6, "**/fo*o", None);
1486    basetokens!(
1487        extract_basetoks7,
1488        "**/fo*o",
1489        Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1490        SLASHLIT
1491    );
1492
1493    ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1494    ext!(extract_ext2, "**/*.rs.bak", None);
1495    ext!(extract_ext3, "*.rs", Some(s(".rs")));
1496    ext!(extract_ext4, "a*.rs", None);
1497    ext!(extract_ext5, "/*.c", None);
1498    ext!(extract_ext6, "*.c", None, SLASHLIT);
1499    ext!(extract_ext7, "*.c", Some(s(".c")));
1500
1501    required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1502    required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1503    required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1504    required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1505    required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1506    required_ext!(extract_req_ext6, "./rs", None);
1507    required_ext!(extract_req_ext7, "foo", None);
1508    required_ext!(extract_req_ext8, ".foo/", None);
1509    required_ext!(extract_req_ext9, "foo/", None);
1510
1511    prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1512    prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1513    prefix!(extract_prefix3, "**/foo", None);
1514    prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1515
1516    suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1517    suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1518    suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1519    suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1520    suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1521    suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1522    suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1523
1524    baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1525    baseliteral!(extract_baselit2, "foo", None);
1526    baseliteral!(extract_baselit3, "*foo", None);
1527    baseliteral!(extract_baselit4, "*/foo", None);
1528}