ignore/
lib.rs

1/*!
2The ignore crate provides a fast recursive directory iterator that respects
3various filters such as globs, file types and `.gitignore` files. The precise
4matching rules and precedence is explained in the documentation for
5`WalkBuilder`.
6
7Secondarily, this crate exposes gitignore and file type matchers for use cases
8that demand more fine-grained control.
9
10# Example
11
12This example shows the most basic usage of this crate. This code will
13recursively traverse the current directory while automatically filtering out
14files and directories according to ignore globs found in files like
15`.ignore` and `.gitignore`:
16
17
18```rust,no_run
19use ignore::Walk;
20
21for result in Walk::new("./") {
22    // Each item yielded by the iterator is either a directory entry or an
23    // error, so either print the path or the error.
24    match result {
25        Ok(entry) => println!("{}", entry.path().display()),
26        Err(err) => println!("ERROR: {}", err),
27    }
28}
29```
30
31# Example: advanced
32
33By default, the recursive directory iterator will ignore hidden files and
34directories. This can be disabled by building the iterator with `WalkBuilder`:
35
36```rust,no_run
37use ignore::WalkBuilder;
38
39for result in WalkBuilder::new("./").hidden(false).build() {
40    println!("{:?}", result);
41}
42```
43
44See the documentation for `WalkBuilder` for many other options.
45*/
46
47#![deny(missing_docs)]
48
49use std::error;
50use std::fmt;
51use std::io;
52use std::path::{Path, PathBuf};
53
54pub use crate::walk::{
55    DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
56    WalkParallel, WalkState,
57};
58
59mod default_types;
60mod dir;
61pub mod gitignore;
62pub mod overrides;
63mod pathutil;
64pub mod types;
65mod walk;
66
67/// Represents an error that can occur when parsing a gitignore file.
68#[derive(Debug)]
69pub enum Error {
70    /// A collection of "soft" errors. These occur when adding an ignore
71    /// file partially succeeded.
72    Partial(Vec<Error>),
73    /// An error associated with a specific line number.
74    WithLineNumber {
75        /// The line number.
76        line: u64,
77        /// The underlying error.
78        err: Box<Error>,
79    },
80    /// An error associated with a particular file path.
81    WithPath {
82        /// The file path.
83        path: PathBuf,
84        /// The underlying error.
85        err: Box<Error>,
86    },
87    /// An error associated with a particular directory depth when recursively
88    /// walking a directory.
89    WithDepth {
90        /// The directory depth.
91        depth: usize,
92        /// The underlying error.
93        err: Box<Error>,
94    },
95    /// An error that occurs when a file loop is detected when traversing
96    /// symbolic links.
97    Loop {
98        /// The ancestor file path in the loop.
99        ancestor: PathBuf,
100        /// The child file path in the loop.
101        child: PathBuf,
102    },
103    /// An error that occurs when doing I/O, such as reading an ignore file.
104    Io(io::Error),
105    /// An error that occurs when trying to parse a glob.
106    Glob {
107        /// The original glob that caused this error. This glob, when
108        /// available, always corresponds to the glob provided by an end user.
109        /// e.g., It is the glob as written in a `.gitignore` file.
110        ///
111        /// (This glob may be distinct from the glob that is actually
112        /// compiled, after accounting for `gitignore` semantics.)
113        glob: Option<String>,
114        /// The underlying glob error as a string.
115        err: String,
116    },
117    /// A type selection for a file type that is not defined.
118    UnrecognizedFileType(String),
119    /// A user specified file type definition could not be parsed.
120    InvalidDefinition,
121}
122
123impl Clone for Error {
124    fn clone(&self) -> Error {
125        match *self {
126            Error::Partial(ref errs) => Error::Partial(errs.clone()),
127            Error::WithLineNumber { line, ref err } => {
128                Error::WithLineNumber { line: line, err: err.clone() }
129            }
130            Error::WithPath { ref path, ref err } => {
131                Error::WithPath { path: path.clone(), err: err.clone() }
132            }
133            Error::WithDepth { depth, ref err } => {
134                Error::WithDepth { depth: depth, err: err.clone() }
135            }
136            Error::Loop { ref ancestor, ref child } => Error::Loop {
137                ancestor: ancestor.clone(),
138                child: child.clone(),
139            },
140            Error::Io(ref err) => match err.raw_os_error() {
141                Some(e) => Error::Io(io::Error::from_raw_os_error(e)),
142                None => Error::Io(io::Error::new(err.kind(), err.to_string())),
143            },
144            Error::Glob { ref glob, ref err } => {
145                Error::Glob { glob: glob.clone(), err: err.clone() }
146            }
147            Error::UnrecognizedFileType(ref err) => {
148                Error::UnrecognizedFileType(err.clone())
149            }
150            Error::InvalidDefinition => Error::InvalidDefinition,
151        }
152    }
153}
154
155impl Error {
156    /// Returns true if this is a partial error.
157    ///
158    /// A partial error occurs when only some operations failed while others
159    /// may have succeeded. For example, an ignore file may contain an invalid
160    /// glob among otherwise valid globs.
161    pub fn is_partial(&self) -> bool {
162        match *self {
163            Error::Partial(_) => true,
164            Error::WithLineNumber { ref err, .. } => err.is_partial(),
165            Error::WithPath { ref err, .. } => err.is_partial(),
166            Error::WithDepth { ref err, .. } => err.is_partial(),
167            _ => false,
168        }
169    }
170
171    /// Returns true if this error is exclusively an I/O error.
172    pub fn is_io(&self) -> bool {
173        match *self {
174            Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
175            Error::WithLineNumber { ref err, .. } => err.is_io(),
176            Error::WithPath { ref err, .. } => err.is_io(),
177            Error::WithDepth { ref err, .. } => err.is_io(),
178            Error::Loop { .. } => false,
179            Error::Io(_) => true,
180            Error::Glob { .. } => false,
181            Error::UnrecognizedFileType(_) => false,
182            Error::InvalidDefinition => false,
183        }
184    }
185
186    /// Inspect the original [`io::Error`] if there is one.
187    ///
188    /// [`None`] is returned if the [`Error`] doesn't correspond to an
189    /// [`io::Error`]. This might happen, for example, when the error was
190    /// produced because a cycle was found in the directory tree while
191    /// following symbolic links.
192    ///
193    /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To
194    /// obtain an owned value, the [`into_io_error`] can be used instead.
195    ///
196    /// > This is the original [`io::Error`] and is _not_ the same as
197    /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the
198    /// error.
199    ///
200    /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
201    /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
202    /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
203    /// [`Error`]: struct.Error.html
204    /// [`into_io_error`]: struct.Error.html#method.into_io_error
205    /// [impl]: struct.Error.html#impl-From%3CError%3E
206    pub fn io_error(&self) -> Option<&std::io::Error> {
207        match *self {
208            Error::Partial(ref errs) => {
209                if errs.len() == 1 {
210                    errs[0].io_error()
211                } else {
212                    None
213                }
214            }
215            Error::WithLineNumber { ref err, .. } => err.io_error(),
216            Error::WithPath { ref err, .. } => err.io_error(),
217            Error::WithDepth { ref err, .. } => err.io_error(),
218            Error::Loop { .. } => None,
219            Error::Io(ref err) => Some(err),
220            Error::Glob { .. } => None,
221            Error::UnrecognizedFileType(_) => None,
222            Error::InvalidDefinition => None,
223        }
224    }
225
226    /// Similar to [`io_error`] except consumes self to convert to the original
227    /// [`io::Error`] if one exists.
228    ///
229    /// [`io_error`]: struct.Error.html#method.io_error
230    /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
231    pub fn into_io_error(self) -> Option<std::io::Error> {
232        match self {
233            Error::Partial(mut errs) => {
234                if errs.len() == 1 {
235                    errs.remove(0).into_io_error()
236                } else {
237                    None
238                }
239            }
240            Error::WithLineNumber { err, .. } => err.into_io_error(),
241            Error::WithPath { err, .. } => err.into_io_error(),
242            Error::WithDepth { err, .. } => err.into_io_error(),
243            Error::Loop { .. } => None,
244            Error::Io(err) => Some(err),
245            Error::Glob { .. } => None,
246            Error::UnrecognizedFileType(_) => None,
247            Error::InvalidDefinition => None,
248        }
249    }
250
251    /// Returns a depth associated with recursively walking a directory (if
252    /// this error was generated from a recursive directory iterator).
253    pub fn depth(&self) -> Option<usize> {
254        match *self {
255            Error::WithPath { ref err, .. } => err.depth(),
256            Error::WithDepth { depth, .. } => Some(depth),
257            _ => None,
258        }
259    }
260
261    /// Turn an error into a tagged error with the given file path.
262    fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
263        Error::WithPath {
264            path: path.as_ref().to_path_buf(),
265            err: Box::new(self),
266        }
267    }
268
269    /// Turn an error into a tagged error with the given depth.
270    fn with_depth(self, depth: usize) -> Error {
271        Error::WithDepth { depth: depth, err: Box::new(self) }
272    }
273
274    /// Turn an error into a tagged error with the given file path and line
275    /// number. If path is empty, then it is omitted from the error.
276    fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
277        let errline =
278            Error::WithLineNumber { line: lineno, err: Box::new(self) };
279        if path.as_ref().as_os_str().is_empty() {
280            return errline;
281        }
282        errline.with_path(path)
283    }
284
285    /// Build an error from a walkdir error.
286    fn from_walkdir(err: walkdir::Error) -> Error {
287        let depth = err.depth();
288        if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
289            return Error::WithDepth {
290                depth: depth,
291                err: Box::new(Error::Loop {
292                    ancestor: anc.to_path_buf(),
293                    child: child.to_path_buf(),
294                }),
295            };
296        }
297        let path = err.path().map(|p| p.to_path_buf());
298        let mut ig_err = Error::Io(io::Error::from(err));
299        if let Some(path) = path {
300            ig_err = Error::WithPath { path: path, err: Box::new(ig_err) };
301        }
302        ig_err
303    }
304}
305
306impl error::Error for Error {
307    #[allow(deprecated)]
308    fn description(&self) -> &str {
309        match *self {
310            Error::Partial(_) => "partial error",
311            Error::WithLineNumber { ref err, .. } => err.description(),
312            Error::WithPath { ref err, .. } => err.description(),
313            Error::WithDepth { ref err, .. } => err.description(),
314            Error::Loop { .. } => "file system loop found",
315            Error::Io(ref err) => err.description(),
316            Error::Glob { ref err, .. } => err,
317            Error::UnrecognizedFileType(_) => "unrecognized file type",
318            Error::InvalidDefinition => "invalid definition",
319        }
320    }
321}
322
323impl fmt::Display for Error {
324    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
325        match *self {
326            Error::Partial(ref errs) => {
327                let msgs: Vec<String> =
328                    errs.iter().map(|err| err.to_string()).collect();
329                write!(f, "{}", msgs.join("\n"))
330            }
331            Error::WithLineNumber { line, ref err } => {
332                write!(f, "line {}: {}", line, err)
333            }
334            Error::WithPath { ref path, ref err } => {
335                write!(f, "{}: {}", path.display(), err)
336            }
337            Error::WithDepth { ref err, .. } => err.fmt(f),
338            Error::Loop { ref ancestor, ref child } => write!(
339                f,
340                "File system loop found: \
341                           {} points to an ancestor {}",
342                child.display(),
343                ancestor.display()
344            ),
345            Error::Io(ref err) => err.fmt(f),
346            Error::Glob { glob: None, ref err } => write!(f, "{}", err),
347            Error::Glob { glob: Some(ref glob), ref err } => {
348                write!(f, "error parsing glob '{}': {}", glob, err)
349            }
350            Error::UnrecognizedFileType(ref ty) => {
351                write!(f, "unrecognized file type: {}", ty)
352            }
353            Error::InvalidDefinition => write!(
354                f,
355                "invalid definition (format is type:glob, e.g., \
356                           html:*.html)"
357            ),
358        }
359    }
360}
361
362impl From<io::Error> for Error {
363    fn from(err: io::Error) -> Error {
364        Error::Io(err)
365    }
366}
367
368#[derive(Debug, Default)]
369struct PartialErrorBuilder(Vec<Error>);
370
371impl PartialErrorBuilder {
372    fn push(&mut self, err: Error) {
373        self.0.push(err);
374    }
375
376    fn push_ignore_io(&mut self, err: Error) {
377        if !err.is_io() {
378            self.push(err);
379        }
380    }
381
382    fn maybe_push(&mut self, err: Option<Error>) {
383        if let Some(err) = err {
384            self.push(err);
385        }
386    }
387
388    fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
389        if let Some(err) = err {
390            self.push_ignore_io(err);
391        }
392    }
393
394    fn into_error_option(mut self) -> Option<Error> {
395        if self.0.is_empty() {
396            None
397        } else if self.0.len() == 1 {
398            Some(self.0.pop().unwrap())
399        } else {
400            Some(Error::Partial(self.0))
401        }
402    }
403}
404
405/// The result of a glob match.
406///
407/// The type parameter `T` typically refers to a type that provides more
408/// information about a particular match. For example, it might identify
409/// the specific gitignore file and the specific glob pattern that caused
410/// the match.
411#[derive(Clone, Debug)]
412pub enum Match<T> {
413    /// The path didn't match any glob.
414    None,
415    /// The highest precedent glob matched indicates the path should be
416    /// ignored.
417    Ignore(T),
418    /// The highest precedent glob matched indicates the path should be
419    /// whitelisted.
420    Whitelist(T),
421}
422
423impl<T> Match<T> {
424    /// Returns true if the match result didn't match any globs.
425    pub fn is_none(&self) -> bool {
426        match *self {
427            Match::None => true,
428            Match::Ignore(_) | Match::Whitelist(_) => false,
429        }
430    }
431
432    /// Returns true if the match result implies the path should be ignored.
433    pub fn is_ignore(&self) -> bool {
434        match *self {
435            Match::Ignore(_) => true,
436            Match::None | Match::Whitelist(_) => false,
437        }
438    }
439
440    /// Returns true if the match result implies the path should be
441    /// whitelisted.
442    pub fn is_whitelist(&self) -> bool {
443        match *self {
444            Match::Whitelist(_) => true,
445            Match::None | Match::Ignore(_) => false,
446        }
447    }
448
449    /// Inverts the match so that `Ignore` becomes `Whitelist` and
450    /// `Whitelist` becomes `Ignore`. A non-match remains the same.
451    pub fn invert(self) -> Match<T> {
452        match self {
453            Match::None => Match::None,
454            Match::Ignore(t) => Match::Whitelist(t),
455            Match::Whitelist(t) => Match::Ignore(t),
456        }
457    }
458
459    /// Return the value inside this match if it exists.
460    pub fn inner(&self) -> Option<&T> {
461        match *self {
462            Match::None => None,
463            Match::Ignore(ref t) => Some(t),
464            Match::Whitelist(ref t) => Some(t),
465        }
466    }
467
468    /// Apply the given function to the value inside this match.
469    ///
470    /// If the match has no value, then return the match unchanged.
471    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
472        match self {
473            Match::None => Match::None,
474            Match::Ignore(t) => Match::Ignore(f(t)),
475            Match::Whitelist(t) => Match::Whitelist(f(t)),
476        }
477    }
478
479    /// Return the match if it is not none. Otherwise, return other.
480    pub fn or(self, other: Self) -> Self {
481        if self.is_none() {
482            other
483        } else {
484            self
485        }
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use std::env;
492    use std::error;
493    use std::fs;
494    use std::path::{Path, PathBuf};
495    use std::result;
496
497    /// A convenient result type alias.
498    pub type Result<T> =
499        result::Result<T, Box<dyn error::Error + Send + Sync>>;
500
501    macro_rules! err {
502        ($($tt:tt)*) => {
503            Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
504        }
505    }
506
507    /// A simple wrapper for creating a temporary directory that is
508    /// automatically deleted when it's dropped.
509    ///
510    /// We use this in lieu of tempfile because tempfile brings in too many
511    /// dependencies.
512    #[derive(Debug)]
513    pub struct TempDir(PathBuf);
514
515    impl Drop for TempDir {
516        fn drop(&mut self) {
517            fs::remove_dir_all(&self.0).unwrap();
518        }
519    }
520
521    impl TempDir {
522        /// Create a new empty temporary directory under the system's configured
523        /// temporary directory.
524        pub fn new() -> Result<TempDir> {
525            use std::sync::atomic::{AtomicUsize, Ordering};
526
527            static TRIES: usize = 100;
528            static COUNTER: AtomicUsize = AtomicUsize::new(0);
529
530            let tmpdir = env::temp_dir();
531            for _ in 0..TRIES {
532                let count = COUNTER.fetch_add(1, Ordering::SeqCst);
533                let path = tmpdir.join("rust-ignore").join(count.to_string());
534                if path.is_dir() {
535                    continue;
536                }
537                fs::create_dir_all(&path).map_err(|e| {
538                    err!("failed to create {}: {}", path.display(), e)
539                })?;
540                return Ok(TempDir(path));
541            }
542            Err(err!("failed to create temp dir after {} tries", TRIES))
543        }
544
545        /// Return the underlying path to this temporary directory.
546        pub fn path(&self) -> &Path {
547            &self.0
548        }
549    }
550}