ignore/
walk.rs

1use std::cmp;
2use std::ffi::OsStr;
3use std::fmt;
4use std::fs::{self, FileType, Metadata};
5use std::io;
6use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8use std::sync::{Arc, Mutex};
9use std::thread;
10use std::time::Duration;
11use std::vec;
12
13use same_file::Handle;
14use walkdir::{self, WalkDir};
15
16use crate::dir::{Ignore, IgnoreBuilder};
17use crate::gitignore::GitignoreBuilder;
18use crate::overrides::Override;
19use crate::types::Types;
20use crate::{Error, PartialErrorBuilder};
21
22/// A directory entry with a possible error attached.
23///
24/// The error typically refers to a problem parsing ignore files in a
25/// particular directory.
26#[derive(Clone, Debug)]
27pub struct DirEntry {
28    dent: DirEntryInner,
29    err: Option<Error>,
30}
31
32impl DirEntry {
33    /// The full path that this entry represents.
34    pub fn path(&self) -> &Path {
35        self.dent.path()
36    }
37
38    /// The full path that this entry represents.
39    /// Analogous to [`path`], but moves ownership of the path.
40    ///
41    /// [`path`]: struct.DirEntry.html#method.path
42    pub fn into_path(self) -> PathBuf {
43        self.dent.into_path()
44    }
45
46    /// Whether this entry corresponds to a symbolic link or not.
47    pub fn path_is_symlink(&self) -> bool {
48        self.dent.path_is_symlink()
49    }
50
51    /// Returns true if and only if this entry corresponds to stdin.
52    ///
53    /// i.e., The entry has depth 0 and its file name is `-`.
54    pub fn is_stdin(&self) -> bool {
55        self.dent.is_stdin()
56    }
57
58    /// Return the metadata for the file that this entry points to.
59    pub fn metadata(&self) -> Result<Metadata, Error> {
60        self.dent.metadata()
61    }
62
63    /// Return the file type for the file that this entry points to.
64    ///
65    /// This entry doesn't have a file type if it corresponds to stdin.
66    pub fn file_type(&self) -> Option<FileType> {
67        self.dent.file_type()
68    }
69
70    /// Return the file name of this entry.
71    ///
72    /// If this entry has no file name (e.g., `/`), then the full path is
73    /// returned.
74    pub fn file_name(&self) -> &OsStr {
75        self.dent.file_name()
76    }
77
78    /// Returns the depth at which this entry was created relative to the root.
79    pub fn depth(&self) -> usize {
80        self.dent.depth()
81    }
82
83    /// Returns the underlying inode number if one exists.
84    ///
85    /// If this entry doesn't have an inode number, then `None` is returned.
86    #[cfg(unix)]
87    pub fn ino(&self) -> Option<u64> {
88        self.dent.ino()
89    }
90
91    /// Returns an error, if one exists, associated with processing this entry.
92    ///
93    /// An example of an error is one that occurred while parsing an ignore
94    /// file. Errors related to traversing a directory tree itself are reported
95    /// as part of yielding the directory entry, and not with this method.
96    pub fn error(&self) -> Option<&Error> {
97        self.err.as_ref()
98    }
99
100    /// Returns true if and only if this entry points to a directory.
101    pub(crate) fn is_dir(&self) -> bool {
102        self.dent.is_dir()
103    }
104
105    fn new_stdin() -> DirEntry {
106        DirEntry { dent: DirEntryInner::Stdin, err: None }
107    }
108
109    fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
110        DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
111    }
112
113    fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
114        DirEntry { dent: DirEntryInner::Raw(dent), err: err }
115    }
116}
117
118/// DirEntryInner is the implementation of DirEntry.
119///
120/// It specifically represents three distinct sources of directory entries:
121///
122/// 1. From the walkdir crate.
123/// 2. Special entries that represent things like stdin.
124/// 3. From a path.
125///
126/// Specifically, (3) has to essentially re-create the DirEntry implementation
127/// from WalkDir.
128#[derive(Clone, Debug)]
129enum DirEntryInner {
130    Stdin,
131    Walkdir(walkdir::DirEntry),
132    Raw(DirEntryRaw),
133}
134
135impl DirEntryInner {
136    fn path(&self) -> &Path {
137        use self::DirEntryInner::*;
138        match *self {
139            Stdin => Path::new("<stdin>"),
140            Walkdir(ref x) => x.path(),
141            Raw(ref x) => x.path(),
142        }
143    }
144
145    fn into_path(self) -> PathBuf {
146        use self::DirEntryInner::*;
147        match self {
148            Stdin => PathBuf::from("<stdin>"),
149            Walkdir(x) => x.into_path(),
150            Raw(x) => x.into_path(),
151        }
152    }
153
154    fn path_is_symlink(&self) -> bool {
155        use self::DirEntryInner::*;
156        match *self {
157            Stdin => false,
158            Walkdir(ref x) => x.path_is_symlink(),
159            Raw(ref x) => x.path_is_symlink(),
160        }
161    }
162
163    fn is_stdin(&self) -> bool {
164        match *self {
165            DirEntryInner::Stdin => true,
166            _ => false,
167        }
168    }
169
170    fn metadata(&self) -> Result<Metadata, Error> {
171        use self::DirEntryInner::*;
172        match *self {
173            Stdin => {
174                let err = Error::Io(io::Error::new(
175                    io::ErrorKind::Other,
176                    "<stdin> has no metadata",
177                ));
178                Err(err.with_path("<stdin>"))
179            }
180            Walkdir(ref x) => x.metadata().map_err(|err| {
181                Error::Io(io::Error::from(err)).with_path(x.path())
182            }),
183            Raw(ref x) => x.metadata(),
184        }
185    }
186
187    fn file_type(&self) -> Option<FileType> {
188        use self::DirEntryInner::*;
189        match *self {
190            Stdin => None,
191            Walkdir(ref x) => Some(x.file_type()),
192            Raw(ref x) => Some(x.file_type()),
193        }
194    }
195
196    fn file_name(&self) -> &OsStr {
197        use self::DirEntryInner::*;
198        match *self {
199            Stdin => OsStr::new("<stdin>"),
200            Walkdir(ref x) => x.file_name(),
201            Raw(ref x) => x.file_name(),
202        }
203    }
204
205    fn depth(&self) -> usize {
206        use self::DirEntryInner::*;
207        match *self {
208            Stdin => 0,
209            Walkdir(ref x) => x.depth(),
210            Raw(ref x) => x.depth(),
211        }
212    }
213
214    #[cfg(unix)]
215    fn ino(&self) -> Option<u64> {
216        use self::DirEntryInner::*;
217        use walkdir::DirEntryExt;
218        match *self {
219            Stdin => None,
220            Walkdir(ref x) => Some(x.ino()),
221            Raw(ref x) => Some(x.ino()),
222        }
223    }
224
225    /// Returns true if and only if this entry points to a directory.
226    fn is_dir(&self) -> bool {
227        self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
228    }
229}
230
231/// DirEntryRaw is essentially copied from the walkdir crate so that we can
232/// build `DirEntry`s from whole cloth in the parallel iterator.
233#[derive(Clone)]
234struct DirEntryRaw {
235    /// The path as reported by the `fs::ReadDir` iterator (even if it's a
236    /// symbolic link).
237    path: PathBuf,
238    /// The file type. Necessary for recursive iteration, so store it.
239    ty: FileType,
240    /// Is set when this entry was created from a symbolic link and the user
241    /// expects the iterator to follow symbolic links.
242    follow_link: bool,
243    /// The depth at which this entry was generated relative to the root.
244    depth: usize,
245    /// The underlying inode number (Unix only).
246    #[cfg(unix)]
247    ino: u64,
248    /// The underlying metadata (Windows only). We store this on Windows
249    /// because this comes for free while reading a directory.
250    #[cfg(windows)]
251    metadata: fs::Metadata,
252}
253
254impl fmt::Debug for DirEntryRaw {
255    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256        // Leaving out FileType because it doesn't have a debug impl
257        // in Rust 1.9. We could add it if we really wanted to by manually
258        // querying each possibly file type. Meh. ---AG
259        f.debug_struct("DirEntryRaw")
260            .field("path", &self.path)
261            .field("follow_link", &self.follow_link)
262            .field("depth", &self.depth)
263            .finish()
264    }
265}
266
267impl DirEntryRaw {
268    fn path(&self) -> &Path {
269        &self.path
270    }
271
272    fn into_path(self) -> PathBuf {
273        self.path
274    }
275
276    fn path_is_symlink(&self) -> bool {
277        self.ty.is_symlink() || self.follow_link
278    }
279
280    fn metadata(&self) -> Result<Metadata, Error> {
281        self.metadata_internal()
282    }
283
284    #[cfg(windows)]
285    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
286        if self.follow_link {
287            fs::metadata(&self.path)
288        } else {
289            Ok(self.metadata.clone())
290        }
291        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
292    }
293
294    #[cfg(not(windows))]
295    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296        if self.follow_link {
297            fs::metadata(&self.path)
298        } else {
299            fs::symlink_metadata(&self.path)
300        }
301        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302    }
303
304    fn file_type(&self) -> FileType {
305        self.ty
306    }
307
308    fn file_name(&self) -> &OsStr {
309        self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
310    }
311
312    fn depth(&self) -> usize {
313        self.depth
314    }
315
316    #[cfg(unix)]
317    fn ino(&self) -> u64 {
318        self.ino
319    }
320
321    fn from_entry(
322        depth: usize,
323        ent: &fs::DirEntry,
324    ) -> Result<DirEntryRaw, Error> {
325        let ty = ent.file_type().map_err(|err| {
326            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
327            Error::WithDepth { depth: depth, err: Box::new(err) }
328        })?;
329        DirEntryRaw::from_entry_os(depth, ent, ty)
330    }
331
332    #[cfg(windows)]
333    fn from_entry_os(
334        depth: usize,
335        ent: &fs::DirEntry,
336        ty: fs::FileType,
337    ) -> Result<DirEntryRaw, Error> {
338        let md = ent.metadata().map_err(|err| {
339            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
340            Error::WithDepth { depth: depth, err: Box::new(err) }
341        })?;
342        Ok(DirEntryRaw {
343            path: ent.path(),
344            ty: ty,
345            follow_link: false,
346            depth: depth,
347            metadata: md,
348        })
349    }
350
351    #[cfg(unix)]
352    fn from_entry_os(
353        depth: usize,
354        ent: &fs::DirEntry,
355        ty: fs::FileType,
356    ) -> Result<DirEntryRaw, Error> {
357        use std::os::unix::fs::DirEntryExt;
358
359        Ok(DirEntryRaw {
360            path: ent.path(),
361            ty: ty,
362            follow_link: false,
363            depth: depth,
364            ino: ent.ino(),
365        })
366    }
367
368    // Placeholder implementation to allow compiling on non-standard platforms
369    // (e.g. wasm32).
370    #[cfg(not(any(windows, unix)))]
371    fn from_entry_os(
372        depth: usize,
373        ent: &fs::DirEntry,
374        ty: fs::FileType,
375    ) -> Result<DirEntryRaw, Error> {
376        Err(Error::Io(io::Error::new(
377            io::ErrorKind::Other,
378            "unsupported platform",
379        )))
380    }
381
382    #[cfg(windows)]
383    fn from_path(
384        depth: usize,
385        pb: PathBuf,
386        link: bool,
387    ) -> Result<DirEntryRaw, Error> {
388        let md =
389            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
390        Ok(DirEntryRaw {
391            path: pb,
392            ty: md.file_type(),
393            follow_link: link,
394            depth: depth,
395            metadata: md,
396        })
397    }
398
399    #[cfg(unix)]
400    fn from_path(
401        depth: usize,
402        pb: PathBuf,
403        link: bool,
404    ) -> Result<DirEntryRaw, Error> {
405        use std::os::unix::fs::MetadataExt;
406
407        let md =
408            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
409        Ok(DirEntryRaw {
410            path: pb,
411            ty: md.file_type(),
412            follow_link: link,
413            depth: depth,
414            ino: md.ino(),
415        })
416    }
417
418    // Placeholder implementation to allow compiling on non-standard platforms
419    // (e.g. wasm32).
420    #[cfg(not(any(windows, unix)))]
421    fn from_path(
422        depth: usize,
423        pb: PathBuf,
424        link: bool,
425    ) -> Result<DirEntryRaw, Error> {
426        Err(Error::Io(io::Error::new(
427            io::ErrorKind::Other,
428            "unsupported platform",
429        )))
430    }
431}
432
433/// WalkBuilder builds a recursive directory iterator.
434///
435/// The builder supports a large number of configurable options. This includes
436/// specific glob overrides, file type matching, toggling whether hidden
437/// files are ignored or not, and of course, support for respecting gitignore
438/// files.
439///
440/// By default, all ignore files found are respected. This includes `.ignore`,
441/// `.gitignore`, `.git/info/exclude` and even your global gitignore
442/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
443///
444/// Some standard recursive directory options are also supported, such as
445/// limiting the recursive depth or whether to follow symbolic links (disabled
446/// by default).
447///
448/// # Ignore rules
449///
450/// There are many rules that influence whether a particular file or directory
451/// is skipped by this iterator. Those rules are documented here. Note that
452/// the rules assume a default configuration.
453///
454/// * First, glob overrides are checked. If a path matches a glob override,
455/// then matching stops. The path is then only skipped if the glob that matched
456/// the path is an ignore glob. (An override glob is a whitelist glob unless it
457/// starts with a `!`, in which case it is an ignore glob.)
458/// * Second, ignore files are checked. Ignore files currently only come from
459/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
460/// global gitignore file), plain `.ignore` files, which have the same format
461/// as gitignore files, or explicitly added ignore files. The precedence order
462/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
463/// finally explicitly added ignore files. Note that precedence between
464/// different types of ignore files is not impacted by the directory hierarchy;
465/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
466/// level, more nested ignore files have a higher precedence than less nested
467/// ignore files.
468/// * Third, if the previous step yields an ignore match, then all matching
469/// is stopped and the path is skipped. If it yields a whitelist match, then
470/// matching continues. A whitelist match can be overridden by a later matcher.
471/// * Fourth, unless the path is a directory, the file type matcher is run on
472/// the path. As above, if it yields an ignore match, then all matching is
473/// stopped and the path is skipped. If it yields a whitelist match, then
474/// matching continues.
475/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
476/// path is skipped.
477/// * Sixth, unless the path is a directory, the size of the file is compared
478/// against the max filesize limit. If it exceeds the limit, it is skipped.
479/// * Seventh, if the path has made it this far then it is yielded in the
480/// iterator.
481#[derive(Clone)]
482pub struct WalkBuilder {
483    paths: Vec<PathBuf>,
484    ig_builder: IgnoreBuilder,
485    max_depth: Option<usize>,
486    max_filesize: Option<u64>,
487    follow_links: bool,
488    same_file_system: bool,
489    sorter: Option<Sorter>,
490    threads: usize,
491    skip: Option<Arc<Handle>>,
492    filter: Option<Filter>,
493}
494
495#[derive(Clone)]
496enum Sorter {
497    ByName(
498        Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
499    ),
500    ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
501}
502
503#[derive(Clone)]
504struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
505
506impl fmt::Debug for WalkBuilder {
507    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
508        f.debug_struct("WalkBuilder")
509            .field("paths", &self.paths)
510            .field("ig_builder", &self.ig_builder)
511            .field("max_depth", &self.max_depth)
512            .field("max_filesize", &self.max_filesize)
513            .field("follow_links", &self.follow_links)
514            .field("threads", &self.threads)
515            .field("skip", &self.skip)
516            .finish()
517    }
518}
519
520impl WalkBuilder {
521    /// Create a new builder for a recursive directory iterator for the
522    /// directory given.
523    ///
524    /// Note that if you want to traverse multiple different directories, it
525    /// is better to call `add` on this builder than to create multiple
526    /// `Walk` values.
527    pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
528        WalkBuilder {
529            paths: vec![path.as_ref().to_path_buf()],
530            ig_builder: IgnoreBuilder::new(),
531            max_depth: None,
532            max_filesize: None,
533            follow_links: false,
534            same_file_system: false,
535            sorter: None,
536            threads: 0,
537            skip: None,
538            filter: None,
539        }
540    }
541
542    /// Build a new `Walk` iterator.
543    pub fn build(&self) -> Walk {
544        let follow_links = self.follow_links;
545        let max_depth = self.max_depth;
546        let sorter = self.sorter.clone();
547        let its = self
548            .paths
549            .iter()
550            .map(move |p| {
551                if p == Path::new("-") {
552                    (p.to_path_buf(), None)
553                } else {
554                    let mut wd = WalkDir::new(p);
555                    wd = wd.follow_links(follow_links || p.is_file());
556                    wd = wd.same_file_system(self.same_file_system);
557                    if let Some(max_depth) = max_depth {
558                        wd = wd.max_depth(max_depth);
559                    }
560                    if let Some(ref sorter) = sorter {
561                        match sorter.clone() {
562                            Sorter::ByName(cmp) => {
563                                wd = wd.sort_by(move |a, b| {
564                                    cmp(a.file_name(), b.file_name())
565                                });
566                            }
567                            Sorter::ByPath(cmp) => {
568                                wd = wd.sort_by(move |a, b| {
569                                    cmp(a.path(), b.path())
570                                });
571                            }
572                        }
573                    }
574                    (p.to_path_buf(), Some(WalkEventIter::from(wd)))
575                }
576            })
577            .collect::<Vec<_>>()
578            .into_iter();
579        let ig_root = self.ig_builder.build();
580        Walk {
581            its: its,
582            it: None,
583            ig_root: ig_root.clone(),
584            ig: ig_root.clone(),
585            max_filesize: self.max_filesize,
586            skip: self.skip.clone(),
587            filter: self.filter.clone(),
588        }
589    }
590
591    /// Build a new `WalkParallel` iterator.
592    ///
593    /// Note that this *doesn't* return something that implements `Iterator`.
594    /// Instead, the returned value must be run with a closure. e.g.,
595    /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
596    pub fn build_parallel(&self) -> WalkParallel {
597        WalkParallel {
598            paths: self.paths.clone().into_iter(),
599            ig_root: self.ig_builder.build(),
600            max_depth: self.max_depth,
601            max_filesize: self.max_filesize,
602            follow_links: self.follow_links,
603            same_file_system: self.same_file_system,
604            threads: self.threads,
605            skip: self.skip.clone(),
606            filter: self.filter.clone(),
607        }
608    }
609
610    /// Add a file path to the iterator.
611    ///
612    /// Each additional file path added is traversed recursively. This should
613    /// be preferred over building multiple `Walk` iterators since this
614    /// enables reusing resources across iteration.
615    pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
616        self.paths.push(path.as_ref().to_path_buf());
617        self
618    }
619
620    /// The maximum depth to recurse.
621    ///
622    /// The default, `None`, imposes no depth restriction.
623    pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
624        self.max_depth = depth;
625        self
626    }
627
628    /// Whether to follow symbolic links or not.
629    pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
630        self.follow_links = yes;
631        self
632    }
633
634    /// Whether to ignore files above the specified limit.
635    pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
636        self.max_filesize = filesize;
637        self
638    }
639
640    /// The number of threads to use for traversal.
641    ///
642    /// Note that this only has an effect when using `build_parallel`.
643    ///
644    /// The default setting is `0`, which chooses the number of threads
645    /// automatically using heuristics.
646    pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
647        self.threads = n;
648        self
649    }
650
651    /// Add a global ignore file to the matcher.
652    ///
653    /// This has lower precedence than all other sources of ignore rules.
654    ///
655    /// If there was a problem adding the ignore file, then an error is
656    /// returned. Note that the error may indicate *partial* failure. For
657    /// example, if an ignore file contains an invalid glob, all other globs
658    /// are still applied.
659    pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
660        let mut builder = GitignoreBuilder::new("");
661        let mut errs = PartialErrorBuilder::default();
662        errs.maybe_push(builder.add(path));
663        match builder.build() {
664            Ok(gi) => {
665                self.ig_builder.add_ignore(gi);
666            }
667            Err(err) => {
668                errs.push(err);
669            }
670        }
671        errs.into_error_option()
672    }
673
674    /// Add a custom ignore file name
675    ///
676    /// These ignore files have higher precedence than all other ignore files.
677    ///
678    /// When specifying multiple names, earlier names have lower precedence than
679    /// later names.
680    pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
681        &mut self,
682        file_name: S,
683    ) -> &mut WalkBuilder {
684        self.ig_builder.add_custom_ignore_filename(file_name);
685        self
686    }
687
688    /// Add an override matcher.
689    ///
690    /// By default, no override matcher is used.
691    ///
692    /// This overrides any previous setting.
693    pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
694        self.ig_builder.overrides(overrides);
695        self
696    }
697
698    /// Add a file type matcher.
699    ///
700    /// By default, no file type matcher is used.
701    ///
702    /// This overrides any previous setting.
703    pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
704        self.ig_builder.types(types);
705        self
706    }
707
708    /// Enables all the standard ignore filters.
709    ///
710    /// This toggles, as a group, all the filters that are enabled by default:
711    ///
712    /// - [hidden()](#method.hidden)
713    /// - [parents()](#method.parents)
714    /// - [ignore()](#method.ignore)
715    /// - [git_ignore()](#method.git_ignore)
716    /// - [git_global()](#method.git_global)
717    /// - [git_exclude()](#method.git_exclude)
718    ///
719    /// They may still be toggled individually after calling this function.
720    ///
721    /// This is (by definition) enabled by default.
722    pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
723        self.hidden(yes)
724            .parents(yes)
725            .ignore(yes)
726            .git_ignore(yes)
727            .git_global(yes)
728            .git_exclude(yes)
729    }
730
731    /// Enables ignoring hidden files.
732    ///
733    /// This is enabled by default.
734    pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
735        self.ig_builder.hidden(yes);
736        self
737    }
738
739    /// Enables reading ignore files from parent directories.
740    ///
741    /// If this is enabled, then .gitignore files in parent directories of each
742    /// file path given are respected. Otherwise, they are ignored.
743    ///
744    /// This is enabled by default.
745    pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
746        self.ig_builder.parents(yes);
747        self
748    }
749
750    /// Enables reading `.ignore` files.
751    ///
752    /// `.ignore` files have the same semantics as `gitignore` files and are
753    /// supported by search tools such as ripgrep and The Silver Searcher.
754    ///
755    /// This is enabled by default.
756    pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
757        self.ig_builder.ignore(yes);
758        self
759    }
760
761    /// Enables reading a global gitignore file, whose path is specified in
762    /// git's `core.excludesFile` config option.
763    ///
764    /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
765    /// does not exist or does not specify `core.excludesFile`, then
766    /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
767    /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
768    ///
769    /// This is enabled by default.
770    pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
771        self.ig_builder.git_global(yes);
772        self
773    }
774
775    /// Enables reading `.gitignore` files.
776    ///
777    /// `.gitignore` files have match semantics as described in the `gitignore`
778    /// man page.
779    ///
780    /// This is enabled by default.
781    pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
782        self.ig_builder.git_ignore(yes);
783        self
784    }
785
786    /// Enables reading `.git/info/exclude` files.
787    ///
788    /// `.git/info/exclude` files have match semantics as described in the
789    /// `gitignore` man page.
790    ///
791    /// This is enabled by default.
792    pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
793        self.ig_builder.git_exclude(yes);
794        self
795    }
796
797    /// Whether a git repository is required to apply git-related ignore
798    /// rules (global rules, .gitignore and local exclude rules).
799    ///
800    /// When disabled, git-related ignore rules are applied even when searching
801    /// outside a git repository.
802    pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
803        self.ig_builder.require_git(yes);
804        self
805    }
806
807    /// Process ignore files case insensitively
808    ///
809    /// This is disabled by default.
810    pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
811        self.ig_builder.ignore_case_insensitive(yes);
812        self
813    }
814
815    /// Set a function for sorting directory entries by their path.
816    ///
817    /// If a compare function is set, the resulting iterator will return all
818    /// paths in sorted order. The compare function will be called to compare
819    /// entries from the same directory.
820    ///
821    /// This is like `sort_by_file_name`, except the comparator accepts
822    /// a `&Path` instead of the base file name, which permits it to sort by
823    /// more criteria.
824    ///
825    /// This method will override any previous sorter set by this method or
826    /// by `sort_by_file_name`.
827    ///
828    /// Note that this is not used in the parallel iterator.
829    pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
830    where
831        F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
832    {
833        self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
834        self
835    }
836
837    /// Set a function for sorting directory entries by file name.
838    ///
839    /// If a compare function is set, the resulting iterator will return all
840    /// paths in sorted order. The compare function will be called to compare
841    /// names from entries from the same directory using only the name of the
842    /// entry.
843    ///
844    /// This method will override any previous sorter set by this method or
845    /// by `sort_by_file_path`.
846    ///
847    /// Note that this is not used in the parallel iterator.
848    pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
849    where
850        F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
851    {
852        self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
853        self
854    }
855
856    /// Do not cross file system boundaries.
857    ///
858    /// When this option is enabled, directory traversal will not descend into
859    /// directories that are on a different file system from the root path.
860    ///
861    /// Currently, this option is only supported on Unix and Windows. If this
862    /// option is used on an unsupported platform, then directory traversal
863    /// will immediately return an error and will not yield any entries.
864    pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
865        self.same_file_system = yes;
866        self
867    }
868
869    /// Do not yield directory entries that are believed to correspond to
870    /// stdout.
871    ///
872    /// This is useful when a command is invoked via shell redirection to a
873    /// file that is also being read. For example, `grep -r foo ./ > results`
874    /// might end up trying to search `results` even though it is also writing
875    /// to it, which could cause an unbounded feedback loop. Setting this
876    /// option prevents this from happening by skipping over the `results`
877    /// file.
878    ///
879    /// This is disabled by default.
880    pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
881        if yes {
882            self.skip = stdout_handle().map(Arc::new);
883        } else {
884            self.skip = None;
885        }
886        self
887    }
888
889    /// Yields only entries which satisfy the given predicate and skips
890    /// descending into directories that do not satisfy the given predicate.
891    ///
892    /// The predicate is applied to all entries. If the predicate is
893    /// true, iteration carries on as normal. If the predicate is false, the
894    /// entry is ignored and if it is a directory, it is not descended into.
895    ///
896    /// Note that the errors for reading entries that may not satisfy the
897    /// predicate will still be yielded.
898    pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
899    where
900        P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
901    {
902        self.filter = Some(Filter(Arc::new(filter)));
903        self
904    }
905}
906
907/// Walk is a recursive directory iterator over file paths in one or more
908/// directories.
909///
910/// Only file and directory paths matching the rules are returned. By default,
911/// ignore files like `.gitignore` are respected. The precise matching rules
912/// and precedence is explained in the documentation for `WalkBuilder`.
913pub struct Walk {
914    its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
915    it: Option<WalkEventIter>,
916    ig_root: Ignore,
917    ig: Ignore,
918    max_filesize: Option<u64>,
919    skip: Option<Arc<Handle>>,
920    filter: Option<Filter>,
921}
922
923impl Walk {
924    /// Creates a new recursive directory iterator for the file path given.
925    ///
926    /// Note that this uses default settings, which include respecting
927    /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
928    /// instead.
929    pub fn new<P: AsRef<Path>>(path: P) -> Walk {
930        WalkBuilder::new(path).build()
931    }
932
933    fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
934        if ent.depth() == 0 {
935            return Ok(false);
936        }
937        // We ensure that trivial skipping is done before any other potentially
938        // expensive operations (stat, filesystem other) are done. This seems
939        // like an obvious optimization but becomes critical when filesystem
940        // operations even as simple as stat can result in significant
941        // overheads; an example of this was a bespoke filesystem layer in
942        // Windows that hosted files remotely and would download them on-demand
943        // when particular filesystem operations occurred. Users of this system
944        // who ensured correct file-type filters were being used could still
945        // get unnecessary file access resulting in large downloads.
946        if should_skip_entry(&self.ig, ent) {
947            return Ok(true);
948        }
949        if let Some(ref stdout) = self.skip {
950            if path_equals(ent, stdout)? {
951                return Ok(true);
952            }
953        }
954        if self.max_filesize.is_some() && !ent.is_dir() {
955            return Ok(skip_filesize(
956                self.max_filesize.unwrap(),
957                ent.path(),
958                &ent.metadata().ok(),
959            ));
960        }
961        if let Some(Filter(filter)) = &self.filter {
962            if !filter(ent) {
963                return Ok(true);
964            }
965        }
966        Ok(false)
967    }
968}
969
970impl Iterator for Walk {
971    type Item = Result<DirEntry, Error>;
972
973    #[inline(always)]
974    fn next(&mut self) -> Option<Result<DirEntry, Error>> {
975        loop {
976            let ev = match self.it.as_mut().and_then(|it| it.next()) {
977                Some(ev) => ev,
978                None => {
979                    match self.its.next() {
980                        None => return None,
981                        Some((_, None)) => {
982                            return Some(Ok(DirEntry::new_stdin()));
983                        }
984                        Some((path, Some(it))) => {
985                            self.it = Some(it);
986                            if path.is_dir() {
987                                let (ig, err) = self.ig_root.add_parents(path);
988                                self.ig = ig;
989                                if let Some(err) = err {
990                                    return Some(Err(err));
991                                }
992                            } else {
993                                self.ig = self.ig_root.clone();
994                            }
995                        }
996                    }
997                    continue;
998                }
999            };
1000            match ev {
1001                Err(err) => {
1002                    return Some(Err(Error::from_walkdir(err)));
1003                }
1004                Ok(WalkEvent::Exit) => {
1005                    self.ig = self.ig.parent().unwrap();
1006                }
1007                Ok(WalkEvent::Dir(ent)) => {
1008                    let mut ent = DirEntry::new_walkdir(ent, None);
1009                    let should_skip = match self.skip_entry(&ent) {
1010                        Err(err) => return Some(Err(err)),
1011                        Ok(should_skip) => should_skip,
1012                    };
1013                    if should_skip {
1014                        self.it.as_mut().unwrap().it.skip_current_dir();
1015                        // Still need to push this on the stack because
1016                        // we'll get a WalkEvent::Exit event for this dir.
1017                        // We don't care if it errors though.
1018                        let (igtmp, _) = self.ig.add_child(ent.path());
1019                        self.ig = igtmp;
1020                        continue;
1021                    }
1022                    let (igtmp, err) = self.ig.add_child(ent.path());
1023                    self.ig = igtmp;
1024                    ent.err = err;
1025                    return Some(Ok(ent));
1026                }
1027                Ok(WalkEvent::File(ent)) => {
1028                    let ent = DirEntry::new_walkdir(ent, None);
1029                    let should_skip = match self.skip_entry(&ent) {
1030                        Err(err) => return Some(Err(err)),
1031                        Ok(should_skip) => should_skip,
1032                    };
1033                    if should_skip {
1034                        continue;
1035                    }
1036                    return Some(Ok(ent));
1037                }
1038            }
1039        }
1040    }
1041}
1042
1043/// WalkEventIter transforms a WalkDir iterator into an iterator that more
1044/// accurately describes the directory tree. Namely, it emits events that are
1045/// one of three types: directory, file or "exit." An "exit" event means that
1046/// the entire contents of a directory have been enumerated.
1047struct WalkEventIter {
1048    depth: usize,
1049    it: walkdir::IntoIter,
1050    next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1051}
1052
1053#[derive(Debug)]
1054enum WalkEvent {
1055    Dir(walkdir::DirEntry),
1056    File(walkdir::DirEntry),
1057    Exit,
1058}
1059
1060impl From<WalkDir> for WalkEventIter {
1061    fn from(it: WalkDir) -> WalkEventIter {
1062        WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1063    }
1064}
1065
1066impl Iterator for WalkEventIter {
1067    type Item = walkdir::Result<WalkEvent>;
1068
1069    #[inline(always)]
1070    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1071        let dent = self.next.take().or_else(|| self.it.next());
1072        let depth = match dent {
1073            None => 0,
1074            Some(Ok(ref dent)) => dent.depth(),
1075            Some(Err(ref err)) => err.depth(),
1076        };
1077        if depth < self.depth {
1078            self.depth -= 1;
1079            self.next = dent;
1080            return Some(Ok(WalkEvent::Exit));
1081        }
1082        self.depth = depth;
1083        match dent {
1084            None => None,
1085            Some(Err(err)) => Some(Err(err)),
1086            Some(Ok(dent)) => {
1087                if walkdir_is_dir(&dent) {
1088                    self.depth += 1;
1089                    Some(Ok(WalkEvent::Dir(dent)))
1090                } else {
1091                    Some(Ok(WalkEvent::File(dent)))
1092                }
1093            }
1094        }
1095    }
1096}
1097
1098/// WalkState is used in the parallel recursive directory iterator to indicate
1099/// whether walking should continue as normal, skip descending into a
1100/// particular directory or quit the walk entirely.
1101#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1102pub enum WalkState {
1103    /// Continue walking as normal.
1104    Continue,
1105    /// If the directory entry given is a directory, don't descend into it.
1106    /// In all other cases, this has no effect.
1107    Skip,
1108    /// Quit the entire iterator as soon as possible.
1109    ///
1110    /// Note that this is an inherently asynchronous action. It is possible
1111    /// for more entries to be yielded even after instructing the iterator
1112    /// to quit.
1113    Quit,
1114}
1115
1116impl WalkState {
1117    fn is_continue(&self) -> bool {
1118        *self == WalkState::Continue
1119    }
1120
1121    fn is_quit(&self) -> bool {
1122        *self == WalkState::Quit
1123    }
1124}
1125
1126/// A builder for constructing a visitor when using
1127/// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
1128/// will be called for each thread started by `WalkParallel`. The visitor
1129/// returned from each builder is then called for every directory entry.
1130pub trait ParallelVisitorBuilder<'s> {
1131    /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
1132    fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1133}
1134
1135impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1136    for &'a mut P
1137{
1138    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1139        (**self).build()
1140    }
1141}
1142
1143/// Receives files and directories for the current thread.
1144///
1145/// Setup for the traversal can be implemented as part of
1146/// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
1147/// Teardown when traversal finishes can be implemented by implementing the
1148/// `Drop` trait on your traversal type.
1149pub trait ParallelVisitor: Send {
1150    /// Receives files and directories for the current thread. This is called
1151    /// once for every directory entry visited by traversal.
1152    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1153}
1154
1155struct FnBuilder<F> {
1156    builder: F,
1157}
1158
1159impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1160    for FnBuilder<F>
1161{
1162    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1163        let visitor = (self.builder)();
1164        Box::new(FnVisitorImp { visitor })
1165    }
1166}
1167
1168type FnVisitor<'s> =
1169    Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1170
1171struct FnVisitorImp<'s> {
1172    visitor: FnVisitor<'s>,
1173}
1174
1175impl<'s> ParallelVisitor for FnVisitorImp<'s> {
1176    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1177        (self.visitor)(entry)
1178    }
1179}
1180
1181/// WalkParallel is a parallel recursive directory iterator over files paths
1182/// in one or more directories.
1183///
1184/// Only file and directory paths matching the rules are returned. By default,
1185/// ignore files like `.gitignore` are respected. The precise matching rules
1186/// and precedence is explained in the documentation for `WalkBuilder`.
1187///
1188/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1189pub struct WalkParallel {
1190    paths: vec::IntoIter<PathBuf>,
1191    ig_root: Ignore,
1192    max_filesize: Option<u64>,
1193    max_depth: Option<usize>,
1194    follow_links: bool,
1195    same_file_system: bool,
1196    threads: usize,
1197    skip: Option<Arc<Handle>>,
1198    filter: Option<Filter>,
1199}
1200
1201impl WalkParallel {
1202    /// Execute the parallel recursive directory iterator. `mkf` is called
1203    /// for each thread used for iteration. The function produced by `mkf`
1204    /// is then in turn called for each visited file path.
1205    pub fn run<'s, F>(self, mkf: F)
1206    where
1207        F: FnMut() -> FnVisitor<'s>,
1208    {
1209        self.visit(&mut FnBuilder { builder: mkf })
1210    }
1211
1212    /// Execute the parallel recursive directory iterator using a custom
1213    /// visitor.
1214    ///
1215    /// The builder given is used to construct a visitor for every thread
1216    /// used by this traversal. The visitor returned from each builder is then
1217    /// called for every directory entry seen by that thread.
1218    ///
1219    /// Typically, creating a custom visitor is useful if you need to perform
1220    /// some kind of cleanup once traversal is finished. This can be achieved
1221    /// by implementing `Drop` for your builder (or for your visitor, if you
1222    /// want to execute cleanup for every thread that is launched).
1223    ///
1224    /// For example, each visitor might build up a data structure of results
1225    /// corresponding to the directory entries seen for each thread. Since each
1226    /// visitor runs on only one thread, this build-up can be done without
1227    /// synchronization. Then, once traversal is complete, all of the results
1228    /// can be merged together into a single data structure.
1229    pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1230        let threads = self.threads();
1231        let stack = Arc::new(Mutex::new(vec![]));
1232        {
1233            let mut stack = stack.lock().unwrap();
1234            let mut visitor = builder.build();
1235            let mut paths = Vec::new().into_iter();
1236            std::mem::swap(&mut paths, &mut self.paths);
1237            // Send the initial set of root paths to the pool of workers. Note
1238            // that we only send directories. For files, we send to them the
1239            // callback directly.
1240            for path in paths {
1241                let (dent, root_device) = if path == Path::new("-") {
1242                    (DirEntry::new_stdin(), None)
1243                } else {
1244                    let root_device = if !self.same_file_system {
1245                        None
1246                    } else {
1247                        match device_num(&path) {
1248                            Ok(root_device) => Some(root_device),
1249                            Err(err) => {
1250                                let err = Error::Io(err).with_path(path);
1251                                if visitor.visit(Err(err)).is_quit() {
1252                                    return;
1253                                }
1254                                continue;
1255                            }
1256                        }
1257                    };
1258                    match DirEntryRaw::from_path(0, path, false) {
1259                        Ok(dent) => {
1260                            (DirEntry::new_raw(dent, None), root_device)
1261                        }
1262                        Err(err) => {
1263                            if visitor.visit(Err(err)).is_quit() {
1264                                return;
1265                            }
1266                            continue;
1267                        }
1268                    }
1269                };
1270                stack.push(Message::Work(Work {
1271                    dent: dent,
1272                    ignore: self.ig_root.clone(),
1273                    root_device: root_device,
1274                }));
1275            }
1276            // ... but there's no need to start workers if we don't need them.
1277            if stack.is_empty() {
1278                return;
1279            }
1280        }
1281        // Create the workers and then wait for them to finish.
1282        let quit_now = Arc::new(AtomicBool::new(false));
1283        let num_pending =
1284            Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
1285        std::thread::scope(|s| {
1286            let mut handles = vec![];
1287            for _ in 0..threads {
1288                let worker = Worker {
1289                    visitor: builder.build(),
1290                    stack: stack.clone(),
1291                    quit_now: quit_now.clone(),
1292                    num_pending: num_pending.clone(),
1293                    max_depth: self.max_depth,
1294                    max_filesize: self.max_filesize,
1295                    follow_links: self.follow_links,
1296                    skip: self.skip.clone(),
1297                    filter: self.filter.clone(),
1298                };
1299                handles.push(s.spawn(|| worker.run()));
1300            }
1301            for handle in handles {
1302                handle.join().unwrap();
1303            }
1304        });
1305    }
1306
1307    fn threads(&self) -> usize {
1308        if self.threads == 0 {
1309            2
1310        } else {
1311            self.threads
1312        }
1313    }
1314}
1315
1316/// Message is the set of instructions that a worker knows how to process.
1317enum Message {
1318    /// A work item corresponds to a directory that should be descended into.
1319    /// Work items for entries that should be skipped or ignored should not
1320    /// be produced.
1321    Work(Work),
1322    /// This instruction indicates that the worker should quit.
1323    Quit,
1324}
1325
1326/// A unit of work for each worker to process.
1327///
1328/// Each unit of work corresponds to a directory that should be descended
1329/// into.
1330struct Work {
1331    /// The directory entry.
1332    dent: DirEntry,
1333    /// Any ignore matchers that have been built for this directory's parents.
1334    ignore: Ignore,
1335    /// The root device number. When present, only files with the same device
1336    /// number should be considered.
1337    root_device: Option<u64>,
1338}
1339
1340impl Work {
1341    /// Returns true if and only if this work item is a directory.
1342    fn is_dir(&self) -> bool {
1343        self.dent.is_dir()
1344    }
1345
1346    /// Returns true if and only if this work item is a symlink.
1347    fn is_symlink(&self) -> bool {
1348        self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1349    }
1350
1351    /// Adds ignore rules for parent directories.
1352    ///
1353    /// Note that this only applies to entries at depth 0. On all other
1354    /// entries, this is a no-op.
1355    fn add_parents(&mut self) -> Option<Error> {
1356        if self.dent.depth() > 0 {
1357            return None;
1358        }
1359        // At depth 0, the path of this entry is a root path, so we can
1360        // use it directly to add parent ignore rules.
1361        let (ig, err) = self.ignore.add_parents(self.dent.path());
1362        self.ignore = ig;
1363        err
1364    }
1365
1366    /// Reads the directory contents of this work item and adds ignore
1367    /// rules for this directory.
1368    ///
1369    /// If there was a problem with reading the directory contents, then
1370    /// an error is returned. If there was a problem reading the ignore
1371    /// rules for this directory, then the error is attached to this
1372    /// work item's directory entry.
1373    fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1374        let readdir = match fs::read_dir(self.dent.path()) {
1375            Ok(readdir) => readdir,
1376            Err(err) => {
1377                let err = Error::from(err)
1378                    .with_path(self.dent.path())
1379                    .with_depth(self.dent.depth());
1380                return Err(err);
1381            }
1382        };
1383        let (ig, err) = self.ignore.add_child(self.dent.path());
1384        self.ignore = ig;
1385        self.dent.err = err;
1386        Ok(readdir)
1387    }
1388}
1389
1390/// A worker is responsible for descending into directories, updating the
1391/// ignore matchers, producing new work and invoking the caller's callback.
1392///
1393/// Note that a worker is *both* a producer and a consumer.
1394struct Worker<'s> {
1395    /// The caller's callback.
1396    visitor: Box<dyn ParallelVisitor + 's>,
1397    /// A stack of work to do.
1398    ///
1399    /// We use a stack instead of a channel because a stack lets us visit
1400    /// directories in depth first order. This can substantially reduce peak
1401    /// memory usage by keeping both the number of files path and gitignore
1402    /// matchers in memory lower.
1403    stack: Arc<Mutex<Vec<Message>>>,
1404    /// Whether all workers should terminate at the next opportunity. Note
1405    /// that we need this because we don't want other `Work` to be done after
1406    /// we quit. We wouldn't need this if have a priority channel.
1407    quit_now: Arc<AtomicBool>,
1408    /// The number of outstanding work items.
1409    num_pending: Arc<AtomicUsize>,
1410    /// The maximum depth of directories to descend. A value of `0` means no
1411    /// descension at all.
1412    max_depth: Option<usize>,
1413    /// The maximum size a searched file can be (in bytes). If a file exceeds
1414    /// this size it will be skipped.
1415    max_filesize: Option<u64>,
1416    /// Whether to follow symbolic links or not. When this is enabled, loop
1417    /// detection is performed.
1418    follow_links: bool,
1419    /// A file handle to skip, currently is either `None` or stdout, if it's
1420    /// a file and it has been requested to skip files identical to stdout.
1421    skip: Option<Arc<Handle>>,
1422    /// A predicate applied to dir entries. If true, the entry and all
1423    /// children will be skipped.
1424    filter: Option<Filter>,
1425}
1426
1427impl<'s> Worker<'s> {
1428    /// Runs this worker until there is no more work left to do.
1429    ///
1430    /// The worker will call the caller's callback for all entries that aren't
1431    /// skipped by the ignore matcher.
1432    fn run(mut self) {
1433        while let Some(work) = self.get_work() {
1434            if let WalkState::Quit = self.run_one(work) {
1435                self.quit_now();
1436            }
1437            self.work_done();
1438        }
1439    }
1440
1441    fn run_one(&mut self, mut work: Work) -> WalkState {
1442        // If the work is not a directory, then we can just execute the
1443        // caller's callback immediately and move on.
1444        if work.is_symlink() || !work.is_dir() {
1445            return self.visitor.visit(Ok(work.dent));
1446        }
1447        if let Some(err) = work.add_parents() {
1448            let state = self.visitor.visit(Err(err));
1449            if state.is_quit() {
1450                return state;
1451            }
1452        }
1453
1454        let descend = if let Some(root_device) = work.root_device {
1455            match is_same_file_system(root_device, work.dent.path()) {
1456                Ok(true) => true,
1457                Ok(false) => false,
1458                Err(err) => {
1459                    let state = self.visitor.visit(Err(err));
1460                    if state.is_quit() {
1461                        return state;
1462                    }
1463                    false
1464                }
1465            }
1466        } else {
1467            true
1468        };
1469
1470        // Try to read the directory first before we transfer ownership
1471        // to the provided closure. Do not unwrap it immediately, though,
1472        // as we may receive an `Err` value e.g. in the case when we do not
1473        // have sufficient read permissions to list the directory.
1474        // In that case we still want to provide the closure with a valid
1475        // entry before passing the error value.
1476        let readdir = work.read_dir();
1477        let depth = work.dent.depth();
1478        let state = self.visitor.visit(Ok(work.dent));
1479        if !state.is_continue() {
1480            return state;
1481        }
1482        if !descend {
1483            return WalkState::Skip;
1484        }
1485
1486        let readdir = match readdir {
1487            Ok(readdir) => readdir,
1488            Err(err) => {
1489                return self.visitor.visit(Err(err));
1490            }
1491        };
1492
1493        if self.max_depth.map_or(false, |max| depth >= max) {
1494            return WalkState::Skip;
1495        }
1496        for result in readdir {
1497            let state = self.generate_work(
1498                &work.ignore,
1499                depth + 1,
1500                work.root_device,
1501                result,
1502            );
1503            if state.is_quit() {
1504                return state;
1505            }
1506        }
1507        WalkState::Continue
1508    }
1509
1510    /// Decides whether to submit the given directory entry as a file to
1511    /// search.
1512    ///
1513    /// If the entry is a path that should be ignored, then this is a no-op.
1514    /// Otherwise, the entry is pushed on to the queue. (The actual execution
1515    /// of the callback happens in `run_one`.)
1516    ///
1517    /// If an error occurs while reading the entry, then it is sent to the
1518    /// caller's callback.
1519    ///
1520    /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1521    /// be the depth of this entry. `result` should be the item yielded by
1522    /// a directory iterator.
1523    fn generate_work(
1524        &mut self,
1525        ig: &Ignore,
1526        depth: usize,
1527        root_device: Option<u64>,
1528        result: Result<fs::DirEntry, io::Error>,
1529    ) -> WalkState {
1530        let fs_dent = match result {
1531            Ok(fs_dent) => fs_dent,
1532            Err(err) => {
1533                return self
1534                    .visitor
1535                    .visit(Err(Error::from(err).with_depth(depth)));
1536            }
1537        };
1538        let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1539            Ok(dent) => DirEntry::new_raw(dent, None),
1540            Err(err) => {
1541                return self.visitor.visit(Err(err));
1542            }
1543        };
1544        let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1545        if self.follow_links && is_symlink {
1546            let path = dent.path().to_path_buf();
1547            dent = match DirEntryRaw::from_path(depth, path, true) {
1548                Ok(dent) => DirEntry::new_raw(dent, None),
1549                Err(err) => {
1550                    return self.visitor.visit(Err(err));
1551                }
1552            };
1553            if dent.is_dir() {
1554                if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1555                    return self.visitor.visit(Err(err));
1556                }
1557            }
1558        }
1559        // N.B. See analogous call in the single-threaded implementation about
1560        // why it's important for this to come before the checks below.
1561        if should_skip_entry(ig, &dent) {
1562            return WalkState::Continue;
1563        }
1564        if let Some(ref stdout) = self.skip {
1565            let is_stdout = match path_equals(&dent, stdout) {
1566                Ok(is_stdout) => is_stdout,
1567                Err(err) => return self.visitor.visit(Err(err)),
1568            };
1569            if is_stdout {
1570                return WalkState::Continue;
1571            }
1572        }
1573        let should_skip_filesize =
1574            if self.max_filesize.is_some() && !dent.is_dir() {
1575                skip_filesize(
1576                    self.max_filesize.unwrap(),
1577                    dent.path(),
1578                    &dent.metadata().ok(),
1579                )
1580            } else {
1581                false
1582            };
1583        let should_skip_filtered =
1584            if let Some(Filter(predicate)) = &self.filter {
1585                !predicate(&dent)
1586            } else {
1587                false
1588            };
1589        if !should_skip_filesize && !should_skip_filtered {
1590            self.send(Work { dent, ignore: ig.clone(), root_device });
1591        }
1592        WalkState::Continue
1593    }
1594
1595    /// Returns the next directory to descend into.
1596    ///
1597    /// If all work has been exhausted, then this returns None. The worker
1598    /// should then subsequently quit.
1599    fn get_work(&mut self) -> Option<Work> {
1600        let mut value = self.recv();
1601        loop {
1602            // Simulate a priority channel: If quit_now flag is set, we can
1603            // receive only quit messages.
1604            if self.is_quit_now() {
1605                value = Some(Message::Quit)
1606            }
1607            match value {
1608                Some(Message::Work(work)) => {
1609                    return Some(work);
1610                }
1611                Some(Message::Quit) => {
1612                    // Repeat quit message to wake up sleeping threads, if
1613                    // any. The domino effect will ensure that every thread
1614                    // will quit.
1615                    self.send_quit();
1616                    return None;
1617                }
1618                None => {
1619                    // Once num_pending reaches 0, it is impossible for it to
1620                    // ever increase again. Namely, it only reaches 0 once
1621                    // all jobs have run such that no jobs have produced more
1622                    // work. We have this guarantee because num_pending is
1623                    // always incremented before each job is submitted and only
1624                    // decremented once each job is completely finished.
1625                    // Therefore, if this reaches zero, then there can be no
1626                    // other job running.
1627                    if self.num_pending() == 0 {
1628                        // Every other thread is blocked at the next recv().
1629                        // Send the initial quit message and quit.
1630                        self.send_quit();
1631                        return None;
1632                    }
1633                    // Wait for next `Work` or `Quit` message.
1634                    loop {
1635                        if let Some(v) = self.recv() {
1636                            value = Some(v);
1637                            break;
1638                        }
1639                        // Our stack isn't blocking. Instead of burning the
1640                        // CPU waiting, we let the thread sleep for a bit. In
1641                        // general, this tends to only occur once the search is
1642                        // approaching termination.
1643                        thread::sleep(Duration::from_millis(1));
1644                    }
1645                }
1646            }
1647        }
1648    }
1649
1650    /// Indicates that all workers should quit immediately.
1651    fn quit_now(&self) {
1652        self.quit_now.store(true, Ordering::SeqCst);
1653    }
1654
1655    /// Returns true if this worker should quit immediately.
1656    fn is_quit_now(&self) -> bool {
1657        self.quit_now.load(Ordering::SeqCst)
1658    }
1659
1660    /// Returns the number of pending jobs.
1661    fn num_pending(&self) -> usize {
1662        self.num_pending.load(Ordering::SeqCst)
1663    }
1664
1665    /// Send work.
1666    fn send(&self, work: Work) {
1667        self.num_pending.fetch_add(1, Ordering::SeqCst);
1668        let mut stack = self.stack.lock().unwrap();
1669        stack.push(Message::Work(work));
1670    }
1671
1672    /// Send a quit message.
1673    fn send_quit(&self) {
1674        let mut stack = self.stack.lock().unwrap();
1675        stack.push(Message::Quit);
1676    }
1677
1678    /// Receive work.
1679    fn recv(&self) -> Option<Message> {
1680        let mut stack = self.stack.lock().unwrap();
1681        stack.pop()
1682    }
1683
1684    /// Signal that work has been received.
1685    fn work_done(&self) {
1686        self.num_pending.fetch_sub(1, Ordering::SeqCst);
1687    }
1688}
1689
1690fn check_symlink_loop(
1691    ig_parent: &Ignore,
1692    child_path: &Path,
1693    child_depth: usize,
1694) -> Result<(), Error> {
1695    let hchild = Handle::from_path(child_path).map_err(|err| {
1696        Error::from(err).with_path(child_path).with_depth(child_depth)
1697    })?;
1698    for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1699        let h = Handle::from_path(ig.path()).map_err(|err| {
1700            Error::from(err).with_path(child_path).with_depth(child_depth)
1701        })?;
1702        if hchild == h {
1703            return Err(Error::Loop {
1704                ancestor: ig.path().to_path_buf(),
1705                child: child_path.to_path_buf(),
1706            }
1707            .with_depth(child_depth));
1708        }
1709    }
1710    Ok(())
1711}
1712
1713// Before calling this function, make sure that you ensure that is really
1714// necessary as the arguments imply a file stat.
1715fn skip_filesize(
1716    max_filesize: u64,
1717    path: &Path,
1718    ent: &Option<Metadata>,
1719) -> bool {
1720    let filesize = match *ent {
1721        Some(ref md) => Some(md.len()),
1722        None => None,
1723    };
1724
1725    if let Some(fs) = filesize {
1726        if fs > max_filesize {
1727            log::debug!("ignoring {}: {} bytes", path.display(), fs);
1728            true
1729        } else {
1730            false
1731        }
1732    } else {
1733        false
1734    }
1735}
1736
1737fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1738    let m = ig.matched_dir_entry(dent);
1739    if m.is_ignore() {
1740        log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1741        true
1742    } else if m.is_whitelist() {
1743        log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1744        false
1745    } else {
1746        false
1747    }
1748}
1749
1750/// Returns a handle to stdout for filtering search.
1751///
1752/// A handle is returned if and only if stdout is being redirected to a file.
1753/// The handle returned corresponds to that file.
1754///
1755/// This can be used to ensure that we do not attempt to search a file that we
1756/// may also be writing to.
1757fn stdout_handle() -> Option<Handle> {
1758    let h = match Handle::stdout() {
1759        Err(_) => return None,
1760        Ok(h) => h,
1761    };
1762    let md = match h.as_file().metadata() {
1763        Err(_) => return None,
1764        Ok(md) => md,
1765    };
1766    if !md.is_file() {
1767        return None;
1768    }
1769    Some(h)
1770}
1771
1772/// Returns true if and only if the given directory entry is believed to be
1773/// equivalent to the given handle. If there was a problem querying the path
1774/// for information to determine equality, then that error is returned.
1775fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1776    #[cfg(unix)]
1777    fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1778        dent.ino() != Some(handle.ino())
1779    }
1780
1781    #[cfg(not(unix))]
1782    fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1783        false
1784    }
1785
1786    // If we know for sure that these two things aren't equal, then avoid
1787    // the costly extra stat call to determine equality.
1788    if dent.is_stdin() || never_equal(dent, handle) {
1789        return Ok(false);
1790    }
1791    Handle::from_path(dent.path())
1792        .map(|h| &h == handle)
1793        .map_err(|err| Error::Io(err).with_path(dent.path()))
1794}
1795
1796/// Returns true if the given walkdir entry corresponds to a directory.
1797///
1798/// This is normally just `dent.file_type().is_dir()`, but when we aren't
1799/// following symlinks, the root directory entry may be a symlink to a
1800/// directory that we *do* follow---by virtue of it being specified by the user
1801/// explicitly. In that case, we need to follow the symlink and query whether
1802/// it's a directory or not. But we only do this for root entries to avoid an
1803/// additional stat check in most cases.
1804fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1805    if dent.file_type().is_dir() {
1806        return true;
1807    }
1808    if !dent.file_type().is_symlink() || dent.depth() > 0 {
1809        return false;
1810    }
1811    dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
1812}
1813
1814/// Returns true if and only if the given path is on the same device as the
1815/// given root device.
1816fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1817    let dent_device =
1818        device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1819    Ok(root_device == dent_device)
1820}
1821
1822#[cfg(unix)]
1823fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1824    use std::os::unix::fs::MetadataExt;
1825
1826    path.as_ref().metadata().map(|md| md.dev())
1827}
1828
1829#[cfg(windows)]
1830fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1831    use winapi_util::{file, Handle};
1832
1833    let h = Handle::from_path_any(path)?;
1834    file::information(h).map(|info| info.volume_serial_number())
1835}
1836
1837#[cfg(not(any(unix, windows)))]
1838fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1839    Err(io::Error::new(
1840        io::ErrorKind::Other,
1841        "walkdir: same_file_system option not supported on this platform",
1842    ))
1843}
1844
1845#[cfg(test)]
1846mod tests {
1847    use std::ffi::OsStr;
1848    use std::fs::{self, File};
1849    use std::io::Write;
1850    use std::path::Path;
1851    use std::sync::{Arc, Mutex};
1852
1853    use super::{DirEntry, WalkBuilder, WalkState};
1854    use crate::tests::TempDir;
1855
1856    fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1857        let mut file = File::create(path).unwrap();
1858        file.write_all(contents.as_bytes()).unwrap();
1859    }
1860
1861    fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1862        let file = File::create(path).unwrap();
1863        file.set_len(size).unwrap();
1864    }
1865
1866    #[cfg(unix)]
1867    fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1868        use std::os::unix::fs::symlink;
1869        symlink(src, dst).unwrap();
1870    }
1871
1872    fn mkdirp<P: AsRef<Path>>(path: P) {
1873        fs::create_dir_all(path).unwrap();
1874    }
1875
1876    fn normal_path(unix: &str) -> String {
1877        if cfg!(windows) {
1878            unix.replace("\\", "/")
1879        } else {
1880            unix.to_string()
1881        }
1882    }
1883
1884    fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1885        let mut paths = vec![];
1886        for result in builder.build() {
1887            let dent = match result {
1888                Err(_) => continue,
1889                Ok(dent) => dent,
1890            };
1891            let path = dent.path().strip_prefix(prefix).unwrap();
1892            if path.as_os_str().is_empty() {
1893                continue;
1894            }
1895            paths.push(normal_path(path.to_str().unwrap()));
1896        }
1897        paths.sort();
1898        paths
1899    }
1900
1901    fn walk_collect_parallel(
1902        prefix: &Path,
1903        builder: &WalkBuilder,
1904    ) -> Vec<String> {
1905        let mut paths = vec![];
1906        for dent in walk_collect_entries_parallel(builder) {
1907            let path = dent.path().strip_prefix(prefix).unwrap();
1908            if path.as_os_str().is_empty() {
1909                continue;
1910            }
1911            paths.push(normal_path(path.to_str().unwrap()));
1912        }
1913        paths.sort();
1914        paths
1915    }
1916
1917    fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1918        let dents = Arc::new(Mutex::new(vec![]));
1919        builder.build_parallel().run(|| {
1920            let dents = dents.clone();
1921            Box::new(move |result| {
1922                if let Ok(dent) = result {
1923                    dents.lock().unwrap().push(dent);
1924                }
1925                WalkState::Continue
1926            })
1927        });
1928
1929        let dents = dents.lock().unwrap();
1930        dents.to_vec()
1931    }
1932
1933    fn mkpaths(paths: &[&str]) -> Vec<String> {
1934        let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1935        paths.sort();
1936        paths
1937    }
1938
1939    fn tmpdir() -> TempDir {
1940        TempDir::new().unwrap()
1941    }
1942
1943    fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1944        let got = walk_collect(prefix, builder);
1945        assert_eq!(got, mkpaths(expected), "single threaded");
1946        let got = walk_collect_parallel(prefix, builder);
1947        assert_eq!(got, mkpaths(expected), "parallel");
1948    }
1949
1950    #[test]
1951    fn no_ignores() {
1952        let td = tmpdir();
1953        mkdirp(td.path().join("a/b/c"));
1954        mkdirp(td.path().join("x/y"));
1955        wfile(td.path().join("a/b/foo"), "");
1956        wfile(td.path().join("x/y/foo"), "");
1957
1958        assert_paths(
1959            td.path(),
1960            &WalkBuilder::new(td.path()),
1961            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1962        );
1963    }
1964
1965    #[test]
1966    fn custom_ignore() {
1967        let td = tmpdir();
1968        let custom_ignore = ".customignore";
1969        mkdirp(td.path().join("a"));
1970        wfile(td.path().join(custom_ignore), "foo");
1971        wfile(td.path().join("foo"), "");
1972        wfile(td.path().join("a/foo"), "");
1973        wfile(td.path().join("bar"), "");
1974        wfile(td.path().join("a/bar"), "");
1975
1976        let mut builder = WalkBuilder::new(td.path());
1977        builder.add_custom_ignore_filename(&custom_ignore);
1978        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1979    }
1980
1981    #[test]
1982    fn custom_ignore_exclusive_use() {
1983        let td = tmpdir();
1984        let custom_ignore = ".customignore";
1985        mkdirp(td.path().join("a"));
1986        wfile(td.path().join(custom_ignore), "foo");
1987        wfile(td.path().join("foo"), "");
1988        wfile(td.path().join("a/foo"), "");
1989        wfile(td.path().join("bar"), "");
1990        wfile(td.path().join("a/bar"), "");
1991
1992        let mut builder = WalkBuilder::new(td.path());
1993        builder.ignore(false);
1994        builder.git_ignore(false);
1995        builder.git_global(false);
1996        builder.git_exclude(false);
1997        builder.add_custom_ignore_filename(&custom_ignore);
1998        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1999    }
2000
2001    #[test]
2002    fn gitignore() {
2003        let td = tmpdir();
2004        mkdirp(td.path().join(".git"));
2005        mkdirp(td.path().join("a"));
2006        wfile(td.path().join(".gitignore"), "foo");
2007        wfile(td.path().join("foo"), "");
2008        wfile(td.path().join("a/foo"), "");
2009        wfile(td.path().join("bar"), "");
2010        wfile(td.path().join("a/bar"), "");
2011
2012        assert_paths(
2013            td.path(),
2014            &WalkBuilder::new(td.path()),
2015            &["bar", "a", "a/bar"],
2016        );
2017    }
2018
2019    #[test]
2020    fn explicit_ignore() {
2021        let td = tmpdir();
2022        let igpath = td.path().join(".not-an-ignore");
2023        mkdirp(td.path().join("a"));
2024        wfile(&igpath, "foo");
2025        wfile(td.path().join("foo"), "");
2026        wfile(td.path().join("a/foo"), "");
2027        wfile(td.path().join("bar"), "");
2028        wfile(td.path().join("a/bar"), "");
2029
2030        let mut builder = WalkBuilder::new(td.path());
2031        assert!(builder.add_ignore(&igpath).is_none());
2032        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2033    }
2034
2035    #[test]
2036    fn explicit_ignore_exclusive_use() {
2037        let td = tmpdir();
2038        let igpath = td.path().join(".not-an-ignore");
2039        mkdirp(td.path().join("a"));
2040        wfile(&igpath, "foo");
2041        wfile(td.path().join("foo"), "");
2042        wfile(td.path().join("a/foo"), "");
2043        wfile(td.path().join("bar"), "");
2044        wfile(td.path().join("a/bar"), "");
2045
2046        let mut builder = WalkBuilder::new(td.path());
2047        builder.standard_filters(false);
2048        assert!(builder.add_ignore(&igpath).is_none());
2049        assert_paths(
2050            td.path(),
2051            &builder,
2052            &[".not-an-ignore", "bar", "a", "a/bar"],
2053        );
2054    }
2055
2056    #[test]
2057    fn gitignore_parent() {
2058        let td = tmpdir();
2059        mkdirp(td.path().join(".git"));
2060        mkdirp(td.path().join("a"));
2061        wfile(td.path().join(".gitignore"), "foo");
2062        wfile(td.path().join("a/foo"), "");
2063        wfile(td.path().join("a/bar"), "");
2064
2065        let root = td.path().join("a");
2066        assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2067    }
2068
2069    #[test]
2070    fn max_depth() {
2071        let td = tmpdir();
2072        mkdirp(td.path().join("a/b/c"));
2073        wfile(td.path().join("foo"), "");
2074        wfile(td.path().join("a/foo"), "");
2075        wfile(td.path().join("a/b/foo"), "");
2076        wfile(td.path().join("a/b/c/foo"), "");
2077
2078        let mut builder = WalkBuilder::new(td.path());
2079        assert_paths(
2080            td.path(),
2081            &builder,
2082            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2083        );
2084        assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2085        assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2086        assert_paths(
2087            td.path(),
2088            builder.max_depth(Some(2)),
2089            &["a", "a/b", "foo", "a/foo"],
2090        );
2091    }
2092
2093    #[test]
2094    fn max_filesize() {
2095        let td = tmpdir();
2096        mkdirp(td.path().join("a/b"));
2097        wfile_size(td.path().join("foo"), 0);
2098        wfile_size(td.path().join("bar"), 400);
2099        wfile_size(td.path().join("baz"), 600);
2100        wfile_size(td.path().join("a/foo"), 600);
2101        wfile_size(td.path().join("a/bar"), 500);
2102        wfile_size(td.path().join("a/baz"), 200);
2103
2104        let mut builder = WalkBuilder::new(td.path());
2105        assert_paths(
2106            td.path(),
2107            &builder,
2108            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2109        );
2110        assert_paths(
2111            td.path(),
2112            builder.max_filesize(Some(0)),
2113            &["a", "a/b", "foo"],
2114        );
2115        assert_paths(
2116            td.path(),
2117            builder.max_filesize(Some(500)),
2118            &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2119        );
2120        assert_paths(
2121            td.path(),
2122            builder.max_filesize(Some(50000)),
2123            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2124        );
2125    }
2126
2127    #[cfg(unix)] // because symlinks on windows are weird
2128    #[test]
2129    fn symlinks() {
2130        let td = tmpdir();
2131        mkdirp(td.path().join("a/b"));
2132        symlink(td.path().join("a/b"), td.path().join("z"));
2133        wfile(td.path().join("a/b/foo"), "");
2134
2135        let mut builder = WalkBuilder::new(td.path());
2136        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2137        assert_paths(
2138            td.path(),
2139            &builder.follow_links(true),
2140            &["a", "a/b", "a/b/foo", "z", "z/foo"],
2141        );
2142    }
2143
2144    #[cfg(unix)] // because symlinks on windows are weird
2145    #[test]
2146    fn first_path_not_symlink() {
2147        let td = tmpdir();
2148        mkdirp(td.path().join("foo"));
2149
2150        let dents = WalkBuilder::new(td.path().join("foo"))
2151            .build()
2152            .into_iter()
2153            .collect::<Result<Vec<_>, _>>()
2154            .unwrap();
2155        assert_eq!(1, dents.len());
2156        assert!(!dents[0].path_is_symlink());
2157
2158        let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2159            td.path().join("foo"),
2160        ));
2161        assert_eq!(1, dents.len());
2162        assert!(!dents[0].path_is_symlink());
2163    }
2164
2165    #[cfg(unix)] // because symlinks on windows are weird
2166    #[test]
2167    fn symlink_loop() {
2168        let td = tmpdir();
2169        mkdirp(td.path().join("a/b"));
2170        symlink(td.path().join("a"), td.path().join("a/b/c"));
2171
2172        let mut builder = WalkBuilder::new(td.path());
2173        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2174        assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2175    }
2176
2177    // It's a little tricky to test the 'same_file_system' option since
2178    // we need an environment with more than one file system. We adopt a
2179    // heuristic where /sys is typically a distinct volume on Linux and roll
2180    // with that.
2181    #[test]
2182    #[cfg(target_os = "linux")]
2183    fn same_file_system() {
2184        use super::device_num;
2185
2186        // If for some reason /sys doesn't exist or isn't a directory, just
2187        // skip this test.
2188        if !Path::new("/sys").is_dir() {
2189            return;
2190        }
2191
2192        // If our test directory actually isn't a different volume from /sys,
2193        // then this test is meaningless and we shouldn't run it.
2194        let td = tmpdir();
2195        if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2196            return;
2197        }
2198
2199        mkdirp(td.path().join("same_file"));
2200        symlink("/sys", td.path().join("same_file").join("alink"));
2201
2202        // Create a symlink to sys and enable following symlinks. If the
2203        // same_file_system option doesn't work, then this probably will hit a
2204        // permission error. Otherwise, it should just skip over the symlink
2205        // completely.
2206        let mut builder = WalkBuilder::new(td.path());
2207        builder.follow_links(true).same_file_system(true);
2208        assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2209    }
2210
2211    #[cfg(target_os = "linux")]
2212    #[test]
2213    fn no_read_permissions() {
2214        let dir_path = Path::new("/root");
2215
2216        // There's no /etc/sudoers.d, skip the test.
2217        if !dir_path.is_dir() {
2218            return;
2219        }
2220        // We're the root, so the test won't check what we want it to.
2221        if fs::read_dir(&dir_path).is_ok() {
2222            return;
2223        }
2224
2225        // Check that we can't descend but get an entry for the parent dir.
2226        let builder = WalkBuilder::new(&dir_path);
2227        assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2228    }
2229
2230    #[test]
2231    fn filter() {
2232        let td = tmpdir();
2233        mkdirp(td.path().join("a/b/c"));
2234        mkdirp(td.path().join("x/y"));
2235        wfile(td.path().join("a/b/foo"), "");
2236        wfile(td.path().join("x/y/foo"), "");
2237
2238        assert_paths(
2239            td.path(),
2240            &WalkBuilder::new(td.path()),
2241            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2242        );
2243
2244        assert_paths(
2245            td.path(),
2246            &WalkBuilder::new(td.path())
2247                .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2248            &["x", "x/y", "x/y/foo"],
2249        );
2250    }
2251}