ignore/
types.rs

1/*!
2The types module provides a way of associating globs on file names to file
3types.
4
5This can be used to match specific types of files. For example, among
6the default file types provided, the Rust file type is defined to be `*.rs`
7with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
8name `c`.
9
10Note that the set of default types may change over time.
11
12# Example
13
14This shows how to create and use a simple file type matcher using the default
15file types defined in this crate.
16
17```
18use ignore::types::TypesBuilder;
19
20let mut builder = TypesBuilder::new();
21builder.add_defaults();
22builder.select("rust");
23let matcher = builder.build().unwrap();
24
25assert!(matcher.matched("foo.rs", false).is_whitelist());
26assert!(matcher.matched("foo.c", false).is_ignore());
27```
28
29# Example: negation
30
31This is like the previous example, but shows how negating a file type works.
32That is, this will let us match file paths that *don't* correspond to a
33particular file type.
34
35```
36use ignore::types::TypesBuilder;
37
38let mut builder = TypesBuilder::new();
39builder.add_defaults();
40builder.negate("c");
41let matcher = builder.build().unwrap();
42
43assert!(matcher.matched("foo.rs", false).is_none());
44assert!(matcher.matched("foo.c", false).is_ignore());
45```
46
47# Example: custom file type definitions
48
49This shows how to extend this library default file type definitions with
50your own.
51
52```
53use ignore::types::TypesBuilder;
54
55let mut builder = TypesBuilder::new();
56builder.add_defaults();
57builder.add("foo", "*.foo");
58// Another way of adding a file type definition.
59// This is useful when accepting input from an end user.
60builder.add_def("bar:*.bar");
61// Note: we only select `foo`, not `bar`.
62builder.select("foo");
63let matcher = builder.build().unwrap();
64
65assert!(matcher.matched("x.foo", false).is_whitelist());
66// This is ignored because we only selected the `foo` file type.
67assert!(matcher.matched("x.bar", false).is_ignore());
68```
69
70We can also add file type definitions based on other definitions.
71
72```
73use ignore::types::TypesBuilder;
74
75let mut builder = TypesBuilder::new();
76builder.add_defaults();
77builder.add("foo", "*.foo");
78builder.add_def("bar:include:foo,cpp");
79builder.select("bar");
80let matcher = builder.build().unwrap();
81
82assert!(matcher.matched("x.foo", false).is_whitelist());
83assert!(matcher.matched("y.cpp", false).is_whitelist());
84```
85*/
86
87use std::cell::RefCell;
88use std::collections::HashMap;
89use std::path::Path;
90use std::sync::Arc;
91
92use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
93use regex::Regex;
94use thread_local::ThreadLocal;
95
96use crate::default_types::DEFAULT_TYPES;
97use crate::pathutil::file_name;
98use crate::{Error, Match};
99
100/// Glob represents a single glob in a set of file type definitions.
101///
102/// There may be more than one glob for a particular file type.
103///
104/// This is used to report information about the highest precedent glob
105/// that matched.
106///
107/// Note that not all matches necessarily correspond to a specific glob.
108/// For example, if there are one or more selections and a file path doesn't
109/// match any of those selections, then the file path is considered to be
110/// ignored.
111///
112/// The lifetime `'a` refers to the lifetime of the underlying file type
113/// definition, which corresponds to the lifetime of the file type matcher.
114#[derive(Clone, Debug)]
115pub struct Glob<'a>(GlobInner<'a>);
116
117#[derive(Clone, Debug)]
118enum GlobInner<'a> {
119    /// No glob matched, but the file path should still be ignored.
120    UnmatchedIgnore,
121    /// A glob matched.
122    Matched {
123        /// The file type definition which provided the glob.
124        def: &'a FileTypeDef,
125    },
126}
127
128impl<'a> Glob<'a> {
129    fn unmatched() -> Glob<'a> {
130        Glob(GlobInner::UnmatchedIgnore)
131    }
132
133    /// Return the file type definition that matched, if one exists. A file type
134    /// definition always exists when a specific definition matches a file
135    /// path.
136    pub fn file_type_def(&self) -> Option<&FileTypeDef> {
137        match self {
138            Glob(GlobInner::UnmatchedIgnore) => None,
139            Glob(GlobInner::Matched { def, .. }) => Some(def),
140        }
141    }
142}
143
144/// A single file type definition.
145///
146/// File type definitions can be retrieved in aggregate from a file type
147/// matcher. File type definitions are also reported when its responsible
148/// for a match.
149#[derive(Clone, Debug, Eq, PartialEq)]
150pub struct FileTypeDef {
151    name: String,
152    globs: Vec<String>,
153}
154
155impl FileTypeDef {
156    /// Return the name of this file type.
157    pub fn name(&self) -> &str {
158        &self.name
159    }
160
161    /// Return the globs used to recognize this file type.
162    pub fn globs(&self) -> &[String] {
163        &self.globs
164    }
165}
166
167/// Types is a file type matcher.
168#[derive(Clone, Debug)]
169pub struct Types {
170    /// All of the file type definitions, sorted lexicographically by name.
171    defs: Vec<FileTypeDef>,
172    /// All of the selections made by the user.
173    selections: Vec<Selection<FileTypeDef>>,
174    /// Whether there is at least one Selection::Select in our selections.
175    /// When this is true, a Match::None is converted to Match::Ignore.
176    has_selected: bool,
177    /// A mapping from glob index in the set to two indices. The first is an
178    /// index into `selections` and the second is an index into the
179    /// corresponding file type definition's list of globs.
180    glob_to_selection: Vec<(usize, usize)>,
181    /// The set of all glob selections, used for actual matching.
182    set: GlobSet,
183    /// Temporary storage for globs that match.
184    matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
185}
186
187/// Indicates the type of a selection for a particular file type.
188#[derive(Clone, Debug)]
189enum Selection<T> {
190    Select(String, T),
191    Negate(String, T),
192}
193
194impl<T> Selection<T> {
195    fn is_negated(&self) -> bool {
196        match *self {
197            Selection::Select(..) => false,
198            Selection::Negate(..) => true,
199        }
200    }
201
202    fn name(&self) -> &str {
203        match *self {
204            Selection::Select(ref name, _) => name,
205            Selection::Negate(ref name, _) => name,
206        }
207    }
208
209    fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
210        match self {
211            Selection::Select(name, inner) => {
212                Selection::Select(name, f(inner))
213            }
214            Selection::Negate(name, inner) => {
215                Selection::Negate(name, f(inner))
216            }
217        }
218    }
219
220    fn inner(&self) -> &T {
221        match *self {
222            Selection::Select(_, ref inner) => inner,
223            Selection::Negate(_, ref inner) => inner,
224        }
225    }
226}
227
228impl Types {
229    /// Creates a new file type matcher that never matches any path and
230    /// contains no file type definitions.
231    pub fn empty() -> Types {
232        Types {
233            defs: vec![],
234            selections: vec![],
235            has_selected: false,
236            glob_to_selection: vec![],
237            set: GlobSetBuilder::new().build().unwrap(),
238            matches: Arc::new(ThreadLocal::default()),
239        }
240    }
241
242    /// Returns true if and only if this matcher has zero selections.
243    pub fn is_empty(&self) -> bool {
244        self.selections.is_empty()
245    }
246
247    /// Returns the number of selections used in this matcher.
248    pub fn len(&self) -> usize {
249        self.selections.len()
250    }
251
252    /// Return the set of current file type definitions.
253    ///
254    /// Definitions and globs are sorted.
255    pub fn definitions(&self) -> &[FileTypeDef] {
256        &self.defs
257    }
258
259    /// Returns a match for the given path against this file type matcher.
260    ///
261    /// The path is considered whitelisted if it matches a selected file type.
262    /// The path is considered ignored if it matches a negated file type.
263    /// If at least one file type is selected and `path` doesn't match, then
264    /// the path is also considered ignored.
265    pub fn matched<'a, P: AsRef<Path>>(
266        &'a self,
267        path: P,
268        is_dir: bool,
269    ) -> Match<Glob<'a>> {
270        // File types don't apply to directories, and we can't do anything
271        // if our glob set is empty.
272        if is_dir || self.set.is_empty() {
273            return Match::None;
274        }
275        // We only want to match against the file name, so extract it.
276        // If one doesn't exist, then we can't match it.
277        let name = match file_name(path.as_ref()) {
278            Some(name) => name,
279            None if self.has_selected => {
280                return Match::Ignore(Glob::unmatched());
281            }
282            None => {
283                return Match::None;
284            }
285        };
286        let mut matches = self.matches.get_or_default().borrow_mut();
287        self.set.matches_into(name, &mut *matches);
288        // The highest precedent match is the last one.
289        if let Some(&i) = matches.last() {
290            let (isel, _) = self.glob_to_selection[i];
291            let sel = &self.selections[isel];
292            let glob = Glob(GlobInner::Matched { def: sel.inner() });
293            return if sel.is_negated() {
294                Match::Ignore(glob)
295            } else {
296                Match::Whitelist(glob)
297            };
298        }
299        if self.has_selected {
300            Match::Ignore(Glob::unmatched())
301        } else {
302            Match::None
303        }
304    }
305}
306
307/// TypesBuilder builds a type matcher from a set of file type definitions and
308/// a set of file type selections.
309pub struct TypesBuilder {
310    types: HashMap<String, FileTypeDef>,
311    selections: Vec<Selection<()>>,
312}
313
314impl TypesBuilder {
315    /// Create a new builder for a file type matcher.
316    ///
317    /// The builder contains *no* type definitions to start with. A set
318    /// of default type definitions can be added with `add_defaults`, and
319    /// additional type definitions can be added with `select` and `negate`.
320    pub fn new() -> TypesBuilder {
321        TypesBuilder { types: HashMap::new(), selections: vec![] }
322    }
323
324    /// Build the current set of file type definitions *and* selections into
325    /// a file type matcher.
326    pub fn build(&self) -> Result<Types, Error> {
327        let defs = self.definitions();
328        let has_selected = self.selections.iter().any(|s| !s.is_negated());
329
330        let mut selections = vec![];
331        let mut glob_to_selection = vec![];
332        let mut build_set = GlobSetBuilder::new();
333        for (isel, selection) in self.selections.iter().enumerate() {
334            let def = match self.types.get(selection.name()) {
335                Some(def) => def.clone(),
336                None => {
337                    let name = selection.name().to_string();
338                    return Err(Error::UnrecognizedFileType(name));
339                }
340            };
341            for (iglob, glob) in def.globs.iter().enumerate() {
342                build_set.add(
343                    GlobBuilder::new(glob)
344                        .literal_separator(true)
345                        .build()
346                        .map_err(|err| Error::Glob {
347                            glob: Some(glob.to_string()),
348                            err: err.kind().to_string(),
349                        })?,
350                );
351                glob_to_selection.push((isel, iglob));
352            }
353            selections.push(selection.clone().map(move |_| def));
354        }
355        let set = build_set
356            .build()
357            .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
358        Ok(Types {
359            defs: defs,
360            selections: selections,
361            has_selected: has_selected,
362            glob_to_selection: glob_to_selection,
363            set: set,
364            matches: Arc::new(ThreadLocal::default()),
365        })
366    }
367
368    /// Return the set of current file type definitions.
369    ///
370    /// Definitions and globs are sorted.
371    pub fn definitions(&self) -> Vec<FileTypeDef> {
372        let mut defs = vec![];
373        for def in self.types.values() {
374            let mut def = def.clone();
375            def.globs.sort();
376            defs.push(def);
377        }
378        defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
379        defs
380    }
381
382    /// Select the file type given by `name`.
383    ///
384    /// If `name` is `all`, then all file types currently defined are selected.
385    pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
386        if name == "all" {
387            for name in self.types.keys() {
388                self.selections.push(Selection::Select(name.to_string(), ()));
389            }
390        } else {
391            self.selections.push(Selection::Select(name.to_string(), ()));
392        }
393        self
394    }
395
396    /// Ignore the file type given by `name`.
397    ///
398    /// If `name` is `all`, then all file types currently defined are negated.
399    pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
400        if name == "all" {
401            for name in self.types.keys() {
402                self.selections.push(Selection::Negate(name.to_string(), ()));
403            }
404        } else {
405            self.selections.push(Selection::Negate(name.to_string(), ()));
406        }
407        self
408    }
409
410    /// Clear any file type definitions for the type name given.
411    pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
412        self.types.remove(name);
413        self
414    }
415
416    /// Add a new file type definition. `name` can be arbitrary and `pat`
417    /// should be a glob recognizing file paths belonging to the `name` type.
418    ///
419    /// If `name` is `all` or otherwise contains any character that is not a
420    /// Unicode letter or number, then an error is returned.
421    pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
422        lazy_static::lazy_static! {
423            static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
424        };
425        if name == "all" || !RE.is_match(name) {
426            return Err(Error::InvalidDefinition);
427        }
428        let (key, glob) = (name.to_string(), glob.to_string());
429        self.types
430            .entry(key)
431            .or_insert_with(|| FileTypeDef {
432                name: name.to_string(),
433                globs: vec![],
434            })
435            .globs
436            .push(glob);
437        Ok(())
438    }
439
440    /// Add a new file type definition specified in string form. There are two
441    /// valid formats:
442    /// 1. `{name}:{glob}`.  This defines a 'root' definition that associates the
443    ///     given name with the given glob.
444    /// 2. `{name}:include:{comma-separated list of already defined names}.
445    ///     This defines an 'include' definition that associates the given name
446    ///     with the definitions of the given existing types.
447    /// Names may not include any characters that are not
448    /// Unicode letters or numbers.
449    pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
450        let parts: Vec<&str> = def.split(':').collect();
451        match parts.len() {
452            2 => {
453                let name = parts[0];
454                let glob = parts[1];
455                if name.is_empty() || glob.is_empty() {
456                    return Err(Error::InvalidDefinition);
457                }
458                self.add(name, glob)
459            }
460            3 => {
461                let name = parts[0];
462                let types_string = parts[2];
463                if name.is_empty()
464                    || parts[1] != "include"
465                    || types_string.is_empty()
466                {
467                    return Err(Error::InvalidDefinition);
468                }
469                let types = types_string.split(',');
470                // Check ahead of time to ensure that all types specified are
471                // present and fail fast if not.
472                if types.clone().any(|t| !self.types.contains_key(t)) {
473                    return Err(Error::InvalidDefinition);
474                }
475                for type_name in types {
476                    let globs =
477                        self.types.get(type_name).unwrap().globs.clone();
478                    for glob in globs {
479                        self.add(name, &glob)?;
480                    }
481                }
482                Ok(())
483            }
484            _ => Err(Error::InvalidDefinition),
485        }
486    }
487
488    /// Add a set of default file type definitions.
489    pub fn add_defaults(&mut self) -> &mut TypesBuilder {
490        static MSG: &'static str = "adding a default type should never fail";
491        for &(name, exts) in DEFAULT_TYPES {
492            for ext in exts {
493                self.add(name, ext).expect(MSG);
494            }
495        }
496        self
497    }
498}
499
500#[cfg(test)]
501mod tests {
502    use super::TypesBuilder;
503
504    macro_rules! matched {
505        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
506         $path:expr) => {
507            matched!($name, $types, $sel, $selnot, $path, true);
508        };
509        (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
510         $path:expr) => {
511            matched!($name, $types, $sel, $selnot, $path, false);
512        };
513        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
514         $path:expr, $matched:expr) => {
515            #[test]
516            fn $name() {
517                let mut btypes = TypesBuilder::new();
518                for tydef in $types {
519                    btypes.add_def(tydef).unwrap();
520                }
521                for sel in $sel {
522                    btypes.select(sel);
523                }
524                for selnot in $selnot {
525                    btypes.negate(selnot);
526                }
527                let types = btypes.build().unwrap();
528                let mat = types.matched($path, false);
529                assert_eq!($matched, !mat.is_ignore());
530            }
531        };
532    }
533
534    fn types() -> Vec<&'static str> {
535        vec![
536            "html:*.html",
537            "html:*.htm",
538            "rust:*.rs",
539            "js:*.js",
540            "foo:*.{rs,foo}",
541            "combo:include:html,rust",
542        ]
543    }
544
545    matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
546    matched!(match2, types(), vec!["html"], vec![], "index.html");
547    matched!(match3, types(), vec!["html"], vec![], "index.htm");
548    matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
549    matched!(match5, types(), vec![], vec![], "index.html");
550    matched!(match6, types(), vec![], vec!["rust"], "index.html");
551    matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
552    matched!(match8, types(), vec!["combo"], vec![], "index.html");
553    matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
554
555    matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
556    matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
557    matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
558    matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
559    matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
560    matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
561
562    #[test]
563    fn test_invalid_defs() {
564        let mut btypes = TypesBuilder::new();
565        for tydef in types() {
566            btypes.add_def(tydef).unwrap();
567        }
568        // Preserve the original definitions for later comparison.
569        let original_defs = btypes.definitions();
570        let bad_defs = vec![
571            // Reference to type that does not exist
572            "combo:include:html,python",
573            // Bad format
574            "combo:foobar:html,rust",
575            "",
576        ];
577        for def in bad_defs {
578            assert!(btypes.add_def(def).is_err());
579            // Ensure that nothing changed, even if some of the includes were valid.
580            assert_eq!(btypes.definitions(), original_defs);
581        }
582    }
583}