slug/
lib.rs

1extern crate deunicode;
2
3use deunicode::deunicode_char;
4
5/// Convert any unicode string to an ascii "slug" (useful for file names/url components)
6///
7/// The returned "slug" will consist of a-z, 0-9, and '-'. Furthermore, a slug will
8/// never contain more than one '-' in a row and will never start or end with '-'.
9///
10/// ```rust
11/// use self::slug::slugify;
12///
13/// assert_eq!(slugify("My Test String!!!1!1"), "my-test-string-1-1");
14/// assert_eq!(slugify("test\nit   now!"), "test-it-now");
15/// assert_eq!(slugify("  --test_-_cool"), "test-cool");
16/// assert_eq!(slugify("Æúű--cool?"), "aeuu-cool");
17/// assert_eq!(slugify("You & Me"), "you-me");
18/// assert_eq!(slugify("user@example.com"), "user-example-com");
19/// ```
20pub fn slugify<S: AsRef<str>>(s: S) -> String {
21    _slugify(s.as_ref())
22}
23
24// avoid unnecessary monomorphizations
25fn _slugify(s: &str) -> String {
26    let mut slug: Vec<u8> = Vec::with_capacity(s.len());
27    // Starts with true to avoid leading -
28    let mut prev_is_dash = true;
29    {
30        let mut push_char = |x: char| {
31            match x {
32                'a'...'z' | '0'...'9' => {
33                    prev_is_dash = false;
34                    slug.push(x as u8);
35                }
36                'A'...'Z' => {
37                    prev_is_dash = false;
38                    // Manual lowercasing as Rust to_lowercase() is unicode
39                    // aware and therefore much slower
40                    slug.push((x as u8) - b'A' + b'a');
41                }
42                _ => {
43                    if !prev_is_dash {
44                        slug.push(b'-');
45                        prev_is_dash = true;
46                    }
47                }
48            }
49        };
50
51        for c in s.chars() {
52            if c.is_ascii() {
53                (push_char)(c);
54            } else {
55                for cx in deunicode_char(c).unwrap_or("-").chars() {
56                    (push_char)(cx);
57                }
58            }
59        }
60    }
61
62    // It's not really unsafe in practice, we know we have ASCII
63    let mut string = unsafe { String::from_utf8_unchecked(slug) };
64    if string.ends_with('-') {
65        string.pop();
66    }
67    // We likely reserved more space than needed.
68    string.shrink_to_fit();
69    string
70}