slug/lib.rs
1extern crate deunicode;
2
3use deunicode::deunicode_char;
4
5/// Convert any unicode string to an ascii "slug" (useful for file names/url components)
6///
7/// The returned "slug" will consist of a-z, 0-9, and '-'. Furthermore, a slug will
8/// never contain more than one '-' in a row and will never start or end with '-'.
9///
10/// ```rust
11/// use self::slug::slugify;
12///
13/// assert_eq!(slugify("My Test String!!!1!1"), "my-test-string-1-1");
14/// assert_eq!(slugify("test\nit now!"), "test-it-now");
15/// assert_eq!(slugify(" --test_-_cool"), "test-cool");
16/// assert_eq!(slugify("Æúű--cool?"), "aeuu-cool");
17/// assert_eq!(slugify("You & Me"), "you-me");
18/// assert_eq!(slugify("user@example.com"), "user-example-com");
19/// ```
20pub fn slugify<S: AsRef<str>>(s: S) -> String {
21 _slugify(s.as_ref())
22}
23
24// avoid unnecessary monomorphizations
25fn _slugify(s: &str) -> String {
26 let mut slug: Vec<u8> = Vec::with_capacity(s.len());
27 // Starts with true to avoid leading -
28 let mut prev_is_dash = true;
29 {
30 let mut push_char = |x: char| {
31 match x {
32 'a'...'z' | '0'...'9' => {
33 prev_is_dash = false;
34 slug.push(x as u8);
35 }
36 'A'...'Z' => {
37 prev_is_dash = false;
38 // Manual lowercasing as Rust to_lowercase() is unicode
39 // aware and therefore much slower
40 slug.push((x as u8) - b'A' + b'a');
41 }
42 _ => {
43 if !prev_is_dash {
44 slug.push(b'-');
45 prev_is_dash = true;
46 }
47 }
48 }
49 };
50
51 for c in s.chars() {
52 if c.is_ascii() {
53 (push_char)(c);
54 } else {
55 for cx in deunicode_char(c).unwrap_or("-").chars() {
56 (push_char)(cx);
57 }
58 }
59 }
60 }
61
62 // It's not really unsafe in practice, we know we have ASCII
63 let mut string = unsafe { String::from_utf8_unchecked(slug) };
64 if string.ends_with('-') {
65 string.pop();
66 }
67 // We likely reserved more space than needed.
68 string.shrink_to_fit();
69 string
70}