caseless/
lib.rs

1use unicode_normalization::UnicodeNormalization;
2
3extern crate unicode_normalization;
4
5include!(concat!(env!("OUT_DIR"), "/case_folding_data.rs"));
6
7
8pub trait Caseless {
9    fn default_case_fold(self) -> CaseFold<Self> where Self: Sized;
10    fn default_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool;
11    fn canonical_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool;
12    fn compatibility_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool;
13}
14
15impl<I: Iterator<Item=char>> Caseless for I {
16    fn default_case_fold(self) -> CaseFold<I> {
17        CaseFold {
18            chars: self,
19            queue: ['\0', '\0'],
20        }
21    }
22
23    fn default_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool {
24        iter_eq(self.default_case_fold(),
25                other.default_case_fold())
26    }
27
28    fn canonical_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool {
29        // FIXME: Inner NFD can be optimized:
30        // "Normalization is not required before case folding,
31        //  except for the character U+0345 "combining greek ypogegrammeni"
32        //  and any characters that have it as part of their canonical decomposition,
33        //  such as U+1FC3 "greek small letter eta with ypogegrammeni".
34        //  In practice, optimized versions of canonical caseless matching
35        //  can catch these special cases, thereby avoiding an extra normalization
36        //  step for each comparison."
37        // Unicode Standard, section 3.13 Default Case Algorithms
38        iter_eq(self.nfd().default_case_fold().nfd(),
39                other.nfd().default_case_fold().nfd())
40    }
41
42    fn compatibility_caseless_match<J: Iterator<Item=char>>(self, other: J) -> bool {
43        // FIXME: Unclear if the inner NFD can be optimized here like in canonical_caseless_match.
44        iter_eq(self.nfd().default_case_fold().nfkd().default_case_fold().nfkd(),
45                other.nfd().default_case_fold().nfkd().default_case_fold().nfkd())
46    }
47
48}
49
50pub fn default_case_fold_str(s: &str) -> String {
51    s.chars().default_case_fold().collect()
52}
53
54pub fn default_caseless_match_str(a: &str, b: &str) -> bool {
55    a.chars().default_caseless_match(b.chars())
56}
57
58pub fn canonical_caseless_match_str(a: &str, b: &str) -> bool {
59    a.chars().canonical_caseless_match(b.chars())
60}
61
62pub fn compatibility_caseless_match_str(a: &str, b: &str) -> bool {
63    a.chars().compatibility_caseless_match(b.chars())
64}
65
66fn iter_eq<L: Iterator, R: Iterator>(mut a: L, mut b: R) -> bool where L::Item: PartialEq<R::Item> {
67    loop {
68        match (a.next(), b.next()) {
69            (None, None) => return true,
70            (None, _) | (_, None) => return false,
71            (Some(x), Some(y)) => if !x.eq(&y) { return false },
72        }
73    }
74}
75
76pub struct CaseFold<I> {
77    chars: I,
78    queue: [char; 2],
79}
80
81impl<I> Iterator for CaseFold<I> where I: Iterator<Item = char> {
82    type Item = char;
83
84    fn next(&mut self) -> Option<char> {
85        let c = self.queue[0];
86        if c != '\0' {
87            self.queue[0] = self.queue[1];
88            self.queue[1] = '\0';
89            return Some(c)
90        }
91        self.chars.next().map(|c| {
92            match CASE_FOLDING_TABLE.binary_search_by(|&(x, _)| x.cmp(&c)) {
93                Err(_) => c,
94                Ok(i) => {
95                    let folded = CASE_FOLDING_TABLE[i].1;
96                    self.queue = [folded[1], folded[2]];
97                    folded[0]
98                }
99            }
100        })
101    }
102
103    fn size_hint(&self) -> (usize, Option<usize>) {
104        let queue_len = if self.queue[0] == '\0' {
105            0
106        } else if self.queue[1] == '\0' {
107            1
108        } else {
109            2
110        };
111        let (low, high) = self.chars.size_hint();
112        (low.saturating_add(queue_len),
113         high.and_then(|h| h.checked_mul(3)).and_then(|h| h.checked_add(queue_len)))
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::default_case_fold_str;
120
121    #[test]
122    fn test_strs() {
123        assert_eq!(default_case_fold_str("Test Case"), "test case");
124        assert_eq!(default_case_fold_str("Teſt Caſe"), "test case");
125        assert_eq!(default_case_fold_str("spiffiest"), "spiffiest");
126        assert_eq!(default_case_fold_str("straße"), "strasse");
127    }
128}
129