bstr/search/
twoway.rs

1use core::cmp;
2
3use bstr::BStr;
4use cow::CowBStr;
5use search::prefilter::{Freqy, PrefilterState};
6
7/// An implementation of the TwoWay substring search algorithm, with heuristics
8/// for accelerating search based on frequency analysis.
9///
10/// This searcher supports forward and reverse search, although not
11/// simultaneously. It runs in O(n + m) time and O(1) space, where
12/// `n ~ len(needle)` and `m ~ len(haystack)`.
13///
14/// The implementation here roughly matches that which was developed by
15/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
16/// only change in this implementation is the use of zero-based indices and
17/// the addition of heuristics for a fast skip loop. That is, this will detect
18/// bytes that are believed to be rare in the needle and use fast vectorized
19/// instructions to find their occurrences quickly. The Two-Way algorithm is
20/// then used to confirm whether a match at that location occurred.
21///
22/// The heuristic for fast skipping is automatically shut off if it's
23/// detected to be ineffective at search time. Generally, this only occurs in
24/// pathological cases. But this is generally necessary in order to preserve
25/// a `O(n + m)` time bound.
26///
27/// The code below is fairly complex and not obviously correct at all. It's
28/// likely necessary to read the Two-Way paper cited above in order to fully
29/// grok this code.
30#[derive(Clone, Debug)]
31pub struct TwoWay<'b> {
32    /// The needle that we're looking for.
33    needle: CowBStr<'b>,
34    /// An implementation of a fast skip loop based on hard-coded frequency
35    /// data. This is only used when conditions are deemed favorable.
36    freqy: Freqy,
37    /// A critical position in needle. Specifically, this position corresponds
38    /// to beginning of either the minimal or maximal suffix in needle. (N.B.
39    /// See SuffixType below for why "minimal" isn't quite the correct word
40    /// here.)
41    ///
42    /// This is the position at which every search begins. Namely, search
43    /// starts by scanning text to the right of this position, and only if
44    /// there's a match does the text to the left of this position get scanned.
45    critical_pos: usize,
46    /// The amount we shift by in the Two-Way search algorithm. This
47    /// corresponds to the "small period" and "large period" cases.
48    shift: Shift,
49}
50
51impl<'b> TwoWay<'b> {
52    /// Create a searcher that uses the Two-Way algorithm by searching forwards
53    /// through any haystack.
54    pub fn forward(needle: &'b BStr) -> TwoWay<'b> {
55        let freqy = Freqy::forward(needle);
56        if needle.is_empty() {
57            return TwoWay {
58                needle: CowBStr::new(needle),
59                freqy,
60                critical_pos: 0,
61                shift: Shift::Large { shift: 0  },
62            };
63        }
64
65        let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
66        let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
67        let (period_lower_bound, critical_pos) =
68            if min_suffix.pos > max_suffix.pos {
69                (min_suffix.period, min_suffix.pos)
70            } else {
71                (max_suffix.period, max_suffix.pos)
72            };
73        let shift = Shift::forward(needle, period_lower_bound, critical_pos);
74        let needle = CowBStr::new(needle);
75        TwoWay { needle, freqy, critical_pos, shift }
76    }
77
78    /// Create a searcher that uses the Two-Way algorithm by searching in
79    /// reverse through any haystack.
80    pub fn reverse(needle: &'b BStr) -> TwoWay<'b> {
81        let freqy = Freqy::reverse(needle);
82        if needle.is_empty() {
83            return TwoWay {
84                needle: CowBStr::new(needle),
85                freqy,
86                critical_pos: 0,
87                shift: Shift::Large { shift: 0  },
88            };
89        }
90
91        let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
92        let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
93        let (period_lower_bound, critical_pos) =
94            if min_suffix.pos < max_suffix.pos {
95                (min_suffix.period, min_suffix.pos)
96            } else {
97                (max_suffix.period, max_suffix.pos)
98            };
99        let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
100        let needle = CowBStr::new(needle);
101        TwoWay { needle, freqy, critical_pos, shift }
102    }
103
104    /// Return a fresh prefilter state that can be used with this searcher.
105    /// A prefilter state is used to track the effectiveness of a searcher's
106    /// prefilter for speeding up searches. Therefore, the prefilter state
107    /// should generally be reused on subsequent searches (such as in an
108    /// iterator). For searches on a different haystack, then a new prefilter
109    /// state should be used.
110    ///
111    /// This always initializes a valid prefilter state even if this searcher
112    /// does not have a prefilter enabled.
113    pub fn prefilter_state(&self) -> PrefilterState {
114        self.freqy.prefilter_state()
115    }
116
117    /// Return the needle used by this searcher.
118    pub fn needle(&self) -> &BStr {
119        self.needle.as_bstr()
120    }
121
122    /// Convert this searched into an owned version, where the needle is
123    /// copied if it isn't already owned.
124    #[cfg(feature = "std")]
125    pub fn into_owned(self) -> TwoWay<'static> {
126        TwoWay {
127            needle: self.needle.into_owned(),
128            freqy: self.freqy,
129            critical_pos: self.critical_pos,
130            shift: self.shift,
131        }
132    }
133
134    /// Find the position of the first occurrence of this searcher's needle in
135    /// the given haystack. If one does not exist, then return None.
136    ///
137    /// This will automatically initialize prefilter state. This should only
138    /// be used for one-off searches.
139    pub fn find(&self, haystack: &BStr) -> Option<usize> {
140        self.find_with(&mut self.prefilter_state(), haystack)
141    }
142
143    /// Find the position of the last occurrence of this searcher's needle
144    /// in the given haystack. If one does not exist, then return None.
145    ///
146    /// This will automatically initialize prefilter state. This should only
147    /// be used for one-off searches.
148    pub fn rfind(&self, haystack: &BStr) -> Option<usize> {
149        self.rfind_with(&mut self.prefilter_state(), haystack)
150    }
151
152    /// Find the position of the first occurrence of this searcher's needle in
153    /// the given haystack. If one does not exist, then return None.
154    ///
155    /// This accepts prefilter state that is useful when using the same
156    /// searcher multiple times, such as in an iterator.
157    pub fn find_with(
158        &self,
159        prestate: &mut PrefilterState,
160        haystack: &BStr,
161    ) -> Option<usize> {
162        if self.needle.is_empty() {
163            return Some(0);
164        } else if haystack.len() < self.needle.len() {
165            return None;
166        } else if self.needle.len() == 1 {
167            return haystack.find_byte(self.needle[0]);
168        }
169        match self.shift {
170            Shift::Small { period } => {
171                self.find_small(prestate, haystack, period)
172            }
173            Shift::Large { shift } => {
174                self.find_large(prestate, haystack, shift)
175            }
176        }
177    }
178
179    /// Find the position of the last occurrence of this searcher's needle
180    /// in the given haystack. If one does not exist, then return None.
181    ///
182    /// This accepts prefilter state that is useful when using the same
183    /// searcher multiple times, such as in an iterator.
184    pub fn rfind_with(
185        &self,
186        prestate: &mut PrefilterState,
187        haystack: &BStr,
188    ) -> Option<usize> {
189        if self.needle.is_empty() {
190            return Some(haystack.len());
191        } else if haystack.len() < self.needle.len() {
192            return None;
193        } else if self.needle.len() == 1 {
194            return haystack.rfind_byte(self.needle[0]);
195        }
196        match self.shift {
197            Shift::Small { period } => {
198                self.rfind_small(prestate, haystack, period)
199            }
200            Shift::Large { shift } => {
201                self.rfind_large(prestate, haystack, shift)
202            }
203        }
204    }
205
206    // Below is the actual implementation of TwoWay searching, including both
207    // forwards and backwards searching. Each forward and reverse search has
208    // two fairly similar implementations, each handling the small and large
209    // period cases, for a total 4 different search routines.
210    //
211    // On top of that, each search implementation can be accelerated by a
212    // Freqy prefilter, but it is not always enabled. To avoid its overhead
213    // when its disabled, we explicitly inline each search implementation based
214    // on whether Freqy will be used or not. This brings us up to a total of
215    // 8 monomorphized versions of the search code.
216
217    #[inline(never)]
218    fn find_small(
219        &self,
220        prestate: &mut PrefilterState,
221        haystack: &BStr,
222        period: usize,
223    ) -> Option<usize> {
224        if prestate.is_effective() {
225            self.find_small_imp(prestate, true, haystack, period)
226        } else {
227            self.find_small_imp(prestate, false, haystack, period)
228        }
229    }
230
231    #[inline(always)]
232    fn find_small_imp(
233        &self,
234        prestate: &mut PrefilterState,
235        prefilter: bool,
236        haystack: &BStr,
237        period: usize,
238    ) -> Option<usize> {
239        let needle = self.needle.as_bstr();
240        let mut pos = 0;
241        let mut shift = 0;
242        while pos + needle.len() <= haystack.len() {
243            let mut i = cmp::max(self.critical_pos, shift);
244            if prefilter && prestate.is_effective() {
245                match self.freqy.find_candidate(prestate, &haystack[pos..]) {
246                    None => return None,
247                    Some(found) => {
248                        shift = 0;
249                        i = self.critical_pos;
250                        pos += found;
251                        if pos + needle.len() > haystack.len() {
252                            return None;
253                        }
254                    }
255                }
256            }
257            while i < needle.len() && needle[i] == haystack[pos + i] {
258                i += 1;
259            }
260            if i < needle.len() {
261                pos += i - self.critical_pos + 1;
262                shift = 0;
263            } else {
264                let mut j = self.critical_pos;
265                while j > shift && needle[j] == haystack[pos + j] {
266                    j -= 1;
267                }
268                if j <= shift && needle[shift] == haystack[pos + shift] {
269                    return Some(pos);
270                }
271                pos += period;
272                shift = needle.len() - period;
273            }
274        }
275        None
276    }
277
278    #[inline(never)]
279    fn find_large(
280        &self,
281        prestate: &mut PrefilterState,
282        haystack: &BStr,
283        shift: usize,
284    ) -> Option<usize> {
285        if prestate.is_effective() {
286            self.find_large_imp(prestate, true, haystack, shift)
287        } else {
288            self.find_large_imp(prestate, false, haystack, shift)
289        }
290    }
291
292    #[inline(always)]
293    fn find_large_imp(
294        &self,
295        prestate: &mut PrefilterState,
296        prefilter: bool,
297        haystack: &BStr,
298        shift: usize,
299    ) -> Option<usize> {
300        let needle = self.needle.as_bstr();
301        let mut pos = 0;
302        while pos + needle.len() <= haystack.len() {
303            let mut i = self.critical_pos;
304            if prefilter && prestate.is_effective() {
305                match self.freqy.find_candidate(prestate, &haystack[pos..]) {
306                    None => return None,
307                    Some(found) => {
308                        pos += found;
309                        if pos + needle.len() > haystack.len() {
310                            return None;
311                        }
312                    }
313                }
314            }
315            while i < needle.len() && needle[i] == haystack[pos + i] {
316                i += 1;
317            }
318            if i < needle.len() {
319                pos += i - self.critical_pos + 1;
320            } else {
321                let mut j = self.critical_pos;
322                while j > 0 && needle[j] == haystack[pos + j] {
323                    j -= 1;
324                }
325                if j == 0 && needle[0] == haystack[pos] {
326                    return Some(pos);
327                }
328                pos += shift;
329            }
330        }
331        None
332    }
333
334    #[inline(never)]
335    fn rfind_small(
336        &self,
337        prestate: &mut PrefilterState,
338        haystack: &BStr,
339        period: usize,
340    ) -> Option<usize> {
341        if prestate.is_effective() {
342            self.rfind_small_imp(prestate, true, haystack, period)
343        } else {
344            self.rfind_small_imp(prestate, false, haystack, period)
345        }
346    }
347
348    #[inline(always)]
349    fn rfind_small_imp(
350        &self,
351        prestate: &mut PrefilterState,
352        prefilter: bool,
353        haystack: &BStr,
354        period: usize,
355    ) -> Option<usize> {
356        let needle = &*self.needle;
357        let nlen = needle.len();
358        let mut pos = haystack.len();
359        let mut shift = nlen;
360        while pos >= nlen {
361            let mut i = cmp::min(self.critical_pos, shift);
362            if prefilter && prestate.is_effective() {
363                match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
364                    None => return None,
365                    Some(found) => {
366                        shift = nlen;
367                        i = self.critical_pos;
368                        pos = found;
369                        if pos < nlen {
370                            return None;
371                        }
372                    }
373                }
374            }
375            while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
376                i -= 1;
377            }
378            if i > 0 || needle[0] != haystack[pos - nlen] {
379                pos -= self.critical_pos - i + 1;
380                shift = nlen;
381            } else {
382                let mut j = self.critical_pos;
383                while j < shift && needle[j] == haystack[pos - nlen + j] {
384                    j += 1;
385                }
386                if j == shift {
387                    return Some(pos - nlen);
388                }
389                pos -= period;
390                shift = period;
391            }
392        }
393        None
394    }
395
396    #[inline(never)]
397    fn rfind_large(
398        &self,
399        prestate: &mut PrefilterState,
400        haystack: &BStr,
401        shift: usize,
402    ) -> Option<usize> {
403        if prestate.is_effective() {
404            self.rfind_large_imp(prestate, true, haystack, shift)
405        } else {
406            self.rfind_large_imp(prestate, false, haystack, shift)
407        }
408    }
409
410    #[inline(always)]
411    fn rfind_large_imp(
412        &self,
413        prestate: &mut PrefilterState,
414        prefilter: bool,
415        haystack: &BStr,
416        shift: usize,
417    ) -> Option<usize> {
418        let needle = &*self.needle;
419        let nlen = needle.len();
420        let mut pos = haystack.len();
421        while pos >= nlen {
422            if prefilter && prestate.is_effective() {
423                match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
424                    None => return None,
425                    Some(found) => {
426                        pos = found;
427                        if pos < nlen {
428                            return None;
429                        }
430                    }
431                }
432            }
433
434            let mut i = self.critical_pos;
435            while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
436                i -= 1;
437            }
438            if i > 0 || needle[0] != haystack[pos - nlen] {
439                pos -= self.critical_pos - i + 1;
440            } else {
441                let mut j = self.critical_pos;
442                while j < nlen && needle[j] == haystack[pos - nlen + j] {
443                    j += 1;
444                }
445                if j == nlen {
446                    return Some(pos - nlen);
447                }
448                pos -= shift;
449            }
450        }
451        None
452    }
453}
454
455/// A representation of the amount we're allowed to shift by during Two-Way
456/// search.
457///
458/// When computing a critical factorization of the needle, we find the position
459/// of the critical factorization by finding the needle's maximal (or minimal)
460/// suffix, along with the period of that suffix. It turns out that the period
461/// of that suffix is a lower bound on the period of the needle itself.
462///
463/// This lower bound is equivalent to the actual period of the needle in
464/// some cases. To describe that case, we denote the needle as `x` where
465/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower
466/// bound given here is always the period of `v`, which is `<= period(x)`. The
467/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and
468/// where `u` is a suffix of `v[0..period(v)]`.
469///
470/// This case is important because the search algorithm for when the
471/// periods are equivalent is slightly different than the search algorithm
472/// for when the periods are not equivalent. In particular, when they aren't
473/// equivalent, we know that the period of the needle is no less than half its
474/// length. In this case, we shift by an amount less than or equal to the
475/// period of the needle (determined by the maximum length of the components
476/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`)..
477///
478/// The above two cases are represented by the variants below. Each entails
479/// a different instantiation of the Two-Way search algorithm.
480///
481/// N.B. If we could find a way to compute the exact period in all cases,
482/// then we could collapse this case analysis and simplify the algorithm. The
483/// Two-Way paper suggests this is possible, but more reading is required to
484/// grok why the authors didn't pursue that path.
485#[derive(Clone, Debug)]
486enum Shift {
487    Small { period: usize },
488    Large { shift: usize },
489}
490
491impl Shift {
492    /// Compute the shift for a given needle in the forward direction.
493    ///
494    /// This requires a lower bound on the period and a critical position.
495    /// These can be computed by extracting both the minimal and maximal
496    /// lexicographic suffixes, and choosing the right-most starting position.
497    /// The lower bound on the period is then the period of the chosen suffix.
498    fn forward(
499        needle: &BStr,
500        period_lower_bound: usize,
501        critical_pos: usize,
502    ) -> Shift {
503        let large = cmp::max(critical_pos, needle.len() - critical_pos);
504        if critical_pos * 2 >= needle.len() {
505            return Shift::Large { shift: large };
506        }
507
508        let (u, v) = needle.split_at(critical_pos);
509        if !v[..period_lower_bound].ends_with(u) {
510            return Shift::Large { shift: large };
511        }
512        Shift::Small { period: period_lower_bound }
513    }
514
515    /// Compute the shift for a given needle in the reverse direction.
516    ///
517    /// This requires a lower bound on the period and a critical position.
518    /// These can be computed by extracting both the minimal and maximal
519    /// lexicographic suffixes, and choosing the left-most starting position.
520    /// The lower bound on the period is then the period of the chosen suffix.
521    fn reverse(
522        needle: &BStr,
523        period_lower_bound: usize,
524        critical_pos: usize,
525    ) -> Shift {
526        let large = cmp::max(critical_pos, needle.len() - critical_pos);
527        if (needle.len() - critical_pos) * 2 >= needle.len() {
528            return Shift::Large { shift: large };
529        }
530
531        let (v, u) = needle.split_at(critical_pos);
532        if !v[v.len() - period_lower_bound..].starts_with(u) {
533            return Shift::Large { shift: large };
534        }
535        Shift::Small { period: period_lower_bound }
536    }
537}
538
539/// A suffix extracted from a needle along with its period.
540#[derive(Debug)]
541struct Suffix {
542    /// The starting position of this suffix.
543    ///
544    /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this
545    /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for
546    /// forward suffixes, this is an inclusive starting position, where as for
547    /// reverse suffixes, this is an exclusive ending position.
548    pos: usize,
549    /// The period of this suffix.
550    ///
551    /// Note that this is NOT necessarily the period of the string from which
552    /// this suffix comes from. (It is always less than or equal to the period
553    /// of the original string.)
554    period: usize,
555}
556
557impl Suffix {
558    fn forward(needle: &BStr, kind: SuffixKind) -> Suffix {
559        debug_assert!(!needle.is_empty());
560
561        // suffix represents our maximal (or minimal) suffix, along with
562        // its period.
563        let mut suffix = Suffix { pos: 0, period: 1 };
564        // The start of a suffix in `needle` that we are considering as a
565        // more maximal (or minimal) suffix than what's in `suffix`.
566        let mut candidate_start = 1;
567        // The current offset of our suffixes that we're comparing.
568        //
569        // When the characters at this offset are the same, then we mush on
570        // to the next position since no decision is possible. When the
571        // candidate's character is greater (or lesser) than the corresponding
572        // character than our current maximal (or minimal) suffix, then the
573        // current suffix is changed over to the candidate and we restart our
574        // search. Otherwise, the candidate suffix is no good and we restart
575        // our search on the next candidate.
576        //
577        // The three cases above correspond to the three cases in the loop
578        // below.
579        let mut offset = 0;
580
581        while candidate_start + offset < needle.len() {
582            let current = needle[suffix.pos + offset];
583            let candidate = needle[candidate_start + offset];
584            match kind.cmp(current, candidate) {
585                SuffixOrdering::Accept => {
586                    suffix = Suffix { pos: candidate_start, period: 1 };
587                    candidate_start += 1;
588                    offset = 0;
589                }
590                SuffixOrdering::Skip => {
591                    candidate_start += offset + 1;
592                    offset = 0;
593                    suffix.period = candidate_start - suffix.pos;
594                }
595                SuffixOrdering::Push => {
596                    if offset + 1 == suffix.period {
597                        candidate_start += suffix.period;
598                        offset = 0;
599                    } else {
600                        offset += 1;
601                    }
602                }
603            }
604        }
605        suffix
606    }
607
608    fn reverse(needle: &BStr, kind: SuffixKind) -> Suffix {
609        debug_assert!(!needle.is_empty());
610
611        // See the comments in `forward` for how this works.
612        let mut suffix = Suffix { pos: needle.len(), period: 1 };
613        if needle.len() == 1 {
614            return suffix;
615        }
616        let mut candidate_start = needle.len() - 1;
617        let mut offset = 0;
618
619        while offset < candidate_start {
620            let current = needle[suffix.pos - offset - 1];
621            let candidate = needle[candidate_start - offset - 1];
622            match kind.cmp(current, candidate) {
623                SuffixOrdering::Accept => {
624                    suffix = Suffix { pos: candidate_start, period: 1 };
625                    candidate_start -= 1;
626                    offset = 0;
627                }
628                SuffixOrdering::Skip => {
629                    candidate_start -= offset + 1;
630                    offset = 0;
631                    suffix.period = suffix.pos - candidate_start;
632                }
633                SuffixOrdering::Push => {
634                    if offset + 1 == suffix.period {
635                        candidate_start -= suffix.period;
636                        offset = 0;
637                    } else {
638                        offset += 1;
639                    }
640                }
641            }
642        }
643        suffix
644    }
645}
646
647/// The kind of suffix to extract.
648#[derive(Clone, Copy, Debug)]
649enum SuffixKind {
650    /// Extract the smallest lexicographic suffix from a string.
651    ///
652    /// Technically, this doesn't actually pick the smallest lexicographic
653    /// suffix. e.g., Given the choice between `a` and `aa`, this will choose
654    /// the latter over the former, even though `a < aa`. The reasoning for
655    /// this isn't clear from the paper, but it still smells like a minimal
656    /// suffix.
657    Minimal,
658    /// Extract the largest lexicographic suffix from a string.
659    ///
660    /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given
661    /// the choice between `z` and `zz`, this will choose the latter over the
662    /// former.
663    Maximal,
664}
665
666/// The result of comparing corresponding bytes between two suffixes.
667#[derive(Clone, Copy, Debug)]
668enum SuffixOrdering {
669    /// This occurs when the given candidate byte indicates that the candidate
670    /// suffix is better than the current maximal (or minimal) suffix. That is,
671    /// the current candidate suffix should supplant the current maximal (or
672    /// minimal) suffix.
673    Accept,
674    /// This occurs when the given candidate byte excludes the candidate suffix
675    /// from being better than the current maximal (or minimal) suffix. That
676    /// is, the current candidate suffix should be dropped and the next one
677    /// should be considered.
678    Skip,
679    /// This occurs when no decision to accept or skip the candidate suffix
680    /// can be made, e.g., when corresponding bytes are equivalent. In this
681    /// case, the next corresponding bytes should be compared.
682    Push,
683}
684
685impl SuffixKind {
686    /// Returns true if and only if the given candidate byte indicates that
687    /// it should replace the current suffix as the maximal (or minimal)
688    /// suffix.
689    fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering {
690        use self::SuffixOrdering::*;
691
692        match self {
693            SuffixKind::Minimal if candidate < current => Accept,
694            SuffixKind::Minimal if candidate > current => Skip,
695            SuffixKind::Minimal => Push,
696            SuffixKind::Maximal if candidate > current => Accept,
697            SuffixKind::Maximal if candidate < current => Skip,
698            SuffixKind::Maximal => Push,
699        }
700    }
701}
702
703// N.B. There are more holistic tests in src/search/tests.rs.
704#[cfg(test)]
705mod tests {
706    use bstr::{B, BStr};
707    use bstring::BString;
708
709    use super::*;
710
711    /// Convenience wrapper for computing the suffix as a byte string.
712    fn get_suffix_forward(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
713        let s = Suffix::forward(needle, kind);
714        (&needle[s.pos..], s.period)
715    }
716
717    /// Convenience wrapper for computing the reverse suffix as a byte string.
718    fn get_suffix_reverse(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
719        let s = Suffix::reverse(needle, kind);
720        (&needle[..s.pos], s.period)
721    }
722
723    /// Return all of the non-empty suffixes in the given byte string.
724    fn suffixes(bytes: &BStr) -> Vec<&BStr> {
725        (0..bytes.len()).map(|i| &bytes[i..]).collect()
726    }
727
728    /// Return the lexicographically maximal suffix of the given byte string.
729    fn naive_maximal_suffix_forward(needle: &BStr) -> &BStr {
730        let mut sufs = suffixes(needle);
731        sufs.sort();
732        sufs.pop().unwrap()
733    }
734
735    /// Return the lexicographically maximal suffix of the reverse of the given
736    /// byte string.
737    fn naive_maximal_suffix_reverse(needle: &BStr) -> BString {
738        let mut reversed = needle.to_bstring();
739        reversed.reverse_bytes();
740        let mut got = naive_maximal_suffix_forward(&reversed).to_bstring();
741        got.reverse_bytes();
742        got
743    }
744
745    #[test]
746    fn suffix_forward() {
747        macro_rules! assert_suffix_min {
748            ($given:expr, $expected:expr, $period:expr) => {
749                let (got_suffix, got_period) = get_suffix_forward(
750                    B($given),
751                    SuffixKind::Minimal,
752                );
753                assert_eq!((B($expected), $period), (got_suffix, got_period));
754            };
755        }
756
757        macro_rules! assert_suffix_max {
758            ($given:expr, $expected:expr, $period:expr) => {
759                let (got_suffix, got_period) = get_suffix_forward(
760                    B($given),
761                    SuffixKind::Maximal,
762                );
763                assert_eq!((B($expected), $period), (got_suffix, got_period));
764            };
765        }
766
767        assert_suffix_min!("a", "a", 1);
768        assert_suffix_max!("a", "a", 1);
769
770        assert_suffix_min!("ab", "ab", 2);
771        assert_suffix_max!("ab", "b", 1);
772
773        assert_suffix_min!("ba", "a", 1);
774        assert_suffix_max!("ba", "ba", 2);
775
776        assert_suffix_min!("abc", "abc", 3);
777        assert_suffix_max!("abc", "c", 1);
778
779        assert_suffix_min!("acb", "acb", 3);
780        assert_suffix_max!("acb", "cb", 2);
781
782        assert_suffix_min!("cba", "a", 1);
783        assert_suffix_max!("cba", "cba", 3);
784
785        assert_suffix_min!("abcabc", "abcabc", 3);
786        assert_suffix_max!("abcabc", "cabc", 3);
787
788        assert_suffix_min!("abcabcabc", "abcabcabc", 3);
789        assert_suffix_max!("abcabcabc", "cabcabc", 3);
790
791        assert_suffix_min!("abczz", "abczz", 5);
792        assert_suffix_max!("abczz", "zz", 1);
793
794        assert_suffix_min!("zzabc", "abc", 3);
795        assert_suffix_max!("zzabc", "zzabc", 5);
796
797        assert_suffix_min!("aaa", "aaa", 1);
798        assert_suffix_max!("aaa", "aaa", 1);
799
800        assert_suffix_min!("foobar", "ar", 2);
801        assert_suffix_max!("foobar", "r", 1);
802    }
803
804    #[test]
805    fn suffix_reverse() {
806        macro_rules! assert_suffix_min {
807            ($given:expr, $expected:expr, $period:expr) => {
808                let (got_suffix, got_period) = get_suffix_reverse(
809                    B($given),
810                    SuffixKind::Minimal,
811                );
812                assert_eq!((B($expected), $period), (got_suffix, got_period));
813            };
814        }
815
816        macro_rules! assert_suffix_max {
817            ($given:expr, $expected:expr, $period:expr) => {
818                let (got_suffix, got_period) = get_suffix_reverse(
819                    B($given),
820                    SuffixKind::Maximal,
821                );
822                assert_eq!((B($expected), $period), (got_suffix, got_period));
823            };
824        }
825
826        assert_suffix_min!("a", "a", 1);
827        assert_suffix_max!("a", "a", 1);
828
829        assert_suffix_min!("ab", "a", 1);
830        assert_suffix_max!("ab", "ab", 2);
831
832        assert_suffix_min!("ba", "ba", 2);
833        assert_suffix_max!("ba", "b", 1);
834
835        assert_suffix_min!("abc", "a", 1);
836        assert_suffix_max!("abc", "abc", 3);
837
838        assert_suffix_min!("acb", "a", 1);
839        assert_suffix_max!("acb", "ac", 2);
840
841        assert_suffix_min!("cba", "cba", 3);
842        assert_suffix_max!("cba", "c", 1);
843
844        assert_suffix_min!("abcabc", "abca", 3);
845        assert_suffix_max!("abcabc", "abcabc", 3);
846
847        assert_suffix_min!("abcabcabc", "abcabca", 3);
848        assert_suffix_max!("abcabcabc", "abcabcabc", 3);
849
850        assert_suffix_min!("abczz", "a", 1);
851        assert_suffix_max!("abczz", "abczz", 5);
852
853        assert_suffix_min!("zzabc", "zza", 3);
854        assert_suffix_max!("zzabc", "zz", 1);
855
856        assert_suffix_min!("aaa", "aaa", 1);
857        assert_suffix_max!("aaa", "aaa", 1);
858    }
859
860    quickcheck! {
861        fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
862            let bytes = BString::from(bytes);
863            if bytes.is_empty() {
864                return true;
865            }
866
867            let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal);
868            let expected = naive_maximal_suffix_forward(&bytes);
869            got == expected
870        }
871
872        fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
873            let bytes = BString::from(bytes);
874            if bytes.is_empty() {
875                return true;
876            }
877
878            let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
879            let expected = naive_maximal_suffix_reverse(&bytes);
880            got == expected
881        }
882    }
883}