Struct CodePointInversionList

struct CodePointInversionList<'data> { ... }

A membership wrapper for CodePointInversionList.

Provides exposure to membership functions and constructors from serialized CodePointSets (sets of code points) and predefined ranges.

Implementations

impl<'data> CodePointInversionList<'data>

fn try_from_inversion_list(inv_list: ZeroVec<'data, PotentialCodePoint>) -> Result<Self, InvalidSetError>

Returns a new CodePointInversionList from an inversion list represented as a ZeroVec<PotentialCodePoint> of code points.

The inversion list must be of even length, sorted ascending non-overlapping, and within the bounds of 0x0 -> 0x10FFFF inclusive, and end points being exclusive.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
use icu::collections::codepointinvlist::InvalidSetError;
use potential_utf::PotentialCodePoint;
use zerovec::ZeroVec;

let valid = [0x0, 0x10000];
let inv_list: ZeroVec<PotentialCodePoint> = valid
    .into_iter()
    .map(PotentialCodePoint::from_u24)
    .collect();
let result = CodePointInversionList::try_from_inversion_list(inv_list);
assert!(matches!(result, CodePointInversionList));

let invalid = vec![0x0, 0x80, 0x3];
let inv_list: ZeroVec<PotentialCodePoint> = invalid
    .iter()
    .copied()
    .map(PotentialCodePoint::from_u24)
    .collect();
let result = CodePointInversionList::try_from_inversion_list(inv_list);
assert!(matches!(result, Err(InvalidSetError(_))));
if let Err(InvalidSetError(actual)) = result {
    assert_eq!(
        &invalid,
        &actual.into_iter().map(u32::from).collect::<Vec<_>>()
    );
}
fn all() -> Self

Returns CodePointInversionList spanning entire Unicode range

The range spans from 0x0 -> 0x10FFFF inclusive.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;

let expected = [0x0, (char::MAX as u32) + 1];
assert_eq!(
    CodePointInversionList::all().get_inversion_list_vec(),
    expected
);
assert_eq!(
    CodePointInversionList::all().size(),
    (expected[1] - expected[0]) as usize
);
fn bmp() -> Self

Returns CodePointInversionList spanning BMP range

The range spans from 0x0 -> 0xFFFF inclusive.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;

const BMP_MAX: u32 = 0xFFFF;

let expected = [0x0, BMP_MAX + 1];
assert_eq!(
    CodePointInversionList::bmp().get_inversion_list_vec(),
    expected
);
assert_eq!(
    CodePointInversionList::bmp().size(),
    (expected[1] - expected[0]) as usize
);
fn iter_chars(self: &Self) -> impl Iterator<Item = char> + '_

Yields an Iterator going through the character set in the CodePointInversionList

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x44, 0x45, 0x46];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
let mut ex_iter_chars = example.iter_chars();
assert_eq!(Some('A'), ex_iter_chars.next());
assert_eq!(Some('B'), ex_iter_chars.next());
assert_eq!(Some('C'), ex_iter_chars.next());
assert_eq!(Some('E'), ex_iter_chars.next());
assert_eq!(None, ex_iter_chars.next());
fn iter_ranges(self: &Self) -> impl ExactSizeIterator<Item = RangeInclusive<u32>> + '_

Yields an Iterator returning the ranges of the code points that are included in the CodePointInversionList

Ranges are returned as RangeInclusive, which is inclusive of its end bound value. An end-inclusive behavior matches the ICU4C/J behavior of ranges, ex: CodePointInversionList::contains(UChar32 start, UChar32 end).

Example

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x44, 0x45, 0x46];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
let mut example_iter_ranges = example.iter_ranges();
assert_eq!(Some(0x41..=0x43), example_iter_ranges.next());
assert_eq!(Some(0x45..=0x45), example_iter_ranges.next());
assert_eq!(None, example_iter_ranges.next());
fn iter_ranges_complemented(self: &Self) -> impl Iterator<Item = RangeInclusive<u32>> + '_

Yields an Iterator returning the ranges of the code points that are not included in the CodePointInversionList

Ranges are returned as RangeInclusive, which is inclusive of its end bound value. An end-inclusive behavior matches the ICU4C/J behavior of ranges, ex: CodePointInversionList::contains(UChar32 start, UChar32 end).

Example

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x44, 0x45, 0x46];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
let mut example_iter_ranges = example.iter_ranges_complemented();
assert_eq!(Some(0..=0x40), example_iter_ranges.next());
assert_eq!(Some(0x44..=0x44), example_iter_ranges.next());
assert_eq!(Some(0x46..=char::MAX as u32), example_iter_ranges.next());
assert_eq!(None, example_iter_ranges.next());
fn get_range_count(self: &Self) -> usize

Returns the number of ranges contained in this CodePointInversionList

fn get_nth_range(self: &Self, idx: usize) -> Option<RangeInclusive<u32>>

Returns a specific range contained in this CodePointInversionList by index. Intended for use in FFI.

fn size(self: &Self) -> usize

Returns the number of elements of the CodePointInversionList

fn is_empty(self: &Self) -> bool

Returns whether or not the CodePointInversionList is empty

fn contains(self: &Self, query: char) -> bool

Checks to see the query is in the CodePointInversionList

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using core implementation

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x43, 0x44, 0x45];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert!(example.contains('A'));
assert!(!example.contains('C'));
fn contains32(self: &Self, query: u32) -> bool

Checks to see the unsigned int is in the CodePointInversionList::all()

Note: Even though u32 and [prim@char] in Rust are non-negative 4-byte values, there is an important difference. A u32 can take values up to a very large integer value, while a [prim@char] in Rust is defined to be in the range from 0 to the maximum valid Unicode Scalar Value.

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using core implementation

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x43, 0x44, 0x45];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert!(example.contains32(0x41));
assert!(!example.contains32(0x43));
fn contains_range<impl RangeBounds<char>: RangeBounds<char>>(self: &Self, range: impl RangeBounds<char>) -> bool

Checks to see if the range is in the CodePointInversionList

Runs a binary search in O(log(n)) where n is the number of start and end points in the set using Vec implementation. Only runs the search once on the start parameter, while the end parameter is checked in a single O(1) step.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x43, 0x44, 0x45];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert!(example.contains_range('A'..'C'));
assert!(example.contains_range('A'..='B'));
assert!(!example.contains_range('A'..='C'));

Surrogate points (0xD800 -> 0xDFFF) will return false if the Range contains them but the CodePointInversionList does not.

Note: when comparing to ICU4C/J, keep in mind that Ranges in Rust are constructed inclusive of start boundary and exclusive of end boundary. The ICU4C/J CodePointInversionList::contains(UChar32 start, UChar32 end) method differs by including the end boundary.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
use std::char;
let check =
    char::from_u32(0xD7FE).unwrap()..char::from_u32(0xE001).unwrap();
let example_list = [0xD7FE, 0xD7FF, 0xE000, 0xE001];
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert!(!example.contains_range(check));
fn contains_set(self: &Self, set: &Self) -> bool

Check if the calling CodePointInversionList contains all the characters of the given CodePointInversionList

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x46, 0x55, 0x5B]; // A - E, U - Z
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
let a_to_d = CodePointInversionList::try_from_u32_inversion_list_slice(&[
    0x41, 0x45,
])
.unwrap();
let f_to_t = CodePointInversionList::try_from_u32_inversion_list_slice(&[
    0x46, 0x55,
])
.unwrap();
let r_to_x = CodePointInversionList::try_from_u32_inversion_list_slice(&[
    0x52, 0x58,
])
.unwrap();
assert!(example.contains_set(&a_to_d)); // contains all
assert!(!example.contains_set(&f_to_t)); // contains none
assert!(!example.contains_set(&r_to_x)); // contains some
fn span(self: &Self, span_str: &str, contained: bool) -> usize

Returns the end of the initial substring where the characters are either contained/not contained in the set.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x44]; // {A, B, C}
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert_eq!(example.span("CABXYZ", true), 3);
assert_eq!(example.span("XYZC", false), 3);
assert_eq!(example.span("XYZ", true), 0);
assert_eq!(example.span("ABC", false), 0);
fn span_back(self: &Self, span_str: &str, contained: bool) -> usize

Returns the start of the trailing substring (starting from end of string) where the characters are either contained/not contained in the set. Returns the length of the string if no valid return.

Examples

use icu::collections::codepointinvlist::CodePointInversionList;
let example_list = [0x41, 0x44]; // {A, B, C}
let example = CodePointInversionList::try_from_u32_inversion_list_slice(
    &example_list,
)
.unwrap();
assert_eq!(example.span_back("XYZCAB", true), 3);
assert_eq!(example.span_back("ABCXYZ", true), 6);
assert_eq!(example.span_back("CABXYZ", false), 3);

impl<'a> Yokeable for CodePointInversionList<'static>

fn transform(self: &'a Self) -> &'a <Self as >::Output
fn transform_owned(self: Self) -> <Self as >::Output
unsafe fn make(this: <Self as >::Output) -> Self
fn transform_mut<F>(self: &'a mut Self, f: F)
where
    F: 'static + for<'b> FnOnce(&'b mut <Self as >::Output)

impl<'data> Clone for CodePointInversionList<'data>

fn clone(self: &Self) -> CodePointInversionList<'data>

impl<'data> Debug for CodePointInversionList<'data>

fn fmt(self: &Self, f: &mut Formatter<'_>) -> Result

impl<'data> EncodeAsVarULE for CodePointInversionList<'data>

fn encode_var_ule_as_slices<R, impl FnOnce(&[&[u8]]) -> R: FnOnce(&[&[u8]]) -> R>(self: &Self, cb: impl FnOnce(&[&[u8]]) -> R) -> R
fn encode_var_ule_len(self: &Self) -> usize
fn encode_var_ule_write(self: &Self, dst: &mut [u8])

impl<'data> Eq for CodePointInversionList<'data>

impl<'data> Freeze for CodePointInversionList<'data>

impl<'data> From for CodePointInversionList<'data>

fn from(other: &'data CodePointInversionListULE) -> Self

impl<'data> PartialEq for CodePointInversionList<'data>

fn eq(self: &Self, other: &CodePointInversionList<'data>) -> bool

impl<'data> RefUnwindSafe for CodePointInversionList<'data>

impl<'data> Send for CodePointInversionList<'data>

impl<'data> StructuralPartialEq for CodePointInversionList<'data>

impl<'data> Sync for CodePointInversionList<'data>

impl<'data> Unpin for CodePointInversionList<'data>

impl<'data> UnsafeUnpin for CodePointInversionList<'data>

impl<'data> UnwindSafe for CodePointInversionList<'data>

impl<'data> ZeroFrom for CodePointInversionList<'data>

fn zero_from(other: &'data CodePointInversionListULE) -> Self

impl<'zf, 'zf_inner> ZeroFrom for CodePointInversionList<'zf>

fn zero_from(this: &'zf CodePointInversionList<'zf_inner>) -> Self

impl<T> Any for CodePointInversionList<'data>

fn type_id(self: &Self) -> TypeId

impl<T> Borrow for CodePointInversionList<'data>

fn borrow(self: &Self) -> &T

impl<T> BorrowMut for CodePointInversionList<'data>

fn borrow_mut(self: &mut Self) -> &mut T

impl<T> CloneToUninit for CodePointInversionList<'data>

unsafe fn clone_to_uninit(self: &Self, dest: *mut u8)

impl<T> ErasedDestructor for CodePointInversionList<'data>

impl<T> From for CodePointInversionList<'data>

fn from(t: T) -> T

Returns the argument unchanged.

impl<T> ToOwned for CodePointInversionList<'data>

fn to_owned(self: &Self) -> T
fn clone_into(self: &Self, target: &mut T)

impl<T, U> Into for CodePointInversionList<'data>

fn into(self: Self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of [From]<T> for U chooses to do.

impl<T, U> TryFrom for CodePointInversionList<'data>

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto for CodePointInversionList<'data>

fn try_into(self: Self) -> Result<U, <U as TryFrom<T>>::Error>