phf_shared/
lib.rs

1//! See [the `phf` crate's documentation][phf] for details.
2//!
3//! [phf]: https://docs.rs/phf
4
5#![doc(html_root_url = "https://docs.rs/phf_shared/0.11")]
6#![cfg_attr(not(feature = "std"), no_std)]
7
8#[cfg(feature = "std")]
9extern crate std as core;
10
11use core::fmt;
12use core::hash::{Hash, Hasher};
13use core::num::Wrapping;
14use siphasher::sip128::{Hash128, Hasher128, SipHasher13};
15
16#[non_exhaustive]
17pub struct Hashes {
18    pub g: u32,
19    pub f1: u32,
20    pub f2: u32,
21}
22
23/// A central typedef for hash keys
24///
25/// Makes experimentation easier by only needing to be updated here.
26pub type HashKey = u64;
27
28#[inline]
29pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 {
30    (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0
31}
32
33/// `key` is from `phf_generator::HashState`.
34#[inline]
35pub fn hash<T: ?Sized + PhfHash>(x: &T, key: &HashKey) -> Hashes {
36    let mut hasher = SipHasher13::new_with_keys(0, *key);
37    x.phf_hash(&mut hasher);
38
39    let Hash128 {
40        h1: lower,
41        h2: upper,
42    } = hasher.finish128();
43
44    Hashes {
45        g: (lower >> 32) as u32,
46        f1: lower as u32,
47        f2: upper as u32,
48    }
49}
50
51/// Return an index into `phf_generator::HashState::map`.
52///
53/// * `hash` is from `hash()` in this crate.
54/// * `disps` is from `phf_generator::HashState::disps`.
55/// * `len` is the length of `phf_generator::HashState::map`.
56#[inline]
57pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 {
58    let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize];
59    displace(hashes.f1, hashes.f2, d1, d2) % (len as u32)
60}
61
62/// A trait implemented by types which can be used in PHF data structures.
63///
64/// This differs from the standard library's `Hash` trait in that `PhfHash`'s
65/// results must be architecture independent so that hashes will be consistent
66/// between the host and target when cross compiling.
67pub trait PhfHash {
68    /// Feeds the value into the state given, updating the hasher as necessary.
69    fn phf_hash<H: Hasher>(&self, state: &mut H);
70
71    /// Feeds a slice of this type into the state provided.
72    fn phf_hash_slice<H: Hasher>(data: &[Self], state: &mut H)
73    where
74        Self: Sized,
75    {
76        for piece in data {
77            piece.phf_hash(state);
78        }
79    }
80}
81
82/// Trait for printing types with `const` constructors, used by `phf_codegen` and `phf_macros`.
83pub trait FmtConst {
84    /// Print a `const` expression representing this value.
85    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
86}
87
88/// Identical to `std::borrow::Borrow` except omitting blanket impls to facilitate other
89/// borrowing patterns.
90///
91/// The same semantic requirements apply:
92///
93/// > In particular `Eq`, `Ord` and `Hash` must be equivalent for borrowed and owned values:
94/// `x.borrow() == y.borrow()` should give the same result as `x == y`.
95///
96/// (This crate's API only requires `Eq` and `PhfHash`, however.)
97///
98/// ### Motivation
99/// The conventional signature for lookup methods on collections looks something like this:
100///
101/// ```rust,ignore
102/// impl<K, V> Map<K, V> where K: PhfHash + Eq {
103///     fn get<T: ?Sized>(&self, key: &T) -> Option<&V> where T: PhfHash + Eq, K: Borrow<T> {
104///         ...
105///     }
106/// }
107/// ```
108///
109/// This allows the key type used for lookup to be different than the key stored in the map so for
110/// example you can use `&str` to look up a value in a `Map<String, _>`. However, this runs into
111/// a problem in the case where `T` and `K` are both a `Foo<_>` type constructor but
112/// the contained type is different (even being the same type with different lifetimes).
113///
114/// The main issue for this crate's API is that, with this method signature, you cannot perform a
115/// lookup on a `Map<UniCase<&'static str>, _>` with a `UniCase<&'a str>` where `'a` is not
116/// `'static`; there is no impl of `Borrow` that resolves to
117/// `impl Borrow<UniCase<'a>> for UniCase<&'static str>` and one cannot be added either because of
118/// all the blanket impls.
119///
120/// Instead, this trait is implemented conservatively, without blanket impls, so that impls like
121/// this may be added. This is feasible since the set of types that implement `PhfHash` is
122/// intentionally small.
123///
124/// This likely won't be fixable with specialization alone but will require full support for lattice
125/// impls since we technically want to add overlapping blanket impls.
126pub trait PhfBorrow<B: ?Sized> {
127    /// Convert a reference to `self` to a reference to the borrowed type.
128    fn borrow(&self) -> &B;
129}
130
131/// Create an impl of `FmtConst` delegating to `fmt::Debug` for types that can deal with it.
132///
133/// Ideally with specialization this could be just one default impl and then specialized where
134/// it doesn't apply.
135macro_rules! delegate_debug (
136    ($ty:ty) => {
137        impl FmtConst for $ty {
138            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139                write!(f, "{:?}", self)
140            }
141        }
142    }
143);
144
145delegate_debug!(str);
146delegate_debug!(char);
147delegate_debug!(u8);
148delegate_debug!(i8);
149delegate_debug!(u16);
150delegate_debug!(i16);
151delegate_debug!(u32);
152delegate_debug!(i32);
153delegate_debug!(u64);
154delegate_debug!(i64);
155delegate_debug!(u128);
156delegate_debug!(i128);
157delegate_debug!(bool);
158
159/// `impl PhfBorrow<T> for T`
160macro_rules! impl_reflexive(
161    ($($t:ty),*) => (
162        $(impl PhfBorrow<$t> for $t {
163            fn borrow(&self) -> &$t {
164                self
165            }
166        })*
167    )
168);
169
170impl_reflexive!(
171    str,
172    char,
173    u8,
174    i8,
175    u16,
176    i16,
177    u32,
178    i32,
179    u64,
180    i64,
181    u128,
182    i128,
183    bool,
184    [u8]
185);
186
187#[cfg(feature = "std")]
188impl PhfBorrow<str> for String {
189    fn borrow(&self) -> &str {
190        self
191    }
192}
193
194#[cfg(feature = "std")]
195impl PhfBorrow<[u8]> for Vec<u8> {
196    fn borrow(&self) -> &[u8] {
197        self
198    }
199}
200
201#[cfg(feature = "std")]
202delegate_debug!(String);
203
204#[cfg(feature = "std")]
205impl PhfHash for String {
206    #[inline]
207    fn phf_hash<H: Hasher>(&self, state: &mut H) {
208        (**self).phf_hash(state)
209    }
210}
211
212#[cfg(feature = "std")]
213impl PhfHash for Vec<u8> {
214    #[inline]
215    fn phf_hash<H: Hasher>(&self, state: &mut H) {
216        (**self).phf_hash(state)
217    }
218}
219
220impl<'a, T: 'a + PhfHash + ?Sized> PhfHash for &'a T {
221    fn phf_hash<H: Hasher>(&self, state: &mut H) {
222        (*self).phf_hash(state)
223    }
224}
225
226impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T {
227    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
228        (*self).fmt_const(f)
229    }
230}
231
232impl<'a> PhfBorrow<str> for &'a str {
233    fn borrow(&self) -> &str {
234        self
235    }
236}
237
238impl<'a> PhfBorrow<[u8]> for &'a [u8] {
239    fn borrow(&self) -> &[u8] {
240        self
241    }
242}
243
244impl PhfHash for str {
245    #[inline]
246    fn phf_hash<H: Hasher>(&self, state: &mut H) {
247        self.as_bytes().phf_hash(state)
248    }
249}
250
251impl PhfHash for [u8] {
252    #[inline]
253    fn phf_hash<H: Hasher>(&self, state: &mut H) {
254        state.write(self);
255    }
256}
257
258impl FmtConst for [u8] {
259    #[inline]
260    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
261        // slices need a leading reference
262        write!(f, "&{:?}", self)
263    }
264}
265
266#[cfg(feature = "unicase")]
267impl<S> PhfHash for unicase::UniCase<S>
268where
269    unicase::UniCase<S>: Hash,
270{
271    #[inline]
272    fn phf_hash<H: Hasher>(&self, state: &mut H) {
273        self.hash(state)
274    }
275}
276
277#[cfg(feature = "unicase")]
278impl<S> FmtConst for unicase::UniCase<S>
279where
280    S: AsRef<str>,
281{
282    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283        if self.is_ascii() {
284            f.write_str("UniCase::ascii(")?;
285        } else {
286            f.write_str("UniCase::unicode(")?;
287        }
288
289        self.as_ref().fmt_const(f)?;
290        f.write_str(")")
291    }
292}
293
294#[cfg(feature = "unicase")]
295impl<'b, 'a: 'b, S: ?Sized + 'a> PhfBorrow<unicase::UniCase<&'b S>> for unicase::UniCase<&'a S> {
296    fn borrow(&self) -> &unicase::UniCase<&'b S> {
297        self
298    }
299}
300
301#[cfg(feature = "uncased")]
302impl PhfHash for uncased::UncasedStr {
303    #[inline]
304    fn phf_hash<H: Hasher>(&self, state: &mut H) {
305        self.hash(state)
306    }
307}
308
309#[cfg(feature = "uncased")]
310impl FmtConst for uncased::UncasedStr {
311    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        // transmute is not stable in const fns (rust-lang/rust#53605), so
313        // `UncasedStr::new` can't be a const fn itself, but we can inline the
314        // call to transmute here in the meantime.
315        f.write_str("unsafe { ::core::mem::transmute::<&'static str, &'static UncasedStr>(")?;
316        self.as_str().fmt_const(f)?;
317        f.write_str(") }")
318    }
319}
320
321#[cfg(feature = "uncased")]
322impl PhfBorrow<uncased::UncasedStr> for &uncased::UncasedStr {
323    fn borrow(&self) -> &uncased::UncasedStr {
324        self
325    }
326}
327
328macro_rules! sip_impl (
329    (le $t:ty) => (
330        impl PhfHash for $t {
331            #[inline]
332            fn phf_hash<H: Hasher>(&self, state: &mut H) {
333                self.to_le().hash(state);
334            }
335        }
336    );
337    ($t:ty) => (
338        impl PhfHash for $t {
339            #[inline]
340            fn phf_hash<H: Hasher>(&self, state: &mut H) {
341                self.hash(state);
342            }
343        }
344    )
345);
346
347sip_impl!(u8);
348sip_impl!(i8);
349sip_impl!(le u16);
350sip_impl!(le i16);
351sip_impl!(le u32);
352sip_impl!(le i32);
353sip_impl!(le u64);
354sip_impl!(le i64);
355sip_impl!(le u128);
356sip_impl!(le i128);
357sip_impl!(bool);
358
359impl PhfHash for char {
360    #[inline]
361    fn phf_hash<H: Hasher>(&self, state: &mut H) {
362        (*self as u32).phf_hash(state)
363    }
364}
365
366// minimize duplicated code since formatting drags in quite a bit
367fn fmt_array<T: core::fmt::Debug>(array: &[T], f: &mut fmt::Formatter<'_>) -> fmt::Result {
368    write!(f, "{:?}", array)
369}
370
371macro_rules! array_impl (
372    ($t:ty) => (
373        impl<const N: usize> PhfHash for [$t; N] {
374            #[inline]
375            fn phf_hash<H: Hasher>(&self, state: &mut H) {
376                for v in &self[..] {
377                    v.phf_hash(state);
378                }
379            }
380        }
381
382        impl<const N: usize> FmtConst for [$t; N] {
383            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384                fmt_array(self, f)
385            }
386        }
387
388        impl<const N: usize> PhfBorrow<[$t]> for [$t; N] {
389            fn borrow(&self) -> &[$t] {
390                self
391            }
392        }
393    )
394);
395
396array_impl!(u8);
397array_impl!(i8);
398array_impl!(u16);
399array_impl!(i16);
400array_impl!(u32);
401array_impl!(i32);
402array_impl!(u64);
403array_impl!(i64);
404array_impl!(u128);
405array_impl!(i128);
406array_impl!(bool);
407array_impl!(char);