foldhash/
seed.rs

1use core::cell::UnsafeCell;
2use core::hash::BuildHasher;
3use core::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
4
5use super::{
6    folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7,
7    ARBITRARY8, ARBITRARY9,
8};
9
10pub mod fast {
11    use super::*;
12    use crate::fast::FoldHasher;
13
14    /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized.
15    #[derive(Copy, Clone, Debug)]
16    pub struct RandomState {
17        per_hasher_seed: u64,
18        global_seed: GlobalSeed,
19    }
20
21    impl Default for RandomState {
22        fn default() -> Self {
23            // We use our current stack address in combination with
24            // PER_HASHER_NONDETERMINISM to create a new value that is very likely
25            // to have never been used as a random state before.
26            //
27            // PER_HASHER_NONDETERMINISM ensures that even if we create two
28            // RandomStates with the same stack location it is still highly unlikely
29            // you'll end up with the same random seed.
30            //
31            // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, but
32            // this doesn't matter in practice - it is impossible that two different
33            // threads have the same stack location, so they'll almost surely
34            // generate different seeds, and provide a different possible update for
35            // PER_HASHER_NONDETERMINISM. If we would use a proper atomic update
36            // then hash table creation would have a global contention point, which
37            // users could not avoid.
38            //
39            // Finally, not all platforms have a 64-bit atomic, so we use usize.
40            static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
41            let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
42            let stack_ptr = &nondeterminism as *const _ as u64;
43            let per_hasher_seed = folded_multiply(nondeterminism ^ stack_ptr, ARBITRARY2);
44            PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
45
46            Self {
47                per_hasher_seed,
48                global_seed: GlobalSeed::new(),
49            }
50        }
51    }
52
53    impl BuildHasher for RandomState {
54        type Hasher = FoldHasher;
55
56        fn build_hasher(&self) -> FoldHasher {
57            FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
58        }
59    }
60
61    /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed.
62    ///
63    /// Not recommended unless you absolutely need determinism.
64    #[derive(Copy, Clone, Debug)]
65    pub struct FixedState {
66        per_hasher_seed: u64,
67    }
68
69    impl FixedState {
70        /// Creates a [`FixedState`] with the given seed.
71        pub const fn with_seed(seed: u64) -> Self {
72            // XOR with ARBITRARY3 such that with_seed(0) matches default.
73            Self {
74                per_hasher_seed: seed ^ ARBITRARY3,
75            }
76        }
77    }
78
79    impl Default for FixedState {
80        fn default() -> Self {
81            Self {
82                per_hasher_seed: ARBITRARY3,
83            }
84        }
85    }
86
87    impl BuildHasher for FixedState {
88        type Hasher = FoldHasher;
89
90        fn build_hasher(&self) -> FoldHasher {
91            FoldHasher::with_seed(
92                self.per_hasher_seed,
93                &[ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7],
94            )
95        }
96    }
97}
98
99pub mod quality {
100    use super::*;
101    use crate::quality::FoldHasher;
102
103    /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized.
104    #[derive(Copy, Clone, Default, Debug)]
105    pub struct RandomState {
106        inner: fast::RandomState,
107    }
108
109    impl BuildHasher for RandomState {
110        type Hasher = FoldHasher;
111
112        fn build_hasher(&self) -> FoldHasher {
113            FoldHasher {
114                inner: self.inner.build_hasher(),
115            }
116        }
117    }
118
119    /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed.
120    ///
121    /// Not recommended unless you absolutely need determinism.
122    #[derive(Copy, Clone, Default, Debug)]
123    pub struct FixedState {
124        inner: fast::FixedState,
125    }
126
127    impl FixedState {
128        /// Creates a [`FixedState`] with the given seed.
129        pub const fn with_seed(seed: u64) -> Self {
130            Self {
131                // We do an additional folded multiply with the seed here for
132                // the quality hash to ensure better independence between seed
133                // and hash. If the seed is zero the folded multiply is zero,
134                // preserving with_seed(0) == default().
135                inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)),
136            }
137        }
138    }
139
140    impl BuildHasher for FixedState {
141        type Hasher = FoldHasher;
142
143        fn build_hasher(&self) -> FoldHasher {
144            FoldHasher {
145                inner: self.inner.build_hasher(),
146            }
147        }
148    }
149}
150
151fn generate_global_seed() -> [u64; 4] {
152    let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
153
154    // Use address space layout randomization as our main randomness source.
155    // This isn't great, but we don't advertise HashDoS resistance in the first
156    // place. This is a whole lot better than nothing, at near zero cost with
157    // no dependencies.
158    let mut seed = 0;
159    let stack_ptr = &seed as *const _;
160    let func_ptr = generate_global_seed;
161    let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
162    seed = mix(seed, stack_ptr as usize as u64);
163    seed = mix(seed, func_ptr as usize as u64);
164    seed = mix(seed, static_ptr as usize as u64);
165
166    // If we have the standard library available, augment entropy with the
167    // current time and an address from the allocator.
168    #[cfg(feature = "std")]
169    {
170        let now = std::time::SystemTime::now();
171        if let Ok(duration) = now.duration_since(std::time::UNIX_EPOCH) {
172            let ns = duration.as_nanos();
173            seed = mix(seed, ns as u64);
174            seed = mix(seed, (ns >> 64) as u64);
175        }
176
177        let box_ptr = &*Box::new(0u8) as *const _;
178        seed = mix(seed, box_ptr as usize as u64);
179    }
180
181    let seed_a = mix(seed, 0);
182    let seed_b = mix(mix(mix(seed_a, 0), 0), 0);
183    let seed_c = mix(mix(mix(seed_b, 0), 0), 0);
184    let seed_d = mix(mix(mix(seed_c, 0), 0), 0);
185
186    // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
187    // a common input. So we want our global seeds that are XOR'ed with the
188    // input to always be non-zero. To also ensure there is always a good spread
189    // of bits, we give up 3 bits of entropy and simply force some bits on.
190    const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
191    [
192        seed_a | FORCED_ONES,
193        seed_b | FORCED_ONES,
194        seed_c | FORCED_ONES,
195        seed_d | FORCED_ONES,
196    ]
197}
198
199// Now all the below code purely exists to cache the above seed as
200// efficiently as possible. Even if we weren't a no_std crate and had access to
201// OnceLock, we don't want to check whether the global is set each time we
202// hash an object, so we hand-roll a global storage where type safety allows us
203// to assume the storage is initialized after construction.
204struct GlobalSeedStorage {
205    state: AtomicU8,
206    seed: UnsafeCell<[u64; 4]>,
207}
208
209const UNINIT: u8 = 0;
210const LOCKED: u8 = 1;
211const INIT: u8 = 2;
212
213// SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
214// LOCKED state, and only read the UnsafeCells when state is in the
215// once-achieved-eternally-preserved state INIT.
216unsafe impl Sync for GlobalSeedStorage {}
217
218static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
219    state: AtomicU8::new(UNINIT),
220    seed: UnsafeCell::new([0; 4]),
221};
222
223/// An object representing an initialized global seed.
224///
225/// Does not actually store the seed inside itself, it is a zero-sized type.
226/// This prevents inflating the RandomState size and in turn HashMap's size.
227#[derive(Copy, Clone, Debug)]
228pub struct GlobalSeed {
229    // So we can't accidentally type GlobalSeed { } within this crate.
230    _no_accidental_unsafe_init: (),
231}
232
233impl GlobalSeed {
234    #[inline(always)]
235    pub fn new() -> Self {
236        if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
237            Self::init_slow()
238        }
239        Self {
240            _no_accidental_unsafe_init: (),
241        }
242    }
243
244    #[cold]
245    #[inline(never)]
246    fn init_slow() {
247        // Generate seed outside of critical section.
248        let seed = generate_global_seed();
249
250        loop {
251            match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
252                UNINIT,
253                LOCKED,
254                Ordering::Relaxed,
255                Ordering::Acquire,
256            ) {
257                Ok(_) => unsafe {
258                    // SAFETY: we just acquired an exclusive lock.
259                    *GLOBAL_SEED_STORAGE.seed.get() = seed;
260                    GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
261                    return;
262                },
263
264                Err(INIT) => return,
265
266                // Yes, it's a spin loop. We are a no_std crate (so no easy
267                // access to proper locks), this is a one-time-per-program
268                // initialization, and the critical section is only a few
269                // store instructions, so it'll be fine.
270                _ => core::hint::spin_loop(),
271            }
272        }
273    }
274
275    #[inline(always)]
276    pub fn get(self) -> &'static [u64; 4] {
277        // SAFETY: our constructor ensured we are in the INIT state and thus
278        // this raw read does not race with any write.
279        unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
280    }
281}