foldhash/seed.rs
1use core::cell::UnsafeCell;
2use core::hash::BuildHasher;
3use core::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
4
5use super::{
6 folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7,
7 ARBITRARY8, ARBITRARY9,
8};
9
10pub mod fast {
11 use super::*;
12 use crate::fast::FoldHasher;
13
14 /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized.
15 #[derive(Copy, Clone, Debug)]
16 pub struct RandomState {
17 per_hasher_seed: u64,
18 global_seed: GlobalSeed,
19 }
20
21 impl Default for RandomState {
22 fn default() -> Self {
23 // We use our current stack address in combination with
24 // PER_HASHER_NONDETERMINISM to create a new value that is very likely
25 // to have never been used as a random state before.
26 //
27 // PER_HASHER_NONDETERMINISM ensures that even if we create two
28 // RandomStates with the same stack location it is still highly unlikely
29 // you'll end up with the same random seed.
30 //
31 // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, but
32 // this doesn't matter in practice - it is impossible that two different
33 // threads have the same stack location, so they'll almost surely
34 // generate different seeds, and provide a different possible update for
35 // PER_HASHER_NONDETERMINISM. If we would use a proper atomic update
36 // then hash table creation would have a global contention point, which
37 // users could not avoid.
38 //
39 // Finally, not all platforms have a 64-bit atomic, so we use usize.
40 static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
41 let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
42 let stack_ptr = &nondeterminism as *const _ as u64;
43 let per_hasher_seed = folded_multiply(nondeterminism ^ stack_ptr, ARBITRARY2);
44 PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
45
46 Self {
47 per_hasher_seed,
48 global_seed: GlobalSeed::new(),
49 }
50 }
51 }
52
53 impl BuildHasher for RandomState {
54 type Hasher = FoldHasher;
55
56 fn build_hasher(&self) -> FoldHasher {
57 FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
58 }
59 }
60
61 /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed.
62 ///
63 /// Not recommended unless you absolutely need determinism.
64 #[derive(Copy, Clone, Debug)]
65 pub struct FixedState {
66 per_hasher_seed: u64,
67 }
68
69 impl FixedState {
70 /// Creates a [`FixedState`] with the given seed.
71 pub const fn with_seed(seed: u64) -> Self {
72 // XOR with ARBITRARY3 such that with_seed(0) matches default.
73 Self {
74 per_hasher_seed: seed ^ ARBITRARY3,
75 }
76 }
77 }
78
79 impl Default for FixedState {
80 fn default() -> Self {
81 Self {
82 per_hasher_seed: ARBITRARY3,
83 }
84 }
85 }
86
87 impl BuildHasher for FixedState {
88 type Hasher = FoldHasher;
89
90 fn build_hasher(&self) -> FoldHasher {
91 FoldHasher::with_seed(
92 self.per_hasher_seed,
93 &[ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7],
94 )
95 }
96 }
97}
98
99pub mod quality {
100 use super::*;
101 use crate::quality::FoldHasher;
102
103 /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized.
104 #[derive(Copy, Clone, Default, Debug)]
105 pub struct RandomState {
106 inner: fast::RandomState,
107 }
108
109 impl BuildHasher for RandomState {
110 type Hasher = FoldHasher;
111
112 fn build_hasher(&self) -> FoldHasher {
113 FoldHasher {
114 inner: self.inner.build_hasher(),
115 }
116 }
117 }
118
119 /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed.
120 ///
121 /// Not recommended unless you absolutely need determinism.
122 #[derive(Copy, Clone, Default, Debug)]
123 pub struct FixedState {
124 inner: fast::FixedState,
125 }
126
127 impl FixedState {
128 /// Creates a [`FixedState`] with the given seed.
129 pub const fn with_seed(seed: u64) -> Self {
130 Self {
131 // We do an additional folded multiply with the seed here for
132 // the quality hash to ensure better independence between seed
133 // and hash. If the seed is zero the folded multiply is zero,
134 // preserving with_seed(0) == default().
135 inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)),
136 }
137 }
138 }
139
140 impl BuildHasher for FixedState {
141 type Hasher = FoldHasher;
142
143 fn build_hasher(&self) -> FoldHasher {
144 FoldHasher {
145 inner: self.inner.build_hasher(),
146 }
147 }
148 }
149}
150
151fn generate_global_seed() -> [u64; 4] {
152 let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
153
154 // Use address space layout randomization as our main randomness source.
155 // This isn't great, but we don't advertise HashDoS resistance in the first
156 // place. This is a whole lot better than nothing, at near zero cost with
157 // no dependencies.
158 let mut seed = 0;
159 let stack_ptr = &seed as *const _;
160 let func_ptr = generate_global_seed;
161 let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
162 seed = mix(seed, stack_ptr as usize as u64);
163 seed = mix(seed, func_ptr as usize as u64);
164 seed = mix(seed, static_ptr as usize as u64);
165
166 // If we have the standard library available, augment entropy with the
167 // current time and an address from the allocator.
168 #[cfg(feature = "std")]
169 {
170 let now = std::time::SystemTime::now();
171 if let Ok(duration) = now.duration_since(std::time::UNIX_EPOCH) {
172 let ns = duration.as_nanos();
173 seed = mix(seed, ns as u64);
174 seed = mix(seed, (ns >> 64) as u64);
175 }
176
177 let box_ptr = &*Box::new(0u8) as *const _;
178 seed = mix(seed, box_ptr as usize as u64);
179 }
180
181 let seed_a = mix(seed, 0);
182 let seed_b = mix(mix(mix(seed_a, 0), 0), 0);
183 let seed_c = mix(mix(mix(seed_b, 0), 0), 0);
184 let seed_d = mix(mix(mix(seed_c, 0), 0), 0);
185
186 // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
187 // a common input. So we want our global seeds that are XOR'ed with the
188 // input to always be non-zero. To also ensure there is always a good spread
189 // of bits, we give up 3 bits of entropy and simply force some bits on.
190 const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
191 [
192 seed_a | FORCED_ONES,
193 seed_b | FORCED_ONES,
194 seed_c | FORCED_ONES,
195 seed_d | FORCED_ONES,
196 ]
197}
198
199// Now all the below code purely exists to cache the above seed as
200// efficiently as possible. Even if we weren't a no_std crate and had access to
201// OnceLock, we don't want to check whether the global is set each time we
202// hash an object, so we hand-roll a global storage where type safety allows us
203// to assume the storage is initialized after construction.
204struct GlobalSeedStorage {
205 state: AtomicU8,
206 seed: UnsafeCell<[u64; 4]>,
207}
208
209const UNINIT: u8 = 0;
210const LOCKED: u8 = 1;
211const INIT: u8 = 2;
212
213// SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
214// LOCKED state, and only read the UnsafeCells when state is in the
215// once-achieved-eternally-preserved state INIT.
216unsafe impl Sync for GlobalSeedStorage {}
217
218static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
219 state: AtomicU8::new(UNINIT),
220 seed: UnsafeCell::new([0; 4]),
221};
222
223/// An object representing an initialized global seed.
224///
225/// Does not actually store the seed inside itself, it is a zero-sized type.
226/// This prevents inflating the RandomState size and in turn HashMap's size.
227#[derive(Copy, Clone, Debug)]
228pub struct GlobalSeed {
229 // So we can't accidentally type GlobalSeed { } within this crate.
230 _no_accidental_unsafe_init: (),
231}
232
233impl GlobalSeed {
234 #[inline(always)]
235 pub fn new() -> Self {
236 if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
237 Self::init_slow()
238 }
239 Self {
240 _no_accidental_unsafe_init: (),
241 }
242 }
243
244 #[cold]
245 #[inline(never)]
246 fn init_slow() {
247 // Generate seed outside of critical section.
248 let seed = generate_global_seed();
249
250 loop {
251 match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
252 UNINIT,
253 LOCKED,
254 Ordering::Relaxed,
255 Ordering::Acquire,
256 ) {
257 Ok(_) => unsafe {
258 // SAFETY: we just acquired an exclusive lock.
259 *GLOBAL_SEED_STORAGE.seed.get() = seed;
260 GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
261 return;
262 },
263
264 Err(INIT) => return,
265
266 // Yes, it's a spin loop. We are a no_std crate (so no easy
267 // access to proper locks), this is a one-time-per-program
268 // initialization, and the critical section is only a few
269 // store instructions, so it'll be fine.
270 _ => core::hint::spin_loop(),
271 }
272 }
273 }
274
275 #[inline(always)]
276 pub fn get(self) -> &'static [u64; 4] {
277 // SAFETY: our constructor ensured we are in the INIT state and thus
278 // this raw read does not race with any write.
279 unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
280 }
281}