Skip to content

Commit ba8c34c

Browse files
committed
Improve short-string performance by helping inlining
1 parent 555f5ff commit ba8c34c

File tree

3 files changed

+90
-107
lines changed

3 files changed

+90
-107
lines changed

src/fast.rs

Lines changed: 19 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,31 @@
33
use core::hash::{BuildHasher, Hasher};
44

55
use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
6-
use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3};
6+
use crate::{folded_multiply, hash_bytes_short, hash_bytes_long, rotate_right, ARBITRARY3};
77

88
/// A [`Hasher`] instance implementing foldhash, optimized for speed.
99
///
1010
/// While you can create one directly with [`FoldHasher::with_seed`], you
1111
/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
1212
/// [`FixedState`] to create [`FoldHasher`]s.
1313
#[derive(Clone)]
14-
pub struct FoldHasher {
14+
pub struct FoldHasher<'a> {
1515
accumulator: u64,
1616
sponge: u128,
1717
sponge_len: u8,
18-
fold_seed: u64,
19-
expand_seed: u64,
20-
expand_seed2: u64,
21-
expand_seed3: u64,
18+
seeds: &'a [u64; 4],
2219
}
2320

24-
impl FoldHasher {
21+
impl<'a> FoldHasher<'a> {
2522
/// Initializes this [`FoldHasher`] with the given per-hasher seed and
2623
/// [`SharedSeed`].
2724
#[inline]
28-
pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
25+
pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
2926
FoldHasher {
3027
accumulator: per_hasher_seed,
3128
sponge: 0,
3229
sponge_len: 0,
33-
fold_seed: shared_seed.seeds[0],
34-
expand_seed: shared_seed.seeds[1],
35-
expand_seed2: shared_seed.seeds[2],
36-
expand_seed3: shared_seed.seeds[3],
30+
seeds: &shared_seed.seeds,
3731
}
3832
}
3933

@@ -43,7 +37,7 @@ impl FoldHasher {
4337
if self.sponge_len as usize + bits > 128 {
4438
let lo = self.sponge as u64;
4539
let hi = (self.sponge >> 64) as u64;
46-
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
40+
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
4741
self.sponge = x.into();
4842
self.sponge_len = bits as u8;
4943
} else {
@@ -53,7 +47,7 @@ impl FoldHasher {
5347
}
5448
}
5549

56-
impl Hasher for FoldHasher {
50+
impl<'a> Hasher for FoldHasher<'a> {
5751
#[inline(always)]
5852
fn write(&mut self, bytes: &[u8]) {
5953
// We perform overlapping reads in the byte hash which could lead to
@@ -62,41 +56,11 @@ impl Hasher for FoldHasher {
6256
// which costs only a single cycle (or none if executed with
6357
// instruction-level parallelism).
6458
let len = bytes.len();
65-
let base_seed = rotate_right(self.accumulator, len as u32);
59+
self.accumulator = rotate_right(self.accumulator, len as u32);
6660
if len <= 16 {
67-
let mut s0 = base_seed;
68-
let mut s1 = self.expand_seed;
69-
// XOR the input into s0, s1, then multiply and fold.
70-
if len >= 8 {
71-
s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
72-
s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
73-
} else if len >= 4 {
74-
s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
75-
s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
76-
} else if len > 0 {
77-
let lo = bytes[0];
78-
let mid = bytes[len / 2];
79-
let hi = bytes[len - 1];
80-
s0 ^= lo as u64;
81-
s1 ^= ((hi as u64) << 8) | mid as u64;
82-
}
83-
self.accumulator = folded_multiply(s0, s1);
84-
} else if len < 256 {
85-
self.accumulator = hash_bytes_medium(
86-
bytes,
87-
base_seed,
88-
base_seed.wrapping_add(self.expand_seed),
89-
self.fold_seed,
90-
);
61+
self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds);
9162
} else {
92-
self.accumulator = hash_bytes_long(
93-
bytes,
94-
base_seed,
95-
base_seed.wrapping_add(self.expand_seed),
96-
base_seed.wrapping_add(self.expand_seed2),
97-
base_seed.wrapping_add(self.expand_seed3),
98-
self.fold_seed,
99-
);
63+
self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds);
10064
}
10165
}
10266

@@ -124,7 +88,7 @@ impl Hasher for FoldHasher {
12488
fn write_u128(&mut self, i: u128) {
12589
let lo = i as u64;
12690
let hi = (i >> 64) as u64;
127-
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
91+
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
12892
}
12993

13094
#[inline(always)]
@@ -141,7 +105,7 @@ impl Hasher for FoldHasher {
141105
if self.sponge_len > 0 {
142106
let lo = self.sponge as u64;
143107
let hi = (self.sponge >> 64) as u64;
144-
folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed)
108+
folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0])
145109
} else {
146110
self.accumulator
147111
}
@@ -166,10 +130,10 @@ impl Default for RandomState {
166130
}
167131

168132
impl BuildHasher for RandomState {
169-
type Hasher = FoldHasher;
133+
type Hasher = FoldHasher<'static>;
170134

171135
#[inline(always)]
172-
fn build_hasher(&self) -> FoldHasher {
136+
fn build_hasher(&self) -> FoldHasher<'static> {
173137
FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
174138
}
175139
}
@@ -224,10 +188,10 @@ impl SeedableRandomState {
224188
}
225189

226190
impl BuildHasher for SeedableRandomState {
227-
type Hasher = FoldHasher;
191+
type Hasher = FoldHasher<'static>;
228192

229193
#[inline(always)]
230-
fn build_hasher(&self) -> FoldHasher {
194+
fn build_hasher(&self) -> FoldHasher<'static> {
231195
FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
232196
}
233197
}
@@ -261,10 +225,10 @@ impl Default for FixedState {
261225
}
262226

263227
impl BuildHasher for FixedState {
264-
type Hasher = FoldHasher;
228+
type Hasher = FoldHasher<'static>;
265229

266230
#[inline(always)]
267-
fn build_hasher(&self) -> FoldHasher {
231+
fn build_hasher(&self) -> FoldHasher<'static> {
268232
FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
269233
}
270234
}

src/lib.rs

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,64 @@ const fn rotate_right(x: u64, r: u32) -> u64 {
220220
}
221221
}
222222

223-
/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
224-
fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) -> u64 {
223+
/// Hashes strings <= 16 bytes, has unspecified behavior when bytes.len() < 16.
224+
#[inline(always)]
225+
fn hash_bytes_short(bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
226+
let len = bytes.len();
227+
let mut s0 = accumulator;
228+
let mut s1 = seeds[1];
229+
// XOR the input into s0, s1, then multiply and fold.
230+
if len >= 8 {
231+
s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
232+
s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
233+
} else if len >= 4 {
234+
s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
235+
s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
236+
} else if len > 0 {
237+
let lo = bytes[0];
238+
let mid = bytes[len / 2];
239+
let hi = bytes[len - 1];
240+
s0 ^= lo as u64;
241+
s1 ^= ((hi as u64) << 8) | mid as u64;
242+
}
243+
folded_multiply(s0, s1)
244+
}
245+
246+
/// Hashes strings > 16 bytes, has unspecified behavior when bytes.len() <= 16.
247+
#[cold]
248+
#[inline(never)]
249+
fn hash_bytes_long(mut bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
250+
let mut s0 = accumulator;
251+
let mut s1 = s0.wrapping_add(seeds[1]);
252+
if bytes.len() >= 256 {
253+
let mut s2 = s0.wrapping_add(seeds[2]);
254+
let mut s3 = s0.wrapping_add(seeds[3]);
255+
let chunks = bytes.chunks_exact(64);
256+
let remainder = chunks.remainder().len();
257+
for chunk in chunks {
258+
let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
259+
let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
260+
let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
261+
let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
262+
let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
263+
let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
264+
let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
265+
let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
266+
s0 = folded_multiply(a ^ s0, e ^ seeds[0]);
267+
s1 = folded_multiply(b ^ s1, f ^ seeds[0]);
268+
s2 = folded_multiply(c ^ s2, g ^ seeds[0]);
269+
s3 = folded_multiply(d ^ s3, h ^ seeds[0]);
270+
}
271+
s0 ^= s2;
272+
s1 ^= s3;
273+
274+
if remainder > 0 {
275+
bytes = &bytes[bytes.len() - remainder.max(16)..];
276+
} else {
277+
return s0 ^ s1;
278+
}
279+
}
280+
225281
// Process 32 bytes per iteration, 16 bytes from the start, 16 bytes from
226282
// the end. On the last iteration these two chunks can overlap, but that is
227283
// perfectly fine.
@@ -239,46 +295,9 @@ fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) ->
239295
let b = u64::from_ne_bytes(lo[8..16].try_into().unwrap());
240296
let c = u64::from_ne_bytes(hi[0..8].try_into().unwrap());
241297
let d = u64::from_ne_bytes(hi[8..16].try_into().unwrap());
242-
s0 = folded_multiply(a ^ s0, c ^ fold_seed);
243-
s1 = folded_multiply(b ^ s1, d ^ fold_seed);
298+
s0 = folded_multiply(a ^ s0, c ^ seeds[0]);
299+
s1 = folded_multiply(b ^ s1, d ^ seeds[0]);
244300
}
245301

246302
s0 ^ s1
247303
}
248-
249-
/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
250-
#[cold]
251-
#[inline(never)]
252-
fn hash_bytes_long(
253-
bytes: &[u8],
254-
mut s0: u64,
255-
mut s1: u64,
256-
mut s2: u64,
257-
mut s3: u64,
258-
fold_seed: u64,
259-
) -> u64 {
260-
let chunks = bytes.chunks_exact(64);
261-
let remainder = chunks.remainder().len();
262-
for chunk in chunks {
263-
let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
264-
let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
265-
let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
266-
let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
267-
let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
268-
let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
269-
let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
270-
let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
271-
s0 = folded_multiply(a ^ s0, e ^ fold_seed);
272-
s1 = folded_multiply(b ^ s1, f ^ fold_seed);
273-
s2 = folded_multiply(c ^ s2, g ^ fold_seed);
274-
s3 = folded_multiply(d ^ s3, h ^ fold_seed);
275-
}
276-
s0 ^= s2;
277-
s1 ^= s3;
278-
279-
if remainder > 0 {
280-
hash_bytes_medium(&bytes[bytes.len() - remainder.max(16)..], s0, s1, fold_seed)
281-
} else {
282-
s0 ^ s1
283-
}
284-
}

src/quality.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@ use crate::{fast, folded_multiply, ARBITRARY0, ARBITRARY8};
1212
/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
1313
/// [`FixedState`] to create [`FoldHasher`]s.
1414
#[derive(Clone)]
15-
pub struct FoldHasher {
16-
pub(crate) inner: fast::FoldHasher,
15+
pub struct FoldHasher<'a> {
16+
pub(crate) inner: fast::FoldHasher<'a>,
1717
}
1818

19-
impl FoldHasher {
19+
impl<'a> FoldHasher<'a> {
2020
/// Initializes this [`FoldHasher`] with the given per-hasher seed and
2121
/// [`SharedSeed`].
2222
#[inline(always)]
23-
pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
23+
pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
2424
FoldHasher {
2525
inner: fast::FoldHasher::with_seed(per_hasher_seed, shared_seed),
2626
}
2727
}
2828
}
2929

30-
impl Hasher for FoldHasher {
30+
impl<'a> Hasher for FoldHasher<'a> {
3131
#[inline(always)]
3232
fn write(&mut self, bytes: &[u8]) {
3333
self.inner.write(bytes);
@@ -76,10 +76,10 @@ pub struct RandomState {
7676
}
7777

7878
impl BuildHasher for RandomState {
79-
type Hasher = FoldHasher;
79+
type Hasher = FoldHasher<'static>;
8080

8181
#[inline(always)]
82-
fn build_hasher(&self) -> FoldHasher {
82+
fn build_hasher(&self) -> FoldHasher<'static> {
8383
FoldHasher {
8484
inner: self.inner.build_hasher(),
8585
}
@@ -130,10 +130,10 @@ impl SeedableRandomState {
130130
}
131131

132132
impl BuildHasher for SeedableRandomState {
133-
type Hasher = FoldHasher;
133+
type Hasher = FoldHasher<'static>;
134134

135135
#[inline(always)]
136-
fn build_hasher(&self) -> FoldHasher {
136+
fn build_hasher(&self) -> FoldHasher<'static> {
137137
FoldHasher {
138138
inner: self.inner.build_hasher(),
139139
}
@@ -163,10 +163,10 @@ impl FixedState {
163163
}
164164

165165
impl BuildHasher for FixedState {
166-
type Hasher = FoldHasher;
166+
type Hasher = FoldHasher<'static>;
167167

168168
#[inline(always)]
169-
fn build_hasher(&self) -> FoldHasher {
169+
fn build_hasher(&self) -> FoldHasher<'static> {
170170
FoldHasher {
171171
inner: self.inner.build_hasher(),
172172
}

0 commit comments

Comments
 (0)