diff --git a/lib/std/hash.zig b/lib/std/hash.zig index c4af9011b821..157dad67a748 100644 --- a/lib/std/hash.zig +++ b/lib/std/hash.zig @@ -32,6 +32,8 @@ pub const CityHash64 = cityhash.CityHash64; const wyhash = @import("hash/wyhash.zig"); pub const Wyhash = wyhash.Wyhash; +pub const RapidHash = @import("hash/RapidHash.zig"); + const xxhash = @import("hash/xxhash.zig"); pub const XxHash3 = xxhash.XxHash3; pub const XxHash64 = xxhash.XxHash64; diff --git a/lib/std/hash/RapidHash.zig b/lib/std/hash/RapidHash.zig new file mode 100644 index 000000000000..030c570df9f4 --- /dev/null +++ b/lib/std/hash/RapidHash.zig @@ -0,0 +1,125 @@ +const std = @import("std"); + +const readInt = std.mem.readInt; +const assert = std.debug.assert; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +const RAPID_SEED: u64 = 0xbdd89aa982704029; +const RAPID_SECRET: [3]u64 = .{ 0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3 }; + +pub fn hash(seed: u64, input: []const u8) u64 { + const sc = RAPID_SECRET; + const len = input.len; + var a: u64 = 0; + var b: u64 = 0; + var k = input; + var is: [3]u64 = .{ seed, 0, 0 }; + + is[0] ^= mix(seed ^ sc[0], sc[1]) ^ len; + + if (len <= 16) { + if (len >= 4) { + const d: u64 = ((len & 24) >> @intCast(len >> 3)); + const e = len - 4; + a = (r32(k) << 32) | r32(k[e..]); + b = ((r32(k[d..]) << 32) | r32(k[(e - d)..])); + } else if (len > 0) + a = (@as(u64, k[0]) << 56) | (@as(u64, k[len >> 1]) << 32) | @as(u64, k[len - 1]); + } else { + var remain = len; + if (len > 48) { + is[1] = is[0]; + is[2] = is[0]; + while (remain >= 96) { + inline for (0..6) |i| { + const m1 = r64(k[8 * i * 2 ..]); + const m2 = r64(k[8 * (i * 2 + 1) ..]); + is[i % 3] = mix(m1 ^ sc[i % 3], m2 ^ is[i % 3]); + } + k = k[96..]; + remain -= 96; + } + if (remain >= 48) { + inline for (0..3) |i| { + const m1 = r64(k[8 * i * 2 ..]); + const m2 = r64(k[8 * (i * 2 + 1) ..]); + is[i] = mix(m1 ^ sc[i], m2 ^ is[i]); + } + k = k[48..]; + remain -= 48; + } + + is[0] ^= is[1] ^ is[2]; + } + + if (remain > 16) { + is[0] = mix(r64(k) ^ sc[2], r64(k[8..]) ^ is[0] ^ sc[1]); + if (remain > 32) { + is[0] = mix(r64(k[16..]) ^ sc[2], r64(k[24..]) ^ is[0]); + } + } + + a = r64(input[len - 16 ..]); + b = r64(input[len - 8 ..]); + } + + a ^= sc[1]; + b ^= is[0]; + mum(&a, &b); + return mix(a ^ sc[0] ^ len, b ^ sc[1]); +} + +test "RapidHash.hash" { + const bytes: []const u8 = "abcdefgh" ** 128; + + const sizes: [13]u64 = .{ 0, 1, 2, 3, 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; + + const outcomes: [13]u64 = .{ + 0x5a6ef77074ebc84b, + 0xc11328477bc0f5d1, + 0x5644ac035e40d569, + 0x347080fbf5fcd81, + 0x56b66b8dc802bcc, + 0xb6bf9055973aac7c, + 0xed56d62eead1e402, + 0xc19072d767da8ffb, + 0x89bb40a9928a4f0d, + 0xe0af7c5e7b6e29fd, + 0x9a3ed35fbedfa11a, + 0x4c684b2119ca19fb, + 0x4b575f5bf25600d6, + }; + + var success: bool = true; + for (sizes, outcomes) |s, e| { + const r = hash(RAPID_SEED, bytes[0..s]); + + expectEqual(e, r) catch |err| { + std.debug.print("Failed on {d}: {!}\n", .{ s, err }); + success = false; + }; + } + try expect(success); +} + +inline fn mum(a: *u64, b: *u64) void { + const r = @as(u128, a.*) * b.*; + a.* = @truncate(r); + b.* = @truncate(r >> 64); +} + +inline fn mix(a: u64, b: u64) u64 { + var copy_a = a; + var copy_b = b; + mum(©_a, ©_b); + return copy_a ^ copy_b; +} + +inline fn r64(p: []const u8) u64 { + return readInt(u64, p[0..8], .little); +} + +inline fn r32(p: []const u8) u64 { + return readInt(u32, p[0..4], .little); +} diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig index 0a73be39ced0..6d5a6b93ac07 100644 --- a/lib/std/hash/benchmark.zig +++ b/lib/std/hash/benchmark.zig @@ -59,6 +59,12 @@ const hashes = [_]Hash{ .ty = hash.crc.Crc32, .name = "crc32", }, + Hash{ + .ty = hash.RapidHash, + .name = "rapidhash", + .has_iterative_api = false, + .init_u64 = 0, + }, Hash{ .ty = hash.CityHash32, .name = "cityhash-32", @@ -329,6 +335,7 @@ fn usage() void { \\ --count [int] \\ --key-size [int] \\ --iterative-only + \\ --small-key-only \\ --help \\ , .{}); @@ -349,6 +356,7 @@ pub fn main() !void { var count: usize = mode(128 * MiB); var key_size: ?usize = null; var seed: u32 = 0; + var test_small_key_only = false; var test_iterative_only = false; var test_arrays = false; @@ -399,6 +407,8 @@ pub fn main() !void { } } else if (std.mem.eql(u8, args[i], "--iterative-only")) { test_iterative_only = true; + } else if (std.mem.eql(u8, args[i], "--small-key-only")) { + test_small_key_only = true; } else if (std.mem.eql(u8, args[i], "--include-array")) { test_arrays = true; } else if (std.mem.eql(u8, args[i], "--help")) { @@ -410,6 +420,12 @@ pub fn main() !void { } } + if (test_iterative_only and test_small_key_only) { + try stdout.print("Cannot use iterative-only and small-key-only together!\n", .{}); + usage(); + std.process.exit(1); + } + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; defer std.testing.expect(gpa.deinit() == .ok) catch @panic("leak"); const allocator = gpa.allocator(); @@ -421,7 +437,7 @@ pub fn main() !void { // Always reseed prior to every call so we are hashing the same buffer contents. // This allows easier comparison between different implementations. - if (H.has_iterative_api) { + if (H.has_iterative_api and !test_small_key_only) { prng.seed(seed); const result = try benchmarkHash(H, count, allocator); try stdout.print(" iterative: {:5} MiB/s [{x:0<16}]\n", .{ result.throughput / (1 * MiB), result.hash });