Skip to content

Instantly share code, notes, and snippets.

@SchrodingerZhu
Created November 29, 2023 01:42
Show Gist options
  • Select an option

  • Save SchrodingerZhu/778a7952a35a1fc3692650b2d9fd6686 to your computer and use it in GitHub Desktop.

Select an option

Save SchrodingerZhu/778a7952a35a1fc3692650b2d9fd6686 to your computer and use it in GitHub Desktop.
SMHasher Patch
7db446a0b8d2e29cd648fb5bf4224db9aed30905
diff --git a/Hashes.h b/Hashes.h
index 0071a6b..59bd2c4 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -1374,3 +1374,173 @@ void khashv64_test ( const void *key, int len, uint32_t seed, void *out);
extern PolymurHashParams g_polymurhashparams;
void polymur_seed_init (size_t &seed);
void polymur_test ( const void *key, int len, uint32_t seed, void *out);
+
+#include <cstddef>
+#include <cstdint>
+namespace llvm_libc_hash {
+ //===-- Portable string hash function ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+#define LIBC_INLINE inline
+#define LIBC_INLINE_VAR inline
+using UInt128 = __uint128_t;
+
+// Folded multiplication.
+// This function multiplies two 64-bit integers and xor the high and
+// low 64-bit parts of the result.
+LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
+ UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
+ UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
+ uint64_t low = static_cast<uint64_t>(p & mask);
+ uint64_t high = static_cast<uint64_t>(p >> 64);
+ return low ^ high;
+}
+
+// Read as little endian.
+// Shift-and-or implementation does not give a satisfactory code on aarch64.
+// Therefore, we use a union to read the value.
+template <typename T>
+LIBC_INLINE static T read_little_endian(const void *ptr) {
+ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
+ union {
+ T value;
+ uint8_t buffer[sizeof(T)];
+ } data;
+ #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+ for (size_t i = 0; i < sizeof(T); ++i) {
+ data.buffer[i] = bytes[sizeof(T) - i - 1];
+ }
+ #else
+ for (size_t i = 0; i < sizeof(T); ++i) {
+ data.buffer[i] = bytes[i];
+ }
+ #endif
+ return data.value;
+}
+
+
+// Specialized read functions for small values. size must be <= 8.
+LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
+ uint64_t &low, uint64_t &high) {
+ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
+ if (size >= 2) {
+ if (size >= 4) {
+ low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
+ high = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
+ } else {
+ low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
+ high = static_cast<uint64_t>(bytes[size - 1]);
+ }
+ } else {
+ if (size > 0) {
+ low = static_cast<uint64_t>(bytes[0]);
+ high = static_cast<uint64_t>(bytes[0]);
+ } else {
+ low = 0;
+ high = 0;
+ }
+ }
+}
+
+// This constant comes from Kunth's prng (it empirically works well).
+LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
+// Rotation amount for mixing.
+LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
+
+// Randomly generated values (for now, it uses the same values from aHash).
+LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
+ 0x243f6a8885a308d3,
+ 0x13198a2e03707344,
+ 0xa4093822299f31d0,
+ 0x082efa98ec4e6c89,
+ 0x452821e638d01377,
+ 0xbe5466cf34e90c6c,
+ 0xc0ac29b7c97c50dd,
+ 0x3f84d5b5b5470917,
+};
+
+
+LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
+ return (x << y) | (x >> (64 - y));
+}
+
+// This is a portable string hasher. It is not cryptographically secure.
+// The implementation is extracted from the generic routine of aHash.
+class HashState {
+ uint64_t buffer;
+ uint64_t pad;
+ uint64_t extra_keys[2];
+ LIBC_INLINE void update(uint64_t low, uint64_t high) {
+ uint64_t combined =
+ folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
+ buffer = (buffer + pad) ^ combined;
+ buffer = rotate_left(buffer, ROTATE);
+ }
+
+ LIBC_INLINE HashState(
+ uint64_t a, uint64_t b, uint64_t c, uint64_t d
+ ) : buffer(a), pad(b), extra_keys{c, d} {
+ }
+
+ LIBC_INLINE static uint64_t mix(uint64_t seed) {
+ HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
+ RANDOMNESS[0][3]};
+ mixer.update(seed, 0);
+ return mixer.finish();
+ }
+
+public:
+ LIBC_INLINE HashState(uint64_t seed) {
+ // Mix one more round of the seed to make it
+ uint64_t mixed = mix(seed);
+ buffer = RANDOMNESS[1][0] ^ mixed;
+ pad = RANDOMNESS[1][1] ^ mixed;
+ extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
+ extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
+ }
+ LIBC_INLINE void update(const void *ptr, size_t size) {
+ uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
+ buffer = (buffer + size) * MULTIPLE;
+ uint64_t low, high;
+ if (size > 8) {
+ if (size > 16) {
+ // update tail
+ low = read_little_endian<uint64_t>(&bytes[size - 16]);
+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
+ update(low, high);
+ while (size > 16) {
+ low = read_little_endian<uint64_t>(&bytes[0]);
+ high = read_little_endian<uint64_t>(&bytes[8]);
+ update(low, high);
+ bytes += 16;
+ size -= 16;
+ }
+ } else {
+ low = read_little_endian<uint64_t>(&bytes[0]);
+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
+ update(low, high);
+ }
+ } else {
+ read_small_values(ptr, size, low, high);
+ update(low, high);
+ }
+ }
+ LIBC_INLINE uint64_t finish() {
+ uint64_t rot = buffer & 63;
+ uint64_t folded = folded_multiply(buffer, pad);
+ return rotate_left(folded, rot);
+ }
+};
+}
+
+static inline void llvm_libc_hash64 ( const void *key, int len, uint32_t seed, void *out) {
+ llvm_libc_hash::HashState state (seed);
+ state.update (key, len);
+ *(uint64_t*)out = state.finish();
+}
diff --git a/PMP_Multilinear.h b/PMP_Multilinear.h
index 9d24e73..42fdc5d 100644
--- a/PMP_Multilinear.h
+++ b/PMP_Multilinear.h
@@ -1417,8 +1417,8 @@ class PMP_Multilinear_Hasher
lowProduct.HighPart = midProduct.LowPart;
uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;
- constTerm.QuadPart += lowProduct.QuadPart;
- ctr += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
+ constTerm.QuadPart += lowProduct.QuadPart;
+ ctr.QuadPart += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
/* for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
{
diff --git a/main.cpp b/main.cpp
index eaaa0bd..d2029f4 100644
--- a/main.cpp
+++ b/main.cpp
@@ -88,6 +88,7 @@ const char* quality_str[3] = { "SKIP", "POOR", "GOOD" };
// marked with !! are known bad seeds, which either hash to 0 or create collisions.
HashInfo g_hashes[] =
{
+ { llvm_libc_hash64, 64, 0x00000000, "llvm_libc_hash64", "llvm_libc_hash 64bit", GOOD, {} },
// first the bad hash funcs, failing tests:
{ DoNothingHash, 32, 0x0, "donothing32", "Do-Nothing function (measure call overhead)", SKIP, {0UL} /* !! */ },
{ DoNothingHash, 64, 0x0, "donothing64", "Do-Nothing function (measure call overhead)", SKIP, {0ULL} /* !! */ },
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment