SchrodingerZhu · November 29, 2023 01:42
diff --git a/COMMIT HASH b/COMMIT HASH
 7db446a0b8d2e29cd648fb5bf4224db9aed30905
diff --git a/patch.diff b/patch.diff
 diff --git a/Hashes.h b/Hashes.h
 index 0071a6b..59bd2c4 100644
 --- a/Hashes.h
 +++ b/Hashes.h
 @@ -1374,3 +1374,173 @@ void khashv64_test ( const void *key, int len, uint32_t seed, void *out);
 extern PolymurHashParams g_polymurhashparams;
 void polymur_seed_init (size_t &seed);
 void polymur_test ( const void *key, int len, uint32_t seed, void *out);
 +
 +#include <cstddef>
 +#include <cstdint>
 +namespace llvm_libc_hash {
 +  //===-- Portable string hash function ---------------------------*- C++ -*-===//
 +//
 +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 +// See https://llvm.org/LICENSE.txt for license information.
 +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 +//
 +//===----------------------------------------------------------------------===//
 +
 +
 +#define LIBC_INLINE inline
 +#define LIBC_INLINE_VAR inline
 +using UInt128 = __uint128_t;
 +
 +// Folded multiplication.
 +// This function multiplies two 64-bit integers and xor the high and
 +// low 64-bit parts of the result.
 +LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
 +  UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
 +  UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
 +  uint64_t low = static_cast<uint64_t>(p & mask);
 +  uint64_t high = static_cast<uint64_t>(p >> 64);
 +  return low ^ high;
 +}
 +
 +// Read as little endian.
 +// Shift-and-or implementation does not give a satisfactory code on aarch64.
 +// Therefore, we use a union to read the value.
 +template <typename T>
 +LIBC_INLINE static T read_little_endian(const void *ptr) {
 +  const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
 +  union {
 +    T value;
 +    uint8_t buffer[sizeof(T)];
 +  } data;
 +  #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
 +    for (size_t i = 0; i < sizeof(T); ++i) {
 +      data.buffer[i] = bytes[sizeof(T) - i - 1];
 +    }
 +  #else
 +    for (size_t i = 0; i < sizeof(T); ++i) {
 +      data.buffer[i] = bytes[i];
 +    }
 +  #endif
 +  return data.value;
 +}
 +
 +
 +// Specialized read functions for small values. size must be <= 8.
 +LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
 +                                          uint64_t &low, uint64_t &high) {
 +  const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
 +  if (size >= 2) {
 +    if (size >= 4) {
 +      low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
 +      high = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
 +    } else {
 +      low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
 +      high = static_cast<uint64_t>(bytes[size - 1]);
 +    }
 +  } else {
 +    if (size > 0) {
 +      low = static_cast<uint64_t>(bytes[0]);
 +      high = static_cast<uint64_t>(bytes[0]);
 +    } else {
 +      low = 0;
 +      high = 0;
 +    }
 +  }
 +}
 +
 +// This constant comes from Kunth's prng (it empirically works well).
 +LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
 +// Rotation amount for mixing.
 +LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
 +
 +// Randomly generated values (for now, it uses the same values from aHash).
 +LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
 +    0x243f6a8885a308d3,
 +    0x13198a2e03707344,
 +    0xa4093822299f31d0,
 +    0x082efa98ec4e6c89,
 +    0x452821e638d01377,
 +    0xbe5466cf34e90c6c,
 +    0xc0ac29b7c97c50dd,
 +    0x3f84d5b5b5470917,
 +};
 +
 +
 +LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
 +  return (x << y) | (x >> (64 - y));
 +}
 +
 +// This is a portable string hasher. It is not cryptographically secure.
 +// The implementation is extracted from the generic routine of aHash.
 +class HashState {
 +  uint64_t buffer;
 +  uint64_t pad;
 +  uint64_t extra_keys[2];
 +  LIBC_INLINE void update(uint64_t low, uint64_t high) {
 +    uint64_t combined =
 +        folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
 +    buffer = (buffer + pad) ^ combined;
 +    buffer = rotate_left(buffer, ROTATE);
 +  }
 +
 +  LIBC_INLINE HashState(
 +    uint64_t a, uint64_t b, uint64_t c, uint64_t d
 +  ) : buffer(a), pad(b), extra_keys{c, d} {
 +  }
 +
 +  LIBC_INLINE static uint64_t mix(uint64_t seed) {
 +    HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
 +                    RANDOMNESS[0][3]};
 +    mixer.update(seed, 0);
 +    return mixer.finish();
 +  }
 +
 +public:
 +  LIBC_INLINE HashState(uint64_t seed) {
 +    // Mix one more round of the seed to make it 
 +    uint64_t mixed = mix(seed);
 +    buffer = RANDOMNESS[1][0] ^ mixed;
 +    pad = RANDOMNESS[1][1] ^ mixed;
 +    extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
 +    extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
 +  }
 +  LIBC_INLINE void update(const void *ptr, size_t size) {
 +    uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
 +    buffer = (buffer + size) * MULTIPLE;
 +    uint64_t low, high;
 +    if (size > 8) {
 +      if (size > 16) {
 +        // update tail
 +        low = read_little_endian<uint64_t>(&bytes[size - 16]);
 +        high = read_little_endian<uint64_t>(&bytes[size - 8]);
 +        update(low, high);
 +        while (size > 16) {
 +          low = read_little_endian<uint64_t>(&bytes[0]);
 +          high = read_little_endian<uint64_t>(&bytes[8]);
 +          update(low, high);
 +          bytes += 16;
 +          size -= 16;
 +        }
 +      } else {
 +        low = read_little_endian<uint64_t>(&bytes[0]);
 +        high = read_little_endian<uint64_t>(&bytes[size - 8]);
 +        update(low, high);
 +      }
 +    } else {
 +      read_small_values(ptr, size, low, high);
 +      update(low, high);
 +    }
 +  }
 +  LIBC_INLINE uint64_t finish() {
 +    uint64_t rot = buffer & 63;
 +    uint64_t folded = folded_multiply(buffer, pad);
 +    return rotate_left(folded, rot);
 +  }
 +};
 +}
 +
 +static inline void llvm_libc_hash64 ( const void *key, int len, uint32_t seed, void *out) {
 +  llvm_libc_hash::HashState state (seed);
 +  state.update (key, len);
 +  *(uint64_t*)out = state.finish();
 +}
 diff --git a/PMP_Multilinear.h b/PMP_Multilinear.h
 index 9d24e73..42fdc5d 100644
 --- a/PMP_Multilinear.h
 +++ b/PMP_Multilinear.h
 @@ -1417,8 +1417,8 @@ class PMP_Multilinear_Hasher
 	lowProduct.HighPart = midProduct.LowPart;
 	uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;
 
 -	constTerm.QuadPart += lowProduct.QuadPart;
 -	ctr += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
 +	constTerm.QuadPart += lowProduct.QuadPart; 
 +	ctr.QuadPart += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
 
 /*	for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
 	{
 diff --git a/main.cpp b/main.cpp
 index eaaa0bd..d2029f4 100644
 --- a/main.cpp
 +++ b/main.cpp
 @@ -88,6 +88,7 @@ const char* quality_str[3] = { "SKIP", "POOR", "GOOD" };
 // marked with !! are known bad seeds, which either hash to 0 or create collisions.
 HashInfo g_hashes[] =
 {
 +  { llvm_libc_hash64,         64, 0x00000000, "llvm_libc_hash64",     "llvm_libc_hash 64bit", GOOD, {} },
 // first the bad hash funcs, failing tests:
 { DoNothingHash,        32, 0x0, "donothing32", "Do-Nothing function (measure call overhead)", SKIP, {0UL} /* !! */ },
 { DoNothingHash,        64, 0x0, "donothing64", "Do-Nothing function (measure call overhead)", SKIP, {0ULL} /* !! */ },
	diff --git a/Hashes.h b/Hashes.h
	index 0071a6b..59bd2c4 100644
	--- a/Hashes.h
	+++ b/Hashes.h
	@@ -1374,3 +1374,173 @@ void khashv64_test ( const void key, int len, uint32_t seed, void out);
	extern PolymurHashParams g_polymurhashparams;
	void polymur_seed_init (size_t &seed);
	void polymur_test ( const void key, int len, uint32_t seed, void out);
	+
	+#include <cstddef>
	+#include <cstdint>
	+namespace llvm_libc_hash {
	+ //===-- Portable string hash function ---------------------------- C++ --===//
	+//
	+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	+// See https://llvm.org/LICENSE.txt for license information.
	+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	+//
	+//===----------------------------------------------------------------------===//
	+
	+
	+#define LIBC_INLINE inline
	+#define LIBC_INLINE_VAR inline
	+using UInt128 = __uint128_t;
	+
	+// Folded multiplication.
	+// This function multiplies two 64-bit integers and xor the high and
	+// low 64-bit parts of the result.
	+LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
	+ UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
	+ UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
	+ uint64_t low = static_cast<uint64_t>(p & mask);
	+ uint64_t high = static_cast<uint64_t>(p >> 64);
	+ return low ^ high;
	+}
	+
	+// Read as little endian.
	+// Shift-and-or implementation does not give a satisfactory code on aarch64.
	+// Therefore, we use a union to read the value.
	+template <typename T>
	+LIBC_INLINE static T read_little_endian(const void *ptr) {
	+ const uint8_t bytes = static_cast<const uint8_t >(ptr);
	+ union {
	+ T value;
	+ uint8_t buffer[sizeof(T)];
	+ } data;
	+ #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
	+ for (size_t i = 0; i < sizeof(T); ++i) {
	+ data.buffer[i] = bytes[sizeof(T) - i - 1];
	+ }
	+ #else
	+ for (size_t i = 0; i < sizeof(T); ++i) {
	+ data.buffer[i] = bytes[i];
	+ }
	+ #endif
	+ return data.value;
	+}
	+
	+
	+// Specialized read functions for small values. size must be <= 8.
	+LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
	+ uint64_t &low, uint64_t &high) {
	+ const uint8_t bytes = static_cast<const uint8_t >(ptr);
	+ if (size >= 2) {
	+ if (size >= 4) {
	+ low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
	+ high = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
	+ } else {
	+ low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
	+ high = static_cast<uint64_t>(bytes[size - 1]);
	+ }
	+ } else {
	+ if (size > 0) {
	+ low = static_cast<uint64_t>(bytes[0]);
	+ high = static_cast<uint64_t>(bytes[0]);
	+ } else {
	+ low = 0;
	+ high = 0;
	+ }
	+ }
	+}
	+
	+// This constant comes from Kunth's prng (it empirically works well).
	+LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
	+// Rotation amount for mixing.
	+LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
	+
	+// Randomly generated values (for now, it uses the same values from aHash).
	+LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
	+ 0x243f6a8885a308d3,
	+ 0x13198a2e03707344,
	+ 0xa4093822299f31d0,
	+ 0x082efa98ec4e6c89,
	+ 0x452821e638d01377,
	+ 0xbe5466cf34e90c6c,
	+ 0xc0ac29b7c97c50dd,
	+ 0x3f84d5b5b5470917,
	+};
	+
	+
	+LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
	+ return (x << y) \| (x >> (64 - y));
	+}
	+
	+// This is a portable string hasher. It is not cryptographically secure.
	+// The implementation is extracted from the generic routine of aHash.
	+class HashState {
	+ uint64_t buffer;
	+ uint64_t pad;
	+ uint64_t extra_keys[2];
	+ LIBC_INLINE void update(uint64_t low, uint64_t high) {
	+ uint64_t combined =
	+ folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
	+ buffer = (buffer + pad) ^ combined;
	+ buffer = rotate_left(buffer, ROTATE);
	+ }
	+
	+ LIBC_INLINE HashState(
	+ uint64_t a, uint64_t b, uint64_t c, uint64_t d
	+ ) : buffer(a), pad(b), extra_keys{c, d} {
	+ }
	+
	+ LIBC_INLINE static uint64_t mix(uint64_t seed) {
	+ HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
	+ RANDOMNESS[0][3]};
	+ mixer.update(seed, 0);
	+ return mixer.finish();
	+ }
	+
	+public:
	+ LIBC_INLINE HashState(uint64_t seed) {
	+ // Mix one more round of the seed to make it
	+ uint64_t mixed = mix(seed);
	+ buffer = RANDOMNESS[1][0] ^ mixed;
	+ pad = RANDOMNESS[1][1] ^ mixed;
	+ extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
	+ extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
	+ }
	+ LIBC_INLINE void update(const void *ptr, size_t size) {
	+ uint8_t const bytes = static_cast<const uint8_t >(ptr);
	+ buffer = (buffer + size) * MULTIPLE;
	+ uint64_t low, high;
	+ if (size > 8) {
	+ if (size > 16) {
	+ // update tail
	+ low = read_little_endian<uint64_t>(&bytes[size - 16]);
	+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
	+ update(low, high);
	+ while (size > 16) {
	+ low = read_little_endian<uint64_t>(&bytes[0]);
	+ high = read_little_endian<uint64_t>(&bytes[8]);
	+ update(low, high);
	+ bytes += 16;
	+ size -= 16;
	+ }
	+ } else {
	+ low = read_little_endian<uint64_t>(&bytes[0]);
	+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
	+ update(low, high);
	+ }
	+ } else {
	+ read_small_values(ptr, size, low, high);
	+ update(low, high);
	+ }
	+ }
	+ LIBC_INLINE uint64_t finish() {
	+ uint64_t rot = buffer & 63;
	+ uint64_t folded = folded_multiply(buffer, pad);
	+ return rotate_left(folded, rot);
	+ }
	+};
	+}
	+
	+static inline void llvm_libc_hash64 ( const void key, int len, uint32_t seed, void out) {
	+ llvm_libc_hash::HashState state (seed);
	+ state.update (key, len);
	+ (uint64_t)out = state.finish();
	+}
	diff --git a/PMP_Multilinear.h b/PMP_Multilinear.h
	index 9d24e73..42fdc5d 100644
	--- a/PMP_Multilinear.h
	+++ b/PMP_Multilinear.h
	@@ -1417,8 +1417,8 @@ class PMP_Multilinear_Hasher
	lowProduct.HighPart = midProduct.LowPart;
	uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;

	- constTerm.QuadPart += lowProduct.QuadPart;
	- ctr += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
	+ constTerm.QuadPart += lowProduct.QuadPart;
	+ ctr.QuadPart += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );

	/* for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
	{
	diff --git a/main.cpp b/main.cpp
	index eaaa0bd..d2029f4 100644
	--- a/main.cpp
	+++ b/main.cpp
	@@ -88,6 +88,7 @@ const char* quality_str[3] = { "SKIP", "POOR", "GOOD" };
	// marked with !! are known bad seeds, which either hash to 0 or create collisions.
	HashInfo g_hashes[] =
	{
	+ { llvm_libc_hash64, 64, 0x00000000, "llvm_libc_hash64", "llvm_libc_hash 64bit", GOOD, {} },
	// first the bad hash funcs, failing tests:
	{ DoNothingHash, 32, 0x0, "donothing32", "Do-Nothing function (measure call overhead)", SKIP, {0UL} /* !! */ },
	{ DoNothingHash, 64, 0x0, "donothing64", "Do-Nothing function (measure call overhead)", SKIP, {0ULL} /* !! */ },
No results found