Use hashing function that takes into account all UniqueID bytes (#2174)

2025-03-06 10:31:39 -05:00 · 2018-06-01 23:07:29 -07:00 · 2018-06-01 23:07:29 -07:00 · d699bfbf10
commit d699bfbf10
parent e1024d84e9
1 changed files with 49 additions and 5 deletions
--- a/src/ray/id.cc
+++ b/src/ray/id.cc
@ -82,13 +82,57 @@ bool UniqueID::operator==(const UniqueID &rhs) const {
  return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0;
 }

-size_t UniqueID::hash() const {
-  size_t result;
-  // Skip the bytes for the object prefix.
-  std::memcpy(&result, id_ + (kObjectIdIndexSize / CHAR_BIT), sizeof(size_t));
-  return result;
+// This code is from https://sites.google.com/site/murmurhash/
+// and is public domain.
+uint64_t MurmurHash64A(const void *key, int len, unsigned int seed) {
+  const uint64_t m = 0xc6a4a7935bd1e995;
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t *data = reinterpret_cast<const uint64_t *>(key);
+  const uint64_t *end = data + (len / 8);
+
+  while (data != end) {
+    uint64_t k = *data++;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h ^= k;
+    h *= m;
+  }
+
+  const unsigned char *data2 = reinterpret_cast<const unsigned char *>(data);
+
+  switch (len & 7) {
+  case 7:
+    h ^= uint64_t(data2[6]) << 48;
+  case 6:
+    h ^= uint64_t(data2[5]) << 40;
+  case 5:
+    h ^= uint64_t(data2[4]) << 32;
+  case 4:
+    h ^= uint64_t(data2[3]) << 24;
+  case 3:
+    h ^= uint64_t(data2[2]) << 16;
+  case 2:
+    h ^= uint64_t(data2[1]) << 8;
+  case 1:
+    h ^= uint64_t(data2[0]);
+    h *= m;
+  };
+
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
 }

+size_t UniqueID::hash() const { return MurmurHash64A(&id_[0], kUniqueIDSize, 0); }
+
 std::ostream &operator<<(std::ostream &os, const UniqueID &id) {
  os << id.hex();
  return os;