Skip to content
Snippets Groups Projects
Select Git revision
  • master
  • elf-metadata
2 results

MurMurHash3.h

Blame
  • MurMurHash3.h 5.62 KiB
    #ifndef __CLANG_HASH_MURMUR3
    #define __CLANG_HASH_MURMUR3
    
    #include <sstream>
    #include <iostream>
    #include <iomanip>
    
    //-----------------------------------------------------------------------------
    // MurmurHash3 was written by Austin Appleby, and is placed in the public
    // domain. The author hereby disclaims copyright to this source code.
    
    // Note - The x86 and x64 versions do _not_ produce the same results, as the
    // algorithms are optimized for their respective platforms. You can still
    // compile and run any of them on any platform, but your performance with the
    // non-native version will be less than optimal.
    
    #if defined(_MSC_VER) && (_MSC_VER < 1600)
    
    typedef unsigned char uint8_t;
    typedef unsigned int uint32_t;
    typedef unsigned __int64 uint64_t;
    
    // Other compilers
    
    #else // defined(_MSC_VER)
    
    #include <stdint.h>
    
    #endif // !defined(_MSC_VER)
    
    //-----------------------------------------------------------------------------
    
    void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out);
    
    //-----------------------------------------------------------------------------
    // Platform-specific functions and macros
    
    // Microsoft Visual Studio
    
    #if defined(_MSC_VER)
    
    #define FORCE_INLINE __forceinline
    
    #include <stdlib.h>
    
    #define ROTL32(x, y) _rotl(x, y)
    #define ROTL64(x, y) _rotl64(x, y)
    
    #define BIG_CONSTANT(x) (x)
    
    // Other compilers
    
    #else // defined(_MSC_VER)
    
    #define FORCE_INLINE inline __attribute__((always_inline))
    
    static inline uint32_t rotl32(uint32_t x, int8_t r) {
      return (x << r) | (x >> (32 - r));
    }
    
    static inline uint64_t rotl64(uint64_t x, int8_t r) {
      return (x << r) | (x >> (64 - r));
    }
    
    #define ROTL32(x, y) rotl32(x, y)
    #define ROTL64(x, y) rotl64(x, y)
    
    #define BIG_CONSTANT(x) (x##LLU)
    
    #endif // !defined(_MSC_VER)
    
    //-----------------------------------------------------------------------------
    // Block read - if your platform needs to do endian-swapping or can only
    // handle aligned reads, do the conversion here
    
    static FORCE_INLINE uint32_t getblock32(const uint32_t *p, int i) {
      return p[i];
    }
    
    static FORCE_INLINE uint64_t getblock64(const uint64_t *p, int i) {
      return p[i];
    }
    
    //-----------------------------------------------------------------------------
    // Finalization mix - force all bits of a hash block to avalanche
    
    static FORCE_INLINE uint32_t fmix32(uint32_t h) {
      h ^= h >> 16;
      h *= 0x85ebca6b;
      h ^= h >> 13;
      h *= 0xc2b2ae35;
      h ^= h >> 16;
    
      return h;
    }
    
    //----------
    
    FORCE_INLINE uint64_t fmix64(uint64_t k) {
      k ^= k >> 33;
      k *= BIG_CONSTANT(0xff51afd7ed558ccd);
      k ^= k >> 33;
      k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
      k ^= k >> 33;
    
      return k;
    }
    
    //-----------------------------------------------------------------------------
    
    // We use the MurMurHash3 x64 - 128 Bit Variant of the MurMur Hash
    class MurMurHash3 {
    public:
      enum { DIGEST_WORDS = 4, BLOCK_LENGTH = 16 };
    
      MurMurHash3() { reset(); }
      virtual ~MurMurHash3() {}
      MurMurHash3(const MurMurHash3 &s) { *this = s; }
    
      const MurMurHash3 &operator=(const MurMurHash3 &s) {
        h1 = s.h1;
        h2 = s.h2;
        memcpy(m_block, s.m_block, BLOCK_LENGTH);
        m_blockByteIndex = s.m_blockByteIndex;
        m_byteCount = s.m_byteCount;
        return *this;
      }
    
      MurMurHash3 &reset() {
        h1 = 0x6745230198BADCFE;
        h2 = 0xEFCDAB8910325476;
        m_blockByteIndex = 0;
        m_byteCount = 0;
        return *this;
      }
    
      MurMurHash3 &processByte(uint8_t octet) {
        this->m_block[this->m_blockByteIndex++] = octet;
        ++this->m_byteCount;
        if (m_blockByteIndex == BLOCK_LENGTH) {
          this->m_blockByteIndex = 0;
          processBlock();
        }
        return *this;
      }
    
      uint32_t finalize(uint32_t *digest) {
        //----------
        // tail
        uint64_t k1 = 0;
        uint64_t k2 = 0;
        // We use m_byteCount NOT here. On Purpose.
        switch (m_blockByteIndex & 15) {
        case 15:
          k2 ^= ((uint64_t)m_block[14]) << 48;
        case 14:
          k2 ^= ((uint64_t)m_block[13]) << 40;
        case 13:
          k2 ^= ((uint64_t)m_block[12]) << 32;
        case 12:
          k2 ^= ((uint64_t)m_block[11]) << 24;
        case 11:
          k2 ^= ((uint64_t)m_block[10]) << 16;
        case 10:
          k2 ^= ((uint64_t)m_block[9]) << 8;
        case 9:
          k2 ^= ((uint64_t)m_block[8]) << 0;
          k2 *= c2;
          k2 = ROTL64(k2, 33);
          k2 *= c1;
          h2 ^= k2;
        case 8:
          k1 ^= ((uint64_t)m_block[7]) << 56;
        case 7:
          k1 ^= ((uint64_t)m_block[6]) << 48;
        case 6:
          k1 ^= ((uint64_t)m_block[5]) << 40;
        case 5:
          k1 ^= ((uint64_t)m_block[4]) << 32;
        case 4:
          k1 ^= ((uint64_t)m_block[3]) << 24;
        case 3:
          k1 ^= ((uint64_t)m_block[2]) << 16;
        case 2:
          k1 ^= ((uint64_t)m_block[1]) << 8;
        case 1:
          k1 ^= ((uint64_t)m_block[0]) << 0;
          k1 *= c1;
          k1 = ROTL64(k1, 31);
          k1 *= c2;
          h1 ^= k1;
        };
    
        //----------
        // finalization
        h1 ^= m_byteCount;
        h2 ^= m_byteCount;
    
        h1 += h2;
        h2 += h1;
    
        h1 = fmix64(h1);
        h2 = fmix64(h2);
    
        h1 += h2;
        h2 += h1;
    
        ((uint64_t *)digest)[0] = h1;
        ((uint64_t *)digest)[1] = h2;
    
        return m_byteCount;
      }
    
    protected:
      void processBlock() {
        uint64_t k1 = getblock64((uint64_t *)m_block, 0);
        uint64_t k2 = getblock64((uint64_t *)m_block, 1);
    
        k1 *= c1;
        k1 = ROTL64(k1, 31);
        k1 *= c2;
        h1 ^= k1;
    
        h1 = ROTL64(h1, 27);
        h1 += h2;
        h1 = h1 * 5 + 0x52dce729;
    
        k2 *= c2;
        k2 = ROTL64(k2, 33);
        k2 *= c1;
        h2 ^= k2;
    
        h2 = ROTL64(h2, 31);
        h2 += h1;
        h2 = h2 * 5 + 0x38495ab5;
      }
    
      static constexpr uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
      static constexpr uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
    
      uint64_t h1, h2;
      uint8_t m_block[BLOCK_LENGTH];
      size_t m_blockByteIndex;
      size_t m_byteCount;
    };
    
    #endif