Replace the hashing functions on APInt and APFloat with overloads of the

new hash_value infrastructure, and replace their implementations using hash_combine. This removes a complete copy of Jenkin's lookup3 hash function (which is both significantly slower and lower quality than the one implemented in hash_combine) along with a somewhat scary xor-only hash function. Now that APInt and APFloat can be passed directly to hash_combine, simplify the rest of the LLVMContextImpl hashing to use the new infrastructure. llvm-svn: 152004
2012-03-04 12:02:57 +00:00 · 2012-03-04 12:02:57 +00:00 · 71bd7d1e54
parent ca99ad3f0d
commit 71bd7d1e54
5 changed files with 44 additions and 114 deletions
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@ -328,8 +328,16 @@ namespace llvm {

    APFloat& operator=(const APFloat &);

-    /* Return an arbitrary integer value usable for hashing. */
-    uint32_t getHashValue() const;
+    /// \brief Overload to compute a hash code for an APFloat value.
+    ///
+    /// Note that the use of hash codes for floating point values is in general
+    /// frought with peril. Equality is hard to define for these values. For
+    /// example, should negative and positive zero hash to different codes? Are
+    /// they equal or not? This hash value implementation specifically
+    /// emphasizes producing different codes for different inputs in order to
+    /// be used in canonicalization and memoization. As such, equality is
+    /// bitwiseIsEqual, and 0 != -0.
+    friend hash_code hash_value(const APFloat &Arg);

    /// Converts this value into a decimal string.
    ///
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@ -23,11 +23,12 @@
 #include <string>

 namespace llvm {
-  class Serializer;
  class Deserializer;
  class FoldingSetNodeID;
-  class raw_ostream;
+  class Serializer;
  class StringRef;
+  class hash_code;
+  class raw_ostream;

  template<typename T>
  class SmallVectorImpl;
@ -502,10 +503,8 @@ public:
    return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
  }

-  /// The hash value is computed as the sum of the words and the bit width.
-  /// @returns A hash value computed from the sum of the APInt words.
-  /// @brief Get a hash value based on this APInt
-  uint64_t getHashValue() const;
+  /// \brief Overload to compute a hash_code for an APInt value.
+  friend hash_code hash_value(const APInt &Arg);

  /// This function returns a pointer to the internal storage of the APInt.
  /// This is useful for writing out the APInt in binary form without any
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@ -14,8 +14,9 @@

 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <limits.h>
@ -2681,21 +2682,19 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
  return writeSignedDecimal (dst, exponent);
 }

-// For good performance it is desirable for different APFloats
-// to produce different integers.
-uint32_t
-APFloat::getHashValue() const
-{
-  if (category==fcZero) return sign<<8 | semantics->precision ;
-  else if (category==fcInfinity) return sign<<9 | semantics->precision;
-  else if (category==fcNaN) return 1<<10 | semantics->precision;
-  else {
-    uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
-    const integerPart* p = significandParts();
-    for (int i=partCount(); i>0; i--, p++)
-      hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
-    return hash;
-  }
+hash_code llvm::hash_value(const APFloat &Arg) {
+  if (Arg.category != APFloat::fcNormal)
+    return hash_combine((uint8_t)Arg.category,
+                        // NaN has no sign, fix it at zero.
+                        Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
+                        Arg.semantics->precision);
+
+  // Normal floats need their exponent and significand hashed.
+  return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
+                      Arg.semantics->precision, Arg.exponent,
+                      hash_combine_range(
+                        Arg.significandParts(),
+                        Arg.significandParts() + Arg.partCount()));
 }

 // Conversion from APFloat to/from host float/double.  It may eventually be
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@ -14,9 +14,10 @@

 #define DEBUG_TYPE "apint"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@ -675,93 +676,11 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
  }
 }

-// From http://www.burtleburtle.net, byBob Jenkins.
-// When targeting x86, both GCC and LLVM seem to recognize this as a
-// rotate instruction.
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+hash_code llvm::hash_value(const APInt &Arg) {
+  if (Arg.isSingleWord())
+    return hash_combine(Arg.VAL);

-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define mix(a,b,c) \
-  { \
-    a -= c;  a ^= rot(c, 4);  c += b; \
-    b -= a;  b ^= rot(a, 6);  a += c; \
-    c -= b;  c ^= rot(b, 8);  b += a; \
-    a -= c;  a ^= rot(c,16);  c += b; \
-    b -= a;  b ^= rot(a,19);  a += c; \
-    c -= b;  c ^= rot(b, 4);  b += a; \
-  }
-
-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define final(a,b,c) \
-  { \
-    c ^= b; c -= rot(b,14); \
-    a ^= c; a -= rot(c,11); \
-    b ^= a; b -= rot(a,25); \
-    c ^= b; c -= rot(b,16); \
-    a ^= c; a -= rot(c,4);  \
-    b ^= a; b -= rot(a,14); \
-    c ^= b; c -= rot(b,24); \
-  }
-
-// hashword() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins.  k is a pointer to an array of uint32_t values; length is
-// the length of the key, in 32-bit chunks.  This version only handles
-// keys that are a multiple of 32 bits in size.
-static inline uint32_t hashword(const uint64_t *k64, size_t length)
-{
-  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
-  uint32_t a,b,c;
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
-
-  /*------------------------------------------------- handle most of the key */
-  while (length > 3) {
-    a += k[0];
-    b += k[1];
-    c += k[2];
-    mix(a,b,c);
-    length -= 3;
-    k += 3;
-  }
-
-  /*------------------------------------------- handle the last 3 uint32_t's */
-  switch (length) {                  /* all the case statements fall through */
-  case 3 : c+=k[2];
-  case 2 : b+=k[1];
-  case 1 : a+=k[0];
-    final(a,b,c);
-    case 0:     /* case 0: nothing left to add */
-      break;
-    }
-  /*------------------------------------------------------ report the result */
-  return c;
-}
-
-// hashword8() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
-// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
-// function into about 35 instructions when inlined.
-static inline uint32_t hashword8(const uint64_t k64)
-{
-  uint32_t a,b,c;
-  a = b = c = 0xdeadbeef + 4;
-  b += k64 >> 32;
-  a += k64 & 0xffffffff;
-  final(a,b,c);
-  return c;
-}
-#undef final
-#undef mix
-#undef rot
-
-uint64_t APInt::getHashValue() const {
-  uint64_t hash;
-  if (isSingleWord())
-    hash = hashword8(VAL);
-  else
-    hash = hashword(pVal, getNumWords()*2);
-  return hash;
+  return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
 }

 /// HiBits - This function returns the high "numBits" bits of this APInt.
--- a/llvm/lib/VMCore/LLVMContextImpl.h
+++ b/llvm/lib/VMCore/LLVMContextImpl.h
@ -52,12 +52,14 @@ struct DenseMapAPIntKeyInfo {
    bool operator!=(const KeyTy& that) const {
      return !this->operator==(that);
    }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.type, Key.val);
+    }
  };
  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
  static unsigned getHashValue(const KeyTy &Key) {
-    return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
-      Key.val.getHashValue();
+    return static_cast<unsigned>(hash_value(Key));
  }
  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
    return LHS == RHS;
@ -75,6 +77,9 @@ struct DenseMapAPFloatKeyInfo {
    bool operator!=(const KeyTy& that) const {
      return !this->operator==(that);
    }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.val);
+    }
  };
  static inline KeyTy getEmptyKey() { 
    return KeyTy(APFloat(APFloat::Bogus,1));
@ -83,7 +88,7 @@ struct DenseMapAPFloatKeyInfo {
    return KeyTy(APFloat(APFloat::Bogus,2)); 
  }
  static unsigned getHashValue(const KeyTy &Key) {
-    return Key.val.getHashValue();
+    return static_cast<unsigned>(hash_value(Key));
  }
  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
    return LHS == RHS;