forked from OSchip/llvm-project
Matthew Dempsky: Attached patch replaces the type punning with memcpy(), which on
x86/x86-64 clang optimizes to direct word accesses anyway. This fixes an unaligned word access in murmurhash/cityhash. llvm-svn: 185558
This commit is contained in:
parent
fbaff8489d
commit
84b569d5cf
|
@ -3128,6 +3128,16 @@ template<class _Tp, class... _Args>
|
|||
|
||||
template <class _Tp> struct hash;
|
||||
|
||||
template <class _Size>
|
||||
inline _LIBCPP_INLINE_VISIBILITY
|
||||
_Size
|
||||
__loadword(const void* __p)
|
||||
{
|
||||
_Size __r;
|
||||
std::memcpy(&__r, __p, sizeof(__r));
|
||||
return __r;
|
||||
}
|
||||
|
||||
// We use murmur2 when size_t is 32 bits, and cityhash64 when size_t
|
||||
// is 64 bits. This is because cityhash64 uses 64bit x 64bit
|
||||
// multiplication, which can be very slow on 32-bit systems.
|
||||
|
@ -3151,7 +3161,7 @@ __murmur2_or_cityhash<_Size, 32>::operator()(const void* __key, _Size __len)
|
|||
const unsigned char* __data = static_cast<const unsigned char*>(__key);
|
||||
for (; __len >= 4; __data += 4, __len -= 4)
|
||||
{
|
||||
_Size __k = *(const _Size*)__data;
|
||||
_Size __k = __loadword<_Size>(__data);
|
||||
__k *= __m;
|
||||
__k ^= __k >> __r;
|
||||
__k *= __m;
|
||||
|
@ -3210,13 +3220,13 @@ struct __murmur2_or_cityhash<_Size, 64>
|
|||
|
||||
static _Size __hash_len_0_to_16(const char* __s, _Size __len) {
|
||||
if (__len > 8) {
|
||||
const _Size __a = *(const _Size*)__s;
|
||||
const _Size __b = *(const _Size*)(__s + __len - 8);
|
||||
const _Size __a = __loadword<_Size>(__s);
|
||||
const _Size __b = __loadword<_Size>(__s + __len - 8);
|
||||
return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b;
|
||||
}
|
||||
if (__len >= 4) {
|
||||
const uint32_t __a = *(const uint32_t*)(__s);
|
||||
const uint32_t __b = *(const uint32_t*)(__s + __len - 4);
|
||||
const uint32_t __a = __loadword<uint32_t>(__s);
|
||||
const uint32_t __b = __loadword<uint32_t>(__s + __len - 4);
|
||||
return __hash_len_16(__len + (__a << 3), __b);
|
||||
}
|
||||
if (__len > 0) {
|
||||
|
@ -3232,10 +3242,10 @@ struct __murmur2_or_cityhash<_Size, 64>
|
|||
}
|
||||
|
||||
static _Size __hash_len_17_to_32(const char *__s, _Size __len) {
|
||||
const _Size __a = *(const _Size*)(__s) * __k1;
|
||||
const _Size __b = *(const _Size*)(__s + 8);
|
||||
const _Size __c = *(const _Size*)(__s + __len - 8) * __k2;
|
||||
const _Size __d = *(const _Size*)(__s + __len - 16) * __k0;
|
||||
const _Size __a = __loadword<_Size>(__s) * __k1;
|
||||
const _Size __b = __loadword<_Size>(__s + 8);
|
||||
const _Size __c = __loadword<_Size>(__s + __len - 8) * __k2;
|
||||
const _Size __d = __loadword<_Size>(__s + __len - 16) * __k0;
|
||||
return __hash_len_16(__rotate(__a - __b, 43) + __rotate(__c, 30) + __d,
|
||||
__a + __rotate(__b ^ __k3, 20) - __c + __len);
|
||||
}
|
||||
|
@ -3256,33 +3266,33 @@ struct __murmur2_or_cityhash<_Size, 64>
|
|||
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
||||
static pair<_Size, _Size> __weak_hash_len_32_with_seeds(
|
||||
const char* __s, _Size __a, _Size __b) {
|
||||
return __weak_hash_len_32_with_seeds(*(const _Size*)(__s),
|
||||
*(const _Size*)(__s + 8),
|
||||
*(const _Size*)(__s + 16),
|
||||
*(const _Size*)(__s + 24),
|
||||
return __weak_hash_len_32_with_seeds(__loadword<_Size>(__s),
|
||||
__loadword<_Size>(__s + 8),
|
||||
__loadword<_Size>(__s + 16),
|
||||
__loadword<_Size>(__s + 24),
|
||||
__a,
|
||||
__b);
|
||||
}
|
||||
|
||||
// Return an 8-byte hash for 33 to 64 bytes.
|
||||
static _Size __hash_len_33_to_64(const char *__s, size_t __len) {
|
||||
_Size __z = *(const _Size*)(__s + 24);
|
||||
_Size __a = *(const _Size*)(__s) +
|
||||
(__len + *(const _Size*)(__s + __len - 16)) * __k0;
|
||||
_Size __z = __loadword<_Size>(__s + 24);
|
||||
_Size __a = __loadword<_Size>(__s) +
|
||||
(__len + __loadword<_Size>(__s + __len - 16)) * __k0;
|
||||
_Size __b = __rotate(__a + __z, 52);
|
||||
_Size __c = __rotate(__a, 37);
|
||||
__a += *(const _Size*)(__s + 8);
|
||||
__a += __loadword<_Size>(__s + 8);
|
||||
__c += __rotate(__a, 7);
|
||||
__a += *(const _Size*)(__s + 16);
|
||||
__a += __loadword<_Size>(__s + 16);
|
||||
_Size __vf = __a + __z;
|
||||
_Size __vs = __b + __rotate(__a, 31) + __c;
|
||||
__a = *(const _Size*)(__s + 16) + *(const _Size*)(__s + __len - 32);
|
||||
__z += *(const _Size*)(__s + __len - 8);
|
||||
__a = __loadword<_Size>(__s + 16) + __loadword<_Size>(__s + __len - 32);
|
||||
__z += __loadword<_Size>(__s + __len - 8);
|
||||
__b = __rotate(__a + __z, 52);
|
||||
__c = __rotate(__a, 37);
|
||||
__a += *(const _Size*)(__s + __len - 24);
|
||||
__a += __loadword<_Size>(__s + __len - 24);
|
||||
__c += __rotate(__a, 7);
|
||||
__a += *(const _Size*)(__s + __len - 16);
|
||||
__a += __loadword<_Size>(__s + __len - 16);
|
||||
_Size __wf = __a + __z;
|
||||
_Size __ws = __b + __rotate(__a, 31) + __c;
|
||||
_Size __r = __shift_mix((__vf + __ws) * __k2 + (__wf + __vs) * __k0);
|
||||
|
@ -3308,26 +3318,26 @@ __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len)
|
|||
|
||||
// For strings over 64 bytes we hash the end first, and then as we
|
||||
// loop we keep 56 bytes of state: v, w, x, y, and z.
|
||||
_Size __x = *(const _Size*)(__s + __len - 40);
|
||||
_Size __y = *(const _Size*)(__s + __len - 16) +
|
||||
*(const _Size*)(__s + __len - 56);
|
||||
_Size __z = __hash_len_16(*(const _Size*)(__s + __len - 48) + __len,
|
||||
*(const _Size*)(__s + __len - 24));
|
||||
_Size __x = __loadword<_Size>(__s + __len - 40);
|
||||
_Size __y = __loadword<_Size>(__s + __len - 16) +
|
||||
__loadword<_Size>(__s + __len - 56);
|
||||
_Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len,
|
||||
__loadword<_Size>(__s + __len - 24));
|
||||
pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z);
|
||||
pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x);
|
||||
__x = __x * __k1 + *(const _Size*)(__s);
|
||||
__x = __x * __k1 + __loadword<_Size>(__s);
|
||||
|
||||
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
||||
__len = (__len - 1) & ~static_cast<_Size>(63);
|
||||
do {
|
||||
__x = __rotate(__x + __y + __v.first + *(const _Size*)(__s + 8), 37) * __k1;
|
||||
__y = __rotate(__y + __v.second + *(const _Size*)(__s + 48), 42) * __k1;
|
||||
__x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1;
|
||||
__y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1;
|
||||
__x ^= __w.second;
|
||||
__y += __v.first + *(const _Size*)(__s + 40);
|
||||
__y += __v.first + __loadword<_Size>(__s + 40);
|
||||
__z = __rotate(__z + __w.first, 33) * __k1;
|
||||
__v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first);
|
||||
__w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second,
|
||||
__y + *(const _Size*)(__s + 16));
|
||||
__y + __loadword<_Size>(__s + 16));
|
||||
std::swap(__z, __x);
|
||||
__s += 64;
|
||||
__len -= 64;
|
||||
|
|
Loading…
Reference in New Issue