tsan: add new vector clock

Add new fixed-size vector clock for the new tsan runtime.
For now it's unused.

Reviewed By: melver

Differential Revision: https://reviews.llvm.org/D107167
This commit is contained in:
Dmitry Vyukov 2021-07-30 16:00:54 +02:00
parent 7bd81fe183
commit 5c2b48fdb0
7 changed files with 310 additions and 15 deletions

View File

@ -51,6 +51,7 @@ set(TSAN_SOURCES
rtl/tsan_suppressions.cpp
rtl/tsan_symbolize.cpp
rtl/tsan_sync.cpp
rtl/tsan_vector_clock.cpp
)
set(TSAN_CXX_SOURCES
@ -105,6 +106,7 @@ set(TSAN_HEADERS
rtl/tsan_sync.h
rtl/tsan_trace.h
rtl/tsan_update_shadow_word_inl.h
rtl/tsan_vector_clock.h
)
set(TSAN_RUNTIME_LIBRARIES)

View File

@ -18,6 +18,24 @@
#include "sanitizer_common/sanitizer_mutex.h"
#include "ubsan/ubsan_platform.h"
#ifndef TSAN_VECTORIZE
# define TSAN_VECTORIZE __SSE4_2__
#endif
#if TSAN_VECTORIZE
// <emmintrin.h> transitively includes <stdlib.h>,
// and it's prohibited to include std headers into tsan runtime.
// So we do this dirty trick.
# define _MM_MALLOC_H_INCLUDED
# define __MM_MALLOC_H
# include <emmintrin.h>
# include <smmintrin.h>
# define VECTOR_ALIGNED ALIGNED(16)
typedef __m128i m128;
#else
# define VECTOR_ALIGNED
#endif
// Setup defaults for compile definitions.
#ifndef TSAN_NO_HISTORY
# define TSAN_NO_HISTORY 0
@ -33,6 +51,14 @@
namespace __tsan {
// Thread slot ID.
enum class Sid : u8 {};
constexpr uptr kThreadSlotCount = 256;
// Abstract time unit, vector clock element.
enum class Epoch : u16 {};
constexpr Epoch kEpochZero = static_cast<Epoch>(0);
const int kClkBits = 42;
const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;

View File

@ -28,16 +28,6 @@
#include "tsan_symbolize.h"
#include "ubsan/ubsan_init.h"
#ifdef __SSE3__
// <emmintrin.h> transitively includes <stdlib.h>,
// and it's prohibited to include std headers into tsan runtime.
// So we do this dirty trick.
#define _MM_MALLOC_H_INCLUDED
#define __MM_MALLOC_H
#include <emmintrin.h>
typedef __m128i m128;
#endif
volatile int __tsan_resumed = 0;
extern "C" void __tsan_resume() {
@ -779,10 +769,11 @@ bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
return false;
}
#if defined(__SSE3__)
#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \
_mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \
(i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
#if TSAN_VECTORIZE
# define SHUF(v0, v1, i0, i1, i2, i3) \
_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
_mm_castsi128_ps(v1), \
(i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
ALWAYS_INLINE
bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
// This is an optimized version of ContainsSameAccessSlow.
@ -839,7 +830,7 @@ bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
ALWAYS_INLINE
bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
#if defined(__SSE3__)
#if TSAN_VECTORIZE
bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
// NOTE: this check can fail if the shadow is concurrently mutated
// by other threads. But it still can be useful if you modify

View File

@ -0,0 +1,123 @@
//===-- tsan_vector_clock.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#include "tsan_vector_clock.h"
#include "sanitizer_common/sanitizer_placement_new.h"
#include "tsan_mman.h"
namespace __tsan {
#if TSAN_VECTORIZE
const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128);
#endif
VectorClock::VectorClock() { Reset(); }
void VectorClock::Reset() {
#if !TSAN_VECTORIZE
for (uptr i = 0; i < kMaxSid; i++) clk_[i] = kEpochZero;
#else
m128 z = _mm_setzero_si128();
m128* vclk = reinterpret_cast<m128*>(clk_);
for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(&vclk[i], z);
#endif
}
void VectorClock::Acquire(const VectorClock* src) {
if (!src)
return;
#if !TSAN_VECTORIZE
for (uptr i = 0; i < kMaxSid; i++) clk_[i] = max(clk_[i], src->clk_[i]);
#else
m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(src->clk_);
for (uptr i = 0; i < kVectorClockSize; i++) {
m128 s = _mm_load_si128(&vsrc[i]);
m128 d = _mm_load_si128(&vdst[i]);
m128 m = _mm_max_epu16(s, d);
_mm_store_si128(&vdst[i], m);
}
#endif
}
static VectorClock* AllocClock(VectorClock** dstp) {
if (UNLIKELY(!*dstp))
*dstp = New<VectorClock>();
return *dstp;
}
void VectorClock::Release(VectorClock** dstp) const {
VectorClock* dst = AllocClock(dstp);
dst->Acquire(this);
}
void VectorClock::ReleaseStore(VectorClock** dstp) const {
VectorClock* dst = AllocClock(dstp);
*dst = *this;
}
VectorClock& VectorClock::operator=(const VectorClock& other) {
#if !TSAN_VECTORIZE
for (uptr i = 0; i < kMaxSid; i++) clk_[i] = other.clk_[i];
#else
m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(other.clk_);
for (uptr i = 0; i < kVectorClockSize; i++) {
m128 s = _mm_load_si128(&vsrc[i]);
_mm_store_si128(&vdst[i], s);
}
#endif
return *this;
}
void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) {
VectorClock* dst = AllocClock(dstp);
#if !TSAN_VECTORIZE
for (uptr i = 0; i < kMaxSid; i++) {
Epoch tmp = dst->clk_[i];
dst->clk_[i] = clk_[i];
clk_[i] = max(clk_[i], tmp);
}
#else
m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
for (uptr i = 0; i < kVectorClockSize; i++) {
m128 t = _mm_load_si128(&vdst[i]);
m128 c = _mm_load_si128(&vclk[i]);
m128 m = _mm_max_epu16(c, t);
_mm_store_si128(&vdst[i], c);
_mm_store_si128(&vclk[i], m);
}
#endif
}
void VectorClock::ReleaseAcquire(VectorClock** dstp) {
VectorClock* dst = AllocClock(dstp);
#if !TSAN_VECTORIZE
for (uptr i = 0; i < kMaxSid; i++) {
dst->clk_[i] = max(dst->clk_[i], clk_[i]);
clk_[i] = dst->clk_[i];
}
#else
m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
for (uptr i = 0; i < kVectorClockSize; i++) {
m128 c = _mm_load_si128(&vclk[i]);
m128 d = _mm_load_si128(&vdst[i]);
m128 m = _mm_max_epu16(c, d);
_mm_store_si128(&vdst[i], m);
_mm_store_si128(&vclk[i], m);
}
#endif
}
} // namespace __tsan

View File

@ -0,0 +1,51 @@
//===-- tsan_vector_clock.h -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#ifndef TSAN_VECTOR_CLOCK_H
#define TSAN_VECTOR_CLOCK_H
#include "tsan_defs.h"
namespace __tsan {
// Fixed-size vector clock, used both for threads and sync objects.
class VectorClock {
public:
VectorClock();
Epoch Get(Sid sid) const;
void Set(Sid sid, Epoch v);
void Reset();
void Acquire(const VectorClock* src);
void Release(VectorClock** dstp) const;
void ReleaseStore(VectorClock** dstp) const;
void ReleaseStoreAcquire(VectorClock** dstp);
void ReleaseAcquire(VectorClock** dstp);
VectorClock& operator=(const VectorClock& other);
private:
Epoch clk_[kThreadSlotCount] VECTOR_ALIGNED;
};
ALWAYS_INLINE Epoch VectorClock::Get(Sid sid) const {
return clk_[static_cast<u8>(sid)];
}
ALWAYS_INLINE void VectorClock::Set(Sid sid, Epoch v) {
DCHECK_GE(v, clk_[static_cast<u8>(sid)]);
clk_[static_cast<u8>(sid)] = v;
}
} // namespace __tsan
#endif // TSAN_VECTOR_CLOCK_H

View File

@ -8,6 +8,7 @@ set(TSAN_UNIT_TEST_SOURCES
tsan_stack_test.cpp
tsan_sync_test.cpp
tsan_unit_test_main.cpp
tsan_vector_clock_test.cpp
)
add_tsan_unittest(TsanUnitTest

View File

@ -0,0 +1,101 @@
//===-- tsan_clock_test.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#include "tsan_vector_clock.h"
#include "gtest/gtest.h"
#include "tsan_rtl.h"
namespace __tsan {
TEST(VectorClock, GetSet) {
// Compiler won't ensure alignment on stack.
VectorClock *vc = New<VectorClock>();
for (uptr i = 0; i < kThreadSlotCount; i++)
ASSERT_EQ(vc->Get(static_cast<Sid>(i)), kEpochZero);
for (uptr i = 0; i < kThreadSlotCount; i++)
vc->Set(static_cast<Sid>(i), static_cast<Epoch>(i));
for (uptr i = 0; i < kThreadSlotCount; i++)
ASSERT_EQ(vc->Get(static_cast<Sid>(i)), static_cast<Epoch>(i));
vc->Reset();
for (uptr i = 0; i < kThreadSlotCount; i++)
ASSERT_EQ(vc->Get(static_cast<Sid>(i)), kEpochZero);
DestroyAndFree(vc);
}
TEST(VectorClock, VectorOps) {
VectorClock *vc1 = New<VectorClock>();
VectorClock *vc2 = nullptr;
VectorClock *vc3 = nullptr;
vc1->Acquire(vc2);
for (uptr i = 0; i < kThreadSlotCount; i++)
ASSERT_EQ(vc1->Get(static_cast<Sid>(i)), kEpochZero);
vc1->Release(&vc2);
EXPECT_NE(vc2, nullptr);
vc1->Acquire(vc2);
for (uptr i = 0; i < kThreadSlotCount; i++)
ASSERT_EQ(vc1->Get(static_cast<Sid>(i)), kEpochZero);
for (uptr i = 0; i < kThreadSlotCount; i++) {
vc1->Set(static_cast<Sid>(i), static_cast<Epoch>(i));
vc2->Set(static_cast<Sid>(i), static_cast<Epoch>(kThreadSlotCount - i));
}
vc1->Acquire(vc2);
for (uptr i = 0; i < kThreadSlotCount; i++) {
ASSERT_EQ(vc1->Get(static_cast<Sid>(i)),
static_cast<Epoch>(i < kThreadSlotCount / 2 ? kThreadSlotCount - i
: i));
ASSERT_EQ(vc2->Get(static_cast<Sid>(i)),
static_cast<Epoch>(kThreadSlotCount - i));
}
vc2->ReleaseStore(&vc3);
for (uptr i = 0; i < kThreadSlotCount; i++) {
ASSERT_EQ(vc3->Get(static_cast<Sid>(i)),
static_cast<Epoch>(kThreadSlotCount - i));
ASSERT_EQ(vc2->Get(static_cast<Sid>(i)),
static_cast<Epoch>(kThreadSlotCount - i));
}
vc1->Reset();
vc2->Reset();
for (uptr i = 0; i < kThreadSlotCount; i++) {
vc1->Set(static_cast<Sid>(i), static_cast<Epoch>(i));
vc2->Set(static_cast<Sid>(i), static_cast<Epoch>(kThreadSlotCount - i));
}
vc1->ReleaseAcquire(&vc2);
for (uptr i = 0; i < kThreadSlotCount; i++) {
Epoch expect =
static_cast<Epoch>(i < kThreadSlotCount / 2 ? kThreadSlotCount - i : i);
ASSERT_EQ(vc1->Get(static_cast<Sid>(i)), expect);
ASSERT_EQ(vc2->Get(static_cast<Sid>(i)), expect);
}
vc1->Reset();
vc2->Reset();
for (uptr i = 0; i < kThreadSlotCount; i++) {
vc1->Set(static_cast<Sid>(i), static_cast<Epoch>(i));
vc2->Set(static_cast<Sid>(i), static_cast<Epoch>(kThreadSlotCount - i));
}
vc1->ReleaseStoreAcquire(&vc2);
for (uptr i = 0; i < kThreadSlotCount; i++) {
ASSERT_EQ(vc1->Get(static_cast<Sid>(i)),
static_cast<Epoch>(i < kThreadSlotCount / 2 ? kThreadSlotCount - i
: i));
ASSERT_EQ(vc2->Get(static_cast<Sid>(i)), static_cast<Epoch>(i));
}
DestroyAndFree(vc1);
DestroyAndFree(vc2);
DestroyAndFree(vc3);
}
} // namespace __tsan