forked from OSchip/llvm-project
tsan: optimize __tsan_read/write16
These callbacks are used for SSE vector accesses. In some computational programs these accesses dominate. Currently we do 2 uninlined 8-byte accesses to handle them. Inline and optimize them similarly to unaligned accesses. This reduces the vector access benchmark time from 8 to 3 seconds. Depends on D112603. Reviewed By: melver Differential Revision: https://reviews.llvm.org/D114594
This commit is contained in:
parent
4f103e9561
commit
abb825725e
|
@ -26,20 +26,6 @@ void __tsan_flush_memory() {
|
|||
FlushShadowMemory();
|
||||
}
|
||||
|
||||
void __tsan_read16(void *addr) {
|
||||
uptr pc = CALLERPC;
|
||||
ThreadState *thr = cur_thread();
|
||||
MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
|
||||
MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
|
||||
}
|
||||
|
||||
void __tsan_write16(void *addr) {
|
||||
uptr pc = CALLERPC;
|
||||
ThreadState *thr = cur_thread();
|
||||
MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
|
||||
MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
|
||||
}
|
||||
|
||||
void __tsan_read16_pc(void *addr, void *pc) {
|
||||
uptr pc_no_pac = STRIP_PAC_PC(pc);
|
||||
ThreadState *thr = cur_thread();
|
||||
|
|
|
@ -34,6 +34,10 @@ void __tsan_read8(void *addr) {
|
|||
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
|
||||
}
|
||||
|
||||
void __tsan_read16(void *addr) {
|
||||
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
|
||||
}
|
||||
|
||||
void __tsan_write1(void *addr) {
|
||||
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
|
||||
}
|
||||
|
@ -50,6 +54,10 @@ void __tsan_write8(void *addr) {
|
|||
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
|
||||
}
|
||||
|
||||
void __tsan_write16(void *addr) {
|
||||
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
|
||||
}
|
||||
|
||||
void __tsan_read1_pc(void *addr, void *pc) {
|
||||
MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
|
||||
}
|
||||
|
|
|
@ -451,6 +451,44 @@ ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
|
|||
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
||||
}
|
||||
|
||||
void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
|
||||
|
||||
NOINLINE
|
||||
void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
|
||||
AccessType typ) {
|
||||
TraceSwitchPart(thr);
|
||||
MemoryAccess16(thr, pc, addr, typ);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
|
||||
AccessType typ) {
|
||||
const uptr size = 16;
|
||||
FastState fast_state = thr->fast_state;
|
||||
if (UNLIKELY(fast_state.GetIgnoreBit()))
|
||||
return;
|
||||
Shadow cur(fast_state, 0, 8, typ);
|
||||
RawShadow* shadow_mem = MemToShadow(addr);
|
||||
bool traced = false;
|
||||
{
|
||||
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
||||
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
||||
goto SECOND;
|
||||
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
||||
return RestartMemoryAccess16(thr, pc, addr, typ);
|
||||
traced = true;
|
||||
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
|
||||
return;
|
||||
}
|
||||
SECOND:
|
||||
shadow_mem += kShadowCnt;
|
||||
LOAD_CURRENT_SHADOW(cur, shadow_mem);
|
||||
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
|
||||
return;
|
||||
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
|
||||
return RestartMemoryAccess16(thr, pc, addr, typ);
|
||||
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
|
||||
}
|
||||
|
||||
NOINLINE
|
||||
void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
|
||||
uptr size, AccessType typ) {
|
||||
|
|
Loading…
Reference in New Issue