tsan: optimize __tsan_read/write16

These callbacks are used for SSE vector accesses.
In some computational programs these accesses dominate.
Currently we do 2 uninlined 8-byte accesses to handle them.
Inline and optimize them similarly to unaligned accesses.
This reduces the vector access benchmark time from 8 to 3 seconds.

Depends on D112603.

Reviewed By: melver

Differential Revision: https://reviews.llvm.org/D114594
This commit is contained in:
Dmitry Vyukov 2021-11-25 15:44:19 +01:00
parent 4f103e9561
commit abb825725e
3 changed files with 46 additions and 14 deletions

View File

@ -26,20 +26,6 @@ void __tsan_flush_memory() {
FlushShadowMemory();
}
void __tsan_read16(void *addr) {
uptr pc = CALLERPC;
ThreadState *thr = cur_thread();
MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
}
void __tsan_write16(void *addr) {
uptr pc = CALLERPC;
ThreadState *thr = cur_thread();
MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
}
void __tsan_read16_pc(void *addr, void *pc) {
uptr pc_no_pac = STRIP_PAC_PC(pc);
ThreadState *thr = cur_thread();

View File

@ -34,6 +34,10 @@ void __tsan_read8(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
}
void __tsan_read16(void *addr) {
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
}
void __tsan_write1(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
}
@ -50,6 +54,10 @@ void __tsan_write8(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
}
void __tsan_write16(void *addr) {
MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
}
void __tsan_read1_pc(void *addr, void *pc) {
MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
}

View File

@ -451,6 +451,44 @@ ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
}
void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
NOINLINE
void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
AccessType typ) {
TraceSwitchPart(thr);
MemoryAccess16(thr, pc, addr, typ);
}
ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
AccessType typ) {
const uptr size = 16;
FastState fast_state = thr->fast_state;
if (UNLIKELY(fast_state.GetIgnoreBit()))
return;
Shadow cur(fast_state, 0, 8, typ);
RawShadow* shadow_mem = MemToShadow(addr);
bool traced = false;
{
LOAD_CURRENT_SHADOW(cur, shadow_mem);
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
goto SECOND;
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
return RestartMemoryAccess16(thr, pc, addr, typ);
traced = true;
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
return;
}
SECOND:
shadow_mem += kShadowCnt;
LOAD_CURRENT_SHADOW(cur, shadow_mem);
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
return;
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
return RestartMemoryAccess16(thr, pc, addr, typ);
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
}
NOINLINE
void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
uptr size, AccessType typ) {