[sanitizer] Simplify GetTls with dl_iterate_phdr on Linux

This was reverted by f176803ef1 due to
Ubuntu 16.04 x86-64 glibc 2.23 problems.
This commit additionally calls `__tls_get_addr({modid,0})` to work around the
dlpi_tls_data==NULL issues for glibc<2.25
(https://sourceware.org/bugzilla/show_bug.cgi?id=19826)

GetTls is the range of

* thread control block and optional TLS_PRE_TCB_SIZE
* static TLS blocks plus static TLS surplus

On glibc, lsan requires the range to include
`pthread::{specific_1stblock,specific}` so that allocations only referenced by
`pthread_setspecific` can be scanned.

This patch uses `dl_iterate_phdr` to collect TLS blocks. Find the one
with `dlpi_tls_modid==1` as one of the initially loaded module, then find
consecutive ranges. The boundaries give us addr and size.

This allows us to drop the glibc internal `_dl_get_tls_static_info` and
`InitTlsSize` entirely. Use the simplified method with non-Android Linux for
now, but in theory this can be used with *BSD and potentially other ELF OSes.

This simplification enables D99566 for TLS Variant I architectures.

See https://reviews.llvm.org/D93972#2480556 for analysis on GetTls usage
across various sanitizers.

Differential Revision: https://reviews.llvm.org/D98926
This commit is contained in:
Fangrui Song 2021-04-04 15:35:53 -07:00
parent 9abff04e50
commit ec575e3b0a
18 changed files with 128 additions and 185 deletions

View File

@ -490,9 +490,6 @@ static void AsanInitInternal() {
if (flags()->start_deactivated)
AsanDeactivate();
// interceptors
InitTlsSize();
// Create main thread.
AsanThread *main_thread = CreateMainThread();
CHECK_EQ(0, main_thread->tid());
@ -568,7 +565,7 @@ void UnpoisonStack(uptr bottom, uptr top, const char *type) {
type, top, bottom, top - bottom, top - bottom);
return;
}
PoisonShadow(bottom, top - bottom, 0);
PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0);
}
static void UnpoisonDefaultStack() {

View File

@ -307,7 +307,7 @@ void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
uptr stack_size = 0;
GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size, &tls_begin_,
&tls_size);
stack_top_ = stack_bottom_ + stack_size;
stack_top_ = RoundDownTo(stack_bottom_ + stack_size, SHADOW_GRANULARITY);
tls_end_ = tls_begin_ + tls_size;
dtls_ = DTLS_Get();

View File

@ -265,8 +265,6 @@ void __hwasan_init() {
hwasan_init_is_running = 1;
SanitizerToolName = "HWAddressSanitizer";
InitTlsSize();
CacheBinaryName();
InitializeFlags();

View File

@ -98,7 +98,6 @@ extern "C" void __lsan_init() {
InitCommonLsan();
InitializeAllocator();
ReplaceSystemMalloc();
InitTlsSize();
InitializeInterceptors();
InitializeThreadRegistry();
InstallDeadlySignalHandlers(LsanOnDeadlySignal);

View File

@ -214,9 +214,6 @@ static void MemprofInitInternal() {
InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
// interceptors
InitTlsSize();
// Create main thread.
MemprofThread *main_thread = CreateMainThread();
CHECK_EQ(0, main_thread->tid());

View File

@ -436,7 +436,6 @@ void __msan_init() {
InitializeInterceptors();
CheckASLR();
InitTlsSize();
InstallDeadlySignalHandlers(MsanOnDeadlySignal);
InstallAtExitHandler(); // Needs __cxa_atexit interceptor.

View File

@ -284,7 +284,6 @@ void SetSandboxingCallback(void (*f)());
void InitializeCoverage(bool enabled, const char *coverage_dir);
void InitTlsSize();
uptr GetTlsSize();
// Other

View File

@ -103,7 +103,6 @@ void DisableCoreDumperIfNecessary() {}
void InstallDeadlySignalHandlers(SignalHandlerType handler) {}
void SetAlternateSignalStack() {}
void UnsetAlternateSignalStack() {}
void InitTlsSize() {}
bool SignalContext::IsStackOverflow() const { return false; }
void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); }

View File

@ -98,7 +98,6 @@ class ThreadLister {
// Exposed for testing.
uptr ThreadDescriptorSize();
uptr ThreadSelf();
uptr ThreadSelfOffset();
// Matches a library's file name against a base name (stripping path and version
// information).

View File

@ -188,80 +188,8 @@ __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
#endif
}
#if SANITIZER_GLIBC && !SANITIZER_GO
static uptr g_tls_size;
#ifdef __i386__
#define CHECK_GET_TLS_STATIC_INFO_VERSION (!__GLIBC_PREREQ(2, 27))
#else
#define CHECK_GET_TLS_STATIC_INFO_VERSION 0
#endif
#if CHECK_GET_TLS_STATIC_INFO_VERSION
#define DL_INTERNAL_FUNCTION __attribute__((regparm(3), stdcall))
#else
#define DL_INTERNAL_FUNCTION
#endif
namespace {
struct GetTlsStaticInfoCall {
typedef void (*get_tls_func)(size_t*, size_t*);
};
struct GetTlsStaticInfoRegparmCall {
typedef void (*get_tls_func)(size_t*, size_t*) DL_INTERNAL_FUNCTION;
};
template <typename T>
void CallGetTls(void* ptr, size_t* size, size_t* align) {
typename T::get_tls_func get_tls;
CHECK_EQ(sizeof(get_tls), sizeof(ptr));
internal_memcpy(&get_tls, &ptr, sizeof(ptr));
CHECK_NE(get_tls, 0);
get_tls(size, align);
}
bool CmpLibcVersion(int major, int minor, int patch) {
int ma;
int mi;
int pa;
if (!GetLibcVersion(&ma, &mi, &pa))
return false;
if (ma > major)
return true;
if (ma < major)
return false;
if (mi > minor)
return true;
if (mi < minor)
return false;
return pa >= patch;
}
} // namespace
void InitTlsSize() {
// all current supported platforms have 16 bytes stack alignment
const size_t kStackAlign = 16;
void *get_tls_static_info_ptr = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
size_t tls_size = 0;
size_t tls_align = 0;
// On i?86, _dl_get_tls_static_info used to be internal_function, i.e.
// __attribute__((regparm(3), stdcall)) before glibc 2.27 and is normal
// function in 2.27 and later.
if (CHECK_GET_TLS_STATIC_INFO_VERSION && !CmpLibcVersion(2, 27, 0))
CallGetTls<GetTlsStaticInfoRegparmCall>(get_tls_static_info_ptr,
&tls_size, &tls_align);
else
CallGetTls<GetTlsStaticInfoCall>(get_tls_static_info_ptr,
&tls_size, &tls_align);
if (tls_align < kStackAlign)
tls_align = kStackAlign;
g_tls_size = RoundUpTo(tls_size, tls_align);
}
#else
void InitTlsSize() { }
#endif // SANITIZER_GLIBC && !SANITIZER_GO
// ThreadDescriptorSize() is only used by lsan to get the pointer to
// thread-specific data keys in the thread control block.
#if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) || \
defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) || \
defined(__arm__) || SANITIZER_RISCV64) && \
@ -334,13 +262,6 @@ uptr ThreadDescriptorSize() {
return val;
}
// The offset at which pointer to self is located in the thread descriptor.
const uptr kThreadSelfOffset = FIRST_32_SECOND_64(8, 16);
uptr ThreadSelfOffset() {
return kThreadSelfOffset;
}
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
// TlsPreTcbSize includes size of struct pthread_descr and size of tcb
// head structure. It lies before the static tls blocks.
@ -359,48 +280,73 @@ static uptr TlsPreTcbSize() {
}
#endif
uptr ThreadSelf() {
uptr descr_addr;
#if defined(__i386__)
asm("mov %%gs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset));
#elif defined(__x86_64__)
asm("mov %%fs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset));
#elif defined(__mips__)
// MIPS uses TLS variant I. The thread pointer (in hardware register $29)
// points to the end of the TCB + 0x7000. The pthread_descr structure is
// immediately in front of the TCB. TlsPreTcbSize() includes the size of the
// TCB and the size of pthread_descr.
const uptr kTlsTcbOffset = 0x7000;
uptr thread_pointer;
asm volatile(".set push;\
.set mips64r2;\
rdhwr %0,$29;\
.set pop" : "=r" (thread_pointer));
descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
#elif defined(__aarch64__) || defined(__arm__)
descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
ThreadDescriptorSize();
#elif SANITIZER_RISCV64
// https://github.com/riscv/riscv-elf-psabi-doc/issues/53
uptr thread_pointer = reinterpret_cast<uptr>(__builtin_thread_pointer());
descr_addr = thread_pointer - TlsPreTcbSize();
#elif defined(__s390__)
descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer());
#elif defined(__powerpc64__)
// PPC64LE uses TLS variant I. The thread pointer (in GPR 13)
// points to the end of the TCB + 0x7000. The pthread_descr structure is
// immediately in front of the TCB. TlsPreTcbSize() includes the size of the
// TCB and the size of pthread_descr.
const uptr kTlsTcbOffset = 0x7000;
uptr thread_pointer;
asm("addi %0,13,%1" : "=r"(thread_pointer) : "I"(-kTlsTcbOffset));
descr_addr = thread_pointer - TlsPreTcbSize();
#else
#error "unsupported CPU arch"
#endif
return descr_addr;
#if !SANITIZER_GO
namespace {
struct TlsBlock {
uptr begin, end, align;
size_t tls_modid;
bool operator<(const TlsBlock &rhs) const { return begin < rhs.begin; }
};
} // namespace
extern "C" void *__tls_get_addr(size_t *);
static int TouchTlsBlock(struct dl_phdr_info *info, size_t size,
void *data) {
size_t mod_and_off[2] = {info->dlpi_tls_modid, 0};
if (mod_and_off[0] != 0)
__tls_get_addr(mod_and_off);
return 0;
}
#endif // (x86_64 || i386 || MIPS) && SANITIZER_LINUX
static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size,
void *data) {
if (!info->dlpi_tls_data)
return 0;
const uptr begin = (uptr)info->dlpi_tls_data;
for (unsigned i = 0; i != info->dlpi_phnum; ++i)
if (info->dlpi_phdr[i].p_type == PT_TLS) {
static_cast<InternalMmapVector<TlsBlock> *>(data)->push_back(
TlsBlock{begin, begin + info->dlpi_phdr[i].p_memsz,
info->dlpi_phdr[i].p_align, info->dlpi_tls_modid});
break;
}
return 0;
}
static void GetStaticTlsBoundary(uptr *addr, uptr *size, uptr *align) {
InternalMmapVector<TlsBlock> ranges;
dl_iterate_phdr(CollectStaticTlsBlocks, &ranges);
uptr len = ranges.size();
Sort(ranges.begin(), len);
// Find the range with tls_modid=1. For glibc, because libc.so uses PT_TLS,
// this module is guaranteed to exist and is one of the initially loaded
// modules.
uptr one = 0;
while (one != len && ranges[one].tls_modid != 1) ++one;
if (one == len) {
// This may happen with musl if no module uses PT_TLS.
*addr = 0;
*size = 0;
*align = 1;
return;
}
// Find the maximum consecutive ranges. We consider two modules consecutive if
// the gap is smaller than the alignment. The dynamic loader places static TLS
// blocks this way not to waste space.
uptr l = one;
*align = ranges[l].align;
while (l != 0 && ranges[l].begin < ranges[l - 1].end + ranges[l - 1].align)
*align = Max(*align, ranges[--l].align);
uptr r = one + 1;
while (r != len && ranges[r].begin < ranges[r - 1].end + ranges[r - 1].align)
*align = Max(*align, ranges[r++].align);
*addr = ranges[l].begin;
*size = ranges[r - 1].end - ranges[l].begin;
}
#endif // !SANITIZER_GO
#endif // (x86_64 || i386 || mips || ...) && SANITIZER_LINUX &&
// !SANITIZER_ANDROID
#if SANITIZER_FREEBSD
static void **ThreadSelfSegbase() {
@ -472,18 +418,53 @@ static void GetTls(uptr *addr, uptr *size) {
*size = 0;
}
#elif SANITIZER_LINUX
// glibc before 2.25 (BZ #19826) does not set dlpi_tls_data. Call
// __tls_get_addr to force allocation. For safety, skip grte/v4-2.19 for now.
int major, minor, patch;
if (GetLibcVersion(&major, &minor, &patch) && major == 2 && minor < 25 &&
minor != 19)
dl_iterate_phdr(TouchTlsBlock, nullptr);
uptr align;
GetStaticTlsBoundary(addr, size, &align);
#if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
*addr = ThreadSelf();
*size = GetTlsSize();
*addr -= *size;
*addr += ThreadDescriptorSize();
#elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) || \
defined(__arm__) || SANITIZER_RISCV64
*addr = ThreadSelf();
*size = GetTlsSize();
if (SANITIZER_GLIBC) {
#if defined(__s390__)
align = Max<uptr>(align, 16);
#else
*addr = 0;
*size = 0;
align = Max<uptr>(align, 64);
#endif
}
const uptr tp = RoundUpTo(*addr + *size, align);
// lsan requires the range to additionally cover the static TLS surplus
// (elf/dl-tls.c defines 1664). Otherwise there may be false positives for
// allocations only referenced by tls in dynamically loaded modules.
if (SANITIZER_GLIBC)
*size += 1644;
// Extend the range to include the thread control block. On glibc, lsan needs
// the range to include pthread::{specific_1stblock,specific} so that
// allocations only referenced by pthread_setspecific can be scanned. This may
// underestimate by at most TLS_TCB_ALIGN-1 bytes but it should be fine
// because the number of bytes after pthread::specific is larger.
*addr = tp - RoundUpTo(*size, align);
*size = tp - *addr + ThreadDescriptorSize();
#else
if (SANITIZER_GLIBC)
*size += 1664;
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
const uptr pre_tcb_size = TlsPreTcbSize();
*addr -= pre_tcb_size;
*size += pre_tcb_size;
#else
// arm and aarch64 reserve two words at TP, so this underestimates the range.
// However, this is sufficient for the purpose of finding the pointers to
// thread-specific data keys.
const uptr tcb_size = ThreadDescriptorSize();
*addr -= tcb_size;
*size += tcb_size;
#endif
#endif
#elif SANITIZER_FREEBSD
void** segbase = ThreadSelfSegbase();
@ -524,17 +505,11 @@ static void GetTls(uptr *addr, uptr *size) {
#if !SANITIZER_GO
uptr GetTlsSize() {
#if SANITIZER_FREEBSD || SANITIZER_ANDROID || SANITIZER_NETBSD || \
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
SANITIZER_SOLARIS
uptr addr, size;
GetTls(&addr, &size);
return size;
#elif SANITIZER_GLIBC
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
return RoundUpTo(g_tls_size + TlsPreTcbSize(), 16);
#else
return g_tls_size;
#endif
#else
return 0;
#endif
@ -557,10 +532,9 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
if (!main) {
// If stack and tls intersect, make them non-intersecting.
if (*tls_addr > *stk_addr && *tls_addr < *stk_addr + *stk_size) {
CHECK_GT(*tls_addr + *tls_size, *stk_addr);
CHECK_LE(*tls_addr + *tls_size, *stk_addr + *stk_size);
*stk_size -= *tls_size;
*tls_addr = *stk_addr + *stk_size;
if (*stk_addr + *stk_size < *tls_addr + *tls_size)
*tls_size = *stk_addr + *stk_size - *tls_addr;
*stk_size = *tls_addr - *stk_addr;
}
}
#endif

View File

@ -548,9 +548,6 @@ uptr GetTlsSize() {
return 0;
}
void InitTlsSize() {
}
uptr TlsBaseAddr() {
uptr segbase = 0;
#if defined(__x86_64__)

View File

@ -106,7 +106,6 @@ void DisableCoreDumperIfNecessary() {}
void InstallDeadlySignalHandlers(SignalHandlerType handler) {}
void SetAlternateSignalStack() {}
void UnsetAlternateSignalStack() {}
void InitTlsSize() {}
void SignalContext::DumpAllRegisters(void *context) {}
const char *DescribeSignalOrException(int signo) { UNIMPLEMENTED(); }

View File

@ -846,9 +846,6 @@ uptr GetTlsSize() {
return 0;
}
void InitTlsSize() {
}
void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
uptr *tls_addr, uptr *tls_size) {
#if SANITIZER_GO

View File

@ -210,12 +210,10 @@ static void *WorkerThread(void *arg) {
}
TEST(SanitizerCommon, ThreadStackTlsMain) {
InitTlsSize();
TestThreadInfo(true);
}
TEST(SanitizerCommon, ThreadStackTlsWorker) {
InitTlsSize();
pthread_t t;
PTHREAD_CREATE(&t, 0, WorkerThread, 0);
PTHREAD_JOIN(t, 0);

View File

@ -188,24 +188,9 @@ TEST(SanitizerCommon, SetEnvTest) {
}
#if (defined(__x86_64__) || defined(__i386__)) && !SANITIZER_ANDROID
void *thread_self_offset_test_func(void *arg) {
bool result =
*(uptr *)((char *)ThreadSelf() + ThreadSelfOffset()) == ThreadSelf();
return (void *)result;
}
TEST(SanitizerLinux, ThreadSelfOffset) {
EXPECT_TRUE((bool)thread_self_offset_test_func(0));
pthread_t tid;
void *result;
ASSERT_EQ(0, pthread_create(&tid, 0, thread_self_offset_test_func, 0));
ASSERT_EQ(0, pthread_join(tid, &result));
EXPECT_TRUE((bool)result);
}
// libpthread puts the thread descriptor at the end of stack space.
void *thread_descriptor_size_test_func(void *arg) {
uptr descr_addr = ThreadSelf();
uptr descr_addr = (uptr)pthread_self();
pthread_attr_t attr;
pthread_getattr_np(pthread_self(), &attr);
void *stackaddr;

View File

@ -655,8 +655,11 @@ TSAN_INTERCEPTOR(void*, malloc, uptr size) {
return p;
}
// In glibc<2.25, dynamic TLS blocks are allocated by __libc_memalign. Intercept
// __libc_memalign so that (1) we can detect races (2) free will not be called
// on libc internally allocated blocks.
TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
SCOPED_TSAN_INTERCEPTOR(__libc_memalign, align, sz);
SCOPED_INTERCEPTOR_RAW(__libc_memalign, align, sz);
return user_memalign(thr, pc, align, sz);
}

View File

@ -318,7 +318,6 @@ void InitializePlatform() {
}
CheckAndProtect();
InitTlsSize();
#endif // !SANITIZER_GO
}

View File

@ -13,6 +13,10 @@
// binutils 2.26 has a change that causes this test to fail on powerpc64.
// UNSUPPORTED: powerpc64
/// We call __tls_get_addr early in GetTls to work around an issue for glibc<2.25,
/// so we don't get a log for f().
// REQUIRES: glibc-2.27
#ifndef SHARED
#include <stdio.h>