[scudo][standalone] Fork support

Summary:
fork() wasn't well (or at all) supported in Scudo. This materialized
in deadlocks in children.

In order to properly support fork, we will lock the allocator pre-fork
and unlock it post-fork in parent and child. This is done via a
`pthread_atfork` call installing the necessary handlers.

A couple of things suck here: this function allocates - so this has to
be done post initialization as our init path is not reentrance, and it
doesn't allow for an extra pointer - so we can't pass the allocator we
are currently working with.

In order to work around this, I added a post-init template parameter
that gets executed once the allocator is initialized for the current
thread. Its job for the C wrappers is to install the atfork handlers.

I reorganized a bit the impacted area and added some tests, courtesy
of cferris@ that were deadlocking prior to this fix.

Subscribers: jfb, #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D72470
This commit is contained in:
Kostya Kortchinsky 2020-01-09 11:43:16 -08:00
parent e7b2d9f470
commit 9ef6faf496
18 changed files with 265 additions and 42 deletions

View File

@ -1,5 +1,6 @@
add_compiler_rt_component(scudo_standalone)
if (COMPILER_RT_HAS_GWP_ASAN)
# FIXME: GWP-ASan is temporarily disabled, re-enable once issues are fixed.
if (FALSE AND COMPILER_RT_HAS_GWP_ASAN)
add_dependencies(scudo_standalone gwp_asan)
endif()
@ -106,7 +107,7 @@ set(SCUDO_SOURCES_CXX_WRAPPERS
set(SCUDO_OBJECT_LIBS)
if (COMPILER_RT_HAS_GWP_ASAN)
if (FALSE AND COMPILER_RT_HAS_GWP_ASAN)
list(APPEND SCUDO_OBJECT_LIBS RTGwpAsan)
list(APPEND SCUDO_CFLAGS -DGWP_ASAN_HOOKS)
endif()

View File

@ -34,6 +34,9 @@ public:
return Map[Index];
}
void disable() {}
void enable() {}
private:
u8 *Map;
};
@ -82,6 +85,9 @@ public:
return Level2Map[Index % Level2Size];
}
void disable() { Mutex.lock(); }
void enable() { Mutex.unlock(); }
private:
u8 *get(uptr Index) const {
DCHECK_LT(Index, Level1Size);

View File

@ -31,15 +31,23 @@
static gwp_asan::GuardedPoolAllocator GuardedAlloc;
#endif // GWP_ASAN_HOOKS
extern "C" inline void EmptyCallback() {}
namespace scudo {
template <class Params> class Allocator {
template <class Params, void (*PostInitCallback)(void) = EmptyCallback>
class Allocator {
public:
using PrimaryT = typename Params::Primary;
using CacheT = typename PrimaryT::CacheT;
typedef Allocator<Params> ThisT;
typedef Allocator<Params, PostInitCallback> ThisT;
typedef typename Params::template TSDRegistryT<ThisT> TSDRegistryT;
void callPostInitCallback() {
static pthread_once_t OnceControl = PTHREAD_ONCE_INIT;
pthread_once(&OnceControl, PostInitCallback);
}
struct QuarantineCallback {
explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache)
: Allocator(Instance), Cache(LocalCache) {}
@ -420,12 +428,18 @@ public:
void disable() {
initThreadMaybe();
TSDRegistry.disable();
Stats.disable();
Quarantine.disable();
Primary.disable();
Secondary.disable();
}
void enable() {
initThreadMaybe();
Secondary.enable();
Primary.enable();
Quarantine.enable();
Stats.enable();
TSDRegistry.enable();
}

View File

@ -123,13 +123,26 @@ public:
}
void disable() {
for (uptr I = 0; I < NumClasses; I++)
getSizeClassInfo(I)->Mutex.lock();
// The BatchClassId must be locked last since other classes can use it.
for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
continue;
getSizeClassInfo(static_cast<uptr>(I))->Mutex.lock();
}
getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock();
RegionsStashMutex.lock();
PossibleRegions.disable();
}
void enable() {
for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--)
getSizeClassInfo(static_cast<uptr>(I))->Mutex.unlock();
PossibleRegions.enable();
RegionsStashMutex.unlock();
getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock();
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
getSizeClassInfo(I)->Mutex.unlock();
}
}
template <typename F> void iterateOverBlocks(F Callback) {

View File

@ -125,13 +125,22 @@ public:
}
void disable() {
for (uptr I = 0; I < NumClasses; I++)
getRegionInfo(I)->Mutex.lock();
// The BatchClassId must be locked last since other classes can use it.
for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
continue;
getRegionInfo(static_cast<uptr>(I))->Mutex.lock();
}
getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock();
}
void enable() {
for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--)
getRegionInfo(static_cast<uptr>(I))->Mutex.unlock();
getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock();
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
getRegionInfo(I)->Mutex.unlock();
}
}
template <typename F> void iterateOverBlocks(F Callback) const {

View File

@ -205,7 +205,7 @@ public:
ScopedLock L(CacheMutex);
Cache.transfer(C);
}
if (Cache.getSize() > getMaxSize() && RecyleMutex.tryLock())
if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock())
recycle(atomic_load_relaxed(&MinSize), Cb);
}
@ -214,7 +214,7 @@ public:
ScopedLock L(CacheMutex);
Cache.transfer(C);
}
RecyleMutex.lock();
RecycleMutex.lock();
recycle(0, Cb);
}
@ -225,11 +225,22 @@ public:
getMaxSize() >> 10, getCacheSize() >> 10);
}
void disable() {
// RecycleMutex must be locked 1st since we grab CacheMutex within recycle.
RecycleMutex.lock();
CacheMutex.lock();
}
void enable() {
CacheMutex.unlock();
RecycleMutex.unlock();
}
private:
// Read-only data.
alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex;
CacheT Cache;
alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecyleMutex;
alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex;
atomic_uptr MinSize;
atomic_uptr MaxSize;
alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize;
@ -261,7 +272,7 @@ private:
while (Cache.getSize() > MinSize)
Tmp.enqueueBatch(Cache.dequeueBatch());
}
RecyleMutex.unlock();
RecycleMutex.unlock();
doRecycle(&Tmp, Cb);
}

View File

@ -87,6 +87,9 @@ public:
S[I] = static_cast<sptr>(S[I]) >= 0 ? S[I] : 0;
}
void disable() { Mutex.lock(); }
void enable() { Mutex.unlock(); }
private:
mutable HybridMutex Mutex;
DoublyLinkedList<LocalStats> StatsList;

View File

@ -20,7 +20,8 @@ if(ANDROID)
list(APPEND SCUDO_UNITTEST_CFLAGS -fno-emulated-tls)
endif()
if (COMPILER_RT_HAS_GWP_ASAN)
# FIXME: GWP-ASan is temporarily disabled, re-enable once issues are fixed.
if (FALSE AND COMPILER_RT_HAS_GWP_ASAN)
list(APPEND SCUDO_UNITTEST_CFLAGS -DGWP_ASAN_HOOKS)
endif()
@ -42,7 +43,7 @@ endforeach()
macro(add_scudo_unittest testname)
cmake_parse_arguments(TEST "" "" "SOURCES;ADDITIONAL_RTOBJECTS" ${ARGN})
if (COMPILER_RT_HAS_GWP_ASAN)
if (FALSE AND COMPILER_RT_HAS_GWP_ASAN)
list(APPEND TEST_ADDITIONAL_RTOBJECTS RTGwpAsan)
endif()

View File

@ -36,6 +36,7 @@ public:
void initCache(CacheT *Cache) { memset(Cache, 0, sizeof(*Cache)); }
void commitBack(scudo::TSD<MockAllocator> *TSD) {}
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
void callPostInitCallback() {}
bool isInitialized() { return Initialized; }

View File

@ -299,7 +299,9 @@ TEST(ScudoWrappersCTest, MallocDisableDeadlock) {
"");
}
// Fuchsia doesn't have fork or malloc_info.
#if !SCUDO_FUCHSIA
TEST(ScudoWrappersCTest, MallocInfo) {
char Buffer[64];
FILE *F = fmemopen(Buffer, sizeof(Buffer), "w+");
@ -310,4 +312,79 @@ TEST(ScudoWrappersCTest, MallocInfo) {
fclose(F);
EXPECT_EQ(strncmp(Buffer, "<malloc version=\"scudo-", 23), 0);
}
#endif
TEST(ScudoWrappersCTest, Fork) {
void *P;
pid_t Pid = fork();
EXPECT_GE(Pid, 0);
if (Pid == 0) {
P = malloc(Size);
EXPECT_NE(P, nullptr);
memset(P, 0x42, Size);
free(P);
_exit(0);
}
waitpid(Pid, nullptr, 0);
P = malloc(Size);
EXPECT_NE(P, nullptr);
memset(P, 0x42, Size);
free(P);
// fork should stall if the allocator has been disabled.
EXPECT_DEATH(
{
malloc_disable();
alarm(1);
Pid = fork();
EXPECT_GE(Pid, 0);
},
"");
}
static pthread_mutex_t Mutex;
static pthread_cond_t Conditional = PTHREAD_COND_INITIALIZER;
static void *enableMalloc(void *Unused) {
// Initialize the allocator for this thread.
void *P = malloc(Size);
EXPECT_NE(P, nullptr);
memset(P, 0x42, Size);
free(P);
// Signal the main thread we are ready.
pthread_mutex_lock(&Mutex);
pthread_cond_signal(&Conditional);
pthread_mutex_unlock(&Mutex);
// Wait for the malloc_disable & fork, then enable the allocator again.
sleep(1);
malloc_enable();
return nullptr;
}
TEST(ScudoWrappersCTest, DisableForkEnable) {
pthread_t ThreadId;
EXPECT_EQ(pthread_create(&ThreadId, nullptr, &enableMalloc, nullptr), 0);
// Wait for the thread to be warmed up.
pthread_mutex_lock(&Mutex);
pthread_cond_wait(&Conditional, &Mutex);
pthread_mutex_unlock(&Mutex);
// Disable the allocator and fork. fork should succeed after malloc_enable.
malloc_disable();
pid_t Pid = fork();
EXPECT_GE(Pid, 0);
if (Pid == 0) {
void *P = malloc(Size);
EXPECT_NE(P, nullptr);
memset(P, 0x42, Size);
free(P);
_exit(0);
}
waitpid(Pid, nullptr, 0);
EXPECT_EQ(pthread_join(ThreadId, 0), 0);
}
#endif // SCUDO_FUCHSIA

View File

@ -8,6 +8,7 @@
#include "tests/scudo_unit_test.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
@ -113,3 +114,59 @@ TEST(ScudoWrappersCppTest, ThreadedNew) {
for (auto &T : Threads)
T.join();
}
#if !SCUDO_FUCHSIA
// TODO(kostyak): for me, this test fails in a specific configuration when ran
// by itself with some Scudo or GWP-ASan violation. Other people
// can't seem to reproduce the failure. Consider skipping this in
// the event it fails on the upstream bots.
TEST(ScudoWrappersCppTest, AllocAfterFork) {
std::atomic_bool Stop;
// Create threads that simply allocate and free different sizes.
std::vector<std::thread *> Threads;
for (size_t N = 0; N < 5; N++) {
std::thread *T = new std::thread([&Stop] {
while (!Stop) {
for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) {
char *P = new char[1UL << SizeLog];
EXPECT_NE(P, nullptr);
// Make sure this value is not optimized away.
asm volatile("" : : "r,m"(P) : "memory");
delete[] P;
}
}
});
Threads.push_back(T);
}
// Create a thread to fork and allocate.
for (size_t N = 0; N < 100; N++) {
pid_t Pid;
if ((Pid = fork()) == 0) {
for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) {
char *P = new char[1UL << SizeLog];
EXPECT_NE(P, nullptr);
// Make sure this value is not optimized away.
asm volatile("" : : "r,m"(P) : "memory");
// Make sure we can touch all of the allocation.
memset(P, 0x32, 1U << SizeLog);
// EXPECT_LE(1U << SizeLog, malloc_usable_size(ptr));
delete[] P;
}
_exit(10);
}
EXPECT_NE(-1, Pid);
int Status;
EXPECT_EQ(Pid, waitpid(Pid, &Status, 0));
EXPECT_FALSE(WIFSIGNALED(Status));
EXPECT_EQ(10, WEXITSTATUS(Status));
}
printf("Waiting for threads to complete\n");
Stop = true;
for (auto Thread : Threads)
Thread->join();
Threads.clear();
}
#endif

View File

@ -14,6 +14,7 @@
#include "mutex.h"
#include <limits.h> // for PTHREAD_DESTRUCTOR_ITERATIONS
#include <pthread.h>
// With some build setups, this might still not be defined.
#ifndef PTHREAD_DESTRUCTOR_ITERATIONS

View File

@ -11,8 +11,6 @@
#include "tsd.h"
#include <pthread.h>
namespace scudo {
enum class ThreadState : u8 {
@ -62,6 +60,7 @@ template <class Allocator> struct TSDRegistryExT {
// To disable the exclusive TSD registry, we effectively lock the fallback TSD
// and force all threads to attempt to use it instead of their local one.
void disable() {
Mutex.lock();
FallbackTSD->lock();
atomic_store(&Disabled, 1U, memory_order_release);
}
@ -69,6 +68,7 @@ template <class Allocator> struct TSDRegistryExT {
void enable() {
atomic_store(&Disabled, 0U, memory_order_release);
FallbackTSD->unlock();
Mutex.unlock();
}
private:
@ -90,6 +90,7 @@ private:
pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0);
ThreadTSD.initLinkerInitialized(Instance);
State = ThreadState::Initialized;
Instance->callPostInitCallback();
}
pthread_key_t PThreadKey;

View File

@ -12,8 +12,6 @@
#include "linux.h" // for getAndroidTlsPtr()
#include "tsd.h"
#include <pthread.h>
namespace scudo {
template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
@ -73,13 +71,15 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
}
void disable() {
Mutex.lock();
for (u32 I = 0; I < NumberOfTSDs; I++)
TSDs[I].lock();
}
void enable() {
for (u32 I = 0; I < NumberOfTSDs; I++)
for (s32 I = NumberOfTSDs - 1; I >= 0; I--)
TSDs[I].unlock();
Mutex.unlock();
}
private:
@ -117,6 +117,7 @@ private:
// Initial context assignment is done in a plain round-robin fashion.
const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed);
setCurrentTSD(&TSDs[Index % NumberOfTSDs]);
Instance->callPostInitCallback();
}
NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) {

View File

@ -18,22 +18,23 @@
#include <stdint.h>
#include <stdio.h>
static scudo::Allocator<scudo::Config> Allocator;
#define SCUDO_PREFIX(name) name
#define SCUDO_ALLOCATOR Allocator
extern "C" void SCUDO_PREFIX(malloc_postinit)();
static scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)>
SCUDO_ALLOCATOR;
// Pointer to the static allocator so that the C++ wrappers can access it.
// Technically we could have a completely separated heap for C & C++ but in
// reality the amount of cross pollination between the two is staggering.
scudo::Allocator<scudo::Config> *AllocatorPtr = &Allocator;
scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> *
CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
extern "C" {
#define SCUDO_PREFIX(name) name
#define SCUDO_ALLOCATOR Allocator
#include "wrappers_c.inc"
#undef SCUDO_ALLOCATOR
#undef SCUDO_PREFIX
INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); }
} // extern "C"
extern "C" INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); }
#endif // !SCUDO_ANDROID || !_BIONIC

View File

@ -17,6 +17,8 @@
#define SCUDO_MALLOC_ALIGNMENT FIRST_32_SECOND_64(8U, 16U)
#endif
extern "C" {
INTERFACE WEAK void *SCUDO_PREFIX(calloc)(size_t nmemb, size_t size) {
scudo::uptr Product;
if (UNLIKELY(scudo::checkForCallocOverflow(size, nmemb, &Product))) {
@ -141,11 +143,16 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_iterate)(
return 0;
}
INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { SCUDO_ALLOCATOR.enable(); }
INTERFACE WEAK void SCUDO_PREFIX(malloc_disable)() {
SCUDO_ALLOCATOR.disable();
}
INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { SCUDO_ALLOCATOR.enable(); }
void SCUDO_PREFIX(malloc_postinit)() {
pthread_atfork(SCUDO_PREFIX(malloc_disable), SCUDO_PREFIX(malloc_enable),
SCUDO_PREFIX(malloc_enable));
}
INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) {
if (param == M_DECAY_TIME) {
@ -176,3 +183,5 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
fputs("</malloc>", stream);
return 0;
}
} // extern "C"

View File

@ -18,22 +18,40 @@
#include <stdint.h>
#include <stdio.h>
static scudo::Allocator<scudo::AndroidConfig> Allocator;
static scudo::Allocator<scudo::AndroidSvelteConfig> SvelteAllocator;
extern "C" {
// Regular MallocDispatch definitions.
#define SCUDO_PREFIX(name) CONCATENATE(scudo_, name)
#define SCUDO_ALLOCATOR Allocator
extern "C" void SCUDO_PREFIX(malloc_postinit)();
static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)>
SCUDO_ALLOCATOR;
// Pointer to the static allocator so that the C++ wrappers can access it.
// Technically we could have a completely separated heap for C & C++ but in
// reality the amount of cross pollination between the two is staggering.
scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> *
CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
#include "wrappers_c.inc"
#undef SCUDO_ALLOCATOR
#undef SCUDO_PREFIX
// Svelte MallocDispatch definitions.
#define SCUDO_PREFIX(name) CONCATENATE(scudo_svelte_, name)
#define SCUDO_ALLOCATOR SvelteAllocator
extern "C" void SCUDO_PREFIX(malloc_postinit)();
static scudo::Allocator<scudo::AndroidSvelteConfig,
SCUDO_PREFIX(malloc_postinit)>
SCUDO_ALLOCATOR;
// Pointer to the static allocator so that the C++ wrappers can access it.
// Technically we could have a completely separated heap for C & C++ but in
// reality the amount of cross pollination between the two is staggering.
scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> *
CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
#include "wrappers_c.inc"
#undef SCUDO_ALLOCATOR
#undef SCUDO_PREFIX
@ -44,6 +62,4 @@ INTERFACE void __scudo_print_stats(void) {
SvelteAllocator.printStats();
}
} // extern "C"
#endif // SCUDO_ANDROID && _BIONIC

View File

@ -15,7 +15,8 @@
#include <stdint.h>
extern scudo::Allocator<scudo::Config> *AllocatorPtr;
extern "C" void malloc_postinit();
extern scudo::Allocator<scudo::Config, malloc_postinit> *AllocatorPtr;
namespace std {
struct nothrow_t {};