[scudo][standalone] Introduce the thread specific data structures
Summary:
This CL adds the structures dealing with thread specific data for the
allocator. This includes the thread specific data structure itself and
two registries for said structures: an exclusive one, where each thread
will have its own TSD struct, and a shared one, where a pool of TSD
structs will be shared by all threads, with dynamic reassignment at
runtime based on contention.
This departs from the current Scudo implementation: we intend to make
the Registry a template parameter of the allocator (as opposed to a
single global entity), allowing various allocators to coexist with
different TSD registry models. As a result, TSD registry and Allocator
are tightly coupled.
This also corrects a couple of things in other files that I noticed
while adding this.
Reviewers: eugenis, vitalybuka, morehouse, hctim
Reviewed By: morehouse
Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D62258
llvm-svn: 362962
2019-06-11 00:50:52 +08:00
|
|
|
//===-- tsd_exclusive.h -----------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef SCUDO_TSD_EXCLUSIVE_H_
|
|
|
|
#define SCUDO_TSD_EXCLUSIVE_H_
|
|
|
|
|
|
|
|
#include "tsd.h"
|
|
|
|
|
|
|
|
#include <pthread.h>
|
|
|
|
|
|
|
|
namespace scudo {
|
|
|
|
|
|
|
|
enum class ThreadState : u8 {
|
|
|
|
NotInitialized = 0,
|
|
|
|
Initialized,
|
|
|
|
TornDown,
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class Allocator> void teardownThread(void *Ptr);
|
|
|
|
|
|
|
|
template <class Allocator> struct TSDRegistryExT {
|
|
|
|
void initLinkerInitialized(Allocator *Instance) {
|
|
|
|
Instance->initLinkerInitialized();
|
|
|
|
CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
|
|
|
|
FallbackTSD = reinterpret_cast<TSD<Allocator> *>(
|
|
|
|
map(nullptr, sizeof(TSD<Allocator>), "scudo:tsd"));
|
|
|
|
FallbackTSD->initLinkerInitialized(Instance);
|
|
|
|
Initialized = true;
|
|
|
|
}
|
|
|
|
void init(Allocator *Instance) {
|
|
|
|
memset(this, 0, sizeof(*this));
|
|
|
|
initLinkerInitialized(Instance);
|
|
|
|
}
|
|
|
|
|
2019-06-12 03:50:12 +08:00
|
|
|
void unmapTestOnly() {
|
|
|
|
unmap(reinterpret_cast<void *>(FallbackTSD), sizeof(TSD<Allocator>));
|
|
|
|
}
|
|
|
|
|
[scudo][standalone] Introduce the thread specific data structures
Summary:
This CL adds the structures dealing with thread specific data for the
allocator. This includes the thread specific data structure itself and
two registries for said structures: an exclusive one, where each thread
will have its own TSD struct, and a shared one, where a pool of TSD
structs will be shared by all threads, with dynamic reassignment at
runtime based on contention.
This departs from the current Scudo implementation: we intend to make
the Registry a template parameter of the allocator (as opposed to a
single global entity), allowing various allocators to coexist with
different TSD registry models. As a result, TSD registry and Allocator
are tightly coupled.
This also corrects a couple of things in other files that I noticed
while adding this.
Reviewers: eugenis, vitalybuka, morehouse, hctim
Reviewed By: morehouse
Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D62258
llvm-svn: 362962
2019-06-11 00:50:52 +08:00
|
|
|
ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
|
|
|
|
if (LIKELY(State != ThreadState::NotInitialized))
|
|
|
|
return;
|
|
|
|
initThread(Instance, MinimalInit);
|
|
|
|
}
|
|
|
|
|
|
|
|
ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) {
|
|
|
|
if (LIKELY(State == ThreadState::Initialized)) {
|
|
|
|
*UnlockRequired = false;
|
|
|
|
return &ThreadTSD;
|
|
|
|
}
|
|
|
|
DCHECK(FallbackTSD);
|
|
|
|
FallbackTSD->lock();
|
|
|
|
*UnlockRequired = true;
|
|
|
|
return FallbackTSD;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
void initOnceMaybe(Allocator *Instance) {
|
[scudo][standalone] Merge Spin & Blocking mutex into a Hybrid one
Summary:
We ran into a problem on Fuchsia where yielding threads would never
be deboosted, ultimately resulting in several threads spinning on the
same TSD, and no possibility for another thread to be scheduled,
dead-locking the process.
While this was fixed in Zircon, this lead to discussions about if
spinning without a break condition was a good decision, and settled on
a new hybrid model that would spin for a while then block.
Currently we are using a number of iterations for spinning that is
mostly arbitrary (based on sanitizer_common values), but this can
be tuned in the future.
Since we are touching `common.h`, we also use this change as a vehicle
for an Android optimization (the page size is fixed in Bionic, so use
a fixed value too).
Reviewers: morehouse, hctim, eugenis, dvyukov, vitalybuka
Reviewed By: hctim
Subscribers: srhines, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D64358
llvm-svn: 365790
2019-07-11 23:32:26 +08:00
|
|
|
ScopedLock L(Mutex);
|
[scudo][standalone] Optimization pass
Summary:
This introduces a bunch of small optimizations with the purpose of
making the fastpath tighter:
- tag more conditions as `LIKELY`/`UNLIKELY`: as a rule of thumb we
consider that every operation related to the secondary is unlikely
- attempt to reduce the number of potentially extraneous instructions
- reorganize the `Chunk` header to not straddle a word boundary and
use more appropriate types
Note that some `LIKELY`/`UNLIKELY` impact might be less obvious as
they are in slow paths (for example in `secondary.cc`), but at this
point I am throwing a pretty wide net, and it's consistant and doesn't
hurt.
This was mosly done for the benfit of Android, but other platforms
benefit from it too. An aarch64 Android benchmark gives:
- before:
```
BM_youtube/min_time:15.000/repeats:4/manual_time_mean 445244 us 659385 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_median 445007 us 658970 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_stddev 885 us 1332 us 4
```
- after:
```
BM_youtube/min_time:15.000/repeats:4/manual_time_mean 415697 us 621925 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_median 415913 us 622061 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_stddev 990 us 1163 us 4
```
Additional since `-Werror=conversion` is enabled on some platforms we
are built on, enable it upstream to catch things early: a few sign
conversions had slept through and needed additional casting.
Reviewers: hctim, morehouse, eugenis, vitalybuka
Reviewed By: vitalybuka
Subscribers: srhines, mgorny, javed.absar, kristof.beyls, delcypher, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D64664
llvm-svn: 366918
2019-07-25 00:36:01 +08:00
|
|
|
if (LIKELY(Initialized))
|
[scudo][standalone] Introduce the thread specific data structures
Summary:
This CL adds the structures dealing with thread specific data for the
allocator. This includes the thread specific data structure itself and
two registries for said structures: an exclusive one, where each thread
will have its own TSD struct, and a shared one, where a pool of TSD
structs will be shared by all threads, with dynamic reassignment at
runtime based on contention.
This departs from the current Scudo implementation: we intend to make
the Registry a template parameter of the allocator (as opposed to a
single global entity), allowing various allocators to coexist with
different TSD registry models. As a result, TSD registry and Allocator
are tightly coupled.
This also corrects a couple of things in other files that I noticed
while adding this.
Reviewers: eugenis, vitalybuka, morehouse, hctim
Reviewed By: morehouse
Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D62258
llvm-svn: 362962
2019-06-11 00:50:52 +08:00
|
|
|
return;
|
|
|
|
initLinkerInitialized(Instance); // Sets Initialized.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Using minimal initialization allows for global initialization while keeping
|
|
|
|
// the thread specific structure untouched. The fallback structure will be
|
|
|
|
// used instead.
|
|
|
|
NOINLINE void initThread(Allocator *Instance, bool MinimalInit) {
|
|
|
|
initOnceMaybe(Instance);
|
[scudo][standalone] Optimization pass
Summary:
This introduces a bunch of small optimizations with the purpose of
making the fastpath tighter:
- tag more conditions as `LIKELY`/`UNLIKELY`: as a rule of thumb we
consider that every operation related to the secondary is unlikely
- attempt to reduce the number of potentially extraneous instructions
- reorganize the `Chunk` header to not straddle a word boundary and
use more appropriate types
Note that some `LIKELY`/`UNLIKELY` impact might be less obvious as
they are in slow paths (for example in `secondary.cc`), but at this
point I am throwing a pretty wide net, and it's consistant and doesn't
hurt.
This was mosly done for the benfit of Android, but other platforms
benefit from it too. An aarch64 Android benchmark gives:
- before:
```
BM_youtube/min_time:15.000/repeats:4/manual_time_mean 445244 us 659385 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_median 445007 us 658970 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_stddev 885 us 1332 us 4
```
- after:
```
BM_youtube/min_time:15.000/repeats:4/manual_time_mean 415697 us 621925 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_median 415913 us 622061 us 4
BM_youtube/min_time:15.000/repeats:4/manual_time_stddev 990 us 1163 us 4
```
Additional since `-Werror=conversion` is enabled on some platforms we
are built on, enable it upstream to catch things early: a few sign
conversions had slept through and needed additional casting.
Reviewers: hctim, morehouse, eugenis, vitalybuka
Reviewed By: vitalybuka
Subscribers: srhines, mgorny, javed.absar, kristof.beyls, delcypher, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D64664
llvm-svn: 366918
2019-07-25 00:36:01 +08:00
|
|
|
if (UNLIKELY(MinimalInit))
|
[scudo][standalone] Introduce the thread specific data structures
Summary:
This CL adds the structures dealing with thread specific data for the
allocator. This includes the thread specific data structure itself and
two registries for said structures: an exclusive one, where each thread
will have its own TSD struct, and a shared one, where a pool of TSD
structs will be shared by all threads, with dynamic reassignment at
runtime based on contention.
This departs from the current Scudo implementation: we intend to make
the Registry a template parameter of the allocator (as opposed to a
single global entity), allowing various allocators to coexist with
different TSD registry models. As a result, TSD registry and Allocator
are tightly coupled.
This also corrects a couple of things in other files that I noticed
while adding this.
Reviewers: eugenis, vitalybuka, morehouse, hctim
Reviewed By: morehouse
Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D62258
llvm-svn: 362962
2019-06-11 00:50:52 +08:00
|
|
|
return;
|
|
|
|
CHECK_EQ(
|
|
|
|
pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0);
|
|
|
|
ThreadTSD.initLinkerInitialized(Instance);
|
|
|
|
State = ThreadState::Initialized;
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_key_t PThreadKey;
|
|
|
|
bool Initialized;
|
|
|
|
TSD<Allocator> *FallbackTSD;
|
[scudo][standalone] Merge Spin & Blocking mutex into a Hybrid one
Summary:
We ran into a problem on Fuchsia where yielding threads would never
be deboosted, ultimately resulting in several threads spinning on the
same TSD, and no possibility for another thread to be scheduled,
dead-locking the process.
While this was fixed in Zircon, this lead to discussions about if
spinning without a break condition was a good decision, and settled on
a new hybrid model that would spin for a while then block.
Currently we are using a number of iterations for spinning that is
mostly arbitrary (based on sanitizer_common values), but this can
be tuned in the future.
Since we are touching `common.h`, we also use this change as a vehicle
for an Android optimization (the page size is fixed in Bionic, so use
a fixed value too).
Reviewers: morehouse, hctim, eugenis, dvyukov, vitalybuka
Reviewed By: hctim
Subscribers: srhines, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D64358
llvm-svn: 365790
2019-07-11 23:32:26 +08:00
|
|
|
HybridMutex Mutex;
|
[scudo][standalone] Introduce the thread specific data structures
Summary:
This CL adds the structures dealing with thread specific data for the
allocator. This includes the thread specific data structure itself and
two registries for said structures: an exclusive one, where each thread
will have its own TSD struct, and a shared one, where a pool of TSD
structs will be shared by all threads, with dynamic reassignment at
runtime based on contention.
This departs from the current Scudo implementation: we intend to make
the Registry a template parameter of the allocator (as opposed to a
single global entity), allowing various allocators to coexist with
different TSD registry models. As a result, TSD registry and Allocator
are tightly coupled.
This also corrects a couple of things in other files that I noticed
while adding this.
Reviewers: eugenis, vitalybuka, morehouse, hctim
Reviewed By: morehouse
Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D62258
llvm-svn: 362962
2019-06-11 00:50:52 +08:00
|
|
|
static THREADLOCAL ThreadState State;
|
|
|
|
static THREADLOCAL TSD<Allocator> ThreadTSD;
|
|
|
|
|
|
|
|
friend void teardownThread<Allocator>(void *Ptr);
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class Allocator>
|
|
|
|
THREADLOCAL TSD<Allocator> TSDRegistryExT<Allocator>::ThreadTSD;
|
|
|
|
template <class Allocator>
|
|
|
|
THREADLOCAL ThreadState TSDRegistryExT<Allocator>::State;
|
|
|
|
|
|
|
|
template <class Allocator> void teardownThread(void *Ptr) {
|
|
|
|
typedef TSDRegistryExT<Allocator> TSDRegistryT;
|
|
|
|
Allocator *Instance = reinterpret_cast<Allocator *>(Ptr);
|
|
|
|
// The glibc POSIX thread-local-storage deallocation routine calls user
|
|
|
|
// provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS.
|
|
|
|
// We want to be called last since other destructors might call free and the
|
|
|
|
// like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the
|
|
|
|
// quarantine and swallowing the cache.
|
|
|
|
if (TSDRegistryT::ThreadTSD.DestructorIterations > 1) {
|
|
|
|
TSDRegistryT::ThreadTSD.DestructorIterations--;
|
|
|
|
// If pthread_setspecific fails, we will go ahead with the teardown.
|
|
|
|
if (LIKELY(pthread_setspecific(Instance->getTSDRegistry()->PThreadKey,
|
|
|
|
Ptr) == 0))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
TSDRegistryT::ThreadTSD.commitBack(Instance);
|
|
|
|
TSDRegistryT::State = ThreadState::TornDown;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace scudo
|
|
|
|
|
|
|
|
#endif // SCUDO_TSD_EXCLUSIVE_H_
|