[sanitizer] Size class map & local cache improvements

Summary:
- Reland rL324263, this time allowing for a compile-time decision as to whether
  or not use the 32-bit division. A single test is using a class map covering
  a maximum size greater than 4GB, this can be checked via the template
  parameters, and allows SizeClassAllocator64PopulateFreeListOOM to pass;
- `MaxCachedHint` is always called on a class id for which we have already
  computed the size, but we still recompute `Size(class_id)`. Change the
  prototype of the function to work on sizes instead of class ids. This also
  allows us to get rid of the `kBatchClassID` special case. Update the callers
  accordingly;
- `InitCache` and `Drain` will start iterating at index 1: index 0 contents are
  unused and can safely be left to be 0. Plus we do not pay the cost of going
  through an `UNLIKELY` in `MaxCachedHint`, and touching memory that is
  otherwise not used;
- `const` some variables in the areas modified;
- Remove an spurious extra line at the end of a file.

Reviewers: alekseyshl, tl0gic, dberris

Reviewed By: alekseyshl, dberris

Subscribers: dberris, kubamracek, delcypher, llvm-commits, #sanitizers

Differential Revision: https://reviews.llvm.org/D43088

llvm-svn: 324906
This commit is contained in:
Kostya Kortchinsky 2018-02-12 16:59:17 +00:00
parent 07e1337c2a
commit 1022220b16
3 changed files with 43 additions and 40 deletions

View File

@ -70,7 +70,7 @@ struct SizeClassAllocator64LocalCache {
}
void Drain(SizeClassAllocator *allocator) {
for (uptr i = 0; i < kNumClasses; i++) {
for (uptr i = 1; i < kNumClasses; i++) {
PerClass *c = &per_class_[i];
while (c->count > 0)
Drain(c, allocator, i, c->count);
@ -94,10 +94,11 @@ struct SizeClassAllocator64LocalCache {
void InitCache(PerClass *c) {
if (LIKELY(c->max_count))
return;
for (uptr i = 0; i < kNumClasses; i++) {
for (uptr i = 1; i < kNumClasses; i++) {
PerClass *c = &per_class_[i];
c->max_count = 2 * SizeClassMap::MaxCachedHint(i);
c->class_size = Allocator::ClassIdToSize(i);
const uptr size = Allocator::ClassIdToSize(i);
c->max_count = 2 * SizeClassMap::MaxCachedHint(size);
c->class_size = size;
}
DCHECK_NE(c->max_count, 0UL);
}
@ -185,7 +186,7 @@ struct SizeClassAllocator32LocalCache {
}
void Drain(SizeClassAllocator *allocator) {
for (uptr i = 0; i < kNumClasses; i++) {
for (uptr i = 1; i < kNumClasses; i++) {
PerClass *c = &per_class_[i];
while (c->count > 0)
Drain(c, allocator, i);
@ -217,11 +218,12 @@ struct SizeClassAllocator32LocalCache {
if (LIKELY(c->max_count))
return;
const uptr batch_class_id = SizeClassMap::ClassID(sizeof(TransferBatch));
for (uptr i = 0; i < kNumClasses; i++) {
for (uptr i = 1; i < kNumClasses; i++) {
PerClass *c = &per_class_[i];
uptr max_cached = TransferBatch::MaxCached(i);
const uptr size = Allocator::ClassIdToSize(i);
const uptr max_cached = TransferBatch::MaxCached(size);
c->max_count = 2 * max_cached;
c->class_size = Allocator::ClassIdToSize(i);
c->class_size = size;
// Precompute the class id to use to store batches for the current class
// id. 0 means the class size is large enough to store a batch within one
// of the chunks. If using a separate size class, it will always be
@ -229,7 +231,7 @@ struct SizeClassAllocator32LocalCache {
if (kUseSeparateSizeClassForBatch) {
c->batch_class_id = (i == kBatchClassID) ? 0 : kBatchClassID;
} else {
c->batch_class_id = (c->class_size <
c->batch_class_id = (size <
TransferBatch::AllocationSizeRequiredForNElements(max_cached)) ?
batch_class_id : 0;
}
@ -266,4 +268,3 @@ struct SizeClassAllocator32LocalCache {
allocator->DeallocateBatch(&stats_, class_id, b);
}
};

View File

@ -84,8 +84,8 @@ class SizeClassAllocator32 {
static uptr AllocationSizeRequiredForNElements(uptr n) {
return sizeof(uptr) * 2 + sizeof(void *) * n;
}
static uptr MaxCached(uptr class_id) {
return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id));
static uptr MaxCached(uptr size) {
return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(size));
}
TransferBatch *next;
@ -156,10 +156,11 @@ class SizeClassAllocator32 {
CHECK_LT(class_id, kNumClasses);
SizeClassInfo *sci = GetSizeClassInfo(class_id);
SpinMutexLock l(&sci->mutex);
if (sci->free_list.empty() &&
UNLIKELY(!PopulateFreeList(stat, c, sci, class_id)))
if (sci->free_list.empty()) {
if (UNLIKELY(!PopulateFreeList(stat, c, sci, class_id)))
return nullptr;
CHECK(!sci->free_list.empty());
DCHECK(!sci->free_list.empty());
}
TransferBatch *b = sci->free_list.front();
sci->free_list.pop_front();
return b;
@ -275,7 +276,7 @@ class SizeClassAllocator32 {
COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
uptr ComputeRegionId(uptr mem) {
uptr res = mem >> kRegionSizeLog;
const uptr res = mem >> kRegionSizeLog;
CHECK_LT(res, kNumPossibleRegions);
return res;
}
@ -329,22 +330,22 @@ class SizeClassAllocator32 {
bool PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
SizeClassInfo *sci, uptr class_id) {
uptr size = ClassIdToSize(class_id);
uptr reg = AllocateRegion(stat, class_id);
if (UNLIKELY(!reg))
const uptr region = AllocateRegion(stat, class_id);
if (UNLIKELY(!region))
return false;
if (kRandomShuffleChunks)
if (UNLIKELY(sci->rand_state == 0))
// The random state is initialized from ASLR (PIE) and time.
sci->rand_state = reinterpret_cast<uptr>(sci) ^ NanoTime();
uptr n_chunks = kRegionSize / (size + kMetadataSize);
uptr max_count = TransferBatch::MaxCached(class_id);
CHECK_GT(max_count, 0);
const uptr size = ClassIdToSize(class_id);
const uptr n_chunks = kRegionSize / (size + kMetadataSize);
const uptr max_count = TransferBatch::MaxCached(size);
DCHECK_GT(max_count, 0);
TransferBatch *b = nullptr;
const uptr kShuffleArraySize = 48;
constexpr uptr kShuffleArraySize = 48;
uptr shuffle_array[kShuffleArraySize];
uptr count = 0;
for (uptr i = reg; i < reg + n_chunks * size; i += size) {
for (uptr i = region; i < region + n_chunks * size; i += size) {
shuffle_array[count++] = i;
if (count == kShuffleArraySize) {
if (UNLIKELY(!PopulateBatches(c, sci, class_id, &b, max_count,

View File

@ -161,23 +161,24 @@ class SizeClassMap {
return 0;
if (size <= kMidSize)
return (size + kMinSize - 1) >> kMinSizeLog;
uptr l = MostSignificantSetBitIndex(size);
uptr hbits = (size >> (l - S)) & M;
uptr lbits = size & ((1 << (l - S)) - 1);
uptr l1 = l - kMidSizeLog;
const uptr l = MostSignificantSetBitIndex(size);
const uptr hbits = (size >> (l - S)) & M;
const uptr lbits = size & ((1U << (l - S)) - 1);
const uptr l1 = l - kMidSizeLog;
return kMidClass + (l1 << S) + hbits + (lbits > 0);
}
static uptr MaxCachedHint(uptr class_id) {
// Estimate the result for kBatchClassID because this class does not know
// the exact size of TransferBatch. We need to cache fewer batches than user
// chunks, so this number can be small.
if (UNLIKELY(class_id == kBatchClassID))
return 16;
if (UNLIKELY(class_id == 0))
static uptr MaxCachedHint(uptr size) {
DCHECK_LE(size, kMaxSize);
if (UNLIKELY(size == 0))
return 0;
uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
return Max<uptr>(1, Min(kMaxNumCachedHint, n));
uptr n;
// Force a 32-bit division if the template parameters allow for it.
if (kMaxBytesCachedLog > 31 || kMaxSizeLog > 31)
n = (1UL << kMaxBytesCachedLog) / size;
else
n = (1U << kMaxBytesCachedLog) / static_cast<u32>(size);
return Max<uptr>(1U, Min(kMaxNumCachedHint, n));
}
static void Print() {
@ -190,12 +191,12 @@ class SizeClassMap {
uptr d = s - prev_s;
uptr p = prev_s ? (d * 100 / prev_s) : 0;
uptr l = s ? MostSignificantSetBitIndex(s) : 0;
uptr cached = MaxCachedHint(i) * s;
uptr cached = MaxCachedHint(s) * s;
if (i == kBatchClassID)
d = p = l = 0;
Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
"cached: %zd %zd; id %zd\n",
i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s));
i, Size(i), d, p, l, MaxCachedHint(s), cached, ClassID(s));
total_cached += cached;
prev_s = s;
}