forked from OSchip/llvm-project
[sanitizer] Allocator local cache improvements
Summary: Here are a few improvements proposed for the local cache: - `InitCache` always read from `per_class_[1]` in the fast path. This was not ideal as we are working with `per_class_[class_id]`. The latter offers the same property we are looking for (eg: `max_count != 0` means initialized), so we might as well use it and keep our memory accesses local to the same `per_class_` element. So change `InitCache` to take the current `PerClass` as an argument. This also makes the fast-path assembly of `Deallocate` a lot more compact; - Change the 32-bit `Refill` & `Drain` functions to mimic their 64-bit counterparts, by passing the current `PerClass` as an argument. This saves some array computations; - As far as I can tell, `InitCache` has no place in `Drain`: it's either called from `Deallocate` which calls `InitCache`, or from the "upper" `Drain` which checks for `c->count` to be greater than 0 (strictly). So remove it there. - Move the `stats_` updates to after we are done with the `per_class_` accesses in an attempt to preserve locality once more; - Change some `CHECK` to `DCHECK`: I don't think the ones changed belonged in the fast path and seemed to be overly cautious failsafes; - Mark some variables as `const`. The overall result is cleaner more compact fast path generated code, and some performance gains with Scudo (and likely other Sanitizers). Reviewers: alekseyshl Reviewed By: alekseyshl Subscribers: kubamracek, delcypher, #sanitizers, llvm-commits Differential Revision: https://reviews.llvm.org/D42851 llvm-svn: 324257
This commit is contained in:
parent
bca95f8acc
commit
5c6e6c28c4
|
@ -19,8 +19,7 @@
|
|||
// object per thread in TLS, is has to be POD.
|
||||
template<class SizeClassAllocator>
|
||||
struct SizeClassAllocatorLocalCache
|
||||
: SizeClassAllocator::AllocatorCache {
|
||||
};
|
||||
: SizeClassAllocator::AllocatorCache {};
|
||||
|
||||
// Cache used by SizeClassAllocator64.
|
||||
template <class SizeClassAllocator>
|
||||
|
@ -46,13 +45,12 @@ struct SizeClassAllocator64LocalCache {
|
|||
if (UNLIKELY(c->count == 0)) {
|
||||
if (UNLIKELY(!Refill(c, allocator, class_id)))
|
||||
return nullptr;
|
||||
DCHECK_GT(c->count, 0);
|
||||
}
|
||||
stats_.Add(AllocatorStatAllocated, c->class_size);
|
||||
CHECK_GT(c->count, 0);
|
||||
CompactPtrT chunk = c->chunks[--c->count];
|
||||
void *res = reinterpret_cast<void *>(allocator->CompactPtrToPointer(
|
||||
stats_.Add(AllocatorStatAllocated, c->class_size);
|
||||
return reinterpret_cast<void *>(allocator->CompactPtrToPointer(
|
||||
allocator->GetRegionBeginBySizeClass(class_id), chunk));
|
||||
return res;
|
||||
}
|
||||
|
||||
void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
|
||||
|
@ -60,16 +58,15 @@ struct SizeClassAllocator64LocalCache {
|
|||
CHECK_LT(class_id, kNumClasses);
|
||||
// If the first allocator call on a new thread is a deallocation, then
|
||||
// max_count will be zero, leading to check failure.
|
||||
InitCache();
|
||||
PerClass *c = &per_class_[class_id];
|
||||
stats_.Sub(AllocatorStatAllocated, c->class_size);
|
||||
CHECK_NE(c->max_count, 0UL);
|
||||
InitCache(c);
|
||||
if (UNLIKELY(c->count == c->max_count))
|
||||
Drain(c, allocator, class_id, c->max_count / 2);
|
||||
CompactPtrT chunk = allocator->PointerToCompactPtr(
|
||||
allocator->GetRegionBeginBySizeClass(class_id),
|
||||
reinterpret_cast<uptr>(p));
|
||||
c->chunks[c->count++] = chunk;
|
||||
stats_.Sub(AllocatorStatAllocated, c->class_size);
|
||||
}
|
||||
|
||||
void Drain(SizeClassAllocator *allocator) {
|
||||
|
@ -94,20 +91,21 @@ struct SizeClassAllocator64LocalCache {
|
|||
PerClass per_class_[kNumClasses];
|
||||
AllocatorStats stats_;
|
||||
|
||||
void InitCache() {
|
||||
if (LIKELY(per_class_[1].max_count))
|
||||
void InitCache(PerClass *c) {
|
||||
if (LIKELY(c->max_count))
|
||||
return;
|
||||
for (uptr i = 0; i < kNumClasses; i++) {
|
||||
PerClass *c = &per_class_[i];
|
||||
c->max_count = 2 * SizeClassMap::MaxCachedHint(i);
|
||||
c->class_size = Allocator::ClassIdToSize(i);
|
||||
}
|
||||
DCHECK_NE(c->max_count, 0UL);
|
||||
}
|
||||
|
||||
NOINLINE bool Refill(PerClass *c, SizeClassAllocator *allocator,
|
||||
uptr class_id) {
|
||||
InitCache();
|
||||
uptr num_requested_chunks = c->max_count / 2;
|
||||
InitCache(c);
|
||||
const uptr num_requested_chunks = c->max_count / 2;
|
||||
if (UNLIKELY(!allocator->GetFromAllocator(&stats_, class_id, c->chunks,
|
||||
num_requested_chunks)))
|
||||
return false;
|
||||
|
@ -117,9 +115,8 @@ struct SizeClassAllocator64LocalCache {
|
|||
|
||||
NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, uptr class_id,
|
||||
uptr count) {
|
||||
InitCache();
|
||||
CHECK_GE(c->count, count);
|
||||
uptr first_idx_to_drain = c->count - count;
|
||||
const uptr first_idx_to_drain = c->count - count;
|
||||
c->count -= count;
|
||||
allocator->ReturnToAllocator(&stats_, class_id,
|
||||
&c->chunks[first_idx_to_drain], count);
|
||||
|
@ -164,12 +161,13 @@ struct SizeClassAllocator32LocalCache {
|
|||
CHECK_LT(class_id, kNumClasses);
|
||||
PerClass *c = &per_class_[class_id];
|
||||
if (UNLIKELY(c->count == 0)) {
|
||||
if (UNLIKELY(!Refill(allocator, class_id)))
|
||||
if (UNLIKELY(!Refill(c, allocator, class_id)))
|
||||
return nullptr;
|
||||
DCHECK_GT(c->count, 0);
|
||||
}
|
||||
stats_.Add(AllocatorStatAllocated, c->class_size);
|
||||
void *res = c->batch[--c->count];
|
||||
PREFETCH(c->batch[c->count - 1]);
|
||||
stats_.Add(AllocatorStatAllocated, c->class_size);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -178,20 +176,19 @@ struct SizeClassAllocator32LocalCache {
|
|||
CHECK_LT(class_id, kNumClasses);
|
||||
// If the first allocator call on a new thread is a deallocation, then
|
||||
// max_count will be zero, leading to check failure.
|
||||
InitCache();
|
||||
PerClass *c = &per_class_[class_id];
|
||||
stats_.Sub(AllocatorStatAllocated, c->class_size);
|
||||
CHECK_NE(c->max_count, 0UL);
|
||||
InitCache(c);
|
||||
if (UNLIKELY(c->count == c->max_count))
|
||||
Drain(allocator, class_id);
|
||||
Drain(c, allocator, class_id);
|
||||
c->batch[c->count++] = p;
|
||||
stats_.Sub(AllocatorStatAllocated, c->class_size);
|
||||
}
|
||||
|
||||
void Drain(SizeClassAllocator *allocator) {
|
||||
for (uptr i = 0; i < kNumClasses; i++) {
|
||||
PerClass *c = &per_class_[i];
|
||||
while (c->count > 0)
|
||||
Drain(allocator, i);
|
||||
Drain(c, allocator, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,8 +213,8 @@ struct SizeClassAllocator32LocalCache {
|
|||
PerClass per_class_[kNumClasses];
|
||||
AllocatorStats stats_;
|
||||
|
||||
void InitCache() {
|
||||
if (LIKELY(per_class_[1].max_count))
|
||||
void InitCache(PerClass *c) {
|
||||
if (LIKELY(c->max_count))
|
||||
return;
|
||||
const uptr batch_class_id = SizeClassMap::ClassID(sizeof(TransferBatch));
|
||||
for (uptr i = 0; i < kNumClasses; i++) {
|
||||
|
@ -237,11 +234,12 @@ struct SizeClassAllocator32LocalCache {
|
|||
batch_class_id : 0;
|
||||
}
|
||||
}
|
||||
DCHECK_NE(c->max_count, 0UL);
|
||||
}
|
||||
|
||||
NOINLINE bool Refill(SizeClassAllocator *allocator, uptr class_id) {
|
||||
InitCache();
|
||||
PerClass *c = &per_class_[class_id];
|
||||
NOINLINE bool Refill(PerClass *c, SizeClassAllocator *allocator,
|
||||
uptr class_id) {
|
||||
InitCache(c);
|
||||
TransferBatch *b = allocator->AllocateBatch(&stats_, this, class_id);
|
||||
if (UNLIKELY(!b))
|
||||
return false;
|
||||
|
@ -252,11 +250,10 @@ struct SizeClassAllocator32LocalCache {
|
|||
return true;
|
||||
}
|
||||
|
||||
NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
|
||||
InitCache();
|
||||
PerClass *c = &per_class_[class_id];
|
||||
uptr cnt = Min(c->max_count / 2, c->count);
|
||||
uptr first_idx_to_drain = c->count - cnt;
|
||||
NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator,
|
||||
uptr class_id) {
|
||||
const uptr count = Min(c->max_count / 2, c->count);
|
||||
const uptr first_idx_to_drain = c->count - count;
|
||||
TransferBatch *b = CreateBatch(
|
||||
class_id, allocator, (TransferBatch *)c->batch[first_idx_to_drain]);
|
||||
// Failure to allocate a batch while releasing memory is non recoverable.
|
||||
|
@ -264,8 +261,8 @@ struct SizeClassAllocator32LocalCache {
|
|||
if (UNLIKELY(!b))
|
||||
DieOnFailure::OnOOM();
|
||||
b->SetFromArray(allocator->GetRegionBeginBySizeClass(class_id),
|
||||
&c->batch[first_idx_to_drain], cnt);
|
||||
c->count -= cnt;
|
||||
&c->batch[first_idx_to_drain], count);
|
||||
c->count -= count;
|
||||
allocator->DeallocateBatch(&stats_, class_id, b);
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue