From 5c6e6c28c48723ac6365a76294d9f05a89b0ecf4 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Mon, 5 Feb 2018 18:06:45 +0000 Subject: [PATCH] [sanitizer] Allocator local cache improvements Summary: Here are a few improvements proposed for the local cache: - `InitCache` always read from `per_class_[1]` in the fast path. This was not ideal as we are working with `per_class_[class_id]`. The latter offers the same property we are looking for (eg: `max_count != 0` means initialized), so we might as well use it and keep our memory accesses local to the same `per_class_` element. So change `InitCache` to take the current `PerClass` as an argument. This also makes the fast-path assembly of `Deallocate` a lot more compact; - Change the 32-bit `Refill` & `Drain` functions to mimic their 64-bit counterparts, by passing the current `PerClass` as an argument. This saves some array computations; - As far as I can tell, `InitCache` has no place in `Drain`: it's either called from `Deallocate` which calls `InitCache`, or from the "upper" `Drain` which checks for `c->count` to be greater than 0 (strictly). So remove it there. - Move the `stats_` updates to after we are done with the `per_class_` accesses in an attempt to preserve locality once more; - Change some `CHECK` to `DCHECK`: I don't think the ones changed belonged in the fast path and seemed to be overly cautious failsafes; - Mark some variables as `const`. The overall result is cleaner more compact fast path generated code, and some performance gains with Scudo (and likely other Sanitizers). Reviewers: alekseyshl Reviewed By: alekseyshl Subscribers: kubamracek, delcypher, #sanitizers, llvm-commits Differential Revision: https://reviews.llvm.org/D42851 llvm-svn: 324257 --- .../sanitizer_allocator_local_cache.h | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h index 1b3c2c0c19d1..0932fb83d835 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h @@ -19,8 +19,7 @@ // object per thread in TLS, is has to be POD. template struct SizeClassAllocatorLocalCache - : SizeClassAllocator::AllocatorCache { -}; + : SizeClassAllocator::AllocatorCache {}; // Cache used by SizeClassAllocator64. template @@ -46,13 +45,12 @@ struct SizeClassAllocator64LocalCache { if (UNLIKELY(c->count == 0)) { if (UNLIKELY(!Refill(c, allocator, class_id))) return nullptr; + DCHECK_GT(c->count, 0); } - stats_.Add(AllocatorStatAllocated, c->class_size); - CHECK_GT(c->count, 0); CompactPtrT chunk = c->chunks[--c->count]; - void *res = reinterpret_cast(allocator->CompactPtrToPointer( + stats_.Add(AllocatorStatAllocated, c->class_size); + return reinterpret_cast(allocator->CompactPtrToPointer( allocator->GetRegionBeginBySizeClass(class_id), chunk)); - return res; } void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) { @@ -60,16 +58,15 @@ struct SizeClassAllocator64LocalCache { CHECK_LT(class_id, kNumClasses); // If the first allocator call on a new thread is a deallocation, then // max_count will be zero, leading to check failure. - InitCache(); PerClass *c = &per_class_[class_id]; - stats_.Sub(AllocatorStatAllocated, c->class_size); - CHECK_NE(c->max_count, 0UL); + InitCache(c); if (UNLIKELY(c->count == c->max_count)) Drain(c, allocator, class_id, c->max_count / 2); CompactPtrT chunk = allocator->PointerToCompactPtr( allocator->GetRegionBeginBySizeClass(class_id), reinterpret_cast(p)); c->chunks[c->count++] = chunk; + stats_.Sub(AllocatorStatAllocated, c->class_size); } void Drain(SizeClassAllocator *allocator) { @@ -94,20 +91,21 @@ struct SizeClassAllocator64LocalCache { PerClass per_class_[kNumClasses]; AllocatorStats stats_; - void InitCache() { - if (LIKELY(per_class_[1].max_count)) + void InitCache(PerClass *c) { + if (LIKELY(c->max_count)) return; for (uptr i = 0; i < kNumClasses; i++) { PerClass *c = &per_class_[i]; c->max_count = 2 * SizeClassMap::MaxCachedHint(i); c->class_size = Allocator::ClassIdToSize(i); } + DCHECK_NE(c->max_count, 0UL); } NOINLINE bool Refill(PerClass *c, SizeClassAllocator *allocator, uptr class_id) { - InitCache(); - uptr num_requested_chunks = c->max_count / 2; + InitCache(c); + const uptr num_requested_chunks = c->max_count / 2; if (UNLIKELY(!allocator->GetFromAllocator(&stats_, class_id, c->chunks, num_requested_chunks))) return false; @@ -117,9 +115,8 @@ struct SizeClassAllocator64LocalCache { NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, uptr class_id, uptr count) { - InitCache(); CHECK_GE(c->count, count); - uptr first_idx_to_drain = c->count - count; + const uptr first_idx_to_drain = c->count - count; c->count -= count; allocator->ReturnToAllocator(&stats_, class_id, &c->chunks[first_idx_to_drain], count); @@ -164,12 +161,13 @@ struct SizeClassAllocator32LocalCache { CHECK_LT(class_id, kNumClasses); PerClass *c = &per_class_[class_id]; if (UNLIKELY(c->count == 0)) { - if (UNLIKELY(!Refill(allocator, class_id))) + if (UNLIKELY(!Refill(c, allocator, class_id))) return nullptr; + DCHECK_GT(c->count, 0); } - stats_.Add(AllocatorStatAllocated, c->class_size); void *res = c->batch[--c->count]; PREFETCH(c->batch[c->count - 1]); + stats_.Add(AllocatorStatAllocated, c->class_size); return res; } @@ -178,20 +176,19 @@ struct SizeClassAllocator32LocalCache { CHECK_LT(class_id, kNumClasses); // If the first allocator call on a new thread is a deallocation, then // max_count will be zero, leading to check failure. - InitCache(); PerClass *c = &per_class_[class_id]; - stats_.Sub(AllocatorStatAllocated, c->class_size); - CHECK_NE(c->max_count, 0UL); + InitCache(c); if (UNLIKELY(c->count == c->max_count)) - Drain(allocator, class_id); + Drain(c, allocator, class_id); c->batch[c->count++] = p; + stats_.Sub(AllocatorStatAllocated, c->class_size); } void Drain(SizeClassAllocator *allocator) { for (uptr i = 0; i < kNumClasses; i++) { PerClass *c = &per_class_[i]; while (c->count > 0) - Drain(allocator, i); + Drain(c, allocator, i); } } @@ -216,8 +213,8 @@ struct SizeClassAllocator32LocalCache { PerClass per_class_[kNumClasses]; AllocatorStats stats_; - void InitCache() { - if (LIKELY(per_class_[1].max_count)) + void InitCache(PerClass *c) { + if (LIKELY(c->max_count)) return; const uptr batch_class_id = SizeClassMap::ClassID(sizeof(TransferBatch)); for (uptr i = 0; i < kNumClasses; i++) { @@ -237,11 +234,12 @@ struct SizeClassAllocator32LocalCache { batch_class_id : 0; } } + DCHECK_NE(c->max_count, 0UL); } - NOINLINE bool Refill(SizeClassAllocator *allocator, uptr class_id) { - InitCache(); - PerClass *c = &per_class_[class_id]; + NOINLINE bool Refill(PerClass *c, SizeClassAllocator *allocator, + uptr class_id) { + InitCache(c); TransferBatch *b = allocator->AllocateBatch(&stats_, this, class_id); if (UNLIKELY(!b)) return false; @@ -252,11 +250,10 @@ struct SizeClassAllocator32LocalCache { return true; } - NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) { - InitCache(); - PerClass *c = &per_class_[class_id]; - uptr cnt = Min(c->max_count / 2, c->count); - uptr first_idx_to_drain = c->count - cnt; + NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, + uptr class_id) { + const uptr count = Min(c->max_count / 2, c->count); + const uptr first_idx_to_drain = c->count - count; TransferBatch *b = CreateBatch( class_id, allocator, (TransferBatch *)c->batch[first_idx_to_drain]); // Failure to allocate a batch while releasing memory is non recoverable. @@ -264,8 +261,8 @@ struct SizeClassAllocator32LocalCache { if (UNLIKELY(!b)) DieOnFailure::OnOOM(); b->SetFromArray(allocator->GetRegionBeginBySizeClass(class_id), - &c->batch[first_idx_to_drain], cnt); - c->count -= cnt; + &c->batch[first_idx_to_drain], count); + c->count -= count; allocator->DeallocateBatch(&stats_, class_id, b); } };