[sanitizer] Size class map & local cache improvements

Summary: - Reland rL324263, this time allowing for a compile-time decision as to whether or not use the 32-bit division. A single test is using a class map covering a maximum size greater than 4GB, this can be checked via the template parameters, and allows SizeClassAllocator64PopulateFreeListOOM to pass; - `MaxCachedHint` is always called on a class id for which we have already computed the size, but we still recompute `Size(class_id)`. Change the prototype of the function to work on sizes instead of class ids. This also allows us to get rid of the `kBatchClassID` special case. Update the callers accordingly; - `InitCache` and `Drain` will start iterating at index 1: index 0 contents are unused and can safely be left to be 0. Plus we do not pay the cost of going through an `UNLIKELY` in `MaxCachedHint`, and touching memory that is otherwise not used; - `const` some variables in the areas modified; - Remove an spurious extra line at the end of a file. Reviewers: alekseyshl, tl0gic, dberris Reviewed By: alekseyshl, dberris Subscribers: dberris, kubamracek, delcypher, llvm-commits, #sanitizers Differential Revision: https://reviews.llvm.org/D43088 llvm-svn: 324906
2018-02-12 16:59:17 +00:00 · 2018-02-12 16:59:17 +00:00 · 1022220b16
parent 07e1337c2a
commit 1022220b16
3 changed files with 43 additions and 40 deletions
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h
@ -70,7 +70,7 @@ struct SizeClassAllocator64LocalCache {
  }

  void Drain(SizeClassAllocator *allocator) {
-    for (uptr i = 0; i < kNumClasses; i++) {
+    for (uptr i = 1; i < kNumClasses; i++) {
      PerClass *c = &per_class_[i];
      while (c->count > 0)
        Drain(c, allocator, i, c->count);
@ -94,10 +94,11 @@ struct SizeClassAllocator64LocalCache {
  void InitCache(PerClass *c) {
    if (LIKELY(c->max_count))
      return;
-    for (uptr i = 0; i < kNumClasses; i++) {
+    for (uptr i = 1; i < kNumClasses; i++) {
      PerClass *c = &per_class_[i];
-      c->max_count = 2 * SizeClassMap::MaxCachedHint(i);
-      c->class_size = Allocator::ClassIdToSize(i);
+      const uptr size = Allocator::ClassIdToSize(i);
+      c->max_count = 2 * SizeClassMap::MaxCachedHint(size);
+      c->class_size = size;
    }
    DCHECK_NE(c->max_count, 0UL);
  }
@ -185,7 +186,7 @@ struct SizeClassAllocator32LocalCache {
  }

  void Drain(SizeClassAllocator *allocator) {
-    for (uptr i = 0; i < kNumClasses; i++) {
+    for (uptr i = 1; i < kNumClasses; i++) {
      PerClass *c = &per_class_[i];
      while (c->count > 0)
        Drain(c, allocator, i);
@ -217,11 +218,12 @@ struct SizeClassAllocator32LocalCache {
    if (LIKELY(c->max_count))
      return;
    const uptr batch_class_id = SizeClassMap::ClassID(sizeof(TransferBatch));
-    for (uptr i = 0; i < kNumClasses; i++) {
+    for (uptr i = 1; i < kNumClasses; i++) {
      PerClass *c = &per_class_[i];
-      uptr max_cached = TransferBatch::MaxCached(i);
+      const uptr size = Allocator::ClassIdToSize(i);
+      const uptr max_cached = TransferBatch::MaxCached(size);
      c->max_count = 2 * max_cached;
-      c->class_size = Allocator::ClassIdToSize(i);
+      c->class_size = size;
      // Precompute the class id to use to store batches for the current class
      // id. 0 means the class size is large enough to store a batch within one
      // of the chunks. If using a separate size class, it will always be
@ -229,7 +231,7 @@ struct SizeClassAllocator32LocalCache {
      if (kUseSeparateSizeClassForBatch) {
        c->batch_class_id = (i == kBatchClassID) ? 0 : kBatchClassID;
      } else {
-        c->batch_class_id = (c->class_size <
+        c->batch_class_id = (size <
          TransferBatch::AllocationSizeRequiredForNElements(max_cached)) ?
              batch_class_id : 0;
      }
@ -266,4 +268,3 @@ struct SizeClassAllocator32LocalCache {
    allocator->DeallocateBatch(&stats_, class_id, b);
  }
 };
-
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h
@ -84,8 +84,8 @@ class SizeClassAllocator32 {
    static uptr AllocationSizeRequiredForNElements(uptr n) {
      return sizeof(uptr) * 2 + sizeof(void *) * n;
    }
-    static uptr MaxCached(uptr class_id) {
-      return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id));
+    static uptr MaxCached(uptr size) {
+      return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(size));
    }

    TransferBatch *next;
@ -156,10 +156,11 @@ class SizeClassAllocator32 {
    CHECK_LT(class_id, kNumClasses);
    SizeClassInfo *sci = GetSizeClassInfo(class_id);
    SpinMutexLock l(&sci->mutex);
-    if (sci->free_list.empty() &&
-        UNLIKELY(!PopulateFreeList(stat, c, sci, class_id)))
+    if (sci->free_list.empty()) {
+      if (UNLIKELY(!PopulateFreeList(stat, c, sci, class_id)))
        return nullptr;
-    CHECK(!sci->free_list.empty());
+      DCHECK(!sci->free_list.empty());
+    }
    TransferBatch *b = sci->free_list.front();
    sci->free_list.pop_front();
    return b;
@ -275,7 +276,7 @@ class SizeClassAllocator32 {
  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);

  uptr ComputeRegionId(uptr mem) {
-    uptr res = mem >> kRegionSizeLog;
+    const uptr res = mem >> kRegionSizeLog;
    CHECK_LT(res, kNumPossibleRegions);
    return res;
  }
@ -329,22 +330,22 @@ class SizeClassAllocator32 {

  bool PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
                        SizeClassInfo *sci, uptr class_id) {
-    uptr size = ClassIdToSize(class_id);
-    uptr reg = AllocateRegion(stat, class_id);
-    if (UNLIKELY(!reg))
+    const uptr region = AllocateRegion(stat, class_id);
+    if (UNLIKELY(!region))
      return false;
    if (kRandomShuffleChunks)
      if (UNLIKELY(sci->rand_state == 0))
        // The random state is initialized from ASLR (PIE) and time.
        sci->rand_state = reinterpret_cast<uptr>(sci) ^ NanoTime();
-    uptr n_chunks = kRegionSize / (size + kMetadataSize);
-    uptr max_count = TransferBatch::MaxCached(class_id);
-    CHECK_GT(max_count, 0);
+    const uptr size = ClassIdToSize(class_id);
+    const uptr n_chunks = kRegionSize / (size + kMetadataSize);
+    const uptr max_count = TransferBatch::MaxCached(size);
+    DCHECK_GT(max_count, 0);
    TransferBatch *b = nullptr;
-    const uptr kShuffleArraySize = 48;
+    constexpr uptr kShuffleArraySize = 48;
    uptr shuffle_array[kShuffleArraySize];
    uptr count = 0;
-    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
+    for (uptr i = region; i < region + n_chunks * size; i += size) {
      shuffle_array[count++] = i;
      if (count == kShuffleArraySize) {
        if (UNLIKELY(!PopulateBatches(c, sci, class_id, &b, max_count,
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
@ -161,23 +161,24 @@ class SizeClassMap {
      return 0;
    if (size <= kMidSize)
      return (size + kMinSize - 1) >> kMinSizeLog;
-    uptr l = MostSignificantSetBitIndex(size);
-    uptr hbits = (size >> (l - S)) & M;
-    uptr lbits = size & ((1 << (l - S)) - 1);
-    uptr l1 = l - kMidSizeLog;
+    const uptr l = MostSignificantSetBitIndex(size);
+    const uptr hbits = (size >> (l - S)) & M;
+    const uptr lbits = size & ((1U << (l - S)) - 1);
+    const uptr l1 = l - kMidSizeLog;
    return kMidClass + (l1 << S) + hbits + (lbits > 0);
  }

-  static uptr MaxCachedHint(uptr class_id) {
-    // Estimate the result for kBatchClassID because this class does not know
-    // the exact size of TransferBatch. We need to cache fewer batches than user
-    // chunks, so this number can be small.
-    if (UNLIKELY(class_id == kBatchClassID))
-      return 16;
-    if (UNLIKELY(class_id == 0))
+  static uptr MaxCachedHint(uptr size) {
+    DCHECK_LE(size, kMaxSize);
+    if (UNLIKELY(size == 0))
      return 0;
-    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
-    return Max<uptr>(1, Min(kMaxNumCachedHint, n));
+    uptr n;
+    // Force a 32-bit division if the template parameters allow for it.
+    if (kMaxBytesCachedLog > 31 || kMaxSizeLog > 31)
+      n = (1UL << kMaxBytesCachedLog) / size;
+    else
+      n = (1U << kMaxBytesCachedLog) / static_cast<u32>(size);
+    return Max<uptr>(1U, Min(kMaxNumCachedHint, n));
  }

  static void Print() {
@ -190,12 +191,12 @@ class SizeClassMap {
      uptr d = s - prev_s;
      uptr p = prev_s ? (d * 100 / prev_s) : 0;
      uptr l = s ? MostSignificantSetBitIndex(s) : 0;
-      uptr cached = MaxCachedHint(i) * s;
+      uptr cached = MaxCachedHint(s) * s;
      if (i == kBatchClassID)
        d = p = l = 0;
      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
             "cached: %zd %zd; id %zd\n",
-             i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s));
+             i, Size(i), d, p, l, MaxCachedHint(s), cached, ClassID(s));
      total_cached += cached;
      prev_s = s;
    }