From c753a306fd1ad4a7e5c61367225abce86ac29cd7 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Tue, 25 Feb 2020 14:23:34 -0800 Subject: [PATCH] [scudo][standalone] Various improvements wrt RSS Summary: This patch includes several changes to reduce the overall footprint of the allocator: - for realloc'd chunks: only keep the same chunk when lowering the size if the delta is within a page worth of bytes; - when draining a cache: drain the beginning, not the end; we add pointers at the end, so that meant we were draining the most recently added pointers; - change the release code to account for an freed up last page: when scanning the pages, we were looking for pages fully covered by blocks; in the event of the last page, if it's only partially covered, we wouldn't mark it as releasable - even what follows the last chunk is all 0s. So now mark the rest of the page as releasable, and adapt the test; - add a missing `setReleaseToOsIntervalMs` to the cacheless secondary; - adjust the Android classes based on more captures thanks to pcc@'s tool. Reviewers: pcc, cferris, hctim, eugenis Subscribers: #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D75142 --- compiler-rt/lib/scudo/standalone/combined.h | 10 ++++-- .../lib/scudo/standalone/local_cache.h | 7 +++-- compiler-rt/lib/scudo/standalone/primary32.h | 13 ++++---- compiler-rt/lib/scudo/standalone/primary64.h | 12 +++---- compiler-rt/lib/scudo/standalone/release.h | 31 ++++++++++++------- compiler-rt/lib/scudo/standalone/secondary.h | 1 + .../lib/scudo/standalone/size_class_map.h | 30 +++++++++--------- .../scudo/standalone/tests/release_test.cpp | 21 ++++++++++--- .../standalone/tests/size_class_map_test.cpp | 1 - 9 files changed, 74 insertions(+), 52 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index f49fc9aac84c..8456dc82d20e 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -449,6 +449,12 @@ public: void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) { initThreadMaybe(); + if (UNLIKELY(NewSize >= MaxAllowedMallocSize)) { + if (Options.MayReturnNull) + return nullptr; + reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); + } + void *OldTaggedPtr = OldPtr; OldPtr = untagPointerMaybe(OldPtr); @@ -502,9 +508,7 @@ public: // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. if (reinterpret_cast(OldPtr) + NewSize <= BlockEnd) { - const uptr Delta = - OldSize < NewSize ? NewSize - OldSize : OldSize - NewSize; - if (Delta <= SizeClassMap::MaxSize / 2) { + if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { Chunk::UnpackedHeader NewHeader = OldHeader; NewHeader.SizeOrUnusedBytes = (ClassId ? NewSize diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h index b08abd3e5d9b..a6425fc6d1ea 100644 --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -165,13 +165,14 @@ private: NOINLINE void drain(PerClass *C, uptr ClassId) { const u32 Count = Min(C->MaxCount / 2, C->Count); - const uptr FirstIndexToDrain = C->Count - Count; - TransferBatch *B = createBatch(ClassId, C->Chunks[FirstIndexToDrain]); + TransferBatch *B = createBatch(ClassId, C->Chunks[0]); if (UNLIKELY(!B)) reportOutOfMemory( SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId)); - B->setFromArray(&C->Chunks[FirstIndexToDrain], Count); + B->setFromArray(&C->Chunks[0], Count); C->Count -= Count; + for (uptr I = 0; I < C->Count; I++) + C->Chunks[I] = C->Chunks[I + Count]; Allocator->pushBatch(ClassId, B); } }; diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index b50f91d492ed..e3376e746ebd 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -386,11 +386,11 @@ private: (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; if (BytesInFreeList < PageSize) return 0; // No chance to release anything. - if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize < - PageSize) { + const uptr BytesPushed = + (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * + BlockSize; + if (BytesPushed < PageSize) return 0; // Nothing new to release. - } if (!Force) { const s32 IntervalMs = getReleaseToOsIntervalMs(); @@ -407,12 +407,13 @@ private: // iterate multiple times over the same freelist if a ClassId spans multiple // regions. But it will have to do for now. uptr TotalReleasedBytes = 0; + const uptr Size = (RegionSize / BlockSize) * BlockSize; for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] - 1U == ClassId) { const uptr Region = I * RegionSize; ReleaseRecorder Recorder(Region); - releaseFreeMemoryToOS(Sci->FreeList, Region, RegionSize / PageSize, - BlockSize, &Recorder); + releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize, + &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 188f3082aee2..e1560001f7f2 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -395,12 +395,11 @@ private: (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; if (BytesInFreeList < PageSize) return 0; // No chance to release anything. - if ((Region->Stats.PushedBlocks - - Region->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize < - PageSize) { + const uptr BytesPushed = (Region->Stats.PushedBlocks - + Region->ReleaseInfo.PushedBlocksAtLastRelease) * + BlockSize; + if (BytesPushed < PageSize) return 0; // Nothing new to release. - } if (!Force) { const s32 IntervalMs = getReleaseToOsIntervalMs(); @@ -415,8 +414,7 @@ private: ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - roundUpTo(Region->AllocatedUser, PageSize) / PageSize, - BlockSize, &Recorder); + Region->AllocatedUser, BlockSize, &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index c4f679711073..323bf9db6dca 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -107,7 +107,8 @@ public: void incRange(uptr From, uptr To) const { DCHECK_LE(From, To); - for (uptr I = From; I <= To; I++) + const uptr Top = Min(To + 1, N); + for (uptr I = From; I < Top; I++) inc(I); } @@ -166,8 +167,7 @@ private: template NOINLINE void releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, - uptr AllocatedPagesCount, uptr BlockSize, - ReleaseRecorderT *Recorder) { + uptr Size, uptr BlockSize, ReleaseRecorderT *Recorder) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -204,12 +204,13 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, } } - PackedCounterArray Counters(AllocatedPagesCount, FullPagesBlockCountMax); + const uptr PagesCount = roundUpTo(Size, PageSize) / PageSize; + PackedCounterArray Counters(PagesCount, FullPagesBlockCountMax); if (!Counters.isAllocated()) return; const uptr PageSizeLog = getLog2(PageSize); - const uptr End = Base + AllocatedPagesCount * PageSize; + const uptr RoundedSize = PagesCount << PageSizeLog; // Iterate over free chunks and count how many free chunks affect each // allocated page. @@ -223,11 +224,14 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, (It.getCount() != 0) && (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast(It.get(I)); - if (P >= Base && P < End) - Counters.inc((P - Base) >> PageSizeLog); + const uptr P = reinterpret_cast(It.get(I)) - Base; + // This takes care of P < Base and P >= Base + RoundedSize. + if (P < RoundedSize) + Counters.inc(P >> PageSizeLog); } } + for (uptr P = Size; P < RoundedSize; P += BlockSize) + Counters.inc(P >> PageSizeLog); } else { // In all other cases chunks might affect more than one page. for (const auto &It : FreeList) { @@ -236,12 +240,15 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, (It.getCount() != 0) && (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast(It.get(I)); - if (P >= Base && P < End) - Counters.incRange((P - Base) >> PageSizeLog, - (P - Base + BlockSize - 1) >> PageSizeLog); + const uptr P = reinterpret_cast(It.get(I)) - Base; + // This takes care of P < Base and P >= Base + RoundedSize. + if (P < RoundedSize) + Counters.incRange(P >> PageSizeLog, + (P + BlockSize - 1) >> PageSizeLog); } } + for (uptr P = Size; P < RoundedSize; P += BlockSize) + Counters.incRange(P >> PageSizeLog, (P + BlockSize - 1) >> PageSizeLog); } // Iterate over pages detecting ranges of pages with chunk Counters equal diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 8ae8108b2eaa..9d5f130f2d45 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -60,6 +60,7 @@ public: void disable() {} void enable() {} void releaseToOS() {} + void setReleaseToOsIntervalMs(UNUSED s32 Interval) {} }; template void testReleaseFreeMemoryToOS() { typedef FreeBatch Batch; - const scudo::uptr AllocatedPagesCount = 1024; + const scudo::uptr PagesCount = 1024; const scudo::uptr PageSize = scudo::getPageSizeCached(); std::mt19937 R; scudo::u32 RandState = 42; for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) { const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I); - const scudo::uptr MaxBlocks = AllocatedPagesCount * PageSize / BlockSize; + const scudo::uptr MaxBlocks = PagesCount * PageSize / BlockSize; // Generate the random free list. std::vector FreeArray; @@ -190,7 +190,7 @@ template void testReleaseFreeMemoryToOS() { // Release the memory. ReleasedPagesRecorder Recorder; - releaseFreeMemoryToOS(FreeList, 0, AllocatedPagesCount, BlockSize, + releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, BlockSize, &Recorder); // Verify that there are no released pages touched by used chunks and all @@ -202,7 +202,7 @@ template void testReleaseFreeMemoryToOS() { scudo::uptr CurrentBlock = 0; InFreeRange = false; scudo::uptr CurrentFreeRangeStart = 0; - for (scudo::uptr I = 0; I <= MaxBlocks; I++) { + for (scudo::uptr I = 0; I < MaxBlocks; I++) { const bool IsFreeBlock = FreeBlocks.find(CurrentBlock) != FreeBlocks.end(); if (IsFreeBlock) { @@ -238,6 +238,19 @@ template void testReleaseFreeMemoryToOS() { CurrentBlock += BlockSize; } + if (InFreeRange) { + scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize); + const scudo::uptr EndPage = + scudo::roundUpTo(MaxBlocks * BlockSize, PageSize); + while (P + PageSize <= EndPage) { + const bool PageReleased = + Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end(); + EXPECT_EQ(true, PageReleased); + VerifiedReleasedPages++; + P += PageSize; + } + } + EXPECT_EQ(Recorder.ReportedPages.size(), VerifiedReleasedPages); while (!FreeList.empty()) { diff --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp index c9e173f8e539..88859ded5b27 100644 --- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp @@ -28,7 +28,6 @@ TEST(ScudoSizeClassMapTest, AndroidSizeClassMap) { testSizeClassMap(); } - struct OneClassSizeClassConfig { static const scudo::uptr NumBits = 1; static const scudo::uptr MinSizeLog = 5;