[scudo][standalone] Allow the release of smaller sizes

Initially we were avoiding the release of smaller size classes due to
the fact that it was an expensive operation, particularly on 32-bit
platforms. With a lot of batches, and given that there are a lot of
blocks per page, this was a lengthy operation with little results.

There has been some improvements since then to the 32-bit release,
and we still have some criterias preventing us from wasting time
(eg, 9x% free blocks in the class size, etc).

Allowing to release blocks < 128 bytes helps in situations where a lot
of small chunks would not have been reclaimed if not for a forced
reclaiming.

Additionally change some `CHECK` to `DCHECK` and rearrange a bit the
code.

I didn't experience any regressions in my benchmarks.

Differential Revision: https://reviews.llvm.org/D93141
This commit is contained in:
Kostya Kortchinsky 2020-12-11 14:04:47 -08:00
parent 4a327bd252
commit 1dbf2c96bc
3 changed files with 34 additions and 58 deletions

View File

@ -76,17 +76,12 @@ public:
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
Seed = static_cast<u32>(
Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
const uptr PageSize = getPageSizeCached();
for (uptr I = 0; I < NumClasses; I++) {
SizeClassInfo *Sci = getSizeClassInfo(I);
Sci->RandState = getRandomU32(&Seed);
// Sci->MaxRegionIndex is already initialized to 0.
Sci->MinRegionIndex = NumRegions;
// See comment in the 64-bit primary about releasing smaller size classes.
Sci->CanRelease = (I != SizeClassMap::BatchClassId) &&
(getSizeByClassId(I) >= (PageSize / 32));
if (Sci->CanRelease)
Sci->ReleaseInfo.LastReleaseAtNs = Time;
Sci->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
}
@ -137,7 +132,7 @@ public:
ScopedLock L(Sci->Mutex);
Sci->FreeList.push_front(B);
Sci->Stats.PushedBlocks += B->getCount();
if (Sci->CanRelease)
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Sci, ClassId);
}
@ -217,6 +212,8 @@ public:
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
SizeClassInfo *Sci = getSizeClassInfo(I);
ScopedLock L(Sci->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true);
@ -262,7 +259,6 @@ private:
uptr CurrentRegion;
uptr CurrentRegionAllocated;
SizeClassStats Stats;
bool CanRelease;
u32 RandState;
uptr AllocatedUser;
// Lowest & highest region index allocated for this size class, to avoid

View File

@ -80,17 +80,7 @@ public:
Region->RegionBeg =
getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
Region->RandState = getRandomU32(&Seed);
// Releasing smaller size classes doesn't necessarily yield to a
// meaningful RSS impact: there are more blocks per page, they are
// randomized around, and thus pages are less likely to be entirely empty.
// On top of this, attempting to release those require more iterations and
// memory accesses which ends up being fairly costly. The current lower
// limit is mostly arbitrary and based on empirical observations.
// TODO(kostyak): make the lower limit a runtime option
Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
(getSizeByClassId(I) >= (PageSize / 32));
if (Region->CanRelease)
Region->ReleaseInfo.LastReleaseAtNs = Time;
Region->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
@ -129,7 +119,7 @@ public:
ScopedLock L(Region->Mutex);
Region->FreeList.push_front(B);
Region->Stats.PushedBlocks += B->getCount();
if (Region->CanRelease)
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Region, ClassId);
}
@ -201,6 +191,8 @@ public:
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
RegionInfo *Region = getRegionInfo(I);
ScopedLock L(Region->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
@ -291,7 +283,6 @@ private:
HybridMutex Mutex;
SinglyLinkedList<TransferBatch> FreeList;
RegionStats Stats;
bool CanRelease;
bool Exhausted;
u32 RandState;
uptr RegionBeg;
@ -417,7 +408,7 @@ private:
const uptr BlockSize = getSizeByClassId(ClassId);
const uptr PageSize = getPageSizeCached();
CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
const uptr BytesInFreeList =
Region->AllocatedUser -
(Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;

View File

@ -52,20 +52,20 @@ public:
PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion,
uptr MaxValue)
: Regions(NumberOfRegions), NumCounters(CountersPerRegion) {
CHECK_GT(Regions, 0);
CHECK_GT(NumCounters, 0);
CHECK_GT(MaxValue, 0);
DCHECK_GT(Regions, 0);
DCHECK_GT(NumCounters, 0);
DCHECK_GT(MaxValue, 0);
constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL;
// Rounding counter storage size up to the power of two allows for using
// bit shifts calculating particular counter's Index and offset.
const uptr CounterSizeBits =
roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
CHECK_LE(CounterSizeBits, MaxCounterBits);
DCHECK_LE(CounterSizeBits, MaxCounterBits);
CounterSizeBitsLog = getLog2(CounterSizeBits);
CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits);
const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog;
CHECK_GT(PackingRatio, 0);
DCHECK_GT(PackingRatio, 0);
PackingRatioLog = getLog2(PackingRatio);
BitOffsetMask = PackingRatio - 1;
@ -235,20 +235,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
// Each chunk affects one page only.
for (const auto &It : FreeList) {
// If dealing with a TransferBatch, the first pointer of the batch will
// point to the batch itself, we do not want to mark this for release as
// the batch is in use, so skip the first entry.
const bool IsTransferBatch =
(It.getCount() != 0) &&
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
if (P < RoundedSize) {
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
const uptr PInRegion = P - RegionIndex * RegionSize;
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
}
if (UNLIKELY(P >= RoundedSize))
continue;
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
const uptr PInRegion = P - RegionIndex * RegionSize;
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
}
}
} else {
@ -256,27 +250,23 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
DCHECK_GE(RegionSize, BlockSize);
const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
for (const auto &It : FreeList) {
// See TransferBatch comment above.
const bool IsTransferBatch =
(It.getCount() != 0) &&
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
if (P < RoundedSize) {
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
uptr PInRegion = P - RegionIndex * RegionSize;
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
// The last block in a region might straddle a page, so if it's
// free, we mark the following "pretend" memory block(s) as free.
if (PInRegion == LastBlockInRegion) {
if (UNLIKELY(P >= RoundedSize))
continue;
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
uptr PInRegion = P - RegionIndex * RegionSize;
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
// The last block in a region might straddle a page, so if it's
// free, we mark the following "pretend" memory block(s) as free.
if (PInRegion == LastBlockInRegion) {
PInRegion += BlockSize;
while (PInRegion < RoundedRegionSize) {
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
PInRegion += BlockSize;
while (PInRegion < RoundedRegionSize) {
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
PInRegion += BlockSize;
}
}
}
}
@ -327,7 +317,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
}
}
PrevPageBoundary = PageBoundary;
RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage);
}
}