forked from OSchip/llvm-project
[scudo][standalone] Allow the release of smaller sizes
Initially we were avoiding the release of smaller size classes due to the fact that it was an expensive operation, particularly on 32-bit platforms. With a lot of batches, and given that there are a lot of blocks per page, this was a lengthy operation with little results. There has been some improvements since then to the 32-bit release, and we still have some criterias preventing us from wasting time (eg, 9x% free blocks in the class size, etc). Allowing to release blocks < 128 bytes helps in situations where a lot of small chunks would not have been reclaimed if not for a forced reclaiming. Additionally change some `CHECK` to `DCHECK` and rearrange a bit the code. I didn't experience any regressions in my benchmarks. Differential Revision: https://reviews.llvm.org/D93141
This commit is contained in:
parent
4a327bd252
commit
1dbf2c96bc
|
@ -76,17 +76,12 @@ public:
|
|||
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
|
||||
Seed = static_cast<u32>(
|
||||
Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
|
||||
const uptr PageSize = getPageSizeCached();
|
||||
for (uptr I = 0; I < NumClasses; I++) {
|
||||
SizeClassInfo *Sci = getSizeClassInfo(I);
|
||||
Sci->RandState = getRandomU32(&Seed);
|
||||
// Sci->MaxRegionIndex is already initialized to 0.
|
||||
Sci->MinRegionIndex = NumRegions;
|
||||
// See comment in the 64-bit primary about releasing smaller size classes.
|
||||
Sci->CanRelease = (I != SizeClassMap::BatchClassId) &&
|
||||
(getSizeByClassId(I) >= (PageSize / 32));
|
||||
if (Sci->CanRelease)
|
||||
Sci->ReleaseInfo.LastReleaseAtNs = Time;
|
||||
Sci->ReleaseInfo.LastReleaseAtNs = Time;
|
||||
}
|
||||
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
|
||||
}
|
||||
|
@ -137,7 +132,7 @@ public:
|
|||
ScopedLock L(Sci->Mutex);
|
||||
Sci->FreeList.push_front(B);
|
||||
Sci->Stats.PushedBlocks += B->getCount();
|
||||
if (Sci->CanRelease)
|
||||
if (ClassId != SizeClassMap::BatchClassId)
|
||||
releaseToOSMaybe(Sci, ClassId);
|
||||
}
|
||||
|
||||
|
@ -217,6 +212,8 @@ public:
|
|||
uptr releaseToOS() {
|
||||
uptr TotalReleasedBytes = 0;
|
||||
for (uptr I = 0; I < NumClasses; I++) {
|
||||
if (I == SizeClassMap::BatchClassId)
|
||||
continue;
|
||||
SizeClassInfo *Sci = getSizeClassInfo(I);
|
||||
ScopedLock L(Sci->Mutex);
|
||||
TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true);
|
||||
|
@ -262,7 +259,6 @@ private:
|
|||
uptr CurrentRegion;
|
||||
uptr CurrentRegionAllocated;
|
||||
SizeClassStats Stats;
|
||||
bool CanRelease;
|
||||
u32 RandState;
|
||||
uptr AllocatedUser;
|
||||
// Lowest & highest region index allocated for this size class, to avoid
|
||||
|
|
|
@ -80,17 +80,7 @@ public:
|
|||
Region->RegionBeg =
|
||||
getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
|
||||
Region->RandState = getRandomU32(&Seed);
|
||||
// Releasing smaller size classes doesn't necessarily yield to a
|
||||
// meaningful RSS impact: there are more blocks per page, they are
|
||||
// randomized around, and thus pages are less likely to be entirely empty.
|
||||
// On top of this, attempting to release those require more iterations and
|
||||
// memory accesses which ends up being fairly costly. The current lower
|
||||
// limit is mostly arbitrary and based on empirical observations.
|
||||
// TODO(kostyak): make the lower limit a runtime option
|
||||
Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
|
||||
(getSizeByClassId(I) >= (PageSize / 32));
|
||||
if (Region->CanRelease)
|
||||
Region->ReleaseInfo.LastReleaseAtNs = Time;
|
||||
Region->ReleaseInfo.LastReleaseAtNs = Time;
|
||||
}
|
||||
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
|
||||
|
||||
|
@ -129,7 +119,7 @@ public:
|
|||
ScopedLock L(Region->Mutex);
|
||||
Region->FreeList.push_front(B);
|
||||
Region->Stats.PushedBlocks += B->getCount();
|
||||
if (Region->CanRelease)
|
||||
if (ClassId != SizeClassMap::BatchClassId)
|
||||
releaseToOSMaybe(Region, ClassId);
|
||||
}
|
||||
|
||||
|
@ -201,6 +191,8 @@ public:
|
|||
uptr releaseToOS() {
|
||||
uptr TotalReleasedBytes = 0;
|
||||
for (uptr I = 0; I < NumClasses; I++) {
|
||||
if (I == SizeClassMap::BatchClassId)
|
||||
continue;
|
||||
RegionInfo *Region = getRegionInfo(I);
|
||||
ScopedLock L(Region->Mutex);
|
||||
TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
|
||||
|
@ -291,7 +283,6 @@ private:
|
|||
HybridMutex Mutex;
|
||||
SinglyLinkedList<TransferBatch> FreeList;
|
||||
RegionStats Stats;
|
||||
bool CanRelease;
|
||||
bool Exhausted;
|
||||
u32 RandState;
|
||||
uptr RegionBeg;
|
||||
|
@ -417,7 +408,7 @@ private:
|
|||
const uptr BlockSize = getSizeByClassId(ClassId);
|
||||
const uptr PageSize = getPageSizeCached();
|
||||
|
||||
CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
|
||||
DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
|
||||
const uptr BytesInFreeList =
|
||||
Region->AllocatedUser -
|
||||
(Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
|
||||
|
|
|
@ -52,20 +52,20 @@ public:
|
|||
PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion,
|
||||
uptr MaxValue)
|
||||
: Regions(NumberOfRegions), NumCounters(CountersPerRegion) {
|
||||
CHECK_GT(Regions, 0);
|
||||
CHECK_GT(NumCounters, 0);
|
||||
CHECK_GT(MaxValue, 0);
|
||||
DCHECK_GT(Regions, 0);
|
||||
DCHECK_GT(NumCounters, 0);
|
||||
DCHECK_GT(MaxValue, 0);
|
||||
constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL;
|
||||
// Rounding counter storage size up to the power of two allows for using
|
||||
// bit shifts calculating particular counter's Index and offset.
|
||||
const uptr CounterSizeBits =
|
||||
roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
|
||||
CHECK_LE(CounterSizeBits, MaxCounterBits);
|
||||
DCHECK_LE(CounterSizeBits, MaxCounterBits);
|
||||
CounterSizeBitsLog = getLog2(CounterSizeBits);
|
||||
CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits);
|
||||
|
||||
const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog;
|
||||
CHECK_GT(PackingRatio, 0);
|
||||
DCHECK_GT(PackingRatio, 0);
|
||||
PackingRatioLog = getLog2(PackingRatio);
|
||||
BitOffsetMask = PackingRatio - 1;
|
||||
|
||||
|
@ -235,20 +235,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
|
|||
if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
|
||||
// Each chunk affects one page only.
|
||||
for (const auto &It : FreeList) {
|
||||
// If dealing with a TransferBatch, the first pointer of the batch will
|
||||
// point to the batch itself, we do not want to mark this for release as
|
||||
// the batch is in use, so skip the first entry.
|
||||
const bool IsTransferBatch =
|
||||
(It.getCount() != 0) &&
|
||||
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
|
||||
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
|
||||
for (u32 I = 0; I < It.getCount(); I++) {
|
||||
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
|
||||
// This takes care of P < Base and P >= Base + RoundedSize.
|
||||
if (P < RoundedSize) {
|
||||
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
|
||||
const uptr PInRegion = P - RegionIndex * RegionSize;
|
||||
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
|
||||
}
|
||||
if (UNLIKELY(P >= RoundedSize))
|
||||
continue;
|
||||
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
|
||||
const uptr PInRegion = P - RegionIndex * RegionSize;
|
||||
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -256,27 +250,23 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
|
|||
DCHECK_GE(RegionSize, BlockSize);
|
||||
const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
|
||||
for (const auto &It : FreeList) {
|
||||
// See TransferBatch comment above.
|
||||
const bool IsTransferBatch =
|
||||
(It.getCount() != 0) &&
|
||||
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
|
||||
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
|
||||
for (u32 I = 0; I < It.getCount(); I++) {
|
||||
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
|
||||
// This takes care of P < Base and P >= Base + RoundedSize.
|
||||
if (P < RoundedSize) {
|
||||
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
|
||||
uptr PInRegion = P - RegionIndex * RegionSize;
|
||||
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
|
||||
(PInRegion + BlockSize - 1) >> PageSizeLog);
|
||||
// The last block in a region might straddle a page, so if it's
|
||||
// free, we mark the following "pretend" memory block(s) as free.
|
||||
if (PInRegion == LastBlockInRegion) {
|
||||
if (UNLIKELY(P >= RoundedSize))
|
||||
continue;
|
||||
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
|
||||
uptr PInRegion = P - RegionIndex * RegionSize;
|
||||
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
|
||||
(PInRegion + BlockSize - 1) >> PageSizeLog);
|
||||
// The last block in a region might straddle a page, so if it's
|
||||
// free, we mark the following "pretend" memory block(s) as free.
|
||||
if (PInRegion == LastBlockInRegion) {
|
||||
PInRegion += BlockSize;
|
||||
while (PInRegion < RoundedRegionSize) {
|
||||
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
|
||||
(PInRegion + BlockSize - 1) >> PageSizeLog);
|
||||
PInRegion += BlockSize;
|
||||
while (PInRegion < RoundedRegionSize) {
|
||||
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
|
||||
(PInRegion + BlockSize - 1) >> PageSizeLog);
|
||||
PInRegion += BlockSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -327,7 +317,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
|
|||
}
|
||||
}
|
||||
PrevPageBoundary = PageBoundary;
|
||||
|
||||
RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue