Merge pull request #6461 from sfc-gh-yiwu/redwood_remap

Redwood: config remap cleanup by size instead of versions
This commit is contained in:
Steve Atherton 2022-03-18 16:23:15 -07:00 committed by GitHub
commit 032c80dec5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 88 additions and 55 deletions

View File

@ -807,8 +807,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 );
init( REDWOOD_LAZY_CLEAR_MIN_PAGES, 0 );
init( REDWOOD_LAZY_CLEAR_MAX_PAGES, 1e6 );
init( REDWOOD_REMAP_CLEANUP_WINDOW, 50 );
init( REDWOOD_REMAP_CLEANUP_LAG, 0.1 );
init( REDWOOD_REMAP_CLEANUP_WINDOW_BYTES, 4LL * 1024 * 1024 * 1024 );
init( REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO, 0.05 );
init( REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES, 20000 ); if( randomize && BUGGIFY ) { REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES = deterministicRandom()->randomInt(200, 1000); }
init( REDWOOD_METRICS_INTERVAL, 5.0 );
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );

View File

@ -754,9 +754,10 @@ public:
// queue is empty
int REDWOOD_LAZY_CLEAR_MAX_PAGES; // Maximum number of pages to free before ending a lazy clear cycle, unless the
// queue is empty
int64_t REDWOOD_REMAP_CLEANUP_WINDOW; // Remap remover lag interval in which to coalesce page writes
double REDWOOD_REMAP_CLEANUP_LAG; // Maximum allowed remap remover lag behind the cleanup window as a multiple of
// the window size
int64_t REDWOOD_REMAP_CLEANUP_WINDOW_BYTES; // Total size of remapped pages to keep before being removed by
// remap cleanup
double REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO; // Maximum ratio of the remap cleanup window that remap cleanup is
// allowed to be ahead or behind
int REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES; // Number of pages to grow page file by
double REDWOOD_METRICS_INTERVAL;
double REDWOOD_HISTOGRAM_INTERVAL;

View File

@ -2144,14 +2144,14 @@ public:
int desiredExtentSize,
std::string filename,
int64_t pageCacheSizeBytes,
Version remapCleanupWindow,
int64_t remapCleanupWindowBytes,
int concurrentExtentReads,
bool memoryOnly = false,
Promise<Void> errorPromise = {})
: ioLock(FLOW_KNOBS->MAX_OUTSTANDING, ioMaxPriority, FLOW_KNOBS->MAX_OUTSTANDING / 2),
pageCacheBytes(pageCacheSizeBytes), pHeader(nullptr), desiredPageSize(desiredPageSize),
desiredExtentSize(desiredExtentSize), filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
remapCleanupWindow(remapCleanupWindow), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
// This sets the page cache size for all PageCacheT instances using the same evictor
pageCache.evictor().sizeLimit = pageCacheBytes;
@ -3437,18 +3437,26 @@ public:
state Version oldestRetainedVersion = self->effectiveOldestVersion();
// Cutoff is the version we can pop to
state RemappedPage cutoff(oldestRetainedVersion - self->remapCleanupWindow);
// Minimum version we must pop to before obeying stop command.
state Version minStopVersion =
cutoff.version - (BUGGIFY ? deterministicRandom()->randomInt(0, 10)
: (self->remapCleanupWindow * SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_LAG));
state RemappedPage cutoff(oldestRetainedVersion);
debug_printf("DWALPager(%s) remapCleanup cutoff.version %" PRId64 " oldestRetainedVersion=%" PRId64
" minStopVersion %" PRId64 " items=%" PRId64 "\n",
// Maximum number of remaining remap entries to keep before obeying stop command.
double toleranceRatio = BUGGIFY ? deterministicRandom()->randomInt(0, 10) / 100.0
: SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO;
// For simplicity, we assume each entry in the remap queue corresponds to one remapped page.
uint64_t remapCleanupWindowEntries =
static_cast<uint64_t>(self->remapCleanupWindowBytes / self->pHeader->pageSize);
state uint64_t minRemapEntries = static_cast<uint64_t>(remapCleanupWindowEntries * (1.0 - toleranceRatio));
state uint64_t maxRemapEntries = static_cast<uint64_t>(remapCleanupWindowEntries * (1.0 + toleranceRatio));
debug_printf("DWALPager(%s) remapCleanup oldestRetainedVersion=%" PRId64 " remapCleanupWindowBytes=%" PRId64
" pageSize=%" PRIu32 " minRemapEntries=%" PRId64 " maxRemapEntries=%" PRId64 " items=%" PRId64
"\n",
self->filename.c_str(),
cutoff.version,
oldestRetainedVersion,
minStopVersion,
self->remapCleanupWindowBytes,
self->pHeader->pageSize,
minRemapEntries,
maxRemapEntries,
self->remapQueue.numEntries);
if (g_network->isSimulated()) {
@ -3457,6 +3465,19 @@ public:
state int sinceYield = 0;
loop {
// Stop if we have cleanup enough remap entries, or if the stop flag is set and the remaining remap
// entries are less than that allowed by the lag.
int64_t remainingEntries = self->remapQueue.numEntries;
if (remainingEntries <= minRemapEntries ||
(self->remapCleanupStop && remainingEntries <= maxRemapEntries)) {
debug_printf("DWALPager(%s) remapCleanup finished remainingEntries=%" PRId64 " minRemapEntries=%" PRId64
" maxRemapEntries=%" PRId64,
self->filename.c_str(),
remainingEntries,
minRemapEntries,
maxRemapEntries);
break;
}
state Optional<RemappedPage> p = wait(self->remapQueue.pop(cutoff));
debug_printf("DWALPager(%s) remapCleanup popped %s items=%" PRId64 "\n",
self->filename.c_str(),
@ -3465,10 +3486,8 @@ public:
// Stop if we have reached the cutoff version, which is the start of the cleanup coalescing window
if (!p.present()) {
debug_printf("DWALPager(%s) remapCleanup pop failed minVer=%" PRId64 " cutoffVer=%" PRId64
" items=%" PRId64 "\n",
debug_printf("DWALPager(%s) remapCleanup pop failed cutoffVer=%" PRId64 " items=%" PRId64 "\n",
self->filename.c_str(),
minStopVersion,
cutoff.version,
self->remapQueue.numEntries);
break;
@ -3479,12 +3498,6 @@ public:
tasks.add(task);
}
// If the stop flag is set and we've reached the minimum stop version according the the allowed lag then
// stop.
if (self->remapCleanupStop && p.get().version >= minStopVersion) {
break;
}
// Yield to prevent slow task in case no IO waits are encountered
if (++sinceYield >= 100) {
sinceYield = 0;
@ -3492,9 +3505,11 @@ public:
}
}
debug_printf("DWALPager(%s) remapCleanup stopped stopSignal=%d free=%lld delayedFree=%lld\n",
debug_printf("DWALPager(%s) remapCleanup stopped stopSignal=%d remap=%" PRId64 " free=%" PRId64
" delayedFree=%" PRId64 "\n",
self->filename.c_str(),
self->remapCleanupStop,
self->remapQueue.numEntries,
self->freeList.numEntries,
self->delayedFreeList.numEntries);
signal.send(Void());
@ -3788,15 +3803,22 @@ private:
// Wait for outstanding commit.
wait(self->commitFuture);
// While the remap queue isn't empty, advance the commit version and oldest readable version
// by the remap cleanup window and commit
while (self->remapQueue.numEntries > 0) {
self->setOldestReadableVersion(self->getLastCommittedVersion());
wait(self->commit(self->getLastCommittedVersion() + self->remapCleanupWindow + 1));
}
// Set remap cleanup window to 0 to allow the remap queue to drain.
state int64_t remapCleanupWindowBytes = self->remapCleanupWindowBytes;
self->remapCleanupWindowBytes = 0;
// One final commit because the active commit cycle may have popped from the remap queue
wait(self->commit(self->getLastCommittedVersion() + 1));
// Try twice to commit and advance version. The first commit should trigger a remap cleanup actor, which picks
// up the new remap cleanup window being 0. The second commit waits for the remap cleanup actor to finish.
state int attempt = 0;
for (attempt = 0; attempt < 2; attempt++) {
self->setOldestReadableVersion(self->getLastCommittedVersion());
wait(self->commit(self->getLastCommittedVersion() + 1));
}
ASSERT(self->remapQueue.numEntries == 0);
// Restore remap cleanup window.
if (remapCleanupWindowBytes != 0)
self->remapCleanupWindowBytes = remapCleanupWindowBytes;
TraceEvent e("RedwoodClearRemapQueue");
self->toTraceEvent(e);
@ -3869,7 +3891,7 @@ private:
RemapQueueT remapQueue;
LogicalPageQueueT extentFreeList;
ExtentUsedListQueueT extentUsedList;
Version remapCleanupWindow;
uint64_t remapCleanupWindowBytes;
Reference<FlowLock> concurrentExtentReads;
std::unordered_set<PhysicalPageID> remapDestinationsSimOnly;
@ -7409,14 +7431,20 @@ public:
? (BUGGIFY ? deterministicRandom()->randomInt(pageSize, FLOW_KNOBS->BUGGIFY_SIM_PAGE_CACHE_4K)
: FLOW_KNOBS->SIM_PAGE_CACHE_4K)
: FLOW_KNOBS->PAGE_CACHE_4K;
Version remapCleanupWindow =
(BUGGIFY ? deterministicRandom()->randomInt64(0, 100) : SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW);
// Rough size of pages to keep in remap cleanup queue before being cleanup.
int64_t remapCleanupWindowBytes =
g_network->isSimulated()
? (BUGGIFY ? (deterministicRandom()->coinflip()
? deterministicRandom()->randomInt64(0, 100 * 1024) // small window
: deterministicRandom()->randomInt64(0, 100 * 1024 * 1024)) // large window
: 100 * 1024 * 1024) // 100M
: SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW_BYTES;
IPager2* pager = new DWALPager(pageSize,
extentSize,
filePrefix,
pageCacheBytes,
remapCleanupWindow,
remapCleanupWindowBytes,
SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS,
false,
m_error);
@ -9459,9 +9487,9 @@ TEST_CASE("Lredwood/correctness/btree") {
: (pageSize * deterministicRandom()->randomInt(1, (BUGGIFY ? 10 : 10000) + 1)));
state Version versionIncrement =
params.getInt("versionIncrement").orDefault(deterministicRandom()->randomInt64(1, 1e8));
state Version remapCleanupWindow =
params.getInt("remapCleanupWindow")
.orDefault(BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, versionIncrement * 50));
state int64_t remapCleanupWindowBytes =
params.getInt("remapCleanupWindowBytes")
.orDefault(BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, 100) * 1024 * 1024);
state int concurrentExtentReads =
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
@ -9494,7 +9522,7 @@ TEST_CASE("Lredwood/correctness/btree") {
printf("advanceOldVersionProbability: %f\n", advanceOldVersionProbability);
printf("pageCacheBytes: %s\n", pageCacheBytes == 0 ? "default" : format("%" PRId64, pageCacheBytes).c_str());
printf("versionIncrement: %" PRId64 "\n", versionIncrement);
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
printf("\n");
printf("Deleting existing test data...\n");
@ -9502,7 +9530,7 @@ TEST_CASE("Lredwood/correctness/btree") {
printf("Initializing...\n");
pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads, pagerMemoryOnly);
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree = new VersionedBTree(pager, file);
wait(btree->init());
@ -9721,7 +9749,7 @@ TEST_CASE("Lredwood/correctness/btree") {
printf("Reopening btree from disk.\n");
IPager2* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads);
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads);
btree = new VersionedBTree(pager, file);
wait(btree->init());
@ -9761,8 +9789,11 @@ TEST_CASE("Lredwood/correctness/btree") {
state Future<Void> closedFuture = btree->onClosed();
btree->close();
wait(closedFuture);
btree =
new VersionedBTree(new DWALPager(pageSize, extentSize, file, pageCacheBytes, 0, concurrentExtentReads), file);
// If buggify, test starting with empty remap cleanup window.
btree = new VersionedBTree(
new DWALPager(
pageSize, extentSize, file, pageCacheBytes, (BUGGIFY ? 0 : remapCleanupWindowBytes), concurrentExtentReads),
file);
wait(btree->init());
wait(btree->clearAllAndCheckSanity());
@ -9899,8 +9930,8 @@ TEST_CASE(":/redwood/performance/extentQueue") {
state int pageSize = params.getInt("pageSize").orDefault(SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE);
state int extentSize = params.getInt("extentSize").orDefault(SERVER_KNOBS->REDWOOD_DEFAULT_EXTENT_SIZE);
state int64_t cacheSizeBytes = params.getInt("cacheSizeBytes").orDefault(FLOW_KNOBS->PAGE_CACHE_4K);
// Choose a large remapCleanupWindow to avoid popping the queue
state Version remapCleanupWindow = params.getInt("remapCleanupWindow").orDefault(1e16);
// Choose a large remapCleanupWindowBytes to avoid popping the queue
state int64_t remapCleanupWindowBytes = params.getInt("remapCleanupWindowBytes").orDefault(1e16);
state int numEntries = params.getInt("numEntries").orDefault(10e6);
state int concurrentExtentReads =
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
@ -9911,12 +9942,12 @@ TEST_CASE(":/redwood/performance/extentQueue") {
printf("pageSize: %d\n", pageSize);
printf("extentSize: %d\n", extentSize);
printf("cacheSizeBytes: %" PRId64 "\n", cacheSizeBytes);
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
// Do random pushes into the queue and commit periodically
if (reload) {
pager =
new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindow, concurrentExtentReads);
pager = new DWALPager(
pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads);
wait(success(pager->init()));
@ -9967,7 +9998,8 @@ TEST_CASE(":/redwood/performance/extentQueue") {
}
printf("Reopening pager file from disk.\n");
pager = new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindow, concurrentExtentReads);
pager =
new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads);
wait(success(pager->init()));
printf("Starting ExtentQueue FastPath Recovery from Disk.\n");
@ -10054,7 +10086,7 @@ TEST_CASE(":/redwood/performance/set") {
state int maxConsecutiveRun = params.getInt("maxConsecutiveRun").orDefault(100);
state char firstKeyChar = params.get("firstKeyChar").orDefault("a")[0];
state char lastKeyChar = params.get("lastKeyChar").orDefault("m")[0];
state Version remapCleanupWindow = params.getInt("remapCleanupWindow").orDefault(100);
state int64_t remapCleanupWindowBytes = params.getInt("remapCleanupWindowBytes").orDefault(100LL * 1024 * 1024);
state int concurrentExtentReads =
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
state bool openExisting = params.getInt("openExisting").orDefault(0);
@ -10088,7 +10120,7 @@ TEST_CASE(":/redwood/performance/set") {
printf("maxCommitSize: %d\n", maxKVBytesPerCommit);
printf("kvBytesTarget: %" PRId64 "\n", kvBytesTarget);
printf("KeyLexicon '%c' to '%c'\n", firstKeyChar, lastKeyChar);
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
printf("concurrentScans: %d\n", concurrentScans);
printf("concurrentSeeks: %d\n", concurrentSeeks);
printf("seeks: %d\n", seeks);
@ -10108,7 +10140,7 @@ TEST_CASE(":/redwood/performance/set") {
}
DWALPager* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads, pagerMemoryOnly);
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree = new VersionedBTree(pager, file);
wait(btree->init());
printf("Initialized. StorageBytes=%s\n", btree->getStorageBytes().toString().c_str());