Merge pull request #6461 from sfc-gh-yiwu/redwood_remap
Redwood: config remap cleanup by size instead of versions
This commit is contained in:
commit
032c80dec5
|
@ -807,8 +807,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 );
|
||||
init( REDWOOD_LAZY_CLEAR_MIN_PAGES, 0 );
|
||||
init( REDWOOD_LAZY_CLEAR_MAX_PAGES, 1e6 );
|
||||
init( REDWOOD_REMAP_CLEANUP_WINDOW, 50 );
|
||||
init( REDWOOD_REMAP_CLEANUP_LAG, 0.1 );
|
||||
init( REDWOOD_REMAP_CLEANUP_WINDOW_BYTES, 4LL * 1024 * 1024 * 1024 );
|
||||
init( REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO, 0.05 );
|
||||
init( REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES, 20000 ); if( randomize && BUGGIFY ) { REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES = deterministicRandom()->randomInt(200, 1000); }
|
||||
init( REDWOOD_METRICS_INTERVAL, 5.0 );
|
||||
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
|
||||
|
|
|
@ -754,9 +754,10 @@ public:
|
|||
// queue is empty
|
||||
int REDWOOD_LAZY_CLEAR_MAX_PAGES; // Maximum number of pages to free before ending a lazy clear cycle, unless the
|
||||
// queue is empty
|
||||
int64_t REDWOOD_REMAP_CLEANUP_WINDOW; // Remap remover lag interval in which to coalesce page writes
|
||||
double REDWOOD_REMAP_CLEANUP_LAG; // Maximum allowed remap remover lag behind the cleanup window as a multiple of
|
||||
// the window size
|
||||
int64_t REDWOOD_REMAP_CLEANUP_WINDOW_BYTES; // Total size of remapped pages to keep before being removed by
|
||||
// remap cleanup
|
||||
double REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO; // Maximum ratio of the remap cleanup window that remap cleanup is
|
||||
// allowed to be ahead or behind
|
||||
int REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES; // Number of pages to grow page file by
|
||||
double REDWOOD_METRICS_INTERVAL;
|
||||
double REDWOOD_HISTOGRAM_INTERVAL;
|
||||
|
|
|
@ -2144,14 +2144,14 @@ public:
|
|||
int desiredExtentSize,
|
||||
std::string filename,
|
||||
int64_t pageCacheSizeBytes,
|
||||
Version remapCleanupWindow,
|
||||
int64_t remapCleanupWindowBytes,
|
||||
int concurrentExtentReads,
|
||||
bool memoryOnly = false,
|
||||
Promise<Void> errorPromise = {})
|
||||
: ioLock(FLOW_KNOBS->MAX_OUTSTANDING, ioMaxPriority, FLOW_KNOBS->MAX_OUTSTANDING / 2),
|
||||
pageCacheBytes(pageCacheSizeBytes), pHeader(nullptr), desiredPageSize(desiredPageSize),
|
||||
desiredExtentSize(desiredExtentSize), filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
|
||||
remapCleanupWindow(remapCleanupWindow), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
|
||||
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
|
||||
|
||||
// This sets the page cache size for all PageCacheT instances using the same evictor
|
||||
pageCache.evictor().sizeLimit = pageCacheBytes;
|
||||
|
@ -3437,18 +3437,26 @@ public:
|
|||
state Version oldestRetainedVersion = self->effectiveOldestVersion();
|
||||
|
||||
// Cutoff is the version we can pop to
|
||||
state RemappedPage cutoff(oldestRetainedVersion - self->remapCleanupWindow);
|
||||
// Minimum version we must pop to before obeying stop command.
|
||||
state Version minStopVersion =
|
||||
cutoff.version - (BUGGIFY ? deterministicRandom()->randomInt(0, 10)
|
||||
: (self->remapCleanupWindow * SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_LAG));
|
||||
state RemappedPage cutoff(oldestRetainedVersion);
|
||||
|
||||
debug_printf("DWALPager(%s) remapCleanup cutoff.version %" PRId64 " oldestRetainedVersion=%" PRId64
|
||||
" minStopVersion %" PRId64 " items=%" PRId64 "\n",
|
||||
// Maximum number of remaining remap entries to keep before obeying stop command.
|
||||
double toleranceRatio = BUGGIFY ? deterministicRandom()->randomInt(0, 10) / 100.0
|
||||
: SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_TOLERANCE_RATIO;
|
||||
// For simplicity, we assume each entry in the remap queue corresponds to one remapped page.
|
||||
uint64_t remapCleanupWindowEntries =
|
||||
static_cast<uint64_t>(self->remapCleanupWindowBytes / self->pHeader->pageSize);
|
||||
state uint64_t minRemapEntries = static_cast<uint64_t>(remapCleanupWindowEntries * (1.0 - toleranceRatio));
|
||||
state uint64_t maxRemapEntries = static_cast<uint64_t>(remapCleanupWindowEntries * (1.0 + toleranceRatio));
|
||||
|
||||
debug_printf("DWALPager(%s) remapCleanup oldestRetainedVersion=%" PRId64 " remapCleanupWindowBytes=%" PRId64
|
||||
" pageSize=%" PRIu32 " minRemapEntries=%" PRId64 " maxRemapEntries=%" PRId64 " items=%" PRId64
|
||||
"\n",
|
||||
self->filename.c_str(),
|
||||
cutoff.version,
|
||||
oldestRetainedVersion,
|
||||
minStopVersion,
|
||||
self->remapCleanupWindowBytes,
|
||||
self->pHeader->pageSize,
|
||||
minRemapEntries,
|
||||
maxRemapEntries,
|
||||
self->remapQueue.numEntries);
|
||||
|
||||
if (g_network->isSimulated()) {
|
||||
|
@ -3457,6 +3465,19 @@ public:
|
|||
|
||||
state int sinceYield = 0;
|
||||
loop {
|
||||
// Stop if we have cleanup enough remap entries, or if the stop flag is set and the remaining remap
|
||||
// entries are less than that allowed by the lag.
|
||||
int64_t remainingEntries = self->remapQueue.numEntries;
|
||||
if (remainingEntries <= minRemapEntries ||
|
||||
(self->remapCleanupStop && remainingEntries <= maxRemapEntries)) {
|
||||
debug_printf("DWALPager(%s) remapCleanup finished remainingEntries=%" PRId64 " minRemapEntries=%" PRId64
|
||||
" maxRemapEntries=%" PRId64,
|
||||
self->filename.c_str(),
|
||||
remainingEntries,
|
||||
minRemapEntries,
|
||||
maxRemapEntries);
|
||||
break;
|
||||
}
|
||||
state Optional<RemappedPage> p = wait(self->remapQueue.pop(cutoff));
|
||||
debug_printf("DWALPager(%s) remapCleanup popped %s items=%" PRId64 "\n",
|
||||
self->filename.c_str(),
|
||||
|
@ -3465,10 +3486,8 @@ public:
|
|||
|
||||
// Stop if we have reached the cutoff version, which is the start of the cleanup coalescing window
|
||||
if (!p.present()) {
|
||||
debug_printf("DWALPager(%s) remapCleanup pop failed minVer=%" PRId64 " cutoffVer=%" PRId64
|
||||
" items=%" PRId64 "\n",
|
||||
debug_printf("DWALPager(%s) remapCleanup pop failed cutoffVer=%" PRId64 " items=%" PRId64 "\n",
|
||||
self->filename.c_str(),
|
||||
minStopVersion,
|
||||
cutoff.version,
|
||||
self->remapQueue.numEntries);
|
||||
break;
|
||||
|
@ -3479,12 +3498,6 @@ public:
|
|||
tasks.add(task);
|
||||
}
|
||||
|
||||
// If the stop flag is set and we've reached the minimum stop version according the the allowed lag then
|
||||
// stop.
|
||||
if (self->remapCleanupStop && p.get().version >= minStopVersion) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Yield to prevent slow task in case no IO waits are encountered
|
||||
if (++sinceYield >= 100) {
|
||||
sinceYield = 0;
|
||||
|
@ -3492,9 +3505,11 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
debug_printf("DWALPager(%s) remapCleanup stopped stopSignal=%d free=%lld delayedFree=%lld\n",
|
||||
debug_printf("DWALPager(%s) remapCleanup stopped stopSignal=%d remap=%" PRId64 " free=%" PRId64
|
||||
" delayedFree=%" PRId64 "\n",
|
||||
self->filename.c_str(),
|
||||
self->remapCleanupStop,
|
||||
self->remapQueue.numEntries,
|
||||
self->freeList.numEntries,
|
||||
self->delayedFreeList.numEntries);
|
||||
signal.send(Void());
|
||||
|
@ -3788,15 +3803,22 @@ private:
|
|||
// Wait for outstanding commit.
|
||||
wait(self->commitFuture);
|
||||
|
||||
// While the remap queue isn't empty, advance the commit version and oldest readable version
|
||||
// by the remap cleanup window and commit
|
||||
while (self->remapQueue.numEntries > 0) {
|
||||
self->setOldestReadableVersion(self->getLastCommittedVersion());
|
||||
wait(self->commit(self->getLastCommittedVersion() + self->remapCleanupWindow + 1));
|
||||
}
|
||||
// Set remap cleanup window to 0 to allow the remap queue to drain.
|
||||
state int64_t remapCleanupWindowBytes = self->remapCleanupWindowBytes;
|
||||
self->remapCleanupWindowBytes = 0;
|
||||
|
||||
// One final commit because the active commit cycle may have popped from the remap queue
|
||||
wait(self->commit(self->getLastCommittedVersion() + 1));
|
||||
// Try twice to commit and advance version. The first commit should trigger a remap cleanup actor, which picks
|
||||
// up the new remap cleanup window being 0. The second commit waits for the remap cleanup actor to finish.
|
||||
state int attempt = 0;
|
||||
for (attempt = 0; attempt < 2; attempt++) {
|
||||
self->setOldestReadableVersion(self->getLastCommittedVersion());
|
||||
wait(self->commit(self->getLastCommittedVersion() + 1));
|
||||
}
|
||||
ASSERT(self->remapQueue.numEntries == 0);
|
||||
|
||||
// Restore remap cleanup window.
|
||||
if (remapCleanupWindowBytes != 0)
|
||||
self->remapCleanupWindowBytes = remapCleanupWindowBytes;
|
||||
|
||||
TraceEvent e("RedwoodClearRemapQueue");
|
||||
self->toTraceEvent(e);
|
||||
|
@ -3869,7 +3891,7 @@ private:
|
|||
RemapQueueT remapQueue;
|
||||
LogicalPageQueueT extentFreeList;
|
||||
ExtentUsedListQueueT extentUsedList;
|
||||
Version remapCleanupWindow;
|
||||
uint64_t remapCleanupWindowBytes;
|
||||
Reference<FlowLock> concurrentExtentReads;
|
||||
std::unordered_set<PhysicalPageID> remapDestinationsSimOnly;
|
||||
|
||||
|
@ -7409,14 +7431,20 @@ public:
|
|||
? (BUGGIFY ? deterministicRandom()->randomInt(pageSize, FLOW_KNOBS->BUGGIFY_SIM_PAGE_CACHE_4K)
|
||||
: FLOW_KNOBS->SIM_PAGE_CACHE_4K)
|
||||
: FLOW_KNOBS->PAGE_CACHE_4K;
|
||||
Version remapCleanupWindow =
|
||||
(BUGGIFY ? deterministicRandom()->randomInt64(0, 100) : SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW);
|
||||
// Rough size of pages to keep in remap cleanup queue before being cleanup.
|
||||
int64_t remapCleanupWindowBytes =
|
||||
g_network->isSimulated()
|
||||
? (BUGGIFY ? (deterministicRandom()->coinflip()
|
||||
? deterministicRandom()->randomInt64(0, 100 * 1024) // small window
|
||||
: deterministicRandom()->randomInt64(0, 100 * 1024 * 1024)) // large window
|
||||
: 100 * 1024 * 1024) // 100M
|
||||
: SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW_BYTES;
|
||||
|
||||
IPager2* pager = new DWALPager(pageSize,
|
||||
extentSize,
|
||||
filePrefix,
|
||||
pageCacheBytes,
|
||||
remapCleanupWindow,
|
||||
remapCleanupWindowBytes,
|
||||
SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS,
|
||||
false,
|
||||
m_error);
|
||||
|
@ -9459,9 +9487,9 @@ TEST_CASE("Lredwood/correctness/btree") {
|
|||
: (pageSize * deterministicRandom()->randomInt(1, (BUGGIFY ? 10 : 10000) + 1)));
|
||||
state Version versionIncrement =
|
||||
params.getInt("versionIncrement").orDefault(deterministicRandom()->randomInt64(1, 1e8));
|
||||
state Version remapCleanupWindow =
|
||||
params.getInt("remapCleanupWindow")
|
||||
.orDefault(BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, versionIncrement * 50));
|
||||
state int64_t remapCleanupWindowBytes =
|
||||
params.getInt("remapCleanupWindowBytes")
|
||||
.orDefault(BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, 100) * 1024 * 1024);
|
||||
state int concurrentExtentReads =
|
||||
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
|
||||
|
||||
|
@ -9494,7 +9522,7 @@ TEST_CASE("Lredwood/correctness/btree") {
|
|||
printf("advanceOldVersionProbability: %f\n", advanceOldVersionProbability);
|
||||
printf("pageCacheBytes: %s\n", pageCacheBytes == 0 ? "default" : format("%" PRId64, pageCacheBytes).c_str());
|
||||
printf("versionIncrement: %" PRId64 "\n", versionIncrement);
|
||||
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
|
||||
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
|
||||
printf("\n");
|
||||
|
||||
printf("Deleting existing test data...\n");
|
||||
|
@ -9502,7 +9530,7 @@ TEST_CASE("Lredwood/correctness/btree") {
|
|||
|
||||
printf("Initializing...\n");
|
||||
pager = new DWALPager(
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads, pagerMemoryOnly);
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
|
||||
state VersionedBTree* btree = new VersionedBTree(pager, file);
|
||||
wait(btree->init());
|
||||
|
||||
|
@ -9721,7 +9749,7 @@ TEST_CASE("Lredwood/correctness/btree") {
|
|||
|
||||
printf("Reopening btree from disk.\n");
|
||||
IPager2* pager = new DWALPager(
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads);
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads);
|
||||
btree = new VersionedBTree(pager, file);
|
||||
wait(btree->init());
|
||||
|
||||
|
@ -9761,8 +9789,11 @@ TEST_CASE("Lredwood/correctness/btree") {
|
|||
state Future<Void> closedFuture = btree->onClosed();
|
||||
btree->close();
|
||||
wait(closedFuture);
|
||||
btree =
|
||||
new VersionedBTree(new DWALPager(pageSize, extentSize, file, pageCacheBytes, 0, concurrentExtentReads), file);
|
||||
// If buggify, test starting with empty remap cleanup window.
|
||||
btree = new VersionedBTree(
|
||||
new DWALPager(
|
||||
pageSize, extentSize, file, pageCacheBytes, (BUGGIFY ? 0 : remapCleanupWindowBytes), concurrentExtentReads),
|
||||
file);
|
||||
wait(btree->init());
|
||||
|
||||
wait(btree->clearAllAndCheckSanity());
|
||||
|
@ -9899,8 +9930,8 @@ TEST_CASE(":/redwood/performance/extentQueue") {
|
|||
state int pageSize = params.getInt("pageSize").orDefault(SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE);
|
||||
state int extentSize = params.getInt("extentSize").orDefault(SERVER_KNOBS->REDWOOD_DEFAULT_EXTENT_SIZE);
|
||||
state int64_t cacheSizeBytes = params.getInt("cacheSizeBytes").orDefault(FLOW_KNOBS->PAGE_CACHE_4K);
|
||||
// Choose a large remapCleanupWindow to avoid popping the queue
|
||||
state Version remapCleanupWindow = params.getInt("remapCleanupWindow").orDefault(1e16);
|
||||
// Choose a large remapCleanupWindowBytes to avoid popping the queue
|
||||
state int64_t remapCleanupWindowBytes = params.getInt("remapCleanupWindowBytes").orDefault(1e16);
|
||||
state int numEntries = params.getInt("numEntries").orDefault(10e6);
|
||||
state int concurrentExtentReads =
|
||||
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
|
||||
|
@ -9911,12 +9942,12 @@ TEST_CASE(":/redwood/performance/extentQueue") {
|
|||
printf("pageSize: %d\n", pageSize);
|
||||
printf("extentSize: %d\n", extentSize);
|
||||
printf("cacheSizeBytes: %" PRId64 "\n", cacheSizeBytes);
|
||||
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
|
||||
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
|
||||
|
||||
// Do random pushes into the queue and commit periodically
|
||||
if (reload) {
|
||||
pager =
|
||||
new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindow, concurrentExtentReads);
|
||||
pager = new DWALPager(
|
||||
pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads);
|
||||
|
||||
wait(success(pager->init()));
|
||||
|
||||
|
@ -9967,7 +9998,8 @@ TEST_CASE(":/redwood/performance/extentQueue") {
|
|||
}
|
||||
|
||||
printf("Reopening pager file from disk.\n");
|
||||
pager = new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindow, concurrentExtentReads);
|
||||
pager =
|
||||
new DWALPager(pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads);
|
||||
wait(success(pager->init()));
|
||||
|
||||
printf("Starting ExtentQueue FastPath Recovery from Disk.\n");
|
||||
|
@ -10054,7 +10086,7 @@ TEST_CASE(":/redwood/performance/set") {
|
|||
state int maxConsecutiveRun = params.getInt("maxConsecutiveRun").orDefault(100);
|
||||
state char firstKeyChar = params.get("firstKeyChar").orDefault("a")[0];
|
||||
state char lastKeyChar = params.get("lastKeyChar").orDefault("m")[0];
|
||||
state Version remapCleanupWindow = params.getInt("remapCleanupWindow").orDefault(100);
|
||||
state int64_t remapCleanupWindowBytes = params.getInt("remapCleanupWindowBytes").orDefault(100LL * 1024 * 1024);
|
||||
state int concurrentExtentReads =
|
||||
params.getInt("concurrentExtentReads").orDefault(SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS);
|
||||
state bool openExisting = params.getInt("openExisting").orDefault(0);
|
||||
|
@ -10088,7 +10120,7 @@ TEST_CASE(":/redwood/performance/set") {
|
|||
printf("maxCommitSize: %d\n", maxKVBytesPerCommit);
|
||||
printf("kvBytesTarget: %" PRId64 "\n", kvBytesTarget);
|
||||
printf("KeyLexicon '%c' to '%c'\n", firstKeyChar, lastKeyChar);
|
||||
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
|
||||
printf("remapCleanupWindowBytes: %" PRId64 "\n", remapCleanupWindowBytes);
|
||||
printf("concurrentScans: %d\n", concurrentScans);
|
||||
printf("concurrentSeeks: %d\n", concurrentSeeks);
|
||||
printf("seeks: %d\n", seeks);
|
||||
|
@ -10108,7 +10140,7 @@ TEST_CASE(":/redwood/performance/set") {
|
|||
}
|
||||
|
||||
DWALPager* pager = new DWALPager(
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindow, concurrentExtentReads, pagerMemoryOnly);
|
||||
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
|
||||
state VersionedBTree* btree = new VersionedBTree(pager, file);
|
||||
wait(btree->init());
|
||||
printf("Initialized. StorageBytes=%s\n", btree->getStorageBytes().toString().c_str());
|
||||
|
|
Loading…
Reference in New Issue