From bed25f957147b8f19d86f70c5c7b9d11996a1c8f Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Sun, 28 Nov 2021 21:03:44 -0800 Subject: [PATCH 1/2] Delay prioritized eviction of updated pages until after commit completes. --- fdbclient/ServerKnobs.cpp | 1 + fdbclient/ServerKnobs.h | 1 + fdbserver/VersionedBTree.actor.cpp | 54 ++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 3ed0b776c3..b11bd8e88c 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -774,6 +774,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES, 20000 ); if( randomize && BUGGIFY ) { REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES = deterministicRandom()->randomInt(200, 1000); } init( REDWOOD_METRICS_INTERVAL, 5.0 ); init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 ); + init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; } // Server request latency measurement init( LATENCY_SAMPLE_SIZE, 100000 ); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index add4bf5303..050804a9a6 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -727,6 +727,7 @@ public: int REDWOOD_PAGEFILE_GROWTH_SIZE_PAGES; // Number of pages to grow page file by double REDWOOD_METRICS_INTERVAL; double REDWOOD_HISTOGRAM_INTERVAL; + bool REDWOOD_EVICT_UPDATED_PAGES; // Whether to prioritize eviction of updated pages from cache. // Server request latency measurement int LATENCY_SAMPLE_SIZE; diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index f72e569e46..0b38093651 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -1880,6 +1880,7 @@ class ObjectCache : NonCopyable { ObjectType item; int hits; int size; + bool evictionPrioritized; }; typedef std::unordered_map CacheT; @@ -1905,13 +1906,13 @@ public: return nullptr; } - // If index is in cache, move it to the front of the eviction order + // If index is in cache and not on the prioritized eviction order list, move it there. void prioritizeEviction(const IndexType& index) { auto i = cache.find(index); - if (i != cache.end()) { - auto ei = evictionOrder.iterator_to(i->second); - evictionOrder.erase(ei); - evictionOrder.push_front(i->second); + if (i != cache.end() && !i->second.evictionPrioritized) { + prioritizedEvictions.splice( + prioritizedEvictions.end(), evictionOrder, EvictionOrderT::s_iterator_to(i->second)); + i->second.evictionPrioritized = true; } } @@ -1927,7 +1928,7 @@ public: ++g_redwoodMetrics.metric.pagerEvictUnhit; } currentSize -= toEvict.size; - evictionOrder.erase(evictionOrder.iterator_to(toEvict)); + evictionOrder.erase(EvictionOrderT::s_iterator_to(toEvict)); cache.erase(i); return true; } @@ -1943,9 +1944,10 @@ public: if (entry.is_linked()) { if (!noHit) { ++entry.hits; - // Move the entry to the back of the eviction order - evictionOrder.erase(evictionOrder.iterator_to(entry)); - evictionOrder.push_back(entry); + // Move item to the back of the eviction order, regardless of which list it is currently in + evictionOrder.splice(evictionOrder.end(), + entry.evictionPrioritized ? prioritizedEvictions : evictionOrder, + EvictionOrderT::s_iterator_to(entry)); } } else { // Otherwise it was a cache miss @@ -1956,6 +1958,7 @@ public: currentSize += size; // Insert the newly created Entry at the back of the eviction order evictionOrder.push_back(entry); + entry.evictionPrioritized = false; // While the cache is too big, evict the oldest entry until the oldest entry can't be evicted. while (currentSize > sizeLimit) { @@ -2003,6 +2006,10 @@ public: state EvictionOrderT evictionOrder; state int64_t currentSize; + // Flush all prioritized evictions to the main eviction order + self->flushPrioritizedEvictions(); + ASSERT(cache.size() == evictionOrder.size()); + // Swap cache contents to local state vars // After this, no more entries will be added to or read from these // structures so we know for sure that no page will become unevictable @@ -2029,17 +2036,21 @@ public: } Future clear() { - ASSERT(evictionOrder.size() == cache.size()); + ASSERT(evictionOrder.size() + prioritizedEvictions.size() == cache.size()); return clear_impl(this); } int count() const { return currentSize; } + // Move the prioritized evictions queued to the front of the eviction order + void flushPrioritizedEvictions() { evictionOrder.splice(evictionOrder.begin(), prioritizedEvictions); } + private: int64_t sizeLimit; int64_t currentSize; CacheT cache; EvictionOrderT evictionOrder; + EvictionOrderT prioritizedEvictions; }; ACTOR template @@ -2742,10 +2753,12 @@ public: remapQueue.pushBack(r); auto& versionedMap = remappedPages[pageID]; - // An update page is unlikely to have its old version read again soon, so prioritize its cache eviction - // If the versioned map is empty for this page then the prior version of the page is at stored at the - // PhysicalPageID pageID, otherwise it is the last mapped value in the version-ordered map. - pageCache.prioritizeEviction(versionedMap.empty() ? pageID : versionedMap.rbegin()->second); + if (SERVER_KNOBS->REDWOOD_EVICT_UPDATED_PAGES) { + // An update page is unlikely to have its old version read again soon, so prioritize its cache eviction + // If the versioned map is empty for this page then the prior version of the page is at stored at the + // PhysicalPageID pageID, otherwise it is the last mapped value in the version-ordered map. + pageCache.prioritizeEviction(versionedMap.empty() ? pageID : versionedMap.rbegin()->second); + } versionedMap[v] = newPageID; debug_printf("DWALPager(%s) pushed %s\n", filename.c_str(), RemappedPage(r).toString().c_str()); @@ -2776,7 +2789,9 @@ public: } // A freed page is unlikely to be read again soon so prioritize its cache eviction - pageCache.prioritizeEviction(pageID); + if (SERVER_KNOBS->REDWOOD_EVICT_UPDATED_PAGES) { + pageCache.prioritizeEviction(pageID); + } } LogicalPageID detachRemappedPage(LogicalPageID pageID, Version v) override { @@ -2830,8 +2845,10 @@ public: remapQueue.pushBack(RemappedPage{ v, pageID, invalidLogicalPageID }); // A freed page is unlikely to be read again soon so prioritize its cache eviction - PhysicalPageID previousPhysicalPage = i->second.rbegin()->second; - pageCache.prioritizeEviction(previousPhysicalPage); + if (SERVER_KNOBS->REDWOOD_EVICT_UPDATED_PAGES) { + PhysicalPageID previousPhysicalPage = i->second.rbegin()->second; + pageCache.prioritizeEviction(previousPhysicalPage); + } i->second[v] = invalidLogicalPageID; return; @@ -3599,6 +3616,9 @@ public: // Start unmapping pages for expired versions self->remapCleanupFuture = remapCleanup(self); + // If there are prioritized evictions queued, flush them to the regular eviction order. + self->pageCache.flushPrioritizedEvictions(); + return Void(); } From 8216ad1e4ea596ca51203d403e918fd55a7620bd Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Sun, 28 Nov 2021 18:49:16 -0800 Subject: [PATCH 2/2] When a cached page is hit, if it is in the prioritized eviction order don't move it to the normal order. --- fdbserver/VersionedBTree.actor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 0b38093651..4b9ee39090 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -1944,10 +1944,10 @@ public: if (entry.is_linked()) { if (!noHit) { ++entry.hits; - // Move item to the back of the eviction order, regardless of which list it is currently in - evictionOrder.splice(evictionOrder.end(), - entry.evictionPrioritized ? prioritizedEvictions : evictionOrder, - EvictionOrderT::s_iterator_to(entry)); + // If item eviction is not prioritized, move to back of eviction order + if (!entry.evictionPrioritized) { + evictionOrder.splice(evictionOrder.end(), evictionOrder, EvictionOrderT::s_iterator_to(entry)); + } } } else { // Otherwise it was a cache miss