From 55fc0c1a0bfa0b728c70edd8256d4cba81cf0d3c Mon Sep 17 00:00:00 2001 From: neethuhaneesha Date: Fri, 28 Oct 2022 12:24:53 -0700 Subject: [PATCH] Enable clear range eager reads knob for rocksdb. --- fdbclient/ServerKnobs.cpp | 3 ++- fdbclient/include/fdbclient/ServerKnobs.h | 1 + fdbserver/storageserver.actor.cpp | 25 +++++++++++++++-------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 4f034714c7..db51b74611 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -422,10 +422,11 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // Enable this knob only for experminatal purpose, never enable this in production. // If enabled, all the committed in-memory memtable writes are lost on a crash. init( ROCKSDB_DISABLE_WAL_EXPERIMENTAL, false ); - // If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ENABLE_CLEAR_RANGE_EAGER_READS knob. + // If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS knob. // These knobs have contrary functionality. init( ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE, false ); if( randomize && BUGGIFY ) ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE = deterministicRandom()->coinflip() ? false : true; init( ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT, 200000 ); // 200KB + init( ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip() ? false : true; // Can commit will delay ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD seconds for // ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD times, if rocksdb overloaded. // Set ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD to 0, to disable diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index 8cb24c0e75..cf9f050043 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -349,6 +349,7 @@ public: bool ROCKSDB_DISABLE_WAL_EXPERIMENTAL; bool ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE; int64_t ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT; + bool ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS; int64_t ROCKSDB_COMPACTION_READAHEAD_SIZE; int64_t ROCKSDB_BLOCK_SIZE; bool ENABLE_SHARDED_ROCKSDB; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 409f2a7b23..b4498b8333 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -460,6 +460,9 @@ struct UpdateEagerReadInfo { std::vector> value; Arena arena; + bool enableClearRangeEagerReads; + + UpdateEagerReadInfo(bool enableClearRangeEagerReads) : enableClearRangeEagerReads(enableClearRangeEagerReads) {} void addMutations(VectorRef const& mutations) { for (auto& m : mutations) @@ -468,11 +471,10 @@ struct UpdateEagerReadInfo { void addMutation(MutationRef const& m) { // SOMEDAY: Theoretically we can avoid a read if there is an earlier overlapping ClearRange - if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end) && - SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) + if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end) && enableClearRangeEagerReads) keyBegin.push_back(m.param2); else if (m.type == MutationRef::CompareAndClear) { - if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) + if (enableClearRangeEagerReads) keyBegin.push_back(keyAfter(m.param1, arena)); if (keys.size() > 0 && keys.back().first == m.param1) { // Don't issue a second read, if the last read was equal to the current key. @@ -489,7 +491,7 @@ struct UpdateEagerReadInfo { } void finishKeyBegin() { - if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) { + if (enableClearRangeEagerReads) { std::sort(keyBegin.begin(), keyBegin.end()); keyBegin.resize(std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin()); } @@ -5387,7 +5389,7 @@ ACTOR Future doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager) eager->finishKeyBegin(); state ReadOptions options; options.type = ReadType::EAGER; - if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) { + if (eager->enableClearRangeEagerReads) { std::vector> keyEnd(eager->keyBegin.size()); for (int i = 0; i < keyEnd.size(); i++) keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i], options); @@ -5591,7 +5593,7 @@ void expandClear(MutationRef& m, i = d.lastLessOrEqual(m.param2); if (i && i->isClearTo() && i->getEndKey() >= m.param2) { m.param2 = i->getEndKey(); - } else if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) { + } else if (eager->enableClearRangeEagerReads) { // Expand to the next set or clear (from storage or latestVersion), and if it // is a clear, engulf it as well i = d.lower_bound(m.param2); @@ -8415,6 +8417,12 @@ ACTOR Future tssDelayForever() { ACTOR Future update(StorageServer* data, bool* pReceivedUpdate) { state double updateStart = g_network->timer(); state double start; + state bool enableClearRangeEagerReads = + (data->storage.getKeyValueStoreType() == KeyValueStoreType::SSD_ROCKSDB_V1 || + data->storage.getKeyValueStoreType() == KeyValueStoreType::SSD_SHARDED_ROCKSDB) + ? SERVER_KNOBS->ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS + : SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS; + state UpdateEagerReadInfo eager(enableClearRangeEagerReads); try { // If we are disk bound and durableVersion is very old, we need to block updates or we could run out of @@ -8517,7 +8525,6 @@ ACTOR Future update(StorageServer* data, bool* pReceivedUpdate) { data->ssVersionLockLatencyHistogram->sampleSeconds(now() - start); start = now(); - state UpdateEagerReadInfo eager; state FetchInjectionInfo fii; state Reference cloneCursor2 = cursor->cloneNoMore(); state Optional>> cipherKeys; @@ -8591,7 +8598,7 @@ ACTOR Future update(StorageServer* data, bool* pReceivedUpdate) { wait(getEncryptCipherKeys(data->db, cipherDetails, BlobCipherMetrics::TLOG)); cipherKeys = getCipherKeysResult; collectingCipherKeys = false; - eager = UpdateEagerReadInfo(); + eager = UpdateEagerReadInfo(enableClearRangeEagerReads); } else { // Any fetchKeys which are ready to transition their shards to the adding,transferred state do so now. // If there is an epoch end we skip this step, to increase testability and to prevent inserting a @@ -8615,7 +8622,7 @@ ACTOR Future update(StorageServer* data, bool* pReceivedUpdate) { "A fetchKeys completed while we were doing this, so eager might be outdated. Read it again."); // SOMEDAY: Theoretically we could check the change counters of individual shards and retry the reads // only selectively - eager = UpdateEagerReadInfo(); + eager = UpdateEagerReadInfo(enableClearRangeEagerReads); cloneCursor2 = cursor->cloneNoMore(); } }