Merge pull request #8614 from neethuhaneesha/clearRanges

Enable clear range eager reads knob for rocksdb.
This commit is contained in:
Jingyu Zhou 2022-10-28 17:03:39 -07:00 committed by GitHub
commit 22293ebac5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 10 deletions

View File

@ -422,10 +422,11 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
// Enable this knob only for experminatal purpose, never enable this in production.
// If enabled, all the committed in-memory memtable writes are lost on a crash.
init( ROCKSDB_DISABLE_WAL_EXPERIMENTAL, false );
// If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ENABLE_CLEAR_RANGE_EAGER_READS knob.
// If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS knob.
// These knobs have contrary functionality.
init( ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE, false ); if( randomize && BUGGIFY ) ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE = deterministicRandom()->coinflip() ? false : true;
init( ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT, 200000 ); // 200KB
init( ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip() ? false : true;
// Can commit will delay ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD seconds for
// ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD times, if rocksdb overloaded.
// Set ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD to 0, to disable

View File

@ -349,6 +349,7 @@ public:
bool ROCKSDB_DISABLE_WAL_EXPERIMENTAL;
bool ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE;
int64_t ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT;
bool ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS;
int64_t ROCKSDB_COMPACTION_READAHEAD_SIZE;
int64_t ROCKSDB_BLOCK_SIZE;
bool ENABLE_SHARDED_ROCKSDB;

View File

@ -461,6 +461,9 @@ struct UpdateEagerReadInfo {
std::vector<Optional<Value>> value;
Arena arena;
bool enableClearRangeEagerReads;
UpdateEagerReadInfo(bool enableClearRangeEagerReads) : enableClearRangeEagerReads(enableClearRangeEagerReads) {}
void addMutations(VectorRef<MutationRef> const& mutations) {
for (auto& m : mutations)
@ -469,11 +472,10 @@ struct UpdateEagerReadInfo {
void addMutation(MutationRef const& m) {
// SOMEDAY: Theoretically we can avoid a read if there is an earlier overlapping ClearRange
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end) &&
SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS)
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end) && enableClearRangeEagerReads)
keyBegin.push_back(m.param2);
else if (m.type == MutationRef::CompareAndClear) {
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS)
if (enableClearRangeEagerReads)
keyBegin.push_back(keyAfter(m.param1, arena));
if (keys.size() > 0 && keys.back().first == m.param1) {
// Don't issue a second read, if the last read was equal to the current key.
@ -490,7 +492,7 @@ struct UpdateEagerReadInfo {
}
void finishKeyBegin() {
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
if (enableClearRangeEagerReads) {
std::sort(keyBegin.begin(), keyBegin.end());
keyBegin.resize(std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin());
}
@ -5374,7 +5376,7 @@ ACTOR Future<Void> doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager)
eager->finishKeyBegin();
state ReadOptions options;
options.type = ReadType::EAGER;
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
if (eager->enableClearRangeEagerReads) {
std::vector<Future<Key>> keyEnd(eager->keyBegin.size());
for (int i = 0; i < keyEnd.size(); i++)
keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i], options);
@ -5578,7 +5580,7 @@ void expandClear(MutationRef& m,
i = d.lastLessOrEqual(m.param2);
if (i && i->isClearTo() && i->getEndKey() >= m.param2) {
m.param2 = i->getEndKey();
} else if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
} else if (eager->enableClearRangeEagerReads) {
// Expand to the next set or clear (from storage or latestVersion), and if it
// is a clear, engulf it as well
i = d.lower_bound(m.param2);
@ -8415,6 +8417,12 @@ ACTOR Future<Void> tssDelayForever() {
ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
state double updateStart = g_network->timer();
state double start;
state bool enableClearRangeEagerReads =
(data->storage.getKeyValueStoreType() == KeyValueStoreType::SSD_ROCKSDB_V1 ||
data->storage.getKeyValueStoreType() == KeyValueStoreType::SSD_SHARDED_ROCKSDB)
? SERVER_KNOBS->ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS
: SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS;
state UpdateEagerReadInfo eager(enableClearRangeEagerReads);
try {
// If we are disk bound and durableVersion is very old, we need to block updates or we could run out of
@ -8517,7 +8525,6 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
data->ssVersionLockLatencyHistogram->sampleSeconds(now() - start);
start = now();
state UpdateEagerReadInfo eager;
state FetchInjectionInfo fii;
state Reference<ILogSystem::IPeekCursor> cloneCursor2 = cursor->cloneNoMore();
state Optional<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> cipherKeys;
@ -8591,7 +8598,7 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
wait(getEncryptCipherKeys(data->db, cipherDetails, BlobCipherMetrics::TLOG));
cipherKeys = getCipherKeysResult;
collectingCipherKeys = false;
eager = UpdateEagerReadInfo();
eager = UpdateEagerReadInfo(enableClearRangeEagerReads);
} else {
// Any fetchKeys which are ready to transition their shards to the adding,transferred state do so now.
// If there is an epoch end we skip this step, to increase testability and to prevent inserting a
@ -8615,7 +8622,7 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
"A fetchKeys completed while we were doing this, so eager might be outdated. Read it again.");
// SOMEDAY: Theoretically we could check the change counters of individual shards and retry the reads
// only selectively
eager = UpdateEagerReadInfo();
eager = UpdateEagerReadInfo(enableClearRangeEagerReads);
cloneCursor2 = cursor->cloneNoMore();
}
}