Adding boundaries to rocksdb read iterator pool. (#8584)

This commit is contained in:
neethuhaneesha 2022-11-03 10:26:47 -07:00 committed by GitHub
parent 15625da4ce
commit 1daa346cb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 13 deletions

View File

@ -407,6 +407,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( ROCKSDB_HISTOGRAMS_SAMPLE_RATE, 0.001 ); if( randomize && BUGGIFY ) ROCKSDB_HISTOGRAMS_SAMPLE_RATE = 0;
init( ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME, 30.0 ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME = 0.1;
init( ROCKSDB_READ_RANGE_REUSE_ITERATORS, true ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_ITERATORS = deterministicRandom()->coinflip() ? true : false;
init( ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS, false ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS = deterministicRandom()->coinflip() ? true : false;
init( ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT, 200 );
// Set to 0 to disable rocksdb write rate limiting. Rate limiter unit: bytes per second.
init( ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, 0 );
// If true, enables dynamic adjustment of ROCKSDB_WRITE_RATE_LIMITER_BYTES according to the recent demand of background IO.

View File

@ -334,6 +334,8 @@ public:
double ROCKSDB_HISTOGRAMS_SAMPLE_RATE;
double ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME;
bool ROCKSDB_READ_RANGE_REUSE_ITERATORS;
bool ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS;
int ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT;
int64_t ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC;
bool ROCKSDB_WRITE_RATE_LIMITER_AUTO_TUNE;
std::string DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY;

View File

@ -397,13 +397,23 @@ struct Counters {
};
struct ReadIterator {
CF& cf;
uint64_t index; // incrementing counter to uniquely identify read iterator.
bool inUse;
std::shared_ptr<rocksdb::Iterator> iter;
double creationTime;
KeyRange keyRange;
std::shared_ptr<rocksdb::Slice> beginSlice, endSlice;
ReadIterator(CF& cf, uint64_t index, DB& db, rocksdb::ReadOptions& options)
: cf(cf), index(index), inUse(true), creationTime(now()), iter(db->NewIterator(options, cf)) {}
: index(index), inUse(true), creationTime(now()), iter(db->NewIterator(options, cf)) {}
ReadIterator(CF& cf, uint64_t index, DB& db, rocksdb::ReadOptions options, KeyRange keyRange)
: index(index), inUse(true), creationTime(now()), keyRange(keyRange) {
beginSlice = std::shared_ptr<rocksdb::Slice>(new rocksdb::Slice(toSlice(keyRange.begin)));
options.iterate_lower_bound = beginSlice.get();
endSlice = std::shared_ptr<rocksdb::Slice>(new rocksdb::Slice(toSlice(keyRange.end)));
options.iterate_upper_bound = endSlice.get();
iter = std::shared_ptr<rocksdb::Iterator>(db->NewIterator(options, cf));
}
};
/*
@ -426,42 +436,84 @@ public:
readRangeOptions.auto_prefix_mode = (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0);
TraceEvent("ReadIteratorPool", id)
.detail("KnobRocksDBReadRangeReuseIterators", SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS)
.detail("KnobRocksDBReadRangeReuseBoundedIterators",
SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS)
.detail("KnobRocksDBReadRangeBoundedIteratorsMaxLimit",
SERVER_KNOBS->ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT)
.detail("KnobRocksDBPrefixLen", SERVER_KNOBS->ROCKSDB_PREFIX_LEN);
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS &&
SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS) {
TraceEvent(SevWarn, "ReadIteratorKnobsMismatch");
}
}
// Called on every db commit.
void update() {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS ||
SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
iteratorsMap.clear();
}
}
// Called on every read operation.
ReadIterator getIterator() {
ReadIterator getIterator(KeyRange keyRange) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
mutex.lock();
for (it = iteratorsMap.begin(); it != iteratorsMap.end(); it++) {
if (!it->second.inUse) {
it->second.inUse = true;
iteratorsReuseCount++;
return it->second;
ReadIterator iter = it->second;
mutex.unlock();
return iter;
}
}
index++;
ReadIterator iter(cf, index, db, readRangeOptions);
iteratorsMap.insert({ index, iter });
uint64_t readIteratorIndex = index;
mutex.unlock();
ReadIterator iter(cf, readIteratorIndex, db, readRangeOptions);
mutex.lock();
iteratorsMap.insert({ readIteratorIndex, iter });
mutex.unlock();
return iter;
} else if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS) {
// TODO: Based on the datasize in the keyrange, decide whether to store the iterator for reuse.
mutex.lock();
for (it = iteratorsMap.begin(); it != iteratorsMap.end(); it++) {
if (!it->second.inUse && it->second.keyRange.contains(keyRange)) {
it->second.inUse = true;
iteratorsReuseCount++;
ReadIterator iter = it->second;
mutex.unlock();
return iter;
}
}
index++;
uint64_t readIteratorIndex = index;
mutex.unlock();
ReadIterator iter(cf, readIteratorIndex, db, readRangeOptions, keyRange);
if (iteratorsMap.size() < SERVER_KNOBS->ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT) {
// Not storing more than ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT of iterators
// to avoid 'out of memory' issues.
mutex.lock();
iteratorsMap.insert({ readIteratorIndex, iter });
mutex.unlock();
}
return iter;
} else {
index++;
ReadIterator iter(cf, index, db, readRangeOptions);
ReadIterator iter(cf, index, db, readRangeOptions, keyRange);
return iter;
}
}
// Called on every read operation, after the keys are collected.
void returnIterator(ReadIterator& iter) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS ||
SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
it = iteratorsMap.find(iter.index);
// iterator found: put the iterator back to the pool(inUse=false).
@ -768,7 +820,7 @@ uint64_t PerfContextMetrics::getRocksdbPerfcontextMetric(int metric) {
}
ACTOR Future<Void> refreshReadIteratorPool(std::shared_ptr<ReadIteratorPool> readIterPool) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS || SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS) {
loop {
wait(delay(SERVER_KNOBS->ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME));
readIterPool->refreshIterators();
@ -1559,7 +1611,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
rocksdb::Status s;
if (a.rowLimit >= 0) {
double iterCreationBeginTime = a.getHistograms ? timer_monotonic() : 0;
ReadIterator readIter = readIterPool->getIterator();
ReadIterator readIter = readIterPool->getIterator(a.keys);
if (a.getHistograms) {
metricPromiseStream->send(std::make_pair(ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM.toString(),
timer_monotonic() - iterCreationBeginTime));
@ -1588,7 +1640,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
readIterPool->returnIterator(readIter);
} else {
double iterCreationBeginTime = a.getHistograms ? timer_monotonic() : 0;
ReadIterator readIter = readIterPool->getIterator();
ReadIterator readIter = readIterPool->getIterator(a.keys);
if (a.getHistograms) {
metricPromiseStream->send(std::make_pair(ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM.toString(),
timer_monotonic() - iterCreationBeginTime));