Add knobs for prefix bloom filters and larger block cache

This commit is contained in:
Daniel Smith 2021-01-11 21:38:34 +00:00
parent 3b0dbe2803
commit ecf5c2b591
3 changed files with 46 additions and 2 deletions

View File

@ -1,7 +1,11 @@
#ifdef SSD_ROCKSDB_EXPERIMENTAL
#include <rocksdb/cache.h>
#include <rocksdb/db.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/options.h>
#include <rocksdb/slice_transform.h>
#include <rocksdb/table.h>
#include <rocksdb/utilities/table_properties_collectors.h>
#include "flow/flow.h"
#include "flow/IThreadPool.h"
@ -42,6 +46,40 @@ rocksdb::Options getOptions() {
if (SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM > 0) {
options.IncreaseParallelism(SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM);
}
rocksdb::BlockBasedTableOptions bbOpts;
// TODO: Add a knob for the block cache size. (Default is 8 MB)
if (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0) {
// Prefix blooms are used during Seek.
options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(SERVER_KNOBS->ROCKSDB_PREFIX_LEN));
// Also turn on bloom filters in the memtable.
// TODO: Make a knob for this as well.
options.memtable_prefix_bloom_size_ratio = 0.1;
// 5 -- Can be read by RocksDB's versions since 6.6.0. Full and partitioned
// filters use a generally faster and more accurate Bloom filter
// implementation, with a different schema.
// https://github.com/facebook/rocksdb/blob/b77569f18bfc77fb1d8a0b3218f6ecf571bc4988/include/rocksdb/table.h#L391
bbOpts.format_version = 5;
// Create and apply a bloom filter using the 10 bits
// which should yield a ~1% false positive rate:
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#full-filters-new-format
bbOpts.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
// The whole key blooms are only used for point lookups.
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#prefix-vs-whole-key
bbOpts.whole_key_filtering = false;
}
if (SERVER_KNOBS->ROCKSDB_BLOCK_CACHE_SIZE > 0) {
bbOpts.block_cache = rocksdb::NewLRUCache(SERVER_KNOBS->ROCKSDB_BLOCK_CACHE_SIZE);
}
if (SERVER_KNOBS->ROCKSDB_BLOCK_CACHE_SIZE > 0 || SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0) {
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbOpts));
}
return options;
}
@ -248,8 +286,11 @@ struct RocksDBKeyValueStore : IKeyValueStore {
}
int accumulatedBytes = 0;
rocksdb::Status s;
rocksdb::ReadOptions options;
// When using a prefix extractor, ensure that keys are returned in order even if they cross
// a prefix boundary.
options.auto_prefix_mode = (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0);
if (a.rowLimit >= 0) {
rocksdb::ReadOptions options;
auto endSlice = toSlice(a.keys.end);
options.iterate_upper_bound = &endSlice;
auto cursor = std::unique_ptr<rocksdb::Iterator>(db->NewIterator(options));
@ -266,7 +307,6 @@ struct RocksDBKeyValueStore : IKeyValueStore {
}
s = cursor->status();
} else {
rocksdb::ReadOptions options;
auto beginSlice = toSlice(a.keys.begin);
options.iterate_lower_bound = &beginSlice;
auto cursor = std::unique_ptr<rocksdb::Iterator>(db->NewIterator(options));

View File

@ -323,6 +323,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( ROCKSDB_MEMTABLE_BYTES, 512 * 1024 * 1024 );
init( ROCKSDB_UNSAFE_AUTO_FSYNC, false );
init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 );
init( ROCKSDB_PREFIX_LEN, 0 );
init( ROCKSDB_BLOCK_CACHE_SIZE, 0 );
// Leader election
bool longLeaderElection = randomize && BUGGIFY;

View File

@ -258,6 +258,8 @@ public:
int64_t ROCKSDB_MEMTABLE_BYTES;
bool ROCKSDB_UNSAFE_AUTO_FSYNC;
int64_t ROCKSDB_PERIODIC_COMPACTION_SECONDS;
int ROCKSDB_PREFIX_LEN;
int64_t ROCKSDB_BLOCK_CACHE_SIZE;
// Leader election
int MAX_NOTIFICATIONS;