From b699ba4c23a6681becfa91e7aa955e79980bb1b8 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 14 Nov 2022 14:25:54 -0800 Subject: [PATCH 1/2] Increase memtable and writebuffer size for rocksdb simulation test memtable and writebuffer size are too small in simualtion, which causes thousands of sst files and at least 6 levels of ssts. Both makes compaction slower in simulation and contribute to timeout errors. After increasing the size, failure rate (timeout failures) when we only run rocksdb and sharded rocksdb engines in simulation drops from 10 out of 332339 tests to 10 out of 497532 tests. For apple dev who wants to look into the joshua details, before the change, joshua ensemble id is 20221111-223720-mengxudebugrocks-505ede1c55664ddf after the change, joshua ensemble id is 20221114-192042-mengxurocksdebugknobchange-1e4c047d112e9a38 --- fdbclient/ServerKnobs.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 431278ee22..af9639cf0a 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -390,19 +390,22 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // If true, do not process and store RocksDB logs init( ROCKSDB_MUTE_LOGS, true ); // Use a smaller memtable in simulation to avoid OOMs. - int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024; + // TODO: change it to bigger value. it was 32K when timeout + int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024; init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_LEVEL_STYLE_COMPACTION, true ); init( ROCKSDB_UNSAFE_AUTO_FSYNC, false ); init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 ); init( ROCKSDB_PREFIX_LEN, 0 ); // If rocksdb block cache size is 0, the default 8MB is used. - int64_t blockCacheSize = isSimulated ? 0 : 1024 * 1024 * 1024 /* 1GB */; + int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */; init( ROCKSDB_BLOCK_CACHE_SIZE, blockCacheSize ); init( ROCKSDB_METRICS_DELAY, 60.0 ); - init( ROCKSDB_READ_VALUE_TIMEOUT, isSimulated ? 5.0 : 200.0 ); - init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, isSimulated ? 5.0 : 200.0 ); - init( ROCKSDB_READ_RANGE_TIMEOUT, isSimulated ? 5.0 : 200.0 ); + // // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have + // very high load and single read thread cannot process all the load within the timeouts. + init( ROCKSDB_READ_VALUE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60; + init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60; + init( ROCKSDB_READ_RANGE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_RANGE_TIMEOUT = 5 * 60; init( ROCKSDB_READ_QUEUE_WAIT, 1.0 ); init( ROCKSDB_READ_QUEUE_HARD_MAX, 1000 ); init( ROCKSDB_READ_QUEUE_SOFT_MAX, 500 ); From 68eb129c71329837934c5d82265298add723ff31 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 14 Nov 2022 16:17:49 -0800 Subject: [PATCH 2/2] RocksDB:Use knob to control readValueTimeout value in simulation --- fdbclient/ServerKnobs.cpp | 4 ++-- fdbserver/KeyValueStoreRocksDB.actor.cpp | 16 +++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index af9639cf0a..8b32417aff 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -390,7 +390,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // If true, do not process and store RocksDB logs init( ROCKSDB_MUTE_LOGS, true ); // Use a smaller memtable in simulation to avoid OOMs. - // TODO: change it to bigger value. it was 32K when timeout int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024; init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_LEVEL_STYLE_COMPACTION, true ); @@ -401,7 +400,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */; init( ROCKSDB_BLOCK_CACHE_SIZE, blockCacheSize ); init( ROCKSDB_METRICS_DELAY, 60.0 ); - // // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have + // ROCKSDB_READ_VALUE_TIMEOUT, ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, ROCKSDB_READ_RANGE_TIMEOUT knobs: + // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have // very high load and single read thread cannot process all the load within the timeouts. init( ROCKSDB_READ_VALUE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60; init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60; diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index f2525b49a2..94e07b0798 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -1402,17 +1402,11 @@ struct RocksDBKeyValueStore : IKeyValueStore { ThreadReturnPromiseStream>* metricPromiseStream) : id(id), db(db), cf(cf), sharedState(sharedState), readIterPool(readIterPool), perfContextMetrics(perfContextMetrics), metricPromiseStream(metricPromiseStream), threadIndex(threadIndex) { - if (g_network->isSimulated()) { - // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have - // very high load and single read thread cannot process all the load within the timeouts. - readValueTimeout = 5 * 60; - readValuePrefixTimeout = 5 * 60; - readRangeTimeout = 5 * 60; - } else { - readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT; - readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT; - readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT; - } + + readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT; + readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT; + readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT; + if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) { // Enable perf context on the same thread with the db thread rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex);