From 0613ff892f8701e3772a5d3d75558b4ea342d722 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Fri, 9 Jul 2021 16:25:14 -0400 Subject: [PATCH 1/5] Enable RocksDB in simulation --- fdbclient/ServerKnobs.cpp | 4 +++- fdbserver/KeyValueStoreRocksDB.actor.cpp | 14 +++++++++++--- fdbserver/SimulatedCluster.actor.cpp | 13 +++++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 994f367292..d46336099d 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -335,7 +335,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // KeyValueStoreRocksDB init( ROCKSDB_BACKGROUND_PARALLELISM, 0 ); init( ROCKSDB_READ_PARALLELISM, 4 ); - init( ROCKSDB_MEMTABLE_BYTES, 512 * 1024 * 1024 ); + // Use a smaller memtable in simulation to avoid OOMs. + int64_t memtableBytes = isSimulated ? 64 * 1014 : 512 * 1024 * 1024; + init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_UNSAFE_AUTO_FSYNC, false ); init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 ); init( ROCKSDB_PREFIX_LEN, 0 ); diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index 99278a7862..edae3412d0 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "fdbserver/CoroFlow.h" #include "flow/flow.h" #include "flow/IThreadPool.h" @@ -283,7 +284,9 @@ struct RocksDBKeyValueStore : IKeyValueStore { std::min(value.size(), size_t(a.maxLength))))); } else { if (!s.IsNotFound()) { - TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "ReadValuePrefix"); + TraceEvent(SevError, "RocksDBError") + .detail("Error", s.ToString()) + .detail("Method", "ReadValuePrefix"); } a.result.send(Optional()); } @@ -367,8 +370,13 @@ struct RocksDBKeyValueStore : IKeyValueStore { std::unique_ptr writeBatch; explicit RocksDBKeyValueStore(const std::string& path, UID id) : path(path), id(id) { - writeThread = createGenericThreadPool(); - readThreads = createGenericThreadPool(); + if (g_network->isSimulated()) { + writeThread = CoroThreadPool::createThreadPool(); + readThreads = CoroThreadPool::createThreadPool(); + } else { + writeThread = createGenericThreadPool(); + readThreads = createGenericThreadPool(); + } writeThread->addThread(new Writer(db, id), "fdb-rocksdb-wr"); for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) { readThreads->addThread(new Reader(db), "fdb-rocksdb-re"); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index a8a152f141..831fbaf630 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -232,6 +232,7 @@ public: // 1 = "memory" // 2 = "memory-radixtree-beta" // 3 = "ssd-redwood-experimental" + // 4 = "ssd-rocksdb-experimental" // Requires a comma-separated list of numbers WITHOUT whitespaces std::vector storageEngineExcludeTypes; // Set the maximum TLog version that can be selected for a test @@ -1252,7 +1253,7 @@ void SimulationConfig::setDatacenters(const TestConfig& testConfig) { // Sets storage engine based on testConfig details void SimulationConfig::setStorageEngine(const TestConfig& testConfig) { - int storage_engine_type = deterministicRandom()->randomInt(0, 4); + int storage_engine_type = deterministicRandom()->randomInt(0, 5); if (testConfig.storageEngineType.present()) { storage_engine_type = testConfig.storageEngineType.get(); } else { @@ -1260,7 +1261,7 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) { while (std::find(testConfig.storageEngineExcludeTypes.begin(), testConfig.storageEngineExcludeTypes.end(), storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) { - storage_engine_type = deterministicRandom()->randomInt(0, 4); + storage_engine_type = deterministicRandom()->randomInt(0, 5); } } @@ -1285,6 +1286,14 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) { set_config("ssd-redwood-experimental"); break; } + case 4: { + TEST(true); // Simulated cluster using RocksDB storage engine + set_config("ssd-rocksdb-experimental"); + // Tests using the RocksDB engine are necessarily non-deterministic because of RocksDB + // background threads. + noUnseed = true; + break; + } default: ASSERT(false); // Programmer forgot to adjust cases. } From 6e0bfba745d4f40e6c9ad2180f10b18e36b38052 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Mon, 12 Jul 2021 12:34:27 -0400 Subject: [PATCH 2/5] Don't use the RocksDB engine with snapshot tests --- tests/restarting/from_6.2.33/SnapCycleRestart-1.txt | 2 ++ tests/restarting/from_6.2.33/SnapCycleRestart-2.txt | 1 + tests/restarting/from_6.2.33/SnapTestAttrition-1.txt | 2 ++ tests/restarting/from_6.2.33/SnapTestAttrition-2.txt | 1 + tests/restarting/from_6.2.33/SnapTestRestart-1.txt | 2 ++ tests/restarting/from_6.2.33/SnapTestRestart-2.txt | 1 + tests/restarting/from_6.2.33/SnapTestSimpleRestart-1.txt | 2 ++ tests/restarting/from_6.2.33/SnapTestSimpleRestart-2.txt | 1 + tests/restarting/from_7.0.0/SnapIncrementalRestore-1.toml | 3 ++- tests/restarting/from_7.0.0/SnapIncrementalRestore-2.toml | 3 +++ 10 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/restarting/from_6.2.33/SnapCycleRestart-1.txt b/tests/restarting/from_6.2.33/SnapCycleRestart-1.txt index e56bda34f9..3a3a678b92 100644 --- a/tests/restarting/from_6.2.33/SnapCycleRestart-1.txt +++ b/tests/restarting/from_6.2.33/SnapCycleRestart-1.txt @@ -1,3 +1,5 @@ +storageEngineExcludeTypes=4 + ;Take snap and do cycle test testTitle=SnapCyclePre clearAfterTest=false diff --git a/tests/restarting/from_6.2.33/SnapCycleRestart-2.txt b/tests/restarting/from_6.2.33/SnapCycleRestart-2.txt index 2fae7418d4..a2b7f05a0b 100644 --- a/tests/restarting/from_6.2.33/SnapCycleRestart-2.txt +++ b/tests/restarting/from_6.2.33/SnapCycleRestart-2.txt @@ -1,4 +1,5 @@ buggify=off +storageEngineExcludeTypes=4 testTitle=SnapCycleRestore runSetup=false diff --git a/tests/restarting/from_6.2.33/SnapTestAttrition-1.txt b/tests/restarting/from_6.2.33/SnapTestAttrition-1.txt index 401a075c0d..f389400c98 100644 --- a/tests/restarting/from_6.2.33/SnapTestAttrition-1.txt +++ b/tests/restarting/from_6.2.33/SnapTestAttrition-1.txt @@ -1,3 +1,5 @@ +storageEngineExcludeTypes=4 + ;write 1000 Keys ending with even numbers testTitle=SnapTestPre clearAfterTest=false diff --git a/tests/restarting/from_6.2.33/SnapTestAttrition-2.txt b/tests/restarting/from_6.2.33/SnapTestAttrition-2.txt index d7d9131549..d4a37486cb 100644 --- a/tests/restarting/from_6.2.33/SnapTestAttrition-2.txt +++ b/tests/restarting/from_6.2.33/SnapTestAttrition-2.txt @@ -1,4 +1,5 @@ buggify=off +storageEngineExcludeTypes=4 ; verify all keys are even numbered testTitle=SnapTestVerify diff --git a/tests/restarting/from_6.2.33/SnapTestRestart-1.txt b/tests/restarting/from_6.2.33/SnapTestRestart-1.txt index 319a617628..c639381244 100644 --- a/tests/restarting/from_6.2.33/SnapTestRestart-1.txt +++ b/tests/restarting/from_6.2.33/SnapTestRestart-1.txt @@ -1,3 +1,5 @@ +storageEngineExcludeTypes=4 + ;write 1000 Keys ending with even numbers testTitle=SnapTestPre clearAfterTest=false diff --git a/tests/restarting/from_6.2.33/SnapTestRestart-2.txt b/tests/restarting/from_6.2.33/SnapTestRestart-2.txt index 0d4aae1d16..f4154c9754 100644 --- a/tests/restarting/from_6.2.33/SnapTestRestart-2.txt +++ b/tests/restarting/from_6.2.33/SnapTestRestart-2.txt @@ -1,4 +1,5 @@ buggify=off +storageEngineExcludeTypes=4 ; verify all keys are even numbered testTitle=SnapTestVerify diff --git a/tests/restarting/from_6.2.33/SnapTestSimpleRestart-1.txt b/tests/restarting/from_6.2.33/SnapTestSimpleRestart-1.txt index 2416f27d29..00f3bb6c3e 100644 --- a/tests/restarting/from_6.2.33/SnapTestSimpleRestart-1.txt +++ b/tests/restarting/from_6.2.33/SnapTestSimpleRestart-1.txt @@ -1,3 +1,5 @@ +storageEngineExcludeTypes=4 + ;write 1000 Keys ending with even number testTitle=SnapSimplePre clearAfterTest=false diff --git a/tests/restarting/from_6.2.33/SnapTestSimpleRestart-2.txt b/tests/restarting/from_6.2.33/SnapTestSimpleRestart-2.txt index eeaa599184..3ffe0a8fcc 100644 --- a/tests/restarting/from_6.2.33/SnapTestSimpleRestart-2.txt +++ b/tests/restarting/from_6.2.33/SnapTestSimpleRestart-2.txt @@ -1,4 +1,5 @@ buggify=off +storageEngineExcludeTypes=4 ; verify all keys are even numbered testTitle=SnapSimpleVerify diff --git a/tests/restarting/from_7.0.0/SnapIncrementalRestore-1.toml b/tests/restarting/from_7.0.0/SnapIncrementalRestore-1.toml index 6321090c4e..3cde35a07c 100644 --- a/tests/restarting/from_7.0.0/SnapIncrementalRestore-1.toml +++ b/tests/restarting/from_7.0.0/SnapIncrementalRestore-1.toml @@ -1,7 +1,8 @@ [configuration] logAntiQuorum = 0 +storageEngineExcludeTypes = [4] -[[test]] +[[test]] testTitle = 'SubmitBackup' simBackupAgents = 'BackupToFile' clearAfterTest = false diff --git a/tests/restarting/from_7.0.0/SnapIncrementalRestore-2.toml b/tests/restarting/from_7.0.0/SnapIncrementalRestore-2.toml index f99d46b344..8eaedcf6fd 100644 --- a/tests/restarting/from_7.0.0/SnapIncrementalRestore-2.toml +++ b/tests/restarting/from_7.0.0/SnapIncrementalRestore-2.toml @@ -1,3 +1,6 @@ +[configuration] +storageEngineExcludeTypes = [4] + [[test]] testTitle = 'RestoreBackup' simBackupAgents = 'BackupToFile' From 4e16e2552b4dfe8a5c2bc5e78bb30bf219a423e0 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Mon, 12 Jul 2021 14:03:21 -0400 Subject: [PATCH 3/5] Even smaller memtable --- fdbclient/ServerKnobs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index d46336099d..6d74c2f67a 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -336,7 +336,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( ROCKSDB_BACKGROUND_PARALLELISM, 0 ); init( ROCKSDB_READ_PARALLELISM, 4 ); // Use a smaller memtable in simulation to avoid OOMs. - int64_t memtableBytes = isSimulated ? 64 * 1014 : 512 * 1024 * 1024; + int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024; init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_UNSAFE_AUTO_FSYNC, false ); init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 ); From ac4699212025f0995a91314a96dfdcc36dc66bca Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 13 Jul 2021 12:24:39 -0400 Subject: [PATCH 4/5] Add warn trace event about non-determinism --- fdbserver/SimulatedCluster.actor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 831fbaf630..42a9642d24 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -1291,6 +1291,8 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) { set_config("ssd-rocksdb-experimental"); // Tests using the RocksDB engine are necessarily non-deterministic because of RocksDB // background threads. + TraceEvent(SevWarn, "RocksDBNonDeterminism") + .detail("Explanation", "The RocksDB storage engine is threaded and non-deterministic"); noUnseed = true; break; } From ed35df9a0052397f4f17db3b40f4f20b3a0ad88e Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 13 Jul 2021 14:56:23 -0400 Subject: [PATCH 5/5] Add comment explaining coro threads --- fdbserver/KeyValueStoreRocksDB.actor.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index edae3412d0..08dee91f04 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -370,6 +370,16 @@ struct RocksDBKeyValueStore : IKeyValueStore { std::unique_ptr writeBatch; explicit RocksDBKeyValueStore(const std::string& path, UID id) : path(path), id(id) { + // In simluation, run the reader/writer threads as Coro threads (i.e. in the network thread. The storage engine + // is still multi-threaded as background compaction threads are still present. Reads/writes to disk will also + // block the network thread in a way that would be unacceptable in production but is a necessary evil here. When + // performing the reads in background threads in simulation, the event loop thinks there is no work to do and + // advances time faster than 1 sec/sec. By the time the blocking read actually finishes, simulation has advanced + // time by more than 5 seconds, so every read fails with a transaction_too_old error. Doing blocking IO on the + // main thread solves this issue. There are almost certainly better fixes, but my goal was to get a less + // invasive change merged first and work on a more realistic version if/when we think that would provide + // substantially more confidence in the correctness. + // TODO: Adapt the simulation framework to not advance time quickly when background reads/writes are occurring. if (g_network->isSimulated()) { writeThread = CoroThreadPool::createThreadPool(); readThreads = CoroThreadPool::createThreadPool();