From 5a6a052c5524bc84cef504324f0a450197f7b574 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Oct 2021 09:46:38 -0400 Subject: [PATCH 1/5] Add a knob to gate blob-related work. --- fdbclient/ServerKnobs.cpp | 3 ++- fdbclient/ServerKnobs.h | 1 + fdbserver/ClusterController.actor.cpp | 4 +++- fdbserver/SimulatedCluster.actor.cpp | 5 ++++- fdbserver/workloads/ConsistencyCheck.actor.cpp | 10 ++++++---- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 1e3553f797..fae9c4b01c 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -766,7 +766,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( BG_DELTA_FILE_TARGET_BYTES, BG_DELTA_BYTES_BEFORE_COMPACT/10 ); // TODO should discuss proper value for this - init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0; + init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0; + init( ENABLE_BLOB_GRANULES, false); // clang-format on diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 2be6757624..63a62cde82 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -714,6 +714,7 @@ public: int BG_DELTA_BYTES_BEFORE_COMPACT; double BLOB_WORKER_TIMEOUT; // Blob Manager's reaction time to a blob worker failure + bool ENABLE_BLOB_GRANULES; ServerKnobs(Randomize, ClientKnobs*, IsSimulated); void initialize(Randomize, ClientKnobs*, IsSimulated); diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index f0b6b1cfd3..8da716b75e 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -5241,7 +5241,9 @@ ACTOR Future clusterControllerCore(ClusterControllerFullInterface interf, self.addActor.send(handleForcedRecoveries(&self, interf)); self.addActor.send(monitorDataDistributor(&self)); self.addActor.send(monitorRatekeeper(&self)); - self.addActor.send(monitorBlobManager(&self)); + if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + self.addActor.send(monitorBlobManager(&self)); + } // self.addActor.send(monitorTSSMapping(&self)); self.addActor.send(dbInfoUpdater(&self)); self.addActor.send(traceCounters("ClusterControllerMetrics", diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 4e4d20a133..2ba53a4bbf 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -2002,7 +2002,10 @@ void setupSimulatedSystem(vector>* systemActors, // TODO: caching disabled for this merge // FIXME: we hardcode some machines to specifically test storage cache and blob workers int storageCacheMachines = dc == 0 ? 1 : 0; - int blobWorkerMachines = 2 + deterministicRandom()->randomInt(0, NUM_EXTRA_BW_MACHINES + 1); + int blobWorkerMachines = 0; + if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + blobWorkerMachines = 2 + deterministicRandom()->randomInt(0, NUM_EXTRA_BW_MACHINES + 1); + } int totalMachines = machines + storageCacheMachines + blobWorkerMachines; int useSeedForMachine = deterministicRandom()->randomInt(0, totalMachines); diff --git a/fdbserver/workloads/ConsistencyCheck.actor.cpp b/fdbserver/workloads/ConsistencyCheck.actor.cpp index 97689ef75c..de4cf36f12 100644 --- a/fdbserver/workloads/ConsistencyCheck.actor.cpp +++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp @@ -296,9 +296,11 @@ struct ConsistencyCheckWorkload : TestWorkload { wait(::success(self->checkForExtraDataStores(cx, self))); // Check blob workers are operating as expected - bool blobWorkersCorrect = wait(self->checkBlobWorkers(cx, configuration, self)); - if (!blobWorkersCorrect) - self->testFailure("Blob workers incorrect"); + if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + bool blobWorkersCorrect = wait(self->checkBlobWorkers(cx, configuration, self)); + if (!blobWorkersCorrect) + self->testFailure("Blob workers incorrect"); + } // Check that each machine is operating as its desired class bool usingDesiredClasses = wait(self->checkUsingDesiredClasses(cx, self)); @@ -2335,7 +2337,7 @@ struct ConsistencyCheckWorkload : TestWorkload { } // Check BlobManager - if (db.blobManager.present() && + if (SERVER_KNOBS->ENABLE_BLOB_GRANULES && db.blobManager.present() && (!nonExcludedWorkerProcessMap.count(db.blobManager.get().address()) || nonExcludedWorkerProcessMap[db.blobManager.get().address()].processClass.machineClassFitness( ProcessClass::BlobManager) > fitnessLowerBound)) { From a163619fbc931ae187068d32b9e001cebe61da18 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Oct 2021 09:56:46 -0400 Subject: [PATCH 2/5] Change default val for knob. --- fdbclient/ServerKnobs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index fae9c4b01c..2571f214b9 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -767,7 +767,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // TODO should discuss proper value for this init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0; - init( ENABLE_BLOB_GRANULES, false); + init( ENABLE_BLOB_GRANULES, true ); // TODO: change to isSimulated // clang-format on From 9d4b55c7fe5efe7d069e69317331133f5840cd1d Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Oct 2021 11:45:51 -0400 Subject: [PATCH 3/5] Gate the blob verifier as well. --- .../workloads/BlobGranuleVerifier.actor.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fdbserver/workloads/BlobGranuleVerifier.actor.cpp b/fdbserver/workloads/BlobGranuleVerifier.actor.cpp index db5e521ae6..46e90bef64 100644 --- a/fdbserver/workloads/BlobGranuleVerifier.actor.cpp +++ b/fdbserver/workloads/BlobGranuleVerifier.actor.cpp @@ -123,6 +123,10 @@ struct BlobGranuleVerifierWorkload : TestWorkload { std::string description() const override { return "BlobGranuleVerifier"; } Future setup(Database const& cx) override { + if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + return Void(); + } + if (doSetup) { double initialDelay = deterministicRandom()->random01() * (maxDelay - minDelay) + minDelay; if (BGV_DEBUG) { @@ -380,6 +384,10 @@ struct BlobGranuleVerifierWorkload : TestWorkload { } Future start(Database const& cx) override { + if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + return Void(); + } + clients.reserve(threads + 1); clients.push_back(timeout(findGranules(cx, this), testDuration, Void())); for (int i = 0; i < threads; i++) { @@ -445,7 +453,13 @@ struct BlobGranuleVerifierWorkload : TestWorkload { return self->mismatches == 0 && checks > 0; } - Future check(Database const& cx) override { return _check(cx, this); } + Future check(Database const& cx) override { + if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + return true; + } + + return _check(cx, this); + } void getMetrics(vector& m) override {} }; From bde90f058b2150789c5c19b7e9dfb1b3485f33c7 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Oct 2021 13:26:27 -0400 Subject: [PATCH 4/5] Gate the client API. --- fdbclient/ClientKnobs.cpp | 3 +++ fdbclient/ClientKnobs.h | 3 +++ fdbclient/NativeAPI.actor.cpp | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 031c3fcfaf..8c8167267d 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -251,6 +251,9 @@ void ClientKnobs::initialize(Randomize randomize) { init( BUSYNESS_SPIKE_START_THRESHOLD, 0.100 ); init( BUSYNESS_SPIKE_SATURATED_THRESHOLD, 0.500 ); + // blob granules + init( ENABLE_BLOB_GRANULES, true ); // same as SERVER_KNOBS->ENABLE_BLOB_GRANULES + // clang-format on } diff --git a/fdbclient/ClientKnobs.h b/fdbclient/ClientKnobs.h index dd52bd98c5..b84659afc8 100644 --- a/fdbclient/ClientKnobs.h +++ b/fdbclient/ClientKnobs.h @@ -242,6 +242,9 @@ public: double BUSYNESS_SPIKE_START_THRESHOLD; double BUSYNESS_SPIKE_SATURATED_THRESHOLD; + // blob granules + bool ENABLE_BLOB_GRANULES; + ClientKnobs(Randomize randomize); void initialize(Randomize randomize); }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 32e300052f..b050ec67b5 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -7096,6 +7096,9 @@ ACTOR Future getBlobGranuleRangesStreamActor(Reference db } Future DatabaseContext::getBlobGranuleRangesStream(const PromiseStream& results, KeyRange range) { + if (!CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { + throw client_invalid_operation(); + } return getBlobGranuleRangesStreamActor(Reference::addRef(this), results, range); } @@ -7296,6 +7299,9 @@ Future DatabaseContext::readBlobGranulesStream(const PromiseStreamENABLE_BLOB_GRANULES) { + throw client_invalid_operation(); + } return readBlobGranulesStreamActor(Reference::addRef(this), results, range, begin, end); } From 2ec8781224e93ea64aa713e55386f31c17e0527d Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Oct 2021 14:00:37 -0400 Subject: [PATCH 5/5] Merge knobs into one. --- fdbclient/ClientKnobs.cpp | 2 +- fdbclient/ServerKnobs.cpp | 1 - fdbclient/ServerKnobs.h | 1 - fdbserver/ClusterController.actor.cpp | 2 +- fdbserver/SimulatedCluster.actor.cpp | 2 +- fdbserver/workloads/BlobGranuleVerifier.actor.cpp | 6 +++--- fdbserver/workloads/ConsistencyCheck.actor.cpp | 4 ++-- 7 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 8c8167267d..4f0021cb6d 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -252,7 +252,7 @@ void ClientKnobs::initialize(Randomize randomize) { init( BUSYNESS_SPIKE_SATURATED_THRESHOLD, 0.500 ); // blob granules - init( ENABLE_BLOB_GRANULES, true ); // same as SERVER_KNOBS->ENABLE_BLOB_GRANULES + init( ENABLE_BLOB_GRANULES, true ); // clang-format on } diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 2571f214b9..691ced7756 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -767,7 +767,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // TODO should discuss proper value for this init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0; - init( ENABLE_BLOB_GRANULES, true ); // TODO: change to isSimulated // clang-format on diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 63a62cde82..2be6757624 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -714,7 +714,6 @@ public: int BG_DELTA_BYTES_BEFORE_COMPACT; double BLOB_WORKER_TIMEOUT; // Blob Manager's reaction time to a blob worker failure - bool ENABLE_BLOB_GRANULES; ServerKnobs(Randomize, ClientKnobs*, IsSimulated); void initialize(Randomize, ClientKnobs*, IsSimulated); diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 8da716b75e..c44ac69deb 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -5241,7 +5241,7 @@ ACTOR Future clusterControllerCore(ClusterControllerFullInterface interf, self.addActor.send(handleForcedRecoveries(&self, interf)); self.addActor.send(monitorDataDistributor(&self)); self.addActor.send(monitorRatekeeper(&self)); - if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { self.addActor.send(monitorBlobManager(&self)); } // self.addActor.send(monitorTSSMapping(&self)); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 2ba53a4bbf..68aa053670 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -2003,7 +2003,7 @@ void setupSimulatedSystem(vector>* systemActors, // FIXME: we hardcode some machines to specifically test storage cache and blob workers int storageCacheMachines = dc == 0 ? 1 : 0; int blobWorkerMachines = 0; - if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { blobWorkerMachines = 2 + deterministicRandom()->randomInt(0, NUM_EXTRA_BW_MACHINES + 1); } diff --git a/fdbserver/workloads/BlobGranuleVerifier.actor.cpp b/fdbserver/workloads/BlobGranuleVerifier.actor.cpp index 46e90bef64..d1b361ac51 100644 --- a/fdbserver/workloads/BlobGranuleVerifier.actor.cpp +++ b/fdbserver/workloads/BlobGranuleVerifier.actor.cpp @@ -123,7 +123,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload { std::string description() const override { return "BlobGranuleVerifier"; } Future setup(Database const& cx) override { - if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (!CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { return Void(); } @@ -384,7 +384,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload { } Future start(Database const& cx) override { - if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (!CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { return Void(); } @@ -454,7 +454,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload { } Future check(Database const& cx) override { - if (!SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (!CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { return true; } diff --git a/fdbserver/workloads/ConsistencyCheck.actor.cpp b/fdbserver/workloads/ConsistencyCheck.actor.cpp index de4cf36f12..bb3afcbf76 100644 --- a/fdbserver/workloads/ConsistencyCheck.actor.cpp +++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp @@ -296,7 +296,7 @@ struct ConsistencyCheckWorkload : TestWorkload { wait(::success(self->checkForExtraDataStores(cx, self))); // Check blob workers are operating as expected - if (SERVER_KNOBS->ENABLE_BLOB_GRANULES) { + if (CLIENT_KNOBS->ENABLE_BLOB_GRANULES) { bool blobWorkersCorrect = wait(self->checkBlobWorkers(cx, configuration, self)); if (!blobWorkersCorrect) self->testFailure("Blob workers incorrect"); @@ -2337,7 +2337,7 @@ struct ConsistencyCheckWorkload : TestWorkload { } // Check BlobManager - if (SERVER_KNOBS->ENABLE_BLOB_GRANULES && db.blobManager.present() && + if (CLIENT_KNOBS->ENABLE_BLOB_GRANULES && db.blobManager.present() && (!nonExcludedWorkerProcessMap.count(db.blobManager.get().address()) || nonExcludedWorkerProcessMap[db.blobManager.get().address()].processClass.machineClassFitness( ProcessClass::BlobManager) > fitnessLowerBound)) {