Defer recoveredDiskFiles wait if Encryption data at-rest is enabled (#7414)

* Defer recoveredDiskFiles wait if Encryption data at-rest is enabled

Description

In the current code ClusterController startup wait for 'recoveredDiskFiles'
future to complete before triggered 'clusterControllerCore' actor, which
inturn starts 'EncryptKeyProxy' (EKP) actor resposible to fetch/refresh
encryption keys needed for ClusterRecovery as well interactions with
KMS.

Patch addresses a circular dependency where StorageServer initialization
depends on EKP, but, CC doesn't recruit EKP till 'recoveredDiskFiles' completes
which includes SS initialization. Given 'recoveredDiskFiles' is an optimization,
the patch proposes deferring the 'recoveredDiskFiles' future completion until
new Master recruitment is done as part of ClusterRecovery (unblock EKP singleton)

Testing

Ran 500K correctness runs: 20220618-055310-ahusain-foundationdb-61c431d467557551
Recorded failures doesn't seems to be related to the change.
This commit is contained in:
Ata E Husain Bohra 2022-06-21 18:18:57 -07:00 committed by GitHub
parent 8cf2be030f
commit e1ca0ef9a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 40 additions and 7 deletions

View File

@ -28,6 +28,7 @@
#include "fdbclient/SystemData.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbserver/EncryptKeyProxyInterface.h"
#include "fdbserver/Knobs.h"
#include "flow/ActorCollection.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/NativeAPI.actor.h"
@ -208,11 +209,13 @@ ACTOR Future<Void> handleLeaderReplacement(Reference<ClusterRecoveryData> self,
ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
ClusterControllerData::DBInfo* db,
ServerCoordinators coordinators,
Future<Void> leaderFail) {
Future<Void> leaderFail,
Future<Void> recoveredDiskFiles) {
state MasterInterface iMaster;
state Reference<ClusterRecoveryData> recoveryData;
state PromiseStream<Future<Void>> addActor;
state Future<Void> recoveryCore;
state bool recoveredDisk = false;
// SOMEDAY: If there is already a non-failed master referenced by zkMasterInfo, use that one until it fails
// When this someday is implemented, make sure forced failures still cause the master to be recruited again
@ -254,6 +257,18 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
.detail("ChangeID", dbInfo.id);
db->serverInfo->set(dbInfo);
if (SERVER_KNOBS->ENABLE_ENCRYPTION && !recoveredDisk) {
// EKP singleton recruitment waits for 'Master/Sequencer' recruitment, execute wait for
// 'recoveredDiskFiles' optimization once EKP recruitment is unblocked to avoid circular dependencies
// with StorageServer initialization. The waiting for recoveredDiskFiles is to make sure the worker
// server on the same process has been registered with the new CC before recruitment.
wait(recoveredDiskFiles);
TraceEvent("CCWDB_RecoveredDiskFiles", cluster->id).log();
// Need to be done for the first once in the lifetime of ClusterController
recoveredDisk = true;
}
state Future<Void> spinDelay = delay(
SERVER_KNOBS
->MASTER_SPIN_DELAY); // Don't retry cluster recovery more than once per second, but don't delay
@ -2511,7 +2526,8 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
Future<Void> leaderFail,
ServerCoordinators coordinators,
LocalityData locality,
ConfigDBType configDBType) {
ConfigDBType configDBType,
Future<Void> recoveredDiskFiles) {
state ClusterControllerData self(interf, locality, coordinators);
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
@ -2522,7 +2538,8 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
self.addActor.send(monitorEncryptKeyProxy(&self));
}
self.addActor.send(clusterWatchDatabase(&self, &self.db, coordinators, leaderFail)); // Start the master database
self.addActor.send(clusterWatchDatabase(
&self, &self.db, coordinators, leaderFail, recoveredDiskFiles)); // Start the master database
self.addActor.send(self.updateWorkerList.init(self.db.db));
self.addActor.send(statusServer(interf.clientInterface.databaseStatus.getFuture(),
&self,
@ -2651,7 +2668,8 @@ ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
LocalityData locality,
ConfigDBType configDBType) {
ConfigDBType configDBType,
Future<Void> recoveredDiskFiles) {
loop {
state ClusterControllerFullInterface cci;
state bool inRole = false;
@ -2678,7 +2696,7 @@ ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
inRole = true;
wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType));
wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType, recoveredDiskFiles));
}
} catch (Error& e) {
if (inRole)
@ -2703,12 +2721,27 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
Future<Void> recoveredDiskFiles,
LocalityData locality,
ConfigDBType configDBType) {
wait(recoveredDiskFiles);
// Defer this wait optimization of cluster configuration has 'Encryption data at-rest' enabled.
// Encryption depends on available of EncryptKeyProxy (EKP) FDB role to enable fetch/refresh of encryption keys
// created and managed by external KeyManagementService (KMS).
//
// TODO: Wait optimization is to ensure the worker server on the same process gets registered with the new CC before
// recruitment. Unify the codepath for both Encryption enable vs disable scenarios.
if (!SERVER_KNOBS->ENABLE_ENCRYPTION) {
wait(recoveredDiskFiles);
TraceEvent("RecoveredDiskFiles").log();
} else {
TraceEvent("RecoveredDiskFiles_Deferred").log();
}
state bool hasConnected = false;
loop {
try {
ServerCoordinators coordinators(connRecord);
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
wait(clusterController(
coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType, recoveredDiskFiles));
hasConnected = true;
} catch (Error& e) {
if (e.code() != error_code_coordinators_changed)