Add delay between quickly re-recruiting the same singleton process, to avoid recruit thrashing when there are temporarily multiple cluster controllers (#7000)
This commit is contained in:
parent
13a388ff14
commit
db6d7396ca
|
@ -542,6 +542,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
|
||||
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
|
||||
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
|
||||
init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL, 0.5 );
|
||||
|
||||
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
|
||||
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
|
||||
|
|
|
@ -481,6 +481,8 @@ public:
|
|||
// be determined as degraded worker.
|
||||
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
|
||||
// determined as degraded satellite.
|
||||
double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
|
||||
// recruiting fight between two cluster controllers occurs.
|
||||
|
||||
// Knobs used to select the best policy (via monte carlo)
|
||||
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)
|
||||
|
|
|
@ -1933,8 +1933,24 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
|
||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
||||
struct SingletonRecruitThrottler {
|
||||
double lastRecruitStart;
|
||||
|
||||
SingletonRecruitThrottler() : lastRecruitStart(-1) {}
|
||||
|
||||
double newRecruitment() {
|
||||
double n = now();
|
||||
double waitTime =
|
||||
std::max(0.0, (lastRecruitStart + SERVER_KNOBS->CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL - n));
|
||||
lastRecruitStart = n;
|
||||
return waitTime;
|
||||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
wait(delay(waitTime));
|
||||
|
||||
TraceEvent("CCStartDataDistributor", self->id).log();
|
||||
loop {
|
||||
|
@ -2003,6 +2019,7 @@ ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
||||
state SingletonRecruitThrottler recruitThrottler;
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
|
@ -2019,13 +2036,15 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
|||
when(wait(self->recruitDistributor.onChange())) {}
|
||||
}
|
||||
} else {
|
||||
wait(startDataDistributor(self));
|
||||
wait(startDataDistributor(self, recruitThrottler.newRecruitment()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
|
||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
||||
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
wait(delay(waitTime));
|
||||
|
||||
TraceEvent("CCStartRatekeeper", self->id).log();
|
||||
loop {
|
||||
|
@ -2091,6 +2110,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
||||
state SingletonRecruitThrottler recruitThrottler;
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
|
@ -2107,34 +2127,15 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
|||
when(wait(self->recruitRatekeeper.onChange())) {}
|
||||
}
|
||||
} else {
|
||||
wait(startRatekeeper(self));
|
||||
wait(startRatekeeper(self, recruitThrottler.newRecruitment()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Acquires the BM lock by getting the next epoch no.
|
||||
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
try {
|
||||
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
|
||||
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
|
||||
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
|
||||
|
||||
wait(tr->commit());
|
||||
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
|
||||
return newEpoch;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
|
||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
||||
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
wait(delay(waitTime));
|
||||
|
||||
TraceEvent("CCEKP_Start", self->id).log();
|
||||
loop {
|
||||
|
@ -2208,6 +2209,7 @@ ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
|
||||
state SingletonRecruitThrottler recruitThrottler;
|
||||
loop {
|
||||
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
|
||||
choose {
|
||||
|
@ -2219,13 +2221,36 @@ ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
|
|||
when(wait(self->recruitEncryptKeyProxy.onChange())) {}
|
||||
}
|
||||
} else {
|
||||
wait(startEncryptKeyProxy(self));
|
||||
wait(startEncryptKeyProxy(self, recruitThrottler.newRecruitment()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startBlobManager(ClusterControllerData* self) {
|
||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
||||
// Acquires the BM lock by getting the next epoch no.
|
||||
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
try {
|
||||
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
|
||||
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
|
||||
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
|
||||
|
||||
wait(tr->commit());
|
||||
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
|
||||
return newEpoch;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
wait(delay(waitTime));
|
||||
|
||||
TraceEvent("CCStartBlobManager", self->id).log();
|
||||
loop {
|
||||
|
@ -2322,6 +2347,7 @@ ACTOR Future<Void> watchBlobGranulesConfigKey(ClusterControllerData* self) {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
||||
state SingletonRecruitThrottler recruitThrottler;
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
|
@ -2352,7 +2378,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
|||
}
|
||||
} else if (self->db.blobGranulesEnabled.get()) {
|
||||
// if there is no blob manager present but blob granules are now enabled, recruit a BM
|
||||
wait(startBlobManager(self));
|
||||
wait(startBlobManager(self, recruitThrottler.newRecruitment()));
|
||||
} else {
|
||||
// if there is no blob manager present and blob granules are disabled, wait for a config change
|
||||
wait(self->db.blobGranulesEnabled.onChange());
|
||||
|
|
Loading…
Reference in New Issue