Address PR comments.
Revert knob name change, fix comparison between new and old recruitments, and get rid of empty `if` block.
This commit is contained in:
parent
6b4eb06201
commit
6533678f0d
|
@ -241,6 +241,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( DD_LOCATION_CACHE_SIZE, 2000000 ); if( randomize && BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3;
|
||||
init( MOVEKEYS_LOCK_POLLING_DELAY, 5.0 );
|
||||
init( DEBOUNCE_RECRUITING_DELAY, 5.0 );
|
||||
init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0;
|
||||
init( DD_ZERO_HEALTHY_TEAM_DELAY, 1.0 );
|
||||
init( REBALANCE_MAX_RETRIES, 100 );
|
||||
init( DD_OVERLAP_PENALTY, 10000 );
|
||||
|
@ -465,7 +466,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( VERSION_LAG_METRIC_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) VERSION_LAG_METRIC_INTERVAL = 10.0;
|
||||
init( MAX_VERSION_DIFFERENCE, 20 * VERSIONS_PER_SECOND );
|
||||
init( FORCE_RECOVERY_CHECK_DELAY, 5.0 );
|
||||
init( DATA_DISTRIBUTOR_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DATA_DISTRIBUTOR_FAILURE_TIME = 10.0;
|
||||
init( RATEKEEPER_FAILURE_TIME, 1.0 );
|
||||
init( REPLACE_INTERFACE_DELAY, 60.0 );
|
||||
init( REPLACE_INTERFACE_CHECK_DELAY, 5.0 );
|
||||
|
|
|
@ -224,6 +224,7 @@ public:
|
|||
// Remove wrong storage engines
|
||||
double DD_REMOVE_STORE_ENGINE_DELAY; // wait for the specified time before remove the next batch
|
||||
|
||||
double DD_FAILURE_TIME;
|
||||
double DD_ZERO_HEALTHY_TEAM_DELAY;
|
||||
|
||||
// KeyValueStore SQLITE
|
||||
|
@ -389,7 +390,6 @@ public:
|
|||
double VERSION_LAG_METRIC_INTERVAL;
|
||||
int64_t MAX_VERSION_DIFFERENCE;
|
||||
double FORCE_RECOVERY_CHECK_DELAY;
|
||||
double DATA_DISTRIBUTOR_FAILURE_TIME;
|
||||
double RATEKEEPER_FAILURE_TIME;
|
||||
double REPLACE_INTERFACE_DELAY;
|
||||
double REPLACE_INTERFACE_CHECK_DELAY;
|
||||
|
|
|
@ -3510,23 +3510,21 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
// check if we can colocate the singletons in a more optimal way
|
||||
|
||||
// TODO: verify that we don't need to get the pid from the worker like we were doing before
|
||||
Optional<Standalone<StringRef>> currentRKProcessId = rkSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> currentDDProcessId = ddSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> currRKProcessId = rkSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> currDDProcessId = ddSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> newRKProcessId = newRKWorker.interf.locality.processId();
|
||||
Optional<Standalone<StringRef>> newDDProcessId = newRKWorker.interf.locality.processId();
|
||||
|
||||
auto currColocMap = getColocCounts({ currentRKProcessId, currentDDProcessId });
|
||||
auto currColocMap = getColocCounts({ currRKProcessId, currDDProcessId });
|
||||
auto newColocMap = getColocCounts({ newRKProcessId, newDDProcessId });
|
||||
|
||||
auto currColocCounts = std::make_tuple(currColocMap[newRKProcessId], currColocMap[newDDProcessId]);
|
||||
auto newColocCounts = std::make_tuple(newColocMap[newRKProcessId], newColocMap[newDDProcessId]);
|
||||
|
||||
// if the new coloc counts are collectively better (i.e. each singleton's coloc count has not increased)
|
||||
if (newColocCounts <= currColocCounts) {
|
||||
// if the new coloc counts are not worse (i.e. each singleton's coloc count has not increased)
|
||||
if (newColocMap[newRKProcessId] <= currColocMap[currRKProcessId] &&
|
||||
newColocMap[newDDProcessId] <= currColocMap[currDDProcessId]) {
|
||||
// rerecruit the singleton for which we have found a better process, if any
|
||||
if (newColocMap[newRKProcessId] < currColocMap[currentRKProcessId]) {
|
||||
if (newColocMap[newRKProcessId] < currColocMap[currRKProcessId]) {
|
||||
rkSingleton.recruit(self);
|
||||
} else if (newColocMap[newDDProcessId] < currColocMap[currentDDProcessId]) {
|
||||
} else if (newColocMap[newDDProcessId] < currColocMap[currDDProcessId]) {
|
||||
ddSingleton.recruit(self);
|
||||
}
|
||||
}
|
||||
|
@ -3892,8 +3890,6 @@ void haltRegisteringOrCurrentSingleton(ClusterControllerData* self,
|
|||
.detail("DcID", printable(self->clusterControllerDcId))
|
||||
.detail("ReqDcID", printable(worker.locality.dcId()))
|
||||
.detail("Recruiting" + roleAbbr + "ID", recruitingID.present() ? recruitingID.get() : UID());
|
||||
if (registeringSingleton.getClusterRole() == ProcessClass::DataDistributor) {
|
||||
}
|
||||
registeringSingleton.halt(self, worker.locality.processId());
|
||||
} else if (!recruitingID.present()) {
|
||||
// if not currently recruiting, then halt previous one in favour of requesting one
|
||||
|
@ -4767,7 +4763,7 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
|||
if (self->db.serverInfo->get().distributor.present() && !self->recruitDistributor.get()) {
|
||||
choose {
|
||||
when(wait(waitFailureClient(self->db.serverInfo->get().distributor.get().waitFailure,
|
||||
SERVER_KNOBS->DATA_DISTRIBUTOR_FAILURE_TIME))) {
|
||||
SERVER_KNOBS->DD_FAILURE_TIME))) {
|
||||
TraceEvent("CCDataDistributorDied", self->id)
|
||||
.detail("DDID", self->db.serverInfo->get().distributor.get().id());
|
||||
self->db.clearInterf(ProcessClass::DataDistributorClass);
|
||||
|
|
Loading…
Reference in New Issue