Add a failure timeout knob for data distributor.

Set default time to 1.0s.
This commit is contained in:
Jingyu Zhou 2019-01-18 15:43:43 -08:00 committed by Jingyu Zhou
parent efd000dd11
commit e0a7162cf8
3 changed files with 6 additions and 4 deletions

View File

@ -2260,7 +2260,7 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) {
TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id());
self->db.setDistributor( req.dataDistributor );
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
req.reply.send( Void() );
break;
}
@ -2278,7 +2278,7 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
const UID myDdId = self->db.serverInfo->get().distributor.id();
ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid());
self->db.setDistributor( distributorInterf );
distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
newDistributor = Never();
}
when ( wait( distributorFailed ) ) {
@ -2292,17 +2292,17 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) {
if ( !self->db.serverInfo->get().distributor.isValid() ) {
self->db.setDistributor( req.dataDistributor );
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id());
} else {
const UID myDdId = self->db.serverInfo->get().distributor.id();
const bool success = myDdId == req.dataDistributor.id();
req.reply.send( Void() );
TraceEvent("ClusterController", self->id)
.detail("DataDistributorRejoin", success ? "OK" : "Failed")
.detail("OldDataDistributorID", myDdId)
.detail("ReqID", req.dataDistributor.id());
}
req.reply.send( Void() );
}
}
}

View File

@ -165,6 +165,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( DD_LOCATION_CACHE_SIZE, 2000000 ); if( randomize && BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3;
init( MOVEKEYS_LOCK_POLLING_DELAY, 5.0 );
init( DEBOUNCE_RECRUITING_DELAY, 5.0 );
init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0;
// Redwood Storage Engine
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 );

View File

@ -128,6 +128,7 @@ public:
int64_t DD_LOCATION_CACHE_SIZE;
double MOVEKEYS_LOCK_POLLING_DELAY;
double DEBOUNCE_RECRUITING_DELAY;
double DD_FAILURE_TIME;
// Redwood Storage Engine
int PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT;