Add a failure timeout knob for data distributor.
Set default time to 1.0s.
This commit is contained in:
parent
efd000dd11
commit
e0a7162cf8
|
@ -2260,7 +2260,7 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
|
|||
when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) {
|
||||
TraceEvent("ClusterController", self->id).detail("DataDistributorRejoinID", req.dataDistributor.id());
|
||||
self->db.setDistributor( req.dataDistributor );
|
||||
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
|
||||
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
|
||||
req.reply.send( Void() );
|
||||
break;
|
||||
}
|
||||
|
@ -2278,7 +2278,7 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
|
|||
const UID myDdId = self->db.serverInfo->get().distributor.id();
|
||||
ev.detail("NewDataDistributorID", distributorInterf.id()).detail("Valid", distributorInterf.isValid());
|
||||
self->db.setDistributor( distributorInterf );
|
||||
distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
|
||||
distributorFailed = waitFailureClient( distributorInterf.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
|
||||
newDistributor = Never();
|
||||
}
|
||||
when ( wait( distributorFailed ) ) {
|
||||
|
@ -2292,17 +2292,17 @@ ACTOR Future<Void> waitDDRejoinOrStartDD( ClusterControllerData *self, ClusterCo
|
|||
when ( DataDistributorRejoinRequest req = waitNext( clusterInterface->dataDistributorRejoin.getFuture() ) ) {
|
||||
if ( !self->db.serverInfo->get().distributor.isValid() ) {
|
||||
self->db.setDistributor( req.dataDistributor );
|
||||
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME );
|
||||
distributorFailed = waitFailureClient( req.dataDistributor.waitFailure, SERVER_KNOBS->DD_FAILURE_TIME );
|
||||
TraceEvent("ClusterController", self->id).detail("DataDistributorRejoined", req.dataDistributor.id());
|
||||
} else {
|
||||
const UID myDdId = self->db.serverInfo->get().distributor.id();
|
||||
const bool success = myDdId == req.dataDistributor.id();
|
||||
req.reply.send( Void() );
|
||||
TraceEvent("ClusterController", self->id)
|
||||
.detail("DataDistributorRejoin", success ? "OK" : "Failed")
|
||||
.detail("OldDataDistributorID", myDdId)
|
||||
.detail("ReqID", req.dataDistributor.id());
|
||||
}
|
||||
req.reply.send( Void() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -165,6 +165,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
|||
init( DD_LOCATION_CACHE_SIZE, 2000000 ); if( randomize && BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3;
|
||||
init( MOVEKEYS_LOCK_POLLING_DELAY, 5.0 );
|
||||
init( DEBOUNCE_RECRUITING_DELAY, 5.0 );
|
||||
init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0;
|
||||
|
||||
// Redwood Storage Engine
|
||||
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 );
|
||||
|
|
|
@ -128,6 +128,7 @@ public:
|
|||
int64_t DD_LOCATION_CACHE_SIZE;
|
||||
double MOVEKEYS_LOCK_POLLING_DELAY;
|
||||
double DEBOUNCE_RECRUITING_DELAY;
|
||||
double DD_FAILURE_TIME;
|
||||
|
||||
// Redwood Storage Engine
|
||||
int PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT;
|
||||
|
|
Loading…
Reference in New Issue