added logging for the datacenter version difference

This commit is contained in:
Evan Tschannen 2018-06-21 16:31:52 -07:00
parent 8bd7eaebdb
commit 678b4494f4
3 changed files with 9 additions and 4 deletions

View File

@ -1941,6 +1941,7 @@ ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
}
ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *self ) {
double lastLogTime = 0;
loop {
self->versionDifferenceUpdated = false;
if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) {
@ -1977,12 +1978,12 @@ ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *sel
Void _ = wait(self->db.serverInfo->onChange());
continue;
}
state Future<Void> onChange = self->db.serverInfo->onChange();
loop {
state Future<TLogQueuingMetricsReply> primaryMetrics = primaryLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() );
state Future<TLogQueuingMetricsReply> remoteMetrics = remoteLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() );
Void _ = wait( ( success(primaryMetrics) && success(remoteMetrics) ) || onChange );
if(onChange.isReady()) {
break;
@ -1990,6 +1991,10 @@ ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *sel
self->versionDifferenceUpdated = true;
self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v;
if(now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) {
lastLogTime = now();
TraceEvent("DatacenterVersionDifference", self->id).detail("Difference", self->datacenterVersionDifference);
}
Void _ = wait( delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange );
if(onChange.isReady()) {

View File

@ -244,7 +244,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR, 10.0 );
// Master Server
init( MASTER_LOGGING_DELAY, 1.0 );
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
// by delay()ing for this amount of time between accepted batches of TransactionRequests.
init( COMMIT_SLEEP_TIME, 0.0001 ); if( randomize && BUGGIFY ) COMMIT_SLEEP_TIME = 0;
@ -263,6 +262,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( LAST_LIMITED_RATIO, 0.6 );
//Cluster Controller
init( CLUSTER_CONTROLLER_LOGGING_DELAY, 5.0 );
init( MASTER_FAILURE_REACTION_TIME, 0.4 ); if( randomize && BUGGIFY ) MASTER_FAILURE_REACTION_TIME = 10.0;
init( MASTER_FAILURE_SLOPE_DURING_RECOVERY, 0.1 );
init( WORKER_COORDINATION_PING_DELAY, 60 );

View File

@ -188,7 +188,6 @@ public:
double PROXY_SPIN_DELAY;
// Master Server
double MASTER_LOGGING_DELAY;
double COMMIT_SLEEP_TIME;
double MIN_BALANCE_TIME;
int64_t MIN_BALANCE_DIFFERENCE;
@ -204,6 +203,7 @@ public:
int64_t RESOLVER_STATE_MEMORY_LIMIT;
//Cluster Controller
double CLUSTER_CONTROLLER_LOGGING_DELAY;
double MASTER_FAILURE_REACTION_TIME;
double MASTER_FAILURE_SLOPE_DURING_RECOVERY;
int WORKER_COORDINATION_PING_DELAY;