From 28c0d96c90fc616a28f2a7218878f058e72bbf45 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 6 Jul 2018 14:44:11 -0700 Subject: [PATCH] fix: treat the local region as best when version difference is too large re-check requests when the version difference becomes small --- fdbserver/ClusterController.actor.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 3e6364aa94..1a9446deba 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -604,6 +604,11 @@ public: if(regions[0].priority == regions[1].priority && clusterControllerDcId.present() && regions[1].dcId == clusterControllerDcId.get()) { std::swap(regions[0], regions[1]); } + + if(clusterControllerDcId.present() && regions[1].dcId == clusterControllerDcId.get() && (!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) { + std::swap(regions[0], regions[1]); + } + bool setPrimaryDesired = false; try { auto reply = findWorkersForConfiguration(req, regions[0].dcId); @@ -2050,8 +2055,14 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel loop { self->versionDifferenceUpdated = false; if(self->db.serverInfo->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && self->db.config.usableRegions == 1) { + bool oldDifferenceTooLarge = !self->versionDifferenceUpdated || self->datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE; self->versionDifferenceUpdated = true; self->datacenterVersionDifference = 0; + + if(oldDifferenceTooLarge) { + checkOutstandingRequests(self); + } + Void _ = wait(self->db.serverInfo->onChange()); continue; } @@ -2094,8 +2105,14 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel break; } + bool oldDifferenceTooLarge = !self->versionDifferenceUpdated || self->datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE; self->versionDifferenceUpdated = true; self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v; + + if(oldDifferenceTooLarge && self->datacenterVersionDifference < SERVER_KNOBS->MAX_VERSION_DIFFERENCE) { + checkOutstandingRequests(self); + } + if(now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) { lastLogTime = now(); TraceEvent("DatacenterVersionDifference", self->id).detail("Difference", self->datacenterVersionDifference);