Remove explicit degraded peer recovery since this may be false positive
This commit is contained in:
parent
c2752dc773
commit
3cb587edfb
|
@ -2779,11 +2779,14 @@ TEST_CASE("/fdbserver/clustercontroller/updateWorkerHealth") {
|
|||
ASSERT(health.degradedPeers.find(badPeer1) != health.degradedPeers.end());
|
||||
ASSERT_EQ(health.degradedPeers[badPeer1].startTime, health.degradedPeers[badPeer1].lastRefreshTime);
|
||||
ASSERT(health.degradedPeers.find(badPeer2) != health.degradedPeers.end());
|
||||
ASSERT_EQ(health.degradedPeers[badPeer2].startTime, health.degradedPeers[badPeer2].lastRefreshTime);
|
||||
}
|
||||
|
||||
// Create a `UpdateWorkerHealthRequest` with two bad peers, one from the previous test and a new one.
|
||||
// The one from the previous test should have lastRefreshTime updated.
|
||||
// The other one from the previous test not included in this test should be removed.
|
||||
// The other one from the previous test not included in this test should not be removed.
|
||||
state double previousStartTime;
|
||||
state double previousRefreshTime;
|
||||
{
|
||||
// Make the time to move so that now() guarantees to return a larger value than before.
|
||||
wait(delay(0.001));
|
||||
|
@ -2794,20 +2797,31 @@ TEST_CASE("/fdbserver/clustercontroller/updateWorkerHealth") {
|
|||
data.updateWorkerHealth(req);
|
||||
ASSERT(data.workerHealth.find(workerAddress) != data.workerHealth.end());
|
||||
auto& health = data.workerHealth[workerAddress];
|
||||
ASSERT_EQ(health.degradedPeers.size(), 2);
|
||||
ASSERT_EQ(health.degradedPeers.size(), 3);
|
||||
ASSERT(health.degradedPeers.find(badPeer1) != health.degradedPeers.end());
|
||||
ASSERT_LT(health.degradedPeers[badPeer1].startTime, health.degradedPeers[badPeer1].lastRefreshTime);
|
||||
ASSERT(health.degradedPeers.find(badPeer2) == health.degradedPeers.end());
|
||||
ASSERT(health.degradedPeers.find(badPeer2) != health.degradedPeers.end());
|
||||
ASSERT_EQ(health.degradedPeers[badPeer2].startTime, health.degradedPeers[badPeer2].lastRefreshTime);
|
||||
ASSERT_EQ(health.degradedPeers[badPeer2].startTime, health.degradedPeers[badPeer1].startTime);
|
||||
ASSERT(health.degradedPeers.find(badPeer3) != health.degradedPeers.end());
|
||||
ASSERT_EQ(health.degradedPeers[badPeer3].startTime, health.degradedPeers[badPeer3].lastRefreshTime);
|
||||
previousStartTime = health.degradedPeers[badPeer3].startTime;
|
||||
previousRefreshTime = health.degradedPeers[badPeer3].lastRefreshTime;
|
||||
}
|
||||
|
||||
// Create a `UpdateWorkerHealthRequest` with empty `degradedPeers`, which should remove the worker from
|
||||
// Create a `UpdateWorkerHealthRequest` with empty `degradedPeers`, which should not remove the worker from
|
||||
// `workerHealth`.
|
||||
{
|
||||
wait(delay(0.001));
|
||||
UpdateWorkerHealthRequest req;
|
||||
req.address = workerAddress;
|
||||
data.updateWorkerHealth(req);
|
||||
ASSERT(data.workerHealth.find(workerAddress) == data.workerHealth.end());
|
||||
ASSERT(data.workerHealth.find(workerAddress) != data.workerHealth.end());
|
||||
auto& health = data.workerHealth[workerAddress];
|
||||
ASSERT_EQ(health.degradedPeers.size(), 3);
|
||||
ASSERT(health.degradedPeers.find(badPeer3) != health.degradedPeers.end());
|
||||
ASSERT_EQ(health.degradedPeers[badPeer3].startTime, previousStartTime);
|
||||
ASSERT_EQ(health.degradedPeers[badPeer3].lastRefreshTime, previousRefreshTime);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -2907,13 +2907,6 @@ public:
|
|||
.detail("WorkerAddress", req.address)
|
||||
.detail("DegradedPeers", degradedPeersString);
|
||||
|
||||
// `req.degradedPeers` contains the latest peer performance view from the worker. Clear the worker if the
|
||||
// requested worker doesn't see any degraded peers.
|
||||
if (req.degradedPeers.empty()) {
|
||||
workerHealth.erase(req.address);
|
||||
return;
|
||||
}
|
||||
|
||||
double currentTime = now();
|
||||
|
||||
// Current `workerHealth` doesn't have any information about the incoming worker. Add the worker into
|
||||
|
@ -2931,21 +2924,6 @@ public:
|
|||
|
||||
auto& health = workerHealth[req.address];
|
||||
|
||||
// First, remove any degraded peers recorded in the `workerHealth`, but aren't in the incoming request. These
|
||||
// machines network performance should have recovered.
|
||||
std::unordered_set<NetworkAddress> recoveredPeers;
|
||||
for (const auto& [peer, times] : health.degradedPeers) {
|
||||
recoveredPeers.insert(peer);
|
||||
}
|
||||
for (const auto& peer : req.degradedPeers) {
|
||||
if (recoveredPeers.find(peer) != recoveredPeers.end()) {
|
||||
recoveredPeers.erase(peer);
|
||||
}
|
||||
}
|
||||
for (const auto& peer : recoveredPeers) {
|
||||
health.degradedPeers.erase(peer);
|
||||
}
|
||||
|
||||
// Update the worker's degradedPeers.
|
||||
for (const auto& peer : req.degradedPeers) {
|
||||
auto it = health.degradedPeers.find(peer);
|
||||
|
|
Loading…
Reference in New Issue