added more trace lines and added timeout to safety check in test workload
This commit is contained in:
parent
a757e66327
commit
3666c0c776
|
@ -3449,6 +3449,7 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
|||
}
|
||||
}
|
||||
}
|
||||
TraceEvent("ExclusionSafetyCheckCoordinators");
|
||||
state ClientCoordinators coordinatorList(cx->getConnectionFile());
|
||||
state vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
|
||||
|
@ -3474,7 +3475,7 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
|||
}
|
||||
int faultTolerance = (leaderServers.size() - 1) / 2 - coordinatorsUnavailable;
|
||||
bool coordinatorCheck = (attemptCoordinatorExclude <= faultTolerance);
|
||||
TraceEvent("ExclusionSafetyCheck")
|
||||
TraceEvent("ExclusionSafetyCheckFinish")
|
||||
.detail("CoordinatorListSize", leaderServers.size())
|
||||
.detail("NumExclusions", exclusions.size())
|
||||
.detail("FaultTolerance", faultTolerance)
|
||||
|
|
|
@ -4315,6 +4315,7 @@ ACTOR Future<Void> ddExclusionSafetyCheck(DistributorExclusionSafetyCheckRequest
|
|||
break;
|
||||
}
|
||||
}
|
||||
TraceEvent("DDExclusionSafetyCheckFinish");
|
||||
req.reply.send(safe);
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -1552,6 +1552,7 @@ ACTOR Future<Void> proxyCheckSafeExclusion(Reference<AsyncVar<ServerDBInfo>> db,
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
TraceEvent("SafetyCheckMasterProxyFinish");
|
||||
req.reply.send(safe);
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -408,13 +408,26 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||
killProcArray = self->getProcesses(toKill);
|
||||
if (safeKillSet) {
|
||||
loop {
|
||||
state bool safe = false;
|
||||
auto failSet = random_subset(toKillArray, deterministicRandom()->randomInt(1, toKillArray.size() / 2 + 2));
|
||||
toKillMarkFailedArray.resize(failSet.size());
|
||||
std::copy(failSet.begin(), failSet.end(), toKillMarkFailedArray.begin());
|
||||
TraceEvent("RemoveAndKill", functionId)
|
||||
.detail("Step", "SafetyCheck")
|
||||
.detail("Exclusions", describe(toKillMarkFailedArray));
|
||||
bool safe = wait(checkSafeExclusions(cx, toKillMarkFailedArray));
|
||||
loop {
|
||||
choose {
|
||||
when(bool _safe = wait(checkSafeExclusions(cx, toKillMarkFailedArray))) {
|
||||
safe = _safe;
|
||||
break;
|
||||
}
|
||||
when(wait(delay(5.0))) {
|
||||
TraceEvent("RemoveAndKill", functionId)
|
||||
.detail("Step", "SafetyCheckTimedOut")
|
||||
.detail("Exclusions", describe(toKillMarkFailedArray));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (safe) break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue