added a wait to check for master proxies changed and put in a few more trace events
This commit is contained in:
parent
b9c73632e7
commit
04d514c483
|
@ -3434,9 +3434,21 @@ ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID) {
|
|||
}
|
||||
|
||||
ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exclusions) {
|
||||
ExclusionSafetyCheckRequest req(exclusions);
|
||||
state bool ddCheck =
|
||||
wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::exclusionSafetyCheckReq, req, cx->taskID));
|
||||
TraceEvent("ExclusionSafetyCheckBegin")
|
||||
.detail("NumExclusion", exclusions.size())
|
||||
.detail("Exclusions", describe(exclusions));
|
||||
state ExclusionSafetyCheckRequest req(exclusions);
|
||||
state bool ddCheck;
|
||||
loop {
|
||||
choose {
|
||||
when(wait(cx->onMasterProxiesChanged())) {}
|
||||
when(bool _ddCheck = wait(loadBalance(cx->getMasterProxies(false),
|
||||
&MasterProxyInterface::exclusionSafetyCheckReq, req, cx->taskID))) {
|
||||
ddCheck = _ddCheck;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
state ClientCoordinators coordinatorList(cx->getConnectionFile());
|
||||
state vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
|
||||
|
@ -3461,12 +3473,14 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
|||
}
|
||||
}
|
||||
int faultTolerance = (leaderServers.size() - 1) / 2 - coordinatorsUnavailable;
|
||||
bool coordinatorCheck = (attemptCoordinatorExclude <= faultTolerance);
|
||||
TraceEvent("ExclusionSafetyCheck")
|
||||
.detail("CoordinatorListSize", leaderServers.size())
|
||||
.detail("NumExclusions", exclusions.size())
|
||||
.detail("FaultTolerance", faultTolerance)
|
||||
.detail("AttemptCoordinatorExclude", attemptCoordinatorExclude);
|
||||
.detail("AttemptCoordinatorExclude", attemptCoordinatorExclude)
|
||||
.detail("CoordinatorCheck", coordinatorCheck)
|
||||
.detail("DataDistributorCheck", ddCheck);
|
||||
|
||||
bool coordinatorCheck = (attemptCoordinatorExclude <= faultTolerance);
|
||||
return (ddCheck && coordinatorCheck);
|
||||
}
|
||||
|
|
|
@ -4284,7 +4284,9 @@ ACTOR Future<Void> ddSnapCreate(DistributorSnapRequest snapReq, Reference<AsyncV
|
|||
|
||||
ACTOR Future<Void> ddExclusionSafetyCheck(DistributorExclusionSafetyCheckRequest req, Reference<DDTeamCollection> tc,
|
||||
Database cx) {
|
||||
TraceEvent("DDExclusionSafetyCheckBegin");
|
||||
if (!tc.isValid()) {
|
||||
TraceEvent("DDExclusionSafetyCheckTeamCollectionInvalid");
|
||||
req.reply.send(false);
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -1531,6 +1531,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
|
|||
}
|
||||
|
||||
ACTOR Future<Void> proxyCheckSafeExclusion(Reference<AsyncVar<ServerDBInfo>> db, ExclusionSafetyCheckRequest req) {
|
||||
TraceEvent("SafetyCheckMasterProxyBegin");
|
||||
if (!db->get().distributor.present()) {
|
||||
TraceEvent(SevWarnAlways, "DataDistributorNotPresent").detail("Operation", "ExclusionSafetyCheck");
|
||||
req.reply.send(false);
|
||||
|
|
|
@ -412,8 +412,8 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||
toKillMarkFailedArray.resize(failSet.size());
|
||||
std::copy(failSet.begin(), failSet.end(), toKillMarkFailedArray.begin());
|
||||
TraceEvent("RemoveAndKill", functionId)
|
||||
.detail("Step", "Safety Check")
|
||||
.detail("Exclusions", describe(toKillMarkFailedArray));
|
||||
.detail("Step", "SafetyCheck")
|
||||
.detail("Exclusions", describe(toKillMarkFailedArray));
|
||||
bool safe = wait(checkSafeExclusions(cx, toKillMarkFailedArray));
|
||||
if (safe) break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue