diff --git a/fdbrpc/include/fdbrpc/LoadBalance.actor.h b/fdbrpc/include/fdbrpc/LoadBalance.actor.h index 91ab0e3b6d..cf0c2d11e3 100644 --- a/fdbrpc/include/fdbrpc/LoadBalance.actor.h +++ b/fdbrpc/include/fdbrpc/LoadBalance.actor.h @@ -486,6 +486,11 @@ Future loadBalance( // server count is within "LOAD_BALANCE_MAX_BAD_OPTIONS". We // do not need to consider any remote servers. break; + } else if (badServers == alternatives->countBest() && i == badServers) { + TraceEvent("AllLocalAlternativesFailed") + .detail("Alternatives", alternatives->description()) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()); } RequestStream const* thisStream = &alternatives->get(i, channel); @@ -587,6 +592,7 @@ Future loadBalance( // nextAlt. This logic matters only if model == nullptr. Otherwise, the // bestAlt and nextAlt have been decided. state RequestStream const* stream = nullptr; + state LBDistance::Type distance; for (int alternativeNum = 0; alternativeNum < alternatives->size(); alternativeNum++) { int useAlt = nextAlt; if (nextAlt == startAlt) @@ -595,6 +601,7 @@ Future loadBalance( useAlt = (nextAlt + alternatives->size() - 1) % alternatives->size(); stream = &alternatives->get(useAlt, channel); + distance = alternatives->getDistance(useAlt); if (!IFailureMonitor::failureMonitor().getState(stream->getEndpoint()).failed && (!firstRequestEndpoint.present() || stream->getEndpoint().token.first() != firstRequestEndpoint.get())) break; @@ -602,6 +609,7 @@ Future loadBalance( if (nextAlt == startAlt) triedAllOptions = TriedAllOptions::True; stream = nullptr; + distance = LBDistance::DISTANT; } if (!stream && !firstRequestData.isValid()) { @@ -637,6 +645,18 @@ Future loadBalance( firstRequestEndpoint = Optional(); } else if (firstRequestData.isValid()) { // Issue a second request, the first one is taking a long time. + if (distance == LBDistance::DISTANT) { + TraceEvent("LBDistant2nd") + .suppressFor(0.1) + .detail("Distance", (int)distance) + .detail("BackOff", backoff) + .detail("TriedAllOptions", triedAllOptions) + .detail("Alternatives", alternatives->description()) + .detail("Token", stream->getEndpoint().token) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()) + .detail("Attempts", numAttempts); + } secondRequestData.startRequest(backoff, triedAllOptions, stream, request, model, alternatives, channel); loop choose { @@ -664,6 +684,18 @@ Future loadBalance( } } else { // Issue a request, if it takes too long to get a reply, go around the loop + if (distance == LBDistance::DISTANT) { + TraceEvent("LBDistant") + .suppressFor(0.1) + .detail("Distance", (int)distance) + .detail("BackOff", backoff) + .detail("TriedAllOptions", triedAllOptions) + .detail("Alternatives", alternatives->description()) + .detail("Token", stream->getEndpoint().token) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()) + .detail("Attempts", numAttempts); + } firstRequestData.startRequest(backoff, triedAllOptions, stream, request, model, alternatives, channel); firstRequestEndpoint = stream->getEndpoint().token.first(); diff --git a/fdbrpc/include/fdbrpc/MultiInterface.h b/fdbrpc/include/fdbrpc/MultiInterface.h index 4f15dbf087..85fa195206 100644 --- a/fdbrpc/include/fdbrpc/MultiInterface.h +++ b/fdbrpc/include/fdbrpc/MultiInterface.h @@ -226,6 +226,7 @@ public: } T const& getInterface(int index) { return alternatives[index]->interf; } + LBDistance::Type getDistance(int index) const { return (LBDistance::Type)alternatives[index]->distance; } UID getId(int index) const { return alternatives[index]->interf.id(); } bool hasInterface(UID id) const { for (const auto& ref : alternatives) { diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index e21548c199..1c5edf1430 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -3358,7 +3358,9 @@ ACTOR Future fdbd(Reference connRecord, makeReference>(getCCPriorityInfo(fitnessFilePath, processClass)); auto serverDBInfo = ServerDBInfo(); serverDBInfo.client.isEncryptionEnabled = SERVER_KNOBS->ENABLE_ENCRYPTION; + serverDBInfo.myLocality = localities; auto dbInfo = makeReference>(serverDBInfo); + TraceEvent("MyLocality").detail("Locality", dbInfo->get().myLocality.toString()); actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));