fix teamSorter usage bug

This commit is contained in:
Xiaoxi Wang 2022-03-24 13:16:10 -07:00
parent e6893ba0b6
commit b811a62b65
4 changed files with 27 additions and 25 deletions

View File

@ -821,9 +821,9 @@ struct ChangeFeedVersionUpdateRequest {
struct GetStorageMetricsReply {
constexpr static FileIdentifier file_identifier = 15491478;
StorageMetrics load;
StorageMetrics available;
StorageMetrics capacity;
StorageMetrics load; // sum of key-value metrics (logical bytes)
StorageMetrics available; // physical bytes
StorageMetrics capacity; // physical bytes
double bytesInputRate;
int64_t versionLag;
double lastUpdate;

View File

@ -250,8 +250,8 @@ public:
self->shardsAffectedByTeamFailure->hasShards(ShardsAffectedByTeamFailure::Team(
self->teams[currentIndex]->getServerIDs(), self->primary))) &&
// sort conditions
(!bestOption.present() || req.lessCompare(bestOption.get(), self->teams[currentIndex]) ||
!req.lessCompareByLoad(loadBytes, bestLoadBytes))) {
(!bestOption.present() ||
req.lessCompare(bestOption.get(), self->teams[currentIndex], bestLoadBytes, loadBytes))) {
bestLoadBytes = loadBytes;
bestOption = self->teams[currentIndex];
bestIndex = currentIndex;
@ -299,8 +299,8 @@ public:
for (int i = 0; i < randomTeams.size(); i++) {
int64_t loadBytes = randomTeams[i]->getLoadBytes(true, req.inflightPenalty);
if (!bestOption.present() || req.lessCompare(bestOption.get(), randomTeams[i]) ||
!req.lessCompareByLoad(loadBytes, bestLoadBytes)) {
if (!bestOption.present() ||
req.lessCompare(bestOption.get(), randomTeams[i], bestLoadBytes, loadBytes)) {
bestLoadBytes = loadBytes;
bestOption = randomTeams[i];
}

View File

@ -106,16 +106,17 @@ struct GetTeamRequest {
teamMustHaveShards(teamMustHaveShards), inflightPenalty(inflightPenalty) {}
// return true if a.score < b.score
[[nodiscard]] bool lessCompare(TeamRef a, TeamRef b) const {
[[nodiscard]] bool lessCompare(TeamRef a, TeamRef b, int64_t aLoadBytes, int64_t bLoadBytes) const {
if (teamSorter) {
return teamSorter(a, b);
}
return false;
return lessCompareByLoad(aLoadBytes, bLoadBytes);
}
// return true if scoreWithLoadBytes < bestScoreWithBestLoadBytes
bool lessCompareByLoad(int64_t loadBytes, int64_t bestLoadBytes) const {
bool lessLoad = loadBytes < bestLoadBytes;
// return true if preferHigherUtil && aLoadBytes <= bLoadBytes (higher load bytes has larger score)
// or preferLowerUtil && aLoadBytes > bLoadBytes
bool lessCompareByLoad(int64_t aLoadBytes, int64_t bLoadBytes) const {
bool lessLoad = aLoadBytes <= bLoadBytes;
return preferLowerUtilization ? !lessLoad : lessLoad;
}

View File

@ -86,9 +86,10 @@ struct RelocateData {
bool operator==(const RelocateData& rhs) const {
return priority == rhs.priority && boundaryPriority == rhs.boundaryPriority &&
healthPriority == rhs.healthPriority && keys == rhs.keys && startTime == rhs.startTime &&
workFactor == rhs.workFactor && src == rhs.src && completeSources == rhs.completeSources &&
wantsNewServers == rhs.wantsNewServers && randomId == rhs.randomId;
healthPriority == rhs.healthPriority && reason == rhs.reason && keys == rhs.keys &&
startTime == rhs.startTime && workFactor == rhs.workFactor && src == rhs.src &&
completeSources == rhs.completeSources && wantsNewServers == rhs.wantsNewServers &&
randomId == rhs.randomId;
}
bool operator!=(const RelocateData& rhs) const { return !(*this == rhs); }
};
@ -1040,9 +1041,9 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueueData* self, RelocateData rd,
req.src = rd.src;
req.completeSources = rd.completeSources;
// if (rd.reason == RelocateReason::REBALANCE_READ) {
// req.teamSorter = greaterReadLoad;
// }
if (rd.reason == RelocateReason::REBALANCE_READ) {
req.teamSorter = greaterReadLoad;
}
// bestTeam.second = false if the bestTeam in the teamCollection (in the DC) does not have any
// server that hosts the relocateData. This is possible, for example, in a fearless configuration
// when the remote DC is just brought up.
@ -1343,13 +1344,13 @@ ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
return false;
}
if (metrics.keys.present() && metrics.bytes > 0) {
// auto srcLoad = sourceTeam->getLoadReadBandwidth(), destLoad = destTeam->getLoadReadBandwidth();
// if (abs(srcLoad - destLoad) <=
// 3 * std::max(metrics.bytesReadPerKSecond, SERVER_KNOBS->SHARD_READ_HOT_BANDWITH_MIN_PER_KSECONDS)) {
// traceEvent->detail("SkipReason", "TeamTooSimilar");
// return false;
// }
// Verify the shard is still in ShardsAffectedByTeamFailure
// auto srcLoad = sourceTeam->getLoadReadBandwidth(), destLoad = destTeam->getLoadReadBandwidth();
// if (abs(srcLoad - destLoad) <=
// 3 * std::max(metrics.bytesReadPerKSecond, SERVER_KNOBS->SHARD_READ_HOT_BANDWITH_MIN_PER_KSECONDS)) {
// traceEvent->detail("SkipReason", "TeamTooSimilar");
// return false;
// }
// Verify the shard is still in ShardsAffectedByTeamFailure
shards = self->shardsAffectedByTeamFailure->getShardsFor(
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
for (int i = 0; i < shards.size(); i++) {