2017-05-26 04:48:44 +08:00
|
|
|
|
/*
|
|
|
|
|
* DataDistributionQueue.actor.cpp
|
|
|
|
|
*
|
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
|
*
|
2022-03-22 04:36:23 +08:00
|
|
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
*/
|
|
|
|
|
|
2018-08-11 06:18:24 +08:00
|
|
|
|
#include <limits>
|
2020-11-04 12:24:39 +08:00
|
|
|
|
#include <numeric>
|
|
|
|
|
#include <vector>
|
2018-08-11 06:18:24 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
#include "flow/ActorCollection.h"
|
2021-09-25 01:04:30 +08:00
|
|
|
|
#include "flow/FastRef.h"
|
|
|
|
|
#include "flow/Trace.h"
|
2018-08-02 09:09:54 +08:00
|
|
|
|
#include "flow/Util.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
|
#include "fdbrpc/sim_validation.h"
|
|
|
|
|
#include "fdbclient/SystemData.h"
|
2019-03-06 02:29:37 +08:00
|
|
|
|
#include "fdbserver/DataDistribution.actor.h"
|
2022-08-17 05:32:55 +08:00
|
|
|
|
#include "fdbserver/DDSharedContext.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
|
#include "fdbclient/DatabaseContext.h"
|
2019-02-18 10:55:52 +08:00
|
|
|
|
#include "fdbserver/MoveKeys.actor.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
|
#include "fdbserver/Knobs.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
|
#include "fdbrpc/simulator.h"
|
2022-06-10 03:16:12 +08:00
|
|
|
|
#include "fdbserver/DDTxnProcessor.h"
|
2022-08-05 06:28:33 +08:00
|
|
|
|
#include "flow/DebugTrace.h"
|
2018-08-11 06:18:24 +08:00
|
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
#define WORK_FULL_UTILIZATION 10000 // This is not a knob; it is a fixed point scaling factor!
|
|
|
|
|
|
2022-05-23 15:12:48 +08:00
|
|
|
|
typedef Reference<IDataDistributionTeam> ITeamRef;
|
|
|
|
|
typedef std::pair<ITeamRef, ITeamRef> SrcDestTeamPair;
|
|
|
|
|
|
2022-07-15 00:06:56 +08:00
|
|
|
|
inline bool isDataMovementForDiskBalancing(DataMovementReason reason) {
|
|
|
|
|
return reason == DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM ||
|
|
|
|
|
reason == DataMovementReason::REBALANCE_OVERUTILIZED_TEAM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline bool isDataMovementForReadBalancing(DataMovementReason reason) {
|
|
|
|
|
return reason == DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM ||
|
|
|
|
|
reason == DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline bool isDataMovementForMountainChopper(DataMovementReason reason) {
|
|
|
|
|
return reason == DataMovementReason::REBALANCE_OVERUTILIZED_TEAM ||
|
|
|
|
|
reason == DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-25 15:50:37 +08:00
|
|
|
|
// FIXME: Always use DataMovementReason to invoke these functions.
|
2022-04-13 07:22:17 +08:00
|
|
|
|
inline bool isValleyFillerPriority(int priority) {
|
|
|
|
|
return priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM ||
|
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_REBALANCE_READ_UNDERUTIL_TEAM;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-15 00:06:56 +08:00
|
|
|
|
inline bool isDataMovementForValleyFiller(DataMovementReason reason) {
|
|
|
|
|
return reason == DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM ||
|
|
|
|
|
reason == DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-09 12:44:45 +08:00
|
|
|
|
typedef std::map<DataMovementReason, int> DmReasonPriorityMapping;
|
|
|
|
|
typedef std::map<int, DataMovementReason> PriorityDmReasonMapping;
|
2022-08-11 01:47:43 +08:00
|
|
|
|
std::pair<const DmReasonPriorityMapping*, const PriorityDmReasonMapping*> buildPriorityMappings() {
|
2022-08-09 12:44:45 +08:00
|
|
|
|
static DmReasonPriorityMapping reasonPriority{
|
|
|
|
|
{ DataMovementReason::INVALID, -1 },
|
|
|
|
|
{ DataMovementReason::RECOVER_MOVE, SERVER_KNOBS->PRIORITY_RECOVER_MOVE },
|
|
|
|
|
{ DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM },
|
|
|
|
|
{ DataMovementReason::REBALANCE_OVERUTILIZED_TEAM, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM },
|
|
|
|
|
{ DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM, SERVER_KNOBS->PRIORITY_REBALANCE_READ_OVERUTIL_TEAM },
|
|
|
|
|
{ DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM, SERVER_KNOBS->PRIORITY_REBALANCE_READ_UNDERUTIL_TEAM },
|
|
|
|
|
{ DataMovementReason::PERPETUAL_STORAGE_WIGGLE, SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE },
|
|
|
|
|
{ DataMovementReason::TEAM_HEALTHY, SERVER_KNOBS->PRIORITY_TEAM_HEALTHY },
|
|
|
|
|
{ DataMovementReason::TEAM_CONTAINS_UNDESIRED_SERVER, SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER },
|
|
|
|
|
{ DataMovementReason::TEAM_REDUNDANT, SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT },
|
|
|
|
|
{ DataMovementReason::MERGE_SHARD, SERVER_KNOBS->PRIORITY_MERGE_SHARD },
|
|
|
|
|
{ DataMovementReason::POPULATE_REGION, SERVER_KNOBS->PRIORITY_POPULATE_REGION },
|
|
|
|
|
{ DataMovementReason::TEAM_UNHEALTHY, SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY },
|
|
|
|
|
{ DataMovementReason::TEAM_2_LEFT, SERVER_KNOBS->PRIORITY_TEAM_2_LEFT },
|
|
|
|
|
{ DataMovementReason::TEAM_1_LEFT, SERVER_KNOBS->PRIORITY_TEAM_1_LEFT },
|
|
|
|
|
{ DataMovementReason::TEAM_FAILED, SERVER_KNOBS->PRIORITY_TEAM_FAILED },
|
|
|
|
|
{ DataMovementReason::TEAM_0_LEFT, SERVER_KNOBS->PRIORITY_TEAM_0_LEFT },
|
2022-08-20 02:47:00 +08:00
|
|
|
|
{ DataMovementReason::SPLIT_SHARD, SERVER_KNOBS->PRIORITY_SPLIT_SHARD },
|
|
|
|
|
{ DataMovementReason::ENFORCE_MOVE_OUT_OF_PHYSICAL_SHARD,
|
|
|
|
|
SERVER_KNOBS->PRIORITY_ENFORCE_MOVE_OUT_OF_PHYSICAL_SHARD }
|
2022-08-09 12:44:45 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static PriorityDmReasonMapping priorityReason;
|
|
|
|
|
if (priorityReason.empty()) { // only build once
|
|
|
|
|
for (const auto& [r, p] : reasonPriority) {
|
|
|
|
|
priorityReason[p] = r;
|
|
|
|
|
}
|
|
|
|
|
// Don't allow 2 priorities value being the same.
|
|
|
|
|
if (priorityReason.size() != reasonPriority.size()) {
|
|
|
|
|
TraceEvent(SevError, "DuplicateDataMovementPriority").log();
|
|
|
|
|
ASSERT(false);
|
|
|
|
|
}
|
2022-07-15 00:06:56 +08:00
|
|
|
|
}
|
2022-08-09 12:44:45 +08:00
|
|
|
|
|
2022-08-11 01:47:43 +08:00
|
|
|
|
return std::make_pair(&reasonPriority, &priorityReason);
|
2022-08-09 12:44:45 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int dataMovementPriority(DataMovementReason reason) {
|
2022-08-11 01:47:43 +08:00
|
|
|
|
auto [reasonPriority, _] = buildPriorityMappings();
|
|
|
|
|
return reasonPriority->at(reason);
|
2022-08-09 12:44:45 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DataMovementReason priorityToDataMovementReason(int priority) {
|
2022-08-11 01:47:43 +08:00
|
|
|
|
auto [_, priorityReason] = buildPriorityMappings();
|
|
|
|
|
return priorityReason->at(priority);
|
2022-07-15 00:06:56 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
struct RelocateData {
|
|
|
|
|
KeyRange keys;
|
|
|
|
|
int priority;
|
2019-10-12 08:50:43 +08:00
|
|
|
|
int boundaryPriority;
|
|
|
|
|
int healthPriority;
|
2022-03-19 07:39:31 +08:00
|
|
|
|
RelocateReason reason;
|
2019-10-12 08:50:43 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
double startTime;
|
2022-08-05 07:57:55 +08:00
|
|
|
|
UID randomId; // inherit from RelocateShard.traceId
|
2022-07-08 11:49:16 +08:00
|
|
|
|
UID dataMoveId;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
int workFactor;
|
|
|
|
|
std::vector<UID> src;
|
2018-02-03 03:46:04 +08:00
|
|
|
|
std::vector<UID> completeSources;
|
2021-12-14 02:13:34 +08:00
|
|
|
|
std::vector<UID> completeDests;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
bool wantsNewServers;
|
2022-02-25 23:33:46 +08:00
|
|
|
|
bool cancellable;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
TraceInterval interval;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
std::shared_ptr<DataMove> dataMove;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2019-10-12 08:50:43 +08:00
|
|
|
|
RelocateData()
|
2022-08-09 11:43:35 +08:00
|
|
|
|
: priority(-1), boundaryPriority(-1), healthPriority(-1), reason(RelocateReason::OTHER), startTime(-1),
|
2022-07-08 11:49:16 +08:00
|
|
|
|
dataMoveId(anonymousShardId), workFactor(0), wantsNewServers(false), cancellable(false),
|
|
|
|
|
interval("QueuedRelocation") {}
|
2019-10-12 09:31:43 +08:00
|
|
|
|
explicit RelocateData(RelocateShard const& rs)
|
|
|
|
|
: keys(rs.keys), priority(rs.priority), boundaryPriority(isBoundaryPriority(rs.priority) ? rs.priority : -1),
|
2022-03-19 07:39:31 +08:00
|
|
|
|
healthPriority(isHealthPriority(rs.priority) ? rs.priority : -1), reason(rs.reason), startTime(now()),
|
2022-08-05 07:57:55 +08:00
|
|
|
|
randomId(rs.traceId.isValid() ? rs.traceId : deterministicRandom()->randomUniqueID()),
|
|
|
|
|
dataMoveId(rs.dataMoveId), workFactor(0), wantsNewServers(isDataMovementForMountainChopper(rs.moveReason) ||
|
|
|
|
|
isDataMovementForValleyFiller(rs.moveReason) ||
|
|
|
|
|
rs.moveReason == DataMovementReason::SPLIT_SHARD ||
|
|
|
|
|
rs.moveReason == DataMovementReason::TEAM_REDUNDANT),
|
2022-08-05 06:28:33 +08:00
|
|
|
|
cancellable(true), interval("QueuedRelocation", randomId), dataMove(rs.dataMove) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (dataMove != nullptr) {
|
|
|
|
|
this->src.insert(this->src.end(), dataMove->meta.src.begin(), dataMove->meta.src.end());
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2019-10-12 08:50:43 +08:00
|
|
|
|
static bool isHealthPriority(int priority) {
|
2020-03-05 06:07:32 +08:00
|
|
|
|
return priority == SERVER_KNOBS->PRIORITY_POPULATE_REGION ||
|
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || priority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
|
2019-10-12 08:50:43 +08:00
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || priority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT ||
|
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT || priority == SERVER_KNOBS->PRIORITY_TEAM_HEALTHY ||
|
2021-10-15 07:22:47 +08:00
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER ||
|
|
|
|
|
priority == SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE;
|
2019-10-12 08:50:43 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isBoundaryPriority(int priority) {
|
|
|
|
|
return priority == SERVER_KNOBS->PRIORITY_SPLIT_SHARD || priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
bool isRestore() const { return this->dataMove != nullptr; }
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
bool operator>(const RelocateData& rhs) const {
|
|
|
|
|
return priority != rhs.priority
|
|
|
|
|
? priority > rhs.priority
|
|
|
|
|
: (startTime != rhs.startTime ? startTime < rhs.startTime : randomId > rhs.randomId);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool operator==(const RelocateData& rhs) const {
|
2019-10-12 08:50:43 +08:00
|
|
|
|
return priority == rhs.priority && boundaryPriority == rhs.boundaryPriority &&
|
2022-03-25 04:16:10 +08:00
|
|
|
|
healthPriority == rhs.healthPriority && reason == rhs.reason && keys == rhs.keys &&
|
|
|
|
|
startTime == rhs.startTime && workFactor == rhs.workFactor && src == rhs.src &&
|
|
|
|
|
completeSources == rhs.completeSources && wantsNewServers == rhs.wantsNewServers &&
|
|
|
|
|
randomId == rhs.randomId;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2020-07-11 05:37:47 +08:00
|
|
|
|
bool operator!=(const RelocateData& rhs) const { return !(*this == rhs); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
};
|
|
|
|
|
|
2020-10-08 12:58:24 +08:00
|
|
|
|
class ParallelTCInfo final : public ReferenceCounted<ParallelTCInfo>, public IDataDistributionTeam {
|
2020-11-04 12:24:39 +08:00
|
|
|
|
std::vector<Reference<IDataDistributionTeam>> teams;
|
|
|
|
|
std::vector<UID> tempServerIDs;
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int64_t sum(std::function<int64_t(IDataDistributionTeam const&)> func) const {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
int64_t result = 0;
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (const auto& team : teams) {
|
|
|
|
|
result += func(*team);
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
template <class T>
|
2021-09-17 08:42:34 +08:00
|
|
|
|
std::vector<T> collect(std::function<std::vector<T>(IDataDistributionTeam const&)> func) const {
|
2020-11-04 12:24:39 +08:00
|
|
|
|
std::vector<T> result;
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (const auto& team : teams) {
|
2020-11-25 01:41:36 +08:00
|
|
|
|
std::vector<T> newItems = func(*team);
|
2017-10-11 01:36:33 +08:00
|
|
|
|
result.insert(result.end(), newItems.begin(), newItems.end());
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
bool any(std::function<bool(IDataDistributionTeam const&)> func) const {
|
|
|
|
|
for (const auto& team : teams) {
|
|
|
|
|
if (func(*team)) {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:18:54 +08:00
|
|
|
|
public:
|
|
|
|
|
ParallelTCInfo() = default;
|
2022-04-01 00:57:00 +08:00
|
|
|
|
explicit ParallelTCInfo(ParallelTCInfo const& info) : teams(info.teams), tempServerIDs(info.tempServerIDs){};
|
2020-07-21 15:18:54 +08:00
|
|
|
|
|
|
|
|
|
void addTeam(Reference<IDataDistributionTeam> team) { teams.push_back(team); }
|
|
|
|
|
|
|
|
|
|
void clear() { teams.clear(); }
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
bool all(std::function<bool(IDataDistributionTeam const&)> func) const {
|
|
|
|
|
return !any([func](IDataDistributionTeam const& team) { return !func(team); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-11-25 01:41:36 +08:00
|
|
|
|
std::vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
|
2020-07-21 15:08:01 +08:00
|
|
|
|
return collect<StorageServerInterface>(
|
|
|
|
|
[](IDataDistributionTeam const& team) { return team.getLastKnownServerInterfaces(); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int size() const override {
|
2018-02-03 03:46:04 +08:00
|
|
|
|
int totalSize = 0;
|
|
|
|
|
for (auto it = teams.begin(); it != teams.end(); it++) {
|
|
|
|
|
totalSize += (*it)->size();
|
|
|
|
|
}
|
|
|
|
|
return totalSize;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-25 01:41:36 +08:00
|
|
|
|
std::vector<UID> const& getServerIDs() const override {
|
2021-09-17 08:42:34 +08:00
|
|
|
|
static std::vector<UID> tempServerIDs;
|
2017-10-11 01:36:33 +08:00
|
|
|
|
tempServerIDs.clear();
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (const auto& team : teams) {
|
2020-11-25 01:41:36 +08:00
|
|
|
|
std::vector<UID> const& childIDs = team->getServerIDs();
|
2017-10-11 01:36:33 +08:00
|
|
|
|
tempServerIDs.insert(tempServerIDs.end(), childIDs.begin(), childIDs.end());
|
|
|
|
|
}
|
|
|
|
|
return tempServerIDs;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-01 00:57:00 +08:00
|
|
|
|
void addDataInFlightToTeam(int64_t delta) override {
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (auto& team : teams) {
|
2022-04-01 00:57:00 +08:00
|
|
|
|
team->addDataInFlightToTeam(delta);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addReadInFlightToTeam(int64_t delta) override {
|
|
|
|
|
for (auto& team : teams) {
|
|
|
|
|
team->addReadInFlightToTeam(delta);
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int64_t getDataInFlightToTeam() const override {
|
|
|
|
|
return sum([](IDataDistributionTeam const& team) { return team.getDataInFlightToTeam(); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const override {
|
|
|
|
|
return sum([includeInFlight, inflightPenalty](IDataDistributionTeam const& team) {
|
|
|
|
|
return team.getLoadBytes(includeInFlight, inflightPenalty);
|
2017-10-11 01:36:33 +08:00
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-29 05:20:07 +08:00
|
|
|
|
int64_t getReadInFlightToTeam() const override {
|
|
|
|
|
return sum([](IDataDistributionTeam const& team) { return team.getReadInFlightToTeam(); });
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-01 00:57:00 +08:00
|
|
|
|
double getLoadReadBandwidth(bool includeInFlight = true, double inflightPenalty = 1.0) const override {
|
|
|
|
|
return sum([includeInFlight, inflightPenalty](IDataDistributionTeam const& team) {
|
|
|
|
|
return team.getLoadReadBandwidth(includeInFlight, inflightPenalty);
|
2022-03-29 05:20:07 +08:00
|
|
|
|
});
|
2022-03-01 02:22:32 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int64_t getMinAvailableSpace(bool includeInFlight = true) const override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
int64_t result = std::numeric_limits<int64_t>::max();
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (const auto& team : teams) {
|
|
|
|
|
result = std::min(result, team->getMinAvailableSpace(includeInFlight));
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
double getMinAvailableSpaceRatio(bool includeInFlight = true) const override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
double result = std::numeric_limits<double>::max();
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (const auto& team : teams) {
|
|
|
|
|
result = std::min(result, team->getMinAvailableSpaceRatio(includeInFlight));
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-26 09:55:43 +08:00
|
|
|
|
bool hasHealthyAvailableSpace(double minRatio) const override {
|
2020-07-21 15:08:01 +08:00
|
|
|
|
return all([minRatio](IDataDistributionTeam const& team) { return team.hasHealthyAvailableSpace(minRatio); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-10-08 09:41:19 +08:00
|
|
|
|
Future<Void> updateStorageMetrics() override {
|
2020-11-04 12:24:39 +08:00
|
|
|
|
std::vector<Future<Void>> futures;
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
for (auto& team : teams) {
|
|
|
|
|
futures.push_back(team->updateStorageMetrics());
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
return waitForAll(futures);
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
bool isOptimal() const override {
|
|
|
|
|
return all([](IDataDistributionTeam const& team) { return team.isOptimal(); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
bool isWrongConfiguration() const override {
|
|
|
|
|
return any([](IDataDistributionTeam const& team) { return team.isWrongConfiguration(); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
2020-07-21 15:08:01 +08:00
|
|
|
|
void setWrongConfiguration(bool wrongConfiguration) override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
for (auto it = teams.begin(); it != teams.end(); it++) {
|
|
|
|
|
(*it)->setWrongConfiguration(wrongConfiguration);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
bool isHealthy() const override {
|
|
|
|
|
return all([](IDataDistributionTeam const& team) { return team.isHealthy(); });
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
void setHealthy(bool h) override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
for (auto it = teams.begin(); it != teams.end(); it++) {
|
|
|
|
|
(*it)->setHealthy(h);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-08-13 01:08:12 +08:00
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
int getPriority() const override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
int priority = 0;
|
|
|
|
|
for (auto it = teams.begin(); it != teams.end(); it++) {
|
|
|
|
|
priority = std::max(priority, (*it)->getPriority());
|
|
|
|
|
}
|
|
|
|
|
return priority;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
void setPriority(int p) override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
for (auto it = teams.begin(); it != teams.end(); it++) {
|
|
|
|
|
(*it)->setPriority(p);
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-03-19 01:25:41 +08:00
|
|
|
|
void addref() const override { ReferenceCounted<ParallelTCInfo>::addref(); }
|
|
|
|
|
void delref() const override { ReferenceCounted<ParallelTCInfo>::delref(); }
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2020-07-21 15:08:01 +08:00
|
|
|
|
void addServers(const std::vector<UID>& servers) override {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
ASSERT(!teams.empty());
|
|
|
|
|
teams[0]->addServers(servers);
|
|
|
|
|
}
|
2020-10-22 02:10:14 +08:00
|
|
|
|
|
2020-11-17 06:46:36 +08:00
|
|
|
|
std::string getTeamID() const override {
|
2020-10-24 01:06:22 +08:00
|
|
|
|
std::string id;
|
|
|
|
|
for (int i = 0; i < teams.size(); i++) {
|
|
|
|
|
auto const& team = teams[i];
|
|
|
|
|
id += (i == teams.size() - 1) ? team->getTeamID() : format("%s, ", team->getTeamID().c_str());
|
|
|
|
|
}
|
|
|
|
|
return id;
|
|
|
|
|
}
|
2017-10-11 01:36:33 +08:00
|
|
|
|
};
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
struct Busyness {
|
2020-11-04 12:24:39 +08:00
|
|
|
|
std::vector<int> ledger;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
Busyness() : ledger(10, 0) {}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
bool canLaunch(int prio, int work) const {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
ASSERT(prio > 0 && prio < 1000);
|
|
|
|
|
return ledger[prio / 100] <= WORK_FULL_UTILIZATION - work; // allow for rounding errors in double division
|
|
|
|
|
}
|
|
|
|
|
void addWork(int prio, int work) {
|
|
|
|
|
ASSERT(prio > 0 && prio < 1000);
|
|
|
|
|
for (int i = 0; i <= (prio / 100); i++)
|
|
|
|
|
ledger[i] += work;
|
|
|
|
|
}
|
|
|
|
|
void removeWork(int prio, int work) { addWork(prio, -work); }
|
|
|
|
|
std::string toString() {
|
|
|
|
|
std::string result;
|
|
|
|
|
for (int i = 1; i < ledger.size();) {
|
|
|
|
|
int j = i + 1;
|
|
|
|
|
while (j < ledger.size() && ledger[i] == ledger[j])
|
|
|
|
|
j++;
|
|
|
|
|
if (i != 1)
|
|
|
|
|
result += ", ";
|
|
|
|
|
result += i + 1 == j ? format("%03d", i * 100) : format("%03d/%03d", i * 100, (j - 1) * 100);
|
2021-12-14 02:13:34 +08:00
|
|
|
|
result +=
|
|
|
|
|
format("=%1.02f (%d/%d)", (float)ledger[i] / WORK_FULL_UTILIZATION, ledger[i], WORK_FULL_UTILIZATION);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
i = j;
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// find the "workFactor" for this, were it launched now
|
2021-12-14 02:13:34 +08:00
|
|
|
|
int getSrcWorkFactor(RelocateData const& relocation, int singleRegionTeamSize) {
|
2019-10-12 08:50:43 +08:00
|
|
|
|
if (relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT ||
|
|
|
|
|
relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return WORK_FULL_UTILIZATION / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
|
2019-10-12 08:50:43 +08:00
|
|
|
|
else if (relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return WORK_FULL_UTILIZATION / 2 / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
|
|
|
|
|
else // for now we assume that any message at a lower priority can best be assumed to have a full team left for work
|
2020-03-05 06:17:17 +08:00
|
|
|
|
return WORK_FULL_UTILIZATION / singleRegionTeamSize / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
int getDestWorkFactor() {
|
|
|
|
|
// Work of moving a shard is even across destination servers
|
|
|
|
|
return WORK_FULL_UTILIZATION / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_DEST_SERVER;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Data movement's resource control: Do not overload servers used for the RelocateData
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// return true if servers are not too busy to launch the relocation
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// This ensure source servers will not be overloaded.
|
2021-12-14 02:13:34 +08:00
|
|
|
|
bool canLaunchSrc(RelocateData& relocation,
|
|
|
|
|
int teamSize,
|
|
|
|
|
int singleRegionTeamSize,
|
|
|
|
|
std::map<UID, Busyness>& busymap,
|
|
|
|
|
std::vector<RelocateData> cancellableRelocations) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// assert this has not already been launched
|
|
|
|
|
ASSERT(relocation.workFactor == 0);
|
|
|
|
|
ASSERT(relocation.src.size() != 0);
|
2020-03-05 06:17:17 +08:00
|
|
|
|
ASSERT(teamSize >= singleRegionTeamSize);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
// find the "workFactor" for this, were it launched now
|
2021-12-14 02:13:34 +08:00
|
|
|
|
int workFactor = getSrcWorkFactor(relocation, singleRegionTeamSize);
|
2020-03-05 06:17:17 +08:00
|
|
|
|
int neededServers = std::min<int>(relocation.src.size(), teamSize - singleRegionTeamSize + 1);
|
2020-03-05 08:23:49 +08:00
|
|
|
|
if (SERVER_KNOBS->USE_OLD_NEEDED_SERVERS) {
|
|
|
|
|
neededServers = std::max(1, (int)relocation.src.size() - teamSize + 1);
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// see if each of the SS can launch this task
|
|
|
|
|
for (int i = 0; i < relocation.src.size(); i++) {
|
|
|
|
|
// For each source server for this relocation, copy and modify its busyness to reflect work that WOULD be
|
|
|
|
|
// cancelled
|
|
|
|
|
auto busyCopy = busymap[relocation.src[i]];
|
|
|
|
|
for (int j = 0; j < cancellableRelocations.size(); j++) {
|
|
|
|
|
auto& servers = cancellableRelocations[j].src;
|
|
|
|
|
if (std::count(servers.begin(), servers.end(), relocation.src[i]))
|
|
|
|
|
busyCopy.removeWork(cancellableRelocations[j].priority, cancellableRelocations[j].workFactor);
|
|
|
|
|
}
|
|
|
|
|
// Use this modified busyness to check if this relocation could be launched
|
|
|
|
|
if (busyCopy.canLaunch(relocation.priority, workFactor)) {
|
|
|
|
|
--neededServers;
|
|
|
|
|
if (neededServers == 0)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
// candidateTeams is a vector containing one team per datacenter, the team(s) DD is planning on moving the shard to.
|
|
|
|
|
bool canLaunchDest(const std::vector<std::pair<Reference<IDataDistributionTeam>, bool>>& candidateTeams,
|
|
|
|
|
int priority,
|
|
|
|
|
std::map<UID, Busyness>& busymapDest) {
|
|
|
|
|
// fail switch if this is causing issues
|
|
|
|
|
if (SERVER_KNOBS->RELOCATION_PARALLELISM_PER_DEST_SERVER <= 0) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
int workFactor = getDestWorkFactor();
|
2022-05-14 04:03:00 +08:00
|
|
|
|
for (auto& [team, _] : candidateTeams) {
|
|
|
|
|
for (UID id : team->getServerIDs()) {
|
2021-12-14 02:13:34 +08:00
|
|
|
|
if (!busymapDest[id].canLaunch(priority, workFactor)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// update busyness for each server
|
2020-03-05 06:17:17 +08:00
|
|
|
|
void launch(RelocateData& relocation, std::map<UID, Busyness>& busymap, int singleRegionTeamSize) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// if we are here this means that we can launch and should adjust all the work the servers can do
|
2021-12-14 02:13:34 +08:00
|
|
|
|
relocation.workFactor = getSrcWorkFactor(relocation, singleRegionTeamSize);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
for (int i = 0; i < relocation.src.size(); i++)
|
|
|
|
|
busymap[relocation.src[i]].addWork(relocation.priority, relocation.workFactor);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
void launchDest(RelocateData& relocation,
|
|
|
|
|
const std::vector<std::pair<Reference<IDataDistributionTeam>, bool>>& candidateTeams,
|
|
|
|
|
std::map<UID, Busyness>& destBusymap) {
|
|
|
|
|
ASSERT(relocation.completeDests.empty());
|
|
|
|
|
int destWorkFactor = getDestWorkFactor();
|
2022-05-14 04:03:00 +08:00
|
|
|
|
for (auto& [team, _] : candidateTeams) {
|
|
|
|
|
for (UID id : team->getServerIDs()) {
|
2021-12-14 02:13:34 +08:00
|
|
|
|
relocation.completeDests.push_back(id);
|
|
|
|
|
destBusymap[id].addWork(relocation.priority, destWorkFactor);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-04-21 03:15:40 +08:00
|
|
|
|
void completeDest(RelocateData const& relocation, std::map<UID, Busyness>& destBusymap) {
|
|
|
|
|
int destWorkFactor = getDestWorkFactor();
|
|
|
|
|
for (UID id : relocation.completeDests) {
|
|
|
|
|
destBusymap[id].removeWork(relocation.priority, destWorkFactor);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-14 02:13:34 +08:00
|
|
|
|
|
|
|
|
|
void complete(RelocateData const& relocation, std::map<UID, Busyness>& busymap, std::map<UID, Busyness>& destBusymap) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
ASSERT(relocation.workFactor > 0);
|
|
|
|
|
for (int i = 0; i < relocation.src.size(); i++)
|
|
|
|
|
busymap[relocation.src[i]].removeWork(relocation.priority, relocation.workFactor);
|
2021-12-14 02:13:34 +08:00
|
|
|
|
|
2022-04-21 03:15:40 +08:00
|
|
|
|
completeDest(relocation, destBusymap);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
// Cancells in-flight data moves intersecting with range.
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<Void> cancelDataMove(struct DDQueue* self, KeyRange range, const DDEnabledState* ddEnabledState);
|
2022-07-08 11:49:16 +08:00
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<Void> dataDistributionRelocator(struct DDQueue* self,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
RelocateData rd,
|
2022-07-08 11:49:16 +08:00
|
|
|
|
Future<Void> prevCleanup,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
const DDEnabledState* ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-08-17 05:32:55 +08:00
|
|
|
|
struct DDQueue : public IDDRelocationQueue {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
struct DDDataMove {
|
|
|
|
|
DDDataMove() = default;
|
|
|
|
|
explicit DDDataMove(UID id) : id(id) {}
|
2022-05-27 06:55:14 +08:00
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
bool isValid() const { return id.isValid(); }
|
|
|
|
|
|
|
|
|
|
UID id;
|
|
|
|
|
Future<Void> cancel;
|
|
|
|
|
};
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
struct ServerCounter {
|
2022-08-12 02:59:46 +08:00
|
|
|
|
enum CountType : uint8_t { ProposedSource = 0, QueuedSource, LaunchedSource, LaunchedDest, __COUNT };
|
2022-08-06 06:01:49 +08:00
|
|
|
|
|
|
|
|
|
private:
|
2022-08-12 02:59:46 +08:00
|
|
|
|
typedef std::array<int, (int)__COUNT> Item; // one for each CountType
|
2022-08-10 07:16:14 +08:00
|
|
|
|
typedef std::array<Item, RelocateReason::typeCount()> ReasonItem; // one for each RelocateReason
|
2022-08-06 06:01:49 +08:00
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
std::unordered_map<UID, ReasonItem> counter;
|
|
|
|
|
|
2022-08-06 06:01:49 +08:00
|
|
|
|
std::string toString(const Item& item) const {
|
|
|
|
|
return format("%d %d %d %d", item[0], item[1], item[2], item[3]);
|
|
|
|
|
}
|
2022-08-06 14:57:52 +08:00
|
|
|
|
|
2022-08-06 06:01:49 +08:00
|
|
|
|
void traceReasonItem(TraceEvent* event, const ReasonItem& item) const {
|
|
|
|
|
for (int i = 0; i < item.size(); ++i) {
|
2022-08-12 02:59:46 +08:00
|
|
|
|
if (std::accumulate(item[i].cbegin(), item[i].cend(), 0) > 0) {
|
|
|
|
|
// "PQSD" corresponding to CounterType
|
2022-08-12 03:28:18 +08:00
|
|
|
|
event->detail(RelocateReason(i).toString() + "PQSD", toString(item[i]));
|
2022-08-12 02:59:46 +08:00
|
|
|
|
}
|
2022-08-06 06:01:49 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-10 09:22:48 +08:00
|
|
|
|
bool countNonZero(const ReasonItem& item, CountType type) const {
|
|
|
|
|
return std::any_of(item.cbegin(), item.cend(), [type](const Item& item) { return item[(int)type] > 0; });
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 14:57:52 +08:00
|
|
|
|
void increase(const UID& id, RelocateReason reason, CountType type) {
|
2022-08-06 03:01:11 +08:00
|
|
|
|
int idx = (int)(reason);
|
2022-08-10 03:32:46 +08:00
|
|
|
|
// if (idx < 0 || idx >= RelocateReason::typeCount()) {
|
|
|
|
|
// TraceEvent(SevWarnAlways, "ServerCounterDebug").detail("Reason", reason.toString());
|
|
|
|
|
// }
|
2022-08-09 11:43:35 +08:00
|
|
|
|
ASSERT(idx >= 0 && idx < RelocateReason::typeCount());
|
2022-08-06 14:57:52 +08:00
|
|
|
|
counter[id][idx][(int)type] += 1;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-10 14:32:40 +08:00
|
|
|
|
void summarizeLaunchedServers(decltype(counter.cbegin()) begin,
|
|
|
|
|
decltype(counter.cend()) end,
|
|
|
|
|
TraceEvent* event) const {
|
|
|
|
|
if (begin == end)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
std::string execSrc, execDest;
|
|
|
|
|
for (; begin != end; ++begin) {
|
|
|
|
|
if (countNonZero(begin->second, LaunchedSource)) {
|
|
|
|
|
execSrc += begin->first.shortString() + ",";
|
|
|
|
|
}
|
|
|
|
|
if (countNonZero(begin->second, LaunchedDest)) {
|
|
|
|
|
execDest += begin->first.shortString() + ",";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
event->detail("RemainedLaunchedSources", execSrc).detail("RemainedLaunchedDestinations", execDest);
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 14:57:52 +08:00
|
|
|
|
public:
|
|
|
|
|
void clear() { counter.clear(); }
|
|
|
|
|
|
|
|
|
|
int get(const UID& id, RelocateReason reason, CountType type) const {
|
|
|
|
|
return counter.at(id)[(int)reason][(int)type];
|
2022-08-06 03:01:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void increaseForTeam(const std::vector<UID>& ids, RelocateReason reason, CountType type) {
|
|
|
|
|
for (auto& id : ids) {
|
2022-08-06 14:57:52 +08:00
|
|
|
|
increase(id, reason, type);
|
2022-08-06 03:01:11 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-06 06:01:49 +08:00
|
|
|
|
|
|
|
|
|
void traceAll(const UID& debugId = UID()) const {
|
2022-08-10 09:22:48 +08:00
|
|
|
|
auto it = counter.cbegin();
|
2022-08-09 01:04:48 +08:00
|
|
|
|
int count = 0;
|
2022-08-10 09:22:48 +08:00
|
|
|
|
for (; count < SERVER_KNOBS->DD_QUEUE_COUNTER_MAX_LOG && it != counter.cend(); ++count, ++it) {
|
2022-08-06 06:01:49 +08:00
|
|
|
|
TraceEvent event("DDQueueServerCounter", debugId);
|
2022-08-10 09:22:48 +08:00
|
|
|
|
event.detail("ServerId", it->first);
|
|
|
|
|
traceReasonItem(&event, it->second);
|
|
|
|
|
}
|
2022-08-10 14:32:40 +08:00
|
|
|
|
|
2022-08-10 09:22:48 +08:00
|
|
|
|
if (it != counter.cend()) {
|
2022-08-10 14:32:40 +08:00
|
|
|
|
TraceEvent e(SevWarn, "DDQueueServerCounterTooMany", debugId);
|
|
|
|
|
e.detail("Servers", size());
|
|
|
|
|
if (SERVER_KNOBS->DD_QUEUE_COUNTER_SUMMARIZE) {
|
|
|
|
|
summarizeLaunchedServers(it, counter.cend(), &e);
|
|
|
|
|
return;
|
2022-08-09 01:04:48 +08:00
|
|
|
|
}
|
2022-08-06 06:01:49 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-06 14:57:52 +08:00
|
|
|
|
|
|
|
|
|
size_t size() const { return counter.size(); }
|
|
|
|
|
|
|
|
|
|
// for random test
|
|
|
|
|
static CountType randomCountType() {
|
2022-08-12 02:59:46 +08:00
|
|
|
|
int i = deterministicRandom()->randomInt(0, (int)__COUNT);
|
2022-08-06 14:57:52 +08:00
|
|
|
|
return (CountType)i;
|
|
|
|
|
}
|
2022-08-06 03:01:11 +08:00
|
|
|
|
};
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
ActorCollectionNoErrors noErrorActors; // has to be the last one to be destroyed because other Actors may use it.
|
2018-12-14 05:31:37 +08:00
|
|
|
|
UID distributorId;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
MoveKeysLock lock;
|
|
|
|
|
Database cx;
|
2022-09-28 02:22:47 +08:00
|
|
|
|
Reference<IDDTxnProcessor> txnProcessor;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2017-10-11 01:36:33 +08:00
|
|
|
|
std::vector<TeamCollectionInterface> teamCollections;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure;
|
2022-08-20 02:47:00 +08:00
|
|
|
|
Reference<PhysicalShardCollection> physicalShardCollection;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
PromiseStream<Promise<int64_t>> getAverageShardBytes;
|
|
|
|
|
|
|
|
|
|
FlowLock startMoveKeysParallelismLock;
|
|
|
|
|
FlowLock finishMoveKeysParallelismLock;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
FlowLock cleanUpDataMoveParallelismLock;
|
2020-07-10 01:38:19 +08:00
|
|
|
|
Reference<FlowLock> fetchSourceLock;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
int activeRelocations;
|
|
|
|
|
int queuedRelocations;
|
2018-09-01 03:46:57 +08:00
|
|
|
|
int64_t bytesWritten;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
int teamSize;
|
2020-03-05 06:17:17 +08:00
|
|
|
|
int singleRegionTeamSize;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2019-08-13 01:08:12 +08:00
|
|
|
|
std::map<UID, Busyness> busymap; // UID is serverID
|
2021-12-14 02:13:34 +08:00
|
|
|
|
std::map<UID, Busyness> destBusymap; // UID is serverID
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
KeyRangeMap<RelocateData> queueMap;
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> fetchingSourcesQueue;
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> fetchKeysComplete;
|
|
|
|
|
KeyRangeActorMap getSourceActors;
|
2019-07-20 07:22:15 +08:00
|
|
|
|
std::map<UID, std::set<RelocateData, std::greater<RelocateData>>>
|
|
|
|
|
queue; // Key UID is serverID, value is the serverID's set of RelocateData to relocate
|
2022-04-23 05:14:58 +08:00
|
|
|
|
// The last time one server was selected as source team for read rebalance reason. We want to throttle read
|
|
|
|
|
// rebalance on time bases because the read workload sample update has delay after the previous moving
|
|
|
|
|
std::map<UID, double> lastAsSource;
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ServerCounter serverCounter;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
KeyRangeMap<RelocateData> inFlight;
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// Track all actors that relocates specified keys to a good place; Key: keyRange; Value: actor
|
|
|
|
|
KeyRangeActorMap inFlightActors;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
KeyRangeMap<DDDataMove> dataMoves;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
Promise<Void> error;
|
|
|
|
|
PromiseStream<RelocateData> dataTransferComplete;
|
|
|
|
|
PromiseStream<RelocateData> relocationComplete;
|
2020-07-14 01:12:39 +08:00
|
|
|
|
PromiseStream<RelocateData> fetchSourceServersComplete; // find source SSs for a relocate range
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2018-08-10 03:37:46 +08:00
|
|
|
|
PromiseStream<RelocateShard> output;
|
|
|
|
|
FutureStream<RelocateShard> input;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
PromiseStream<GetMetricsRequest> getShardMetrics;
|
2022-05-04 15:00:03 +08:00
|
|
|
|
PromiseStream<GetTopKMetricsRequest> getTopKMetrics;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2018-02-17 08:01:19 +08:00
|
|
|
|
double lastInterval;
|
|
|
|
|
int suppressIntervals;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2020-07-13 09:30:02 +08:00
|
|
|
|
Reference<AsyncVar<bool>> rawProcessingUnhealthy; // many operations will remove relocations before adding a new
|
|
|
|
|
// one, so delay a small time before settling on a new number.
|
2021-10-15 07:22:47 +08:00
|
|
|
|
Reference<AsyncVar<bool>> rawProcessingWiggle;
|
2018-04-09 12:24:05 +08:00
|
|
|
|
|
|
|
|
|
std::map<int, int> priority_relocations;
|
|
|
|
|
int unhealthyRelocations;
|
2021-09-25 01:04:30 +08:00
|
|
|
|
|
|
|
|
|
Reference<EventCacheHolder> movedKeyServersEventHolder;
|
|
|
|
|
|
2022-10-20 13:09:04 +08:00
|
|
|
|
int moveReusePhysicalShard;
|
|
|
|
|
int moveCreateNewPhysicalShard;
|
2022-10-25 01:39:32 +08:00
|
|
|
|
enum RetryFindDstReason {
|
2022-10-23 11:48:58 +08:00
|
|
|
|
None = 0,
|
|
|
|
|
RemoteBestTeamNotReady,
|
|
|
|
|
PrimaryNoHealthyTeam,
|
|
|
|
|
RemoteNoHealthyTeam,
|
|
|
|
|
RemoteTeamIsFull,
|
|
|
|
|
RemoteTeamIsNotHealthy,
|
|
|
|
|
NoAvailablePhysicalShard,
|
|
|
|
|
NumberOfTypes,
|
|
|
|
|
};
|
2022-10-25 01:39:32 +08:00
|
|
|
|
std::vector<int> retryFindDstReasonCount;
|
2022-10-20 13:09:04 +08:00
|
|
|
|
|
2019-10-12 08:50:43 +08:00
|
|
|
|
void startRelocation(int priority, int healthPriority) {
|
2019-07-20 09:30:01 +08:00
|
|
|
|
// Although PRIORITY_TEAM_REDUNDANT has lower priority than split and merge shard movement,
|
|
|
|
|
// we must count it into unhealthyRelocations; because team removers relies on unhealthyRelocations to
|
|
|
|
|
// ensure a team remover will not start before the previous one finishes removing a team and move away data
|
|
|
|
|
// NOTE: split and merge shard have higher priority. If they have to wait for unhealthyRelocations = 0,
|
|
|
|
|
// deadlock may happen: split/merge shard waits for unhealthyRelocations, while blocks team_redundant.
|
2020-03-05 06:07:32 +08:00
|
|
|
|
if (healthPriority == SERVER_KNOBS->PRIORITY_POPULATE_REGION ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
|
2019-10-12 08:50:43 +08:00
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
|
2018-04-09 12:24:05 +08:00
|
|
|
|
unhealthyRelocations++;
|
|
|
|
|
rawProcessingUnhealthy->set(true);
|
|
|
|
|
}
|
2021-10-15 07:22:47 +08:00
|
|
|
|
if (healthPriority == SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE) {
|
|
|
|
|
rawProcessingWiggle->set(true);
|
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
priority_relocations[priority]++;
|
|
|
|
|
}
|
2019-10-12 08:50:43 +08:00
|
|
|
|
void finishRelocation(int priority, int healthPriority) {
|
2020-03-05 06:07:32 +08:00
|
|
|
|
if (healthPriority == SERVER_KNOBS->PRIORITY_POPULATE_REGION ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
|
2019-10-12 08:50:43 +08:00
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT ||
|
|
|
|
|
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
|
2018-04-09 12:24:05 +08:00
|
|
|
|
unhealthyRelocations--;
|
|
|
|
|
ASSERT(unhealthyRelocations >= 0);
|
|
|
|
|
if (unhealthyRelocations == 0) {
|
|
|
|
|
rawProcessingUnhealthy->set(false);
|
|
|
|
|
}
|
2019-10-12 08:50:43 +08:00
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
priority_relocations[priority]--;
|
2021-10-15 07:22:47 +08:00
|
|
|
|
if (priority_relocations[SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE] == 0) {
|
|
|
|
|
rawProcessingWiggle->set(false);
|
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
DDQueue(UID mid,
|
2022-08-06 06:01:49 +08:00
|
|
|
|
MoveKeysLock lock,
|
2022-09-28 02:22:47 +08:00
|
|
|
|
Reference<IDDTxnProcessor> db,
|
2022-08-06 06:01:49 +08:00
|
|
|
|
std::vector<TeamCollectionInterface> teamCollections,
|
|
|
|
|
Reference<ShardsAffectedByTeamFailure> sABTF,
|
2022-08-20 02:47:00 +08:00
|
|
|
|
Reference<PhysicalShardCollection> physicalShardCollection,
|
2022-08-06 06:01:49 +08:00
|
|
|
|
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
|
|
|
|
int teamSize,
|
|
|
|
|
int singleRegionTeamSize,
|
|
|
|
|
PromiseStream<RelocateShard> output,
|
|
|
|
|
FutureStream<RelocateShard> input,
|
|
|
|
|
PromiseStream<GetMetricsRequest> getShardMetrics,
|
|
|
|
|
PromiseStream<GetTopKMetricsRequest> getTopKMetrics)
|
2022-09-24 06:20:35 +08:00
|
|
|
|
: IDDRelocationQueue(), distributorId(mid), lock(lock), cx(db->context()), txnProcessor(db),
|
2022-08-17 14:37:55 +08:00
|
|
|
|
teamCollections(teamCollections), shardsAffectedByTeamFailure(sABTF),
|
2022-08-20 02:47:00 +08:00
|
|
|
|
physicalShardCollection(physicalShardCollection), getAverageShardBytes(getAverageShardBytes),
|
2021-07-25 02:20:51 +08:00
|
|
|
|
startMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM),
|
2020-07-10 01:38:19 +08:00
|
|
|
|
finishMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM),
|
2022-07-08 11:49:16 +08:00
|
|
|
|
cleanUpDataMoveParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM),
|
2021-07-25 02:20:51 +08:00
|
|
|
|
fetchSourceLock(new FlowLock(SERVER_KNOBS->DD_FETCH_SOURCE_PARALLELISM)), activeRelocations(0),
|
|
|
|
|
queuedRelocations(0), bytesWritten(0), teamSize(teamSize), singleRegionTeamSize(singleRegionTeamSize),
|
2022-05-31 12:57:34 +08:00
|
|
|
|
output(output), input(input), getShardMetrics(getShardMetrics), getTopKMetrics(getTopKMetrics), lastInterval(0),
|
|
|
|
|
suppressIntervals(0), rawProcessingUnhealthy(new AsyncVar<bool>(false)),
|
|
|
|
|
rawProcessingWiggle(new AsyncVar<bool>(false)), unhealthyRelocations(0),
|
2022-10-20 13:09:04 +08:00
|
|
|
|
movedKeyServersEventHolder(makeReference<EventCacheHolder>("MovedKeyServers")), moveReusePhysicalShard(0),
|
2022-10-25 01:39:32 +08:00
|
|
|
|
moveCreateNewPhysicalShard(0), retryFindDstReasonCount(static_cast<int>(RetryFindDstReason::NumberOfTypes), 0) {
|
|
|
|
|
}
|
2022-08-06 14:57:52 +08:00
|
|
|
|
DDQueue() = default;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
void validate() {
|
|
|
|
|
if (EXPENSIVE_VALIDATION) {
|
|
|
|
|
for (auto it = fetchingSourcesQueue.begin(); it != fetchingSourcesQueue.end(); ++it) {
|
|
|
|
|
// relocates in the fetching queue do not have src servers yet.
|
|
|
|
|
if (it->src.size())
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError1")
|
|
|
|
|
.detail("Problem", "relocates in the fetching queue do not have src servers yet");
|
|
|
|
|
|
|
|
|
|
// relocates in the fetching queue do not have a work factor yet.
|
|
|
|
|
if (it->workFactor != 0.0)
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError2")
|
|
|
|
|
.detail("Problem", "relocates in the fetching queue do not have a work factor yet");
|
|
|
|
|
|
|
|
|
|
// relocates in the fetching queue are in the queueMap.
|
|
|
|
|
auto range = queueMap.rangeContaining(it->keys.begin);
|
|
|
|
|
if (range.value() != *it || range.range() != it->keys)
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError3")
|
|
|
|
|
.detail("Problem", "relocates in the fetching queue are in the queueMap");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
for( auto it = queue.begin(); it != queue.end(); ++it ) {
|
|
|
|
|
for( auto rdit = it->second.begin(); rdit != it->second.end(); ++rdit ) {
|
|
|
|
|
// relocates in the queue are in the queueMap exactly.
|
|
|
|
|
auto range = queueMap.rangeContaining( rdit->keys.begin );
|
|
|
|
|
if( range.value() != *rdit || range.range() != rdit->keys )
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError4").detail("Problem", "relocates in the queue are in the queueMap exactly")
|
2019-03-19 06:03:43 +08:00
|
|
|
|
.detail("RangeBegin", range.range().begin)
|
|
|
|
|
.detail("RangeEnd", range.range().end)
|
|
|
|
|
.detail("RelocateBegin2", range.value().keys.begin)
|
|
|
|
|
.detail("RelocateEnd2", range.value().keys.end)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("RelocateStart", range.value().startTime)
|
|
|
|
|
.detail("MapStart", rdit->startTime)
|
|
|
|
|
.detail("RelocateWork", range.value().workFactor)
|
|
|
|
|
.detail("MapWork", rdit->workFactor)
|
|
|
|
|
.detail("RelocateSrc", range.value().src.size())
|
|
|
|
|
.detail("MapSrc", rdit->src.size())
|
|
|
|
|
.detail("RelocatePrio", range.value().priority)
|
|
|
|
|
.detail("MapPrio", rdit->priority);
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// relocates in the queue have src servers
|
|
|
|
|
if( !rdit->src.size() )
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError5").detail("Problem", "relocates in the queue have src servers");
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// relocates in the queue do not have a work factor yet.
|
|
|
|
|
if( rdit->workFactor != 0.0 )
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError6").detail("Problem", "relocates in the queue do not have a work factor yet");
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
bool contains = false;
|
|
|
|
|
for( int i = 0; i < rdit->src.size(); i++ ) {
|
|
|
|
|
if( rdit->src[i] == it->first ) {
|
|
|
|
|
contains = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if( !contains )
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError7").detail("Problem", "queued relocate data does not include ss under which its filed");
|
|
|
|
|
}
|
|
|
|
|
}*/
|
|
|
|
|
|
|
|
|
|
auto inFlightRanges = inFlight.ranges();
|
|
|
|
|
for (auto it = inFlightRanges.begin(); it != inFlightRanges.end(); ++it) {
|
|
|
|
|
for (int i = 0; i < it->value().src.size(); i++) {
|
|
|
|
|
// each server in the inFlight map is in the busymap
|
|
|
|
|
if (!busymap.count(it->value().src[i]))
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError8")
|
|
|
|
|
.detail("Problem", "each server in the inFlight map is in the busymap");
|
|
|
|
|
|
|
|
|
|
// relocate data that is inFlight is not also in the queue
|
|
|
|
|
if (queue[it->value().src[i]].count(it->value()))
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError9")
|
|
|
|
|
.detail("Problem", "relocate data that is inFlight is not also in the queue");
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
for (int i = 0; i < it->value().completeDests.size(); i++) {
|
|
|
|
|
// each server in the inFlight map is in the dest busymap
|
|
|
|
|
if (!destBusymap.count(it->value().completeDests[i]))
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError10")
|
|
|
|
|
.detail("Problem", "each server in the inFlight map is in the destBusymap");
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// in flight relocates have source servers
|
|
|
|
|
if (it->value().startTime != -1 && !it->value().src.size())
|
2021-12-14 02:13:34 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError11")
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("Problem", "in flight relocates have source servers");
|
|
|
|
|
|
|
|
|
|
if (inFlightActors.liveActorAt(it->range().begin)) {
|
|
|
|
|
// the key range in the inFlight map matches the key range in the RelocateData message
|
|
|
|
|
if (it->value().keys != it->range())
|
2021-12-14 02:13:34 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError12")
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail(
|
|
|
|
|
"Problem",
|
|
|
|
|
"the key range in the inFlight map matches the key range in the RelocateData message");
|
2022-02-25 23:33:46 +08:00
|
|
|
|
} else if (it->value().cancellable) {
|
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError13")
|
|
|
|
|
.detail("Problem", "key range is cancellable but not in flight!")
|
|
|
|
|
.detail("Range", it->range());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto it = busymap.begin(); it != busymap.end(); ++it) {
|
|
|
|
|
for (int i = 0; i < it->second.ledger.size() - 1; i++) {
|
|
|
|
|
if (it->second.ledger[i] < it->second.ledger[i + 1])
|
2022-02-25 23:33:46 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError14")
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("Problem", "ascending ledger problem")
|
2018-06-09 02:11:08 +08:00
|
|
|
|
.detail("LedgerLevel", i)
|
|
|
|
|
.detail("LedgerValueA", it->second.ledger[i])
|
|
|
|
|
.detail("LedgerValueB", it->second.ledger[i + 1]);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (it->second.ledger[i] < 0.0)
|
2022-02-25 23:33:46 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError15")
|
2021-12-14 02:13:34 +08:00
|
|
|
|
.detail("Problem", "negative ascending problem")
|
|
|
|
|
.detail("LedgerLevel", i)
|
|
|
|
|
.detail("LedgerValue", it->second.ledger[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto it = destBusymap.begin(); it != destBusymap.end(); ++it) {
|
|
|
|
|
for (int i = 0; i < it->second.ledger.size() - 1; i++) {
|
|
|
|
|
if (it->second.ledger[i] < it->second.ledger[i + 1])
|
2022-02-25 23:33:46 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError16")
|
2021-12-14 02:13:34 +08:00
|
|
|
|
.detail("Problem", "ascending ledger problem")
|
|
|
|
|
.detail("LedgerLevel", i)
|
|
|
|
|
.detail("LedgerValueA", it->second.ledger[i])
|
|
|
|
|
.detail("LedgerValueB", it->second.ledger[i + 1]);
|
|
|
|
|
if (it->second.ledger[i] < 0.0)
|
2022-02-25 23:33:46 +08:00
|
|
|
|
TraceEvent(SevError, "DDQueueValidateError17")
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("Problem", "negative ascending problem")
|
2018-06-09 02:11:08 +08:00
|
|
|
|
.detail("LedgerLevel", i)
|
|
|
|
|
.detail("LedgerValue", it->second.ledger[i]);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> queuedRelocationsMatch;
|
|
|
|
|
for (auto it = queue.begin(); it != queue.end(); ++it)
|
|
|
|
|
queuedRelocationsMatch.insert(it->second.begin(), it->second.end());
|
|
|
|
|
ASSERT(queuedRelocations == queuedRelocationsMatch.size() + fetchingSourcesQueue.size());
|
|
|
|
|
|
|
|
|
|
int testActive = 0;
|
|
|
|
|
for (auto it = priority_relocations.begin(); it != priority_relocations.end(); ++it)
|
|
|
|
|
testActive += it->second;
|
|
|
|
|
ASSERT(activeRelocations + queuedRelocations == testActive);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR static Future<Void> getSourceServersForRange(DDQueue* self,
|
2022-06-10 03:16:12 +08:00
|
|
|
|
RelocateData input,
|
|
|
|
|
PromiseStream<RelocateData> output,
|
|
|
|
|
Reference<FlowLock> fetchLock) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
// FIXME: is the merge case needed
|
2019-10-12 08:50:43 +08:00
|
|
|
|
if (input.priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD) {
|
2020-02-08 06:34:24 +08:00
|
|
|
|
wait(delay(0.5, TaskPriority::DataDistributionVeryLow));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
} else {
|
2019-06-25 17:47:35 +08:00
|
|
|
|
wait(delay(0.0001, TaskPriority::DataDistributionLaunch));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-07-10 01:38:19 +08:00
|
|
|
|
wait(fetchLock->take(TaskPriority::DataDistributionLaunch));
|
|
|
|
|
state FlowLock::Releaser releaser(*fetchLock);
|
|
|
|
|
|
2022-06-14 02:27:50 +08:00
|
|
|
|
IDDTxnProcessor::SourceServers res = wait(self->txnProcessor->getSourceServersForRange(input.keys));
|
|
|
|
|
input.src = std::move(res.srcServers);
|
|
|
|
|
input.completeSources = std::move(res.completeSources);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
output.send(input);
|
|
|
|
|
return Void();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This function cannot handle relocation requests which split a shard into three pieces
|
2019-10-12 09:31:43 +08:00
|
|
|
|
void queueRelocation(RelocateShard rs, std::set<UID>& serversToLaunchFrom) {
|
2019-03-19 06:03:43 +08:00
|
|
|
|
//TraceEvent("QueueRelocationBegin").detail("Begin", rd.keys.begin).detail("End", rd.keys.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
// remove all items from both queues that are fully contained in the new relocation (i.e. will be overwritten)
|
2019-10-12 09:31:43 +08:00
|
|
|
|
RelocateData rd(rs);
|
|
|
|
|
bool hasHealthPriority = RelocateData::isHealthPriority(rd.priority);
|
|
|
|
|
bool hasBoundaryPriority = RelocateData::isBoundaryPriority(rd.priority);
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
auto ranges = queueMap.intersectingRanges(rd.keys);
|
|
|
|
|
for (auto r = ranges.begin(); r != ranges.end(); ++r) {
|
|
|
|
|
RelocateData& rrs = r->value();
|
|
|
|
|
|
|
|
|
|
auto fetchingSourcesItr = fetchingSourcesQueue.find(rrs);
|
|
|
|
|
bool foundActiveFetching = fetchingSourcesItr != fetchingSourcesQueue.end();
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>>* firstQueue;
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>>::iterator firstRelocationItr;
|
|
|
|
|
bool foundActiveRelocation = false;
|
|
|
|
|
|
|
|
|
|
if (!foundActiveFetching && rrs.src.size()) {
|
|
|
|
|
firstQueue = &queue[rrs.src[0]];
|
|
|
|
|
firstRelocationItr = firstQueue->find(rrs);
|
|
|
|
|
foundActiveRelocation = firstRelocationItr != firstQueue->end();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If there is a queued job that wants data relocation which we are about to cancel/modify,
|
|
|
|
|
// make sure that we keep the relocation intent for the job that we queue up
|
|
|
|
|
if (foundActiveFetching || foundActiveRelocation) {
|
|
|
|
|
rd.wantsNewServers |= rrs.wantsNewServers;
|
|
|
|
|
rd.startTime = std::min(rd.startTime, rrs.startTime);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
if (!hasHealthPriority) {
|
|
|
|
|
rd.healthPriority = std::max(rd.healthPriority, rrs.healthPriority);
|
|
|
|
|
}
|
|
|
|
|
if (!hasBoundaryPriority) {
|
|
|
|
|
rd.boundaryPriority = std::max(rd.boundaryPriority, rrs.boundaryPriority);
|
|
|
|
|
}
|
|
|
|
|
rd.priority = std::max(rd.priority, std::max(rd.boundaryPriority, rd.healthPriority));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rd.keys.contains(rrs.keys)) {
|
|
|
|
|
if (foundActiveFetching)
|
|
|
|
|
fetchingSourcesQueue.erase(fetchingSourcesItr);
|
|
|
|
|
else if (foundActiveRelocation) {
|
|
|
|
|
firstQueue->erase(firstRelocationItr);
|
|
|
|
|
for (int i = 1; i < rrs.src.size(); i++)
|
|
|
|
|
queue[rrs.src[i]].erase(rrs);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (foundActiveFetching || foundActiveRelocation) {
|
|
|
|
|
serversToLaunchFrom.insert(rrs.src.begin(), rrs.src.end());
|
|
|
|
|
/*TraceEvent(rrs.interval.end(), mi.id()).detail("Result","Cancelled")
|
|
|
|
|
.detail("WasFetching", foundActiveFetching).detail("Contained", rd.keys.contains( rrs.keys ));*/
|
|
|
|
|
queuedRelocations--;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "QueuedRelocationsChanged")
|
|
|
|
|
.detail("DataMoveID", rrs.dataMoveId)
|
|
|
|
|
.detail("RandomID", rrs.randomId)
|
|
|
|
|
.detail("Total", queuedRelocations);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
finishRelocation(rrs.priority, rrs.healthPriority);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// determine the final state of the relocations map
|
|
|
|
|
auto affectedQueuedItems = queueMap.getAffectedRangesAfterInsertion(rd.keys, rd);
|
|
|
|
|
|
|
|
|
|
// put the new request into the global map of requests (modifies the ranges already present)
|
|
|
|
|
queueMap.insert(rd.keys, rd);
|
|
|
|
|
|
|
|
|
|
// cancel all the getSourceServers actors that intersect the new range that we will be getting
|
|
|
|
|
getSourceActors.cancel(KeyRangeRef(affectedQueuedItems.front().begin, affectedQueuedItems.back().end));
|
|
|
|
|
|
|
|
|
|
// update fetchingSourcesQueue and the per-server queue based on truncated ranges after insertion, (re-)launch
|
|
|
|
|
// getSourceServers
|
|
|
|
|
auto queueMapItr = queueMap.rangeContaining(affectedQueuedItems[0].begin);
|
|
|
|
|
for (int r = 0; r < affectedQueuedItems.size(); ++r, ++queueMapItr) {
|
|
|
|
|
// ASSERT(queueMapItr->value() == queueMap.rangeContaining(affectedQueuedItems[r].begin)->value());
|
|
|
|
|
RelocateData& rrs = queueMapItr->value();
|
|
|
|
|
|
|
|
|
|
if (rrs.src.size() == 0 && (rrs.keys == rd.keys || fetchingSourcesQueue.erase(rrs) > 0)) {
|
|
|
|
|
rrs.keys = affectedQueuedItems[r];
|
2022-08-05 06:28:33 +08:00
|
|
|
|
rrs.interval = TraceInterval("QueuedRelocation", rrs.randomId); // inherit the old randomId
|
|
|
|
|
|
|
|
|
|
DebugRelocationTraceEvent(rrs.interval.begin(), distributorId)
|
|
|
|
|
.detail("KeyBegin", rrs.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rrs.keys.end)
|
|
|
|
|
.detail("Priority", rrs.priority)
|
|
|
|
|
.detail("WantsNewServers", rrs.wantsNewServers);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
queuedRelocations++;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "QueuedRelocationsChanged")
|
|
|
|
|
.detail("DataMoveID", rrs.dataMoveId)
|
|
|
|
|
.detail("RandomID", rrs.randomId)
|
|
|
|
|
.detail("Total", queuedRelocations);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
startRelocation(rrs.priority, rrs.healthPriority);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
fetchingSourcesQueue.insert(rrs);
|
2022-06-10 03:16:12 +08:00
|
|
|
|
getSourceActors.insert(
|
2022-07-19 05:21:50 +08:00
|
|
|
|
rrs.keys, getSourceServersForRange(this, rrs, fetchSourceServersComplete, fetchSourceLock));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
} else {
|
|
|
|
|
RelocateData newData(rrs);
|
|
|
|
|
newData.keys = affectedQueuedItems[r];
|
|
|
|
|
ASSERT(rrs.src.size() || rrs.startTime == -1);
|
|
|
|
|
|
|
|
|
|
bool foundActiveRelocation = false;
|
|
|
|
|
for (int i = 0; i < rrs.src.size(); i++) {
|
|
|
|
|
auto& serverQueue = queue[rrs.src[i]];
|
|
|
|
|
|
|
|
|
|
if (serverQueue.erase(rrs) > 0) {
|
|
|
|
|
if (!foundActiveRelocation) {
|
2022-08-05 06:28:33 +08:00
|
|
|
|
newData.interval =
|
|
|
|
|
TraceInterval("QueuedRelocation", rrs.randomId); // inherit the old randomId
|
|
|
|
|
|
|
|
|
|
DebugRelocationTraceEvent(newData.interval.begin(), distributorId)
|
|
|
|
|
.detail("KeyBegin", newData.keys.begin)
|
|
|
|
|
.detail("KeyEnd", newData.keys.end)
|
|
|
|
|
.detail("Priority", newData.priority)
|
|
|
|
|
.detail("WantsNewServers", newData.wantsNewServers);
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
queuedRelocations++;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "QueuedRelocationsChanged")
|
|
|
|
|
.detail("DataMoveID", newData.dataMoveId)
|
|
|
|
|
.detail("RandomID", newData.randomId)
|
|
|
|
|
.detail("Total", queuedRelocations);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
startRelocation(newData.priority, newData.healthPriority);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
foundActiveRelocation = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
serverQueue.insert(newData);
|
|
|
|
|
} else
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We update the keys of a relocation even if it is "dead" since it helps validate()
|
|
|
|
|
rrs.keys = affectedQueuedItems[r];
|
|
|
|
|
rrs.interval = newData.interval;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-05 06:28:33 +08:00
|
|
|
|
DebugRelocationTraceEvent("ReceivedRelocateShard", distributorId)
|
|
|
|
|
.detail("KeyBegin", rd.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rd.keys.end)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("Priority", rd.priority)
|
2022-08-05 06:28:33 +08:00
|
|
|
|
.detail("AffectedRanges", affectedQueuedItems.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-02-08 07:31:03 +08:00
|
|
|
|
void completeSourceFetch(const RelocateData& results) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
ASSERT(fetchingSourcesQueue.count(results));
|
|
|
|
|
|
|
|
|
|
// logRelocation( results, "GotSourceServers" );
|
|
|
|
|
|
|
|
|
|
fetchingSourcesQueue.erase(results);
|
|
|
|
|
queueMap.insert(results.keys, results);
|
|
|
|
|
for (int i = 0; i < results.src.size(); i++) {
|
|
|
|
|
queue[results.src[i]].insert(results);
|
|
|
|
|
}
|
2022-04-23 06:26:44 +08:00
|
|
|
|
updateLastAsSource(results.src);
|
2022-08-06 03:01:11 +08:00
|
|
|
|
serverCounter.increaseForTeam(results.src, results.reason, ServerCounter::CountType::QueuedSource);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-02-08 07:31:03 +08:00
|
|
|
|
void logRelocation(const RelocateData& rd, const char* title) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
std::string busyString;
|
|
|
|
|
for (int i = 0; i < rd.src.size() && i < teamSize * 2; i++)
|
|
|
|
|
busyString += describe(rd.src[i]) + " - (" + busymap[rd.src[i]].toString() + "); ";
|
|
|
|
|
|
2018-12-14 05:31:37 +08:00
|
|
|
|
TraceEvent(title, distributorId)
|
2019-04-06 04:11:50 +08:00
|
|
|
|
.detail("KeyBegin", rd.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rd.keys.end)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("Priority", rd.priority)
|
|
|
|
|
.detail("WorkFactor", rd.workFactor)
|
|
|
|
|
.detail("SourceServerCount", rd.src.size())
|
|
|
|
|
.detail("SourceServers", describe(rd.src, teamSize * 2))
|
|
|
|
|
.detail("SourceBusyness", busyString);
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-28 06:26:50 +08:00
|
|
|
|
void launchQueuedWork(KeyRange keys, const DDEnabledState* ddEnabledState) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// combine all queued work in the key range and check to see if there is anything to launch
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> combined;
|
|
|
|
|
auto f = queueMap.intersectingRanges(keys);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
if (it->value().src.size() && queue[it->value().src[0]].count(it->value()))
|
|
|
|
|
combined.insert(it->value());
|
|
|
|
|
}
|
2020-09-28 06:26:50 +08:00
|
|
|
|
launchQueuedWork(combined, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-09-28 06:26:50 +08:00
|
|
|
|
void launchQueuedWork(const std::set<UID>& serversToLaunchFrom, const DDEnabledState* ddEnabledState) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// combine all work from the source servers to see if there is anything new to launch
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> combined;
|
|
|
|
|
for (auto id : serversToLaunchFrom) {
|
|
|
|
|
auto& queuedWork = queue[id];
|
|
|
|
|
auto it = queuedWork.begin();
|
|
|
|
|
for (int j = 0; j < teamSize && it != queuedWork.end(); j++) {
|
|
|
|
|
combined.insert(*it);
|
|
|
|
|
++it;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-09-28 06:26:50 +08:00
|
|
|
|
launchQueuedWork(combined, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-09-28 06:26:50 +08:00
|
|
|
|
void launchQueuedWork(RelocateData launchData, const DDEnabledState* ddEnabledState) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// check a single RelocateData to see if it can be launched
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>> combined;
|
|
|
|
|
combined.insert(launchData);
|
2020-09-28 06:26:50 +08:00
|
|
|
|
launchQueuedWork(combined, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// For each relocateData rd in the queue, check if there exist inflight relocate data whose keyrange is overlapped
|
2020-07-13 09:30:02 +08:00
|
|
|
|
// with rd. If there exist, cancel them by cancelling their actors and reducing the src servers' busyness of those
|
|
|
|
|
// canceled inflight relocateData. Launch the relocation for the rd.
|
2020-09-28 06:26:50 +08:00
|
|
|
|
void launchQueuedWork(std::set<RelocateData, std::greater<RelocateData>> combined,
|
|
|
|
|
const DDEnabledState* ddEnabledState) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
int startedHere = 0;
|
|
|
|
|
double startTime = now();
|
|
|
|
|
// kick off relocators from items in the queue as need be
|
|
|
|
|
std::set<RelocateData, std::greater<RelocateData>>::iterator it = combined.begin();
|
|
|
|
|
for (; it != combined.end(); it++) {
|
|
|
|
|
RelocateData rd(*it);
|
|
|
|
|
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// Check if there is an inflight shard that is overlapped with the queued relocateShard (rd)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
bool overlappingInFlight = false;
|
|
|
|
|
auto intersectingInFlight = inFlight.intersectingRanges(rd.keys);
|
|
|
|
|
for (auto it = intersectingInFlight.begin(); it != intersectingInFlight.end(); ++it) {
|
2019-07-20 09:32:05 +08:00
|
|
|
|
if (fetchKeysComplete.count(it->value()) && inFlightActors.liveActorAt(it->range().begin) &&
|
|
|
|
|
!rd.keys.contains(it->range()) && it->value().priority >= rd.priority &&
|
2019-10-12 08:50:43 +08:00
|
|
|
|
rd.healthPriority < SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) {
|
2022-08-05 06:28:33 +08:00
|
|
|
|
|
|
|
|
|
DebugRelocationTraceEvent("OverlappingInFlight", distributorId)
|
2019-04-06 04:11:50 +08:00
|
|
|
|
.detail("KeyBegin", it->value().keys.begin)
|
|
|
|
|
.detail("KeyEnd", it->value().keys.end)
|
2022-08-05 06:28:33 +08:00
|
|
|
|
.detail("Priority", it->value().priority);
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
overlappingInFlight = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (overlappingInFlight) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
ASSERT(!rd.isRestore());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// logRelocation( rd, "SkippingOverlappingInFlight" );
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Because the busyness of a server is decreased when a superseding relocation is issued, we
|
|
|
|
|
// need to consider what the busyness of a server WOULD be if
|
|
|
|
|
auto containedRanges = inFlight.containedRanges(rd.keys);
|
|
|
|
|
std::vector<RelocateData> cancellableRelocations;
|
|
|
|
|
for (auto it = containedRanges.begin(); it != containedRanges.end(); ++it) {
|
2022-02-25 23:33:46 +08:00
|
|
|
|
if (it.value().cancellable) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
cancellableRelocations.push_back(it->value());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-20 07:22:15 +08:00
|
|
|
|
// Data movement avoids overloading source servers in moving data.
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// SOMEDAY: the list of source servers may be outdated since they were fetched when the work was put in the
|
|
|
|
|
// queue
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// FIXME: we need spare capacity even when we're just going to be cancelling work via TEAM_HEALTHY
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (!rd.isRestore() && !canLaunchSrc(rd, teamSize, singleRegionTeamSize, busymap, cancellableRelocations)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// logRelocation( rd, "SkippingQueuedRelocation" );
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-20 07:22:15 +08:00
|
|
|
|
// From now on, the source servers for the RelocateData rd have enough resource to move the data away,
|
|
|
|
|
// because they do not have too much inflight data movement.
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// logRelocation( rd, "LaunchingRelocation" );
|
2022-08-05 06:28:33 +08:00
|
|
|
|
DebugRelocationTraceEvent(rd.interval.end(), distributorId).detail("Result", "Success");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (!rd.isRestore()) {
|
|
|
|
|
queuedRelocations--;
|
|
|
|
|
TraceEvent(SevVerbose, "QueuedRelocationsChanged")
|
|
|
|
|
.detail("DataMoveID", rd.dataMoveId)
|
|
|
|
|
.detail("RandomID", rd.randomId)
|
|
|
|
|
.detail("Total", queuedRelocations);
|
|
|
|
|
finishRelocation(rd.priority, rd.healthPriority);
|
|
|
|
|
|
|
|
|
|
// now we are launching: remove this entry from the queue of all the src servers
|
|
|
|
|
for (int i = 0; i < rd.src.size(); i++) {
|
|
|
|
|
ASSERT(queue[rd.src[i]].erase(rd));
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
Future<Void> fCleanup =
|
2022-08-04 04:51:40 +08:00
|
|
|
|
SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA ? cancelDataMove(this, rd.keys, ddEnabledState) : Void();
|
2022-07-08 11:49:16 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// If there is a job in flight that wants data relocation which we are about to cancel/modify,
|
|
|
|
|
// make sure that we keep the relocation intent for the job that we launch
|
|
|
|
|
auto f = inFlight.intersectingRanges(rd.keys);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
if (inFlightActors.liveActorAt(it->range().begin)) {
|
|
|
|
|
rd.wantsNewServers |= it->value().wantsNewServers;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
startedHere++;
|
|
|
|
|
|
|
|
|
|
// update both inFlightActors and inFlight key range maps, cancelling deleted RelocateShards
|
2020-11-04 12:24:39 +08:00
|
|
|
|
std::vector<KeyRange> ranges;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
inFlightActors.getRangesAffectedByInsertion(rd.keys, ranges);
|
|
|
|
|
inFlightActors.cancel(KeyRangeRef(ranges.front().begin, ranges.back().end));
|
|
|
|
|
inFlight.insert(rd.keys, rd);
|
|
|
|
|
for (int r = 0; r < ranges.size(); r++) {
|
|
|
|
|
RelocateData& rrs = inFlight.rangeContaining(ranges[r].begin)->value();
|
|
|
|
|
rrs.keys = ranges[r];
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (rd.keys == ranges[r] && rd.isRestore()) {
|
|
|
|
|
ASSERT(rd.dataMove != nullptr);
|
2022-08-04 04:51:40 +08:00
|
|
|
|
ASSERT(SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
|
2022-07-08 11:49:16 +08:00
|
|
|
|
rrs.dataMoveId = rd.dataMove->meta.id;
|
|
|
|
|
} else {
|
|
|
|
|
ASSERT_WE_THINK(!rd.isRestore()); // Restored data move should not overlap.
|
|
|
|
|
// TODO(psm): The shard id is determined by DD.
|
|
|
|
|
rrs.dataMove.reset();
|
2022-08-04 04:51:40 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
rrs.dataMoveId = UID();
|
|
|
|
|
} else {
|
|
|
|
|
rrs.dataMoveId = deterministicRandom()->randomUniqueID();
|
|
|
|
|
}
|
2022-07-08 11:49:16 +08:00
|
|
|
|
} else {
|
|
|
|
|
rrs.dataMoveId = anonymousShardId;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2020-03-05 06:17:17 +08:00
|
|
|
|
launch(rrs, busymap, singleRegionTeamSize);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
activeRelocations++;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "InFlightRelocationChange")
|
|
|
|
|
.detail("Launch", rrs.dataMoveId)
|
|
|
|
|
.detail("Total", activeRelocations);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
startRelocation(rrs.priority, rrs.healthPriority);
|
2019-08-13 01:08:12 +08:00
|
|
|
|
// Start the actor that relocates data in the rrs.keys
|
2022-07-08 11:49:16 +08:00
|
|
|
|
inFlightActors.insert(rrs.keys, dataDistributionRelocator(this, rrs, fCleanup, ddEnabledState));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// logRelocation( rd, "LaunchedRelocation" );
|
|
|
|
|
}
|
2019-05-11 05:01:52 +08:00
|
|
|
|
if (now() - startTime > .001 && deterministicRandom()->random01() < 0.001)
|
2018-06-09 02:11:08 +08:00
|
|
|
|
TraceEvent(SevWarnAlways, "LaunchingQueueSlowx1000").detail("Elapsed", now() - startTime);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
/*if( startedHere > 0 ) {
|
2018-12-14 05:31:37 +08:00
|
|
|
|
TraceEvent("StartedDDRelocators", distributorId)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("QueueSize", queuedRelocations)
|
|
|
|
|
.detail("StartedHere", startedHere)
|
|
|
|
|
.detail("ActiveRelocations", activeRelocations);
|
|
|
|
|
} */
|
|
|
|
|
|
|
|
|
|
validate();
|
|
|
|
|
}
|
2022-04-04 13:31:45 +08:00
|
|
|
|
|
|
|
|
|
int getHighestPriorityRelocation() const {
|
|
|
|
|
int highestPriority{ 0 };
|
|
|
|
|
for (const auto& [priority, count] : priority_relocations) {
|
|
|
|
|
if (count > 0) {
|
|
|
|
|
highestPriority = std::max(highestPriority, priority);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return highestPriority;
|
|
|
|
|
}
|
2022-04-21 06:28:03 +08:00
|
|
|
|
|
2022-04-23 06:26:44 +08:00
|
|
|
|
// return true if the servers are throttled as source for read rebalance
|
2022-04-28 14:37:35 +08:00
|
|
|
|
bool timeThrottle(const std::vector<UID>& ids) const {
|
|
|
|
|
return std::any_of(ids.begin(), ids.end(), [this](const UID& id) {
|
2022-04-23 06:26:44 +08:00
|
|
|
|
if (this->lastAsSource.count(id)) {
|
2022-05-13 07:30:21 +08:00
|
|
|
|
return (now() - this->lastAsSource.at(id)) * SERVER_KNOBS->READ_REBALANCE_SRC_PARALLELISM <
|
|
|
|
|
SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL;
|
2022-04-21 13:19:56 +08:00
|
|
|
|
}
|
2022-04-23 06:26:44 +08:00
|
|
|
|
return false;
|
2022-04-21 06:28:03 +08:00
|
|
|
|
});
|
|
|
|
|
}
|
2022-04-23 06:26:44 +08:00
|
|
|
|
|
|
|
|
|
void updateLastAsSource(const std::vector<UID>& ids, double t = now()) {
|
|
|
|
|
for (auto& id : ids)
|
|
|
|
|
lastAsSource[id] = t;
|
|
|
|
|
}
|
2022-07-08 11:49:16 +08:00
|
|
|
|
|
|
|
|
|
// Schedules cancellation of a data move.
|
|
|
|
|
void enqueueCancelledDataMove(UID dataMoveId, KeyRange range, const DDEnabledState* ddEnabledState) {
|
2022-09-22 01:56:22 +08:00
|
|
|
|
ASSERT(!txnProcessor->isMocked()); // the mock implementation currently doesn't support data move
|
2022-07-08 11:49:16 +08:00
|
|
|
|
std::vector<Future<Void>> cleanup;
|
|
|
|
|
auto f = this->dataMoves.intersectingRanges(range);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
if (it->value().isValid()) {
|
|
|
|
|
TraceEvent(SevError, "DDEnqueueCancelledDataMoveConflict", this->distributorId)
|
|
|
|
|
.detail("DataMoveID", dataMoveId)
|
|
|
|
|
.detail("CancelledRange", range)
|
|
|
|
|
.detail("ConflictingDataMoveID", it->value().id)
|
|
|
|
|
.detail("ConflictingRange", KeyRangeRef(it->range().begin, it->range().end));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
DDQueue::DDDataMove dataMove(dataMoveId);
|
2022-07-08 11:49:16 +08:00
|
|
|
|
dataMove.cancel = cleanUpDataMove(
|
|
|
|
|
this->cx, dataMoveId, this->lock, &this->cleanUpDataMoveParallelismLock, range, ddEnabledState);
|
|
|
|
|
this->dataMoves.insert(range, dataMove);
|
|
|
|
|
TraceEvent(SevInfo, "DDEnqueuedCancelledDataMove", this->distributorId)
|
|
|
|
|
.detail("DataMoveID", dataMoveId)
|
|
|
|
|
.detail("Range", range);
|
|
|
|
|
}
|
2022-08-06 06:26:34 +08:00
|
|
|
|
|
|
|
|
|
Future<Void> periodicalRefreshCounter() {
|
|
|
|
|
auto f = [this]() {
|
|
|
|
|
serverCounter.traceAll(distributorId);
|
|
|
|
|
serverCounter.clear();
|
|
|
|
|
};
|
2022-08-06 14:57:52 +08:00
|
|
|
|
return recurring(f, SERVER_KNOBS->DD_QUEUE_COUNTER_REFRESH_INTERVAL);
|
2022-08-06 06:26:34 +08:00
|
|
|
|
}
|
2022-08-17 05:32:55 +08:00
|
|
|
|
|
|
|
|
|
int getUnhealthyRelocationCount() override { return unhealthyRelocations; }
|
2022-09-22 08:57:40 +08:00
|
|
|
|
|
|
|
|
|
Future<SrcDestTeamPair> getSrcDestTeams(const int& teamCollectionIndex,
|
|
|
|
|
const GetTeamRequest& srcReq,
|
|
|
|
|
const GetTeamRequest& destReq,
|
|
|
|
|
const int& priority,
|
|
|
|
|
TraceEvent* traceEvent);
|
|
|
|
|
|
|
|
|
|
Future<bool> rebalanceReadLoad(DataMovementReason moveReason,
|
|
|
|
|
Reference<IDataDistributionTeam> sourceTeam,
|
|
|
|
|
Reference<IDataDistributionTeam> destTeam,
|
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent);
|
|
|
|
|
|
|
|
|
|
Future<bool> rebalanceTeams(DataMovementReason moveReason,
|
|
|
|
|
Reference<IDataDistributionTeam const> sourceTeam,
|
|
|
|
|
Reference<IDataDistributionTeam const> destTeam,
|
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
};
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<Void> cancelDataMove(struct DDQueue* self, KeyRange range, const DDEnabledState* ddEnabledState) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
std::vector<Future<Void>> cleanup;
|
|
|
|
|
auto f = self->dataMoves.intersectingRanges(range);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
if (!it->value().isValid()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
KeyRange keys = KeyRangeRef(it->range().begin, it->range().end);
|
|
|
|
|
TraceEvent(SevInfo, "DDQueueCancelDataMove", self->distributorId)
|
|
|
|
|
.detail("DataMoveID", it->value().id)
|
|
|
|
|
.detail("DataMoveRange", keys)
|
|
|
|
|
.detail("Range", range);
|
|
|
|
|
if (!it->value().cancel.isValid()) {
|
|
|
|
|
it->value().cancel = cleanUpDataMove(
|
|
|
|
|
self->cx, it->value().id, self->lock, &self->cleanUpDataMoveParallelismLock, keys, ddEnabledState);
|
|
|
|
|
}
|
|
|
|
|
cleanup.push_back(it->value().cancel);
|
|
|
|
|
}
|
|
|
|
|
wait(waitForAll(cleanup));
|
|
|
|
|
auto ranges = self->dataMoves.getAffectedRangesAfterInsertion(range);
|
|
|
|
|
if (!ranges.empty()) {
|
2022-08-06 03:01:11 +08:00
|
|
|
|
self->dataMoves.insert(KeyRangeRef(ranges.front().begin, ranges.back().end), DDQueue::DDDataMove());
|
2022-07-08 11:49:16 +08:00
|
|
|
|
}
|
|
|
|
|
return Void();
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
static std::string destServersString(std::vector<std::pair<Reference<IDataDistributionTeam>, bool>> const& bestTeams) {
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
|
|
|
|
|
for (auto& tc : bestTeams) {
|
|
|
|
|
for (const auto& id : tc.first->getServerIDs()) {
|
|
|
|
|
ss << id.toString() << " ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::move(ss).str();
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
// This actor relocates the specified keys to a good place.
|
2019-07-20 07:22:15 +08:00
|
|
|
|
// The inFlightActor key range map stores the actor for each RelocateData
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
2022-07-08 11:49:16 +08:00
|
|
|
|
RelocateData rd,
|
|
|
|
|
Future<Void> prevCleanup,
|
|
|
|
|
const DDEnabledState* ddEnabledState) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
state Promise<Void> errorOut(self->error);
|
2022-08-05 06:28:33 +08:00
|
|
|
|
state TraceInterval relocateShardInterval("RelocateShard", rd.randomId);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
state PromiseStream<RelocateData> dataTransferComplete(self->dataTransferComplete);
|
|
|
|
|
state PromiseStream<RelocateData> relocationComplete(self->relocationComplete);
|
|
|
|
|
state bool signalledTransferComplete = false;
|
2018-12-14 05:31:37 +08:00
|
|
|
|
state UID distributorId = self->distributorId;
|
2018-02-03 03:46:04 +08:00
|
|
|
|
state ParallelTCInfo healthyDestinations;
|
2018-06-20 14:15:30 +08:00
|
|
|
|
|
2018-02-03 03:46:04 +08:00
|
|
|
|
state bool anyHealthy = false;
|
2018-06-20 14:15:30 +08:00
|
|
|
|
state bool allHealthy = true;
|
|
|
|
|
state bool anyWithSource = false;
|
2021-12-14 02:13:34 +08:00
|
|
|
|
state bool anyDestOverloaded = false;
|
|
|
|
|
state int destOverloadedCount = 0;
|
|
|
|
|
state int stuckCount = 0;
|
2018-06-20 14:15:30 +08:00
|
|
|
|
state std::vector<std::pair<Reference<IDataDistributionTeam>, bool>> bestTeams;
|
2019-08-17 05:56:58 +08:00
|
|
|
|
state double startTime = now();
|
|
|
|
|
state std::vector<UID> destIds;
|
2022-08-20 02:47:00 +08:00
|
|
|
|
state uint64_t debugID = deterministicRandom()->randomUInt64();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
try {
|
2018-02-17 08:01:19 +08:00
|
|
|
|
if (now() - self->lastInterval < 1.0) {
|
|
|
|
|
relocateShardInterval.severity = SevDebug;
|
|
|
|
|
self->suppressIntervals++;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-14 05:31:37 +08:00
|
|
|
|
TraceEvent(relocateShardInterval.begin(), distributorId)
|
2019-03-19 06:03:43 +08:00
|
|
|
|
.detail("KeyBegin", rd.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rd.keys.end)
|
2018-02-17 08:01:19 +08:00
|
|
|
|
.detail("Priority", rd.priority)
|
|
|
|
|
.detail("SuppressedEventCount", self->suppressIntervals);
|
|
|
|
|
|
|
|
|
|
if (relocateShardInterval.severity != SevDebug) {
|
|
|
|
|
self->lastInterval = now();
|
|
|
|
|
self->suppressIntervals = 0;
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-08-04 04:51:40 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
auto inFlightRange = self->inFlight.rangeContaining(rd.keys.begin);
|
|
|
|
|
ASSERT(inFlightRange.range() == rd.keys);
|
|
|
|
|
ASSERT(inFlightRange.value().randomId == rd.randomId);
|
|
|
|
|
ASSERT(inFlightRange.value().dataMoveId == rd.dataMoveId);
|
|
|
|
|
inFlightRange.value().cancellable = false;
|
|
|
|
|
|
|
|
|
|
wait(prevCleanup);
|
|
|
|
|
|
|
|
|
|
auto f = self->dataMoves.intersectingRanges(rd.keys);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
KeyRangeRef kr(it->range().begin, it->range().end);
|
|
|
|
|
const UID mId = it->value().id;
|
|
|
|
|
if (mId.isValid() && mId != rd.dataMoveId) {
|
|
|
|
|
TraceEvent("DDRelocatorConflictingDataMove", distributorId)
|
|
|
|
|
.detail("CurrentDataMoveID", rd.dataMoveId)
|
|
|
|
|
.detail("DataMoveID", mId)
|
|
|
|
|
.detail("Range", kr);
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (rd.isRestore() || !SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
if (SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
ASSERT(rd.dataMoveId.isValid());
|
|
|
|
|
}
|
|
|
|
|
self->dataMoves.insert(rd.keys, DDQueue::DDDataMove(rd.dataMoveId));
|
|
|
|
|
}
|
2022-07-08 11:49:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-04 15:00:03 +08:00
|
|
|
|
state StorageMetrics metrics =
|
2017-05-26 04:48:44 +08:00
|
|
|
|
wait(brokenPromiseToNever(self->getShardMetrics.getReply(GetMetricsRequest(rd.keys))));
|
|
|
|
|
|
2022-08-20 02:47:00 +08:00
|
|
|
|
state uint64_t physicalShardIDCandidate = UID().first();
|
|
|
|
|
state bool forceToUseNewPhysicalShard = false;
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
ASSERT(rd.src.size());
|
|
|
|
|
loop {
|
2021-12-14 02:13:34 +08:00
|
|
|
|
destOverloadedCount = 0;
|
|
|
|
|
stuckCount = 0;
|
2022-10-25 01:39:32 +08:00
|
|
|
|
state DDQueue::RetryFindDstReason retryFindDstReason = DDQueue::RetryFindDstReason::None;
|
2018-11-22 03:18:26 +08:00
|
|
|
|
// state int bestTeamStuckThreshold = 50;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
loop {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
state int tciIndex = 0;
|
|
|
|
|
state bool foundTeams = true;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
state bool bestTeamReady = false;
|
2018-02-03 03:46:04 +08:00
|
|
|
|
anyHealthy = false;
|
2018-06-20 14:15:30 +08:00
|
|
|
|
allHealthy = true;
|
|
|
|
|
anyWithSource = false;
|
2021-12-14 02:13:34 +08:00
|
|
|
|
anyDestOverloaded = false;
|
2018-06-20 14:15:30 +08:00
|
|
|
|
bestTeams.clear();
|
2021-06-12 06:58:05 +08:00
|
|
|
|
// Get team from teamCollections in different DCs and find the best one
|
2018-06-20 14:15:30 +08:00
|
|
|
|
while (tciIndex < self->teamCollections.size()) {
|
2022-08-04 04:51:40 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && rd.isRestore()) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
auto req = GetTeamRequest(tciIndex == 0 ? rd.dataMove->primaryDest : rd.dataMove->remoteDest);
|
|
|
|
|
Future<std::pair<Optional<Reference<IDataDistributionTeam>>, bool>> fbestTeam =
|
|
|
|
|
brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req));
|
|
|
|
|
bestTeamReady = fbestTeam.isReady();
|
|
|
|
|
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> bestTeam = wait(fbestTeam);
|
|
|
|
|
if (tciIndex > 0 && !bestTeamReady) {
|
|
|
|
|
// self->shardsAffectedByTeamFailure->moveShard must be called without any waits after
|
|
|
|
|
// getting the destination team or we could miss failure notifications for the storage
|
|
|
|
|
// servers in the destination team
|
|
|
|
|
TraceEvent("BestTeamNotReady")
|
|
|
|
|
.detail("TeamCollectionIndex", tciIndex)
|
|
|
|
|
.detail("RestoreDataMoveForDest",
|
|
|
|
|
describe(tciIndex == 0 ? rd.dataMove->primaryDest : rd.dataMove->remoteDest));
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteBestTeamNotReady;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (!bestTeam.first.present() || !bestTeam.first.get()->isHealthy()) {
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = tciIndex == 0 ? DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam
|
|
|
|
|
: DDQueue::RetryFindDstReason::RemoteNoHealthyTeam;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2018-06-20 14:15:30 +08:00
|
|
|
|
anyHealthy = true;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
|
|
|
|
} else {
|
|
|
|
|
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
|
|
|
|
|
if (rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY ||
|
|
|
|
|
rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT)
|
|
|
|
|
inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
|
|
|
|
if (rd.healthPriority == SERVER_KNOBS->PRIORITY_POPULATE_REGION ||
|
|
|
|
|
rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT ||
|
|
|
|
|
rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT)
|
|
|
|
|
inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
|
|
|
|
|
|
|
|
|
auto req = GetTeamRequest(WantNewServers(rd.wantsNewServers),
|
|
|
|
|
WantTrueBest(isValleyFillerPriority(rd.priority)),
|
|
|
|
|
PreferLowerDiskUtil::True,
|
|
|
|
|
TeamMustHaveShards::False,
|
|
|
|
|
ForReadBalance(rd.reason == RelocateReason::REBALANCE_READ),
|
|
|
|
|
PreferLowerReadUtil::True,
|
|
|
|
|
inflightPenalty);
|
|
|
|
|
|
|
|
|
|
req.src = rd.src;
|
|
|
|
|
req.completeSources = rd.completeSources;
|
|
|
|
|
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
|
|
|
|
tciIndex == 1) {
|
|
|
|
|
ASSERT(physicalShardIDCandidate != UID().first() &&
|
|
|
|
|
physicalShardIDCandidate != anonymousShardId.first());
|
|
|
|
|
Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
|
|
|
|
|
self->physicalShardCollection->tryGetAvailableRemoteTeamWith(
|
|
|
|
|
physicalShardIDCandidate, metrics, debugID);
|
|
|
|
|
if (remoteTeamWithPhysicalShard.present()) {
|
|
|
|
|
// Exists a remoteTeam in the mapping that has the physicalShardIDCandidate
|
|
|
|
|
// use the remoteTeam with the physicalShard as the bestTeam
|
|
|
|
|
req = GetTeamRequest(remoteTeamWithPhysicalShard.get().servers);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
// bestTeam.second = false if the bestTeam in the teamCollection (in the DC) does not have any
|
|
|
|
|
// server that hosts the relocateData. This is possible, for example, in a fearless
|
|
|
|
|
// configuration when the remote DC is just brought up.
|
|
|
|
|
Future<std::pair<Optional<Reference<IDataDistributionTeam>>, bool>> fbestTeam =
|
|
|
|
|
brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req));
|
|
|
|
|
bestTeamReady = fbestTeam.isReady();
|
|
|
|
|
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> bestTeam = wait(fbestTeam);
|
|
|
|
|
if (tciIndex > 0 && !bestTeamReady) {
|
|
|
|
|
// self->shardsAffectedByTeamFailure->moveShard must be called without any waits after
|
|
|
|
|
// getting the destination team or we could miss failure notifications for the storage
|
|
|
|
|
// servers in the destination team
|
|
|
|
|
TraceEvent("BestTeamNotReady");
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteBestTeamNotReady;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
// If a DC has no healthy team, we stop checking the other DCs until
|
|
|
|
|
// the unhealthy DC is healthy again or is excluded.
|
|
|
|
|
if (!bestTeam.first.present()) {
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = tciIndex == 0 ? DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam
|
|
|
|
|
: DDQueue::RetryFindDstReason::RemoteNoHealthyTeam;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (!bestTeam.first.get()->isHealthy()) {
|
|
|
|
|
allHealthy = false;
|
|
|
|
|
} else {
|
|
|
|
|
anyHealthy = true;
|
|
|
|
|
}
|
2020-06-30 01:02:27 +08:00
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (bestTeam.second) {
|
|
|
|
|
anyWithSource = true;
|
|
|
|
|
}
|
2020-07-14 08:05:12 +08:00
|
|
|
|
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
// critical to the correctness of team selection by PhysicalShardCollection
|
|
|
|
|
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
|
|
|
|
// team Thus, tryGetAvailableRemoteTeamWith() may select an almost full remote team In this
|
|
|
|
|
// case, we must re-select a remote team We set foundTeams = false to avoid finishing team
|
|
|
|
|
// selection Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select
|
|
|
|
|
// a remote team
|
|
|
|
|
if (tciIndex == 1 && !forceToUseNewPhysicalShard) {
|
|
|
|
|
bool minAvailableSpaceRatio = bestTeam.first.get()->getMinAvailableSpaceRatio(true);
|
|
|
|
|
if (minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsFull;
|
2022-08-20 02:47:00 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
bestTeams.emplace_back(bestTeam.first.get(), true);
|
|
|
|
|
// Always set bestTeams[i].second = true to disable optimization in data move between DCs
|
|
|
|
|
// for the correctness of PhysicalShardCollection
|
|
|
|
|
// Currently, enabling the optimization will break the invariant of PhysicalShardCollection
|
|
|
|
|
// Invariant: once a physical shard is created with a specific set of SSes, this SS set will
|
|
|
|
|
// never get changed.
|
|
|
|
|
} else {
|
|
|
|
|
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// get physicalShardIDCandidate
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
|
|
|
|
tciIndex == 0) {
|
|
|
|
|
ASSERT(foundTeams);
|
|
|
|
|
ShardsAffectedByTeamFailure::Team primaryTeam =
|
|
|
|
|
ShardsAffectedByTeamFailure::Team(bestTeams[0].first->getServerIDs(), true);
|
|
|
|
|
physicalShardIDCandidate =
|
|
|
|
|
self->physicalShardCollection->determinePhysicalShardIDGivenPrimaryTeam(
|
|
|
|
|
primaryTeam, metrics, forceToUseNewPhysicalShard, debugID);
|
|
|
|
|
ASSERT(physicalShardIDCandidate != UID().first() &&
|
|
|
|
|
physicalShardIDCandidate != anonymousShardId.first());
|
|
|
|
|
}
|
2022-07-08 11:49:16 +08:00
|
|
|
|
}
|
2017-10-11 01:36:33 +08:00
|
|
|
|
tciIndex++;
|
|
|
|
|
}
|
2022-08-20 02:47:00 +08:00
|
|
|
|
|
|
|
|
|
// critical to the correctness of team selection by PhysicalShardCollection
|
|
|
|
|
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary team
|
|
|
|
|
// Thus, tryGetAvailableRemoteTeamWith() may select an unhealthy remote team
|
|
|
|
|
// In this case, we must re-select a remote team
|
|
|
|
|
// We set foundTeams = false to avoid finishing team selection
|
|
|
|
|
// Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select a remote team
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
|
|
|
|
bestTeams.size() > 1 && !forceToUseNewPhysicalShard) {
|
|
|
|
|
if (!bestTeams[1].first->isHealthy()) {
|
2022-10-25 01:39:32 +08:00
|
|
|
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
2022-08-20 02:47:00 +08:00
|
|
|
|
foundTeams = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
// once we've found healthy candidate teams, make sure they're not overloaded with outstanding moves
|
|
|
|
|
// already
|
|
|
|
|
anyDestOverloaded = !canLaunchDest(bestTeams, rd.priority, self->destBusymap);
|
|
|
|
|
|
|
|
|
|
if (foundTeams && anyHealthy && !anyDestOverloaded) {
|
|
|
|
|
ASSERT(rd.completeDests.empty());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
2018-08-30 05:40:39 +08:00
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
if (anyDestOverloaded) {
|
2022-07-20 04:15:51 +08:00
|
|
|
|
CODE_PROBE(true, "Destination overloaded throttled move");
|
2021-12-14 02:13:34 +08:00
|
|
|
|
destOverloadedCount++;
|
|
|
|
|
TraceEvent(destOverloadedCount > 50 ? SevInfo : SevDebug, "DestSSBusy", distributorId)
|
|
|
|
|
.suppressFor(1.0)
|
|
|
|
|
.detail("StuckCount", stuckCount)
|
|
|
|
|
.detail("DestOverloadedCount", destOverloadedCount)
|
|
|
|
|
.detail("TeamCollectionId", tciIndex)
|
|
|
|
|
.detail("AnyDestOverloaded", anyDestOverloaded)
|
|
|
|
|
.detail("NumOfTeamCollections", self->teamCollections.size())
|
|
|
|
|
.detail("Servers", destServersString(bestTeams));
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
if (rd.isRestore() && destOverloadedCount > 50) {
|
|
|
|
|
throw data_move_dest_team_not_found();
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-14 02:13:34 +08:00
|
|
|
|
wait(delay(SERVER_KNOBS->DEST_OVERLOADED_DELAY, TaskPriority::DataDistributionLaunch));
|
|
|
|
|
} else {
|
2022-07-20 04:15:51 +08:00
|
|
|
|
CODE_PROBE(true, "did not find a healthy destination team on the first attempt");
|
2021-12-14 02:13:34 +08:00
|
|
|
|
stuckCount++;
|
|
|
|
|
TraceEvent(stuckCount > 50 ? SevWarnAlways : SevWarn, "BestTeamStuck", distributorId)
|
|
|
|
|
.suppressFor(1.0)
|
|
|
|
|
.detail("StuckCount", stuckCount)
|
|
|
|
|
.detail("DestOverloadedCount", destOverloadedCount)
|
|
|
|
|
.detail("TeamCollectionId", tciIndex)
|
|
|
|
|
.detail("AnyDestOverloaded", anyDestOverloaded)
|
|
|
|
|
.detail("NumOfTeamCollections", self->teamCollections.size());
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (rd.isRestore() && stuckCount > 50) {
|
|
|
|
|
throw data_move_dest_team_not_found();
|
|
|
|
|
}
|
2021-12-14 02:13:34 +08:00
|
|
|
|
wait(delay(SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskPriority::DataDistributionLaunch));
|
|
|
|
|
}
|
2022-08-20 02:47:00 +08:00
|
|
|
|
// When forceToUseNewPhysicalShard = false, we get paired primary team and remote team
|
|
|
|
|
// However, this may be failed
|
|
|
|
|
// Any retry triggers to use new physicalShard which enters the normal routine
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
forceToUseNewPhysicalShard = true;
|
|
|
|
|
}
|
2021-12-14 02:13:34 +08:00
|
|
|
|
|
|
|
|
|
// TODO different trace event + knob for overloaded? Could wait on an async var for done moves
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-08-20 02:47:00 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
if (!rd.isRestore()) {
|
|
|
|
|
// when !rd.isRestore(), dataMoveId is just decided as physicalShardIDCandidate
|
|
|
|
|
// thus, update the physicalShardIDCandidate to related data structures
|
|
|
|
|
ASSERT(physicalShardIDCandidate != UID().first());
|
2022-10-20 13:09:04 +08:00
|
|
|
|
if (self->physicalShardCollection->physicalShardExists(physicalShardIDCandidate)) {
|
|
|
|
|
self->moveReusePhysicalShard++;
|
|
|
|
|
} else {
|
|
|
|
|
self->moveCreateNewPhysicalShard++;
|
2022-10-25 01:39:32 +08:00
|
|
|
|
if (retryFindDstReason == DDQueue::RetryFindDstReason::None) {
|
2022-10-23 11:48:58 +08:00
|
|
|
|
// When creating a new physical shard, but the reason is none, this can only happen when
|
|
|
|
|
// determinePhysicalShardIDGivenPrimaryTeam() finds that there is no available physical
|
|
|
|
|
// shard.
|
2022-10-25 01:39:32 +08:00
|
|
|
|
self->retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]++;
|
2022-10-23 11:48:58 +08:00
|
|
|
|
} else {
|
2022-10-25 01:39:32 +08:00
|
|
|
|
self->retryFindDstReasonCount[retryFindDstReason]++;
|
2022-10-23 11:48:58 +08:00
|
|
|
|
}
|
2022-10-20 13:09:04 +08:00
|
|
|
|
}
|
2022-08-20 02:47:00 +08:00
|
|
|
|
rd.dataMoveId = newShardId(physicalShardIDCandidate, AssignEmptyRange::False);
|
|
|
|
|
auto inFlightRange = self->inFlight.rangeContaining(rd.keys.begin);
|
|
|
|
|
inFlightRange.value().dataMoveId = rd.dataMoveId;
|
|
|
|
|
auto f = self->dataMoves.intersectingRanges(rd.keys);
|
|
|
|
|
for (auto it = f.begin(); it != f.end(); ++it) {
|
|
|
|
|
KeyRangeRef kr(it->range().begin, it->range().end);
|
|
|
|
|
const UID mId = it->value().id;
|
|
|
|
|
if (mId.isValid() && mId != rd.dataMoveId) {
|
|
|
|
|
TraceEvent("DDRelocatorConflictingDataMoveAfterGetTeam", distributorId)
|
|
|
|
|
.detail("CurrentDataMoveID", rd.dataMoveId)
|
|
|
|
|
.detail("DataMoveID", mId)
|
|
|
|
|
.detail("Range", kr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self->dataMoves.insert(rd.keys, DDQueue::DDDataMove(rd.dataMoveId));
|
|
|
|
|
}
|
|
|
|
|
ASSERT(rd.dataMoveId.first() != UID().first());
|
|
|
|
|
auto dataMoveRange = self->dataMoves.rangeContaining(rd.keys.begin);
|
|
|
|
|
ASSERT(dataMoveRange.value().id == rd.dataMoveId);
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-25 23:33:46 +08:00
|
|
|
|
// set cancellable to false on inFlight's entry for this key range
|
|
|
|
|
auto inFlightRange = self->inFlight.rangeContaining(rd.keys.begin);
|
|
|
|
|
ASSERT(inFlightRange.range() == rd.keys);
|
|
|
|
|
ASSERT(inFlightRange.value().randomId == rd.randomId);
|
|
|
|
|
inFlightRange.value().cancellable = false;
|
|
|
|
|
|
2019-08-17 05:56:58 +08:00
|
|
|
|
destIds.clear();
|
2018-06-20 14:15:30 +08:00
|
|
|
|
state std::vector<UID> healthyIds;
|
|
|
|
|
state std::vector<UID> extraIds;
|
|
|
|
|
state std::vector<ShardsAffectedByTeamFailure::Team> destinationTeams;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bestTeams.size(); i++) {
|
|
|
|
|
auto& serverIds = bestTeams[i].first->getServerIDs();
|
|
|
|
|
destinationTeams.push_back(ShardsAffectedByTeamFailure::Team(serverIds, i == 0));
|
2019-02-13 06:57:33 +08:00
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
// TODO(psm): Make DataMoveMetaData aware of the two-step data move optimization.
|
2019-02-13 06:57:33 +08:00
|
|
|
|
if (allHealthy && anyWithSource && !bestTeams[i].second) {
|
2020-07-14 08:05:12 +08:00
|
|
|
|
// When all servers in bestTeams[i] do not hold the shard (!bestTeams[i].second), it indicates
|
|
|
|
|
// the bestTeams[i] is in a new DC where data has not been replicated to.
|
|
|
|
|
// To move data (specified in RelocateShard) to bestTeams[i] in the new DC AND reduce data movement
|
|
|
|
|
// across DC, we randomly choose a server in bestTeams[i] as the shard's destination, and
|
2019-02-13 06:57:33 +08:00
|
|
|
|
// move the shard to the randomly chosen server (in the remote DC), which will later
|
|
|
|
|
// propogate its data to the servers in the same team. This saves data movement bandwidth across DC
|
2019-05-11 05:01:52 +08:00
|
|
|
|
int idx = deterministicRandom()->randomInt(0, serverIds.size());
|
2018-06-20 14:15:30 +08:00
|
|
|
|
destIds.push_back(serverIds[idx]);
|
|
|
|
|
healthyIds.push_back(serverIds[idx]);
|
|
|
|
|
for (int j = 0; j < serverIds.size(); j++) {
|
|
|
|
|
if (j != idx) {
|
|
|
|
|
extraIds.push_back(serverIds[j]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
healthyDestinations.addTeam(bestTeams[i].first);
|
|
|
|
|
} else {
|
|
|
|
|
destIds.insert(destIds.end(), serverIds.begin(), serverIds.end());
|
|
|
|
|
if (bestTeams[i].first->isHealthy()) {
|
|
|
|
|
healthyIds.insert(healthyIds.end(), serverIds.begin(), serverIds.end());
|
|
|
|
|
healthyDestinations.addTeam(bestTeams[i].first);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-30 05:40:39 +08:00
|
|
|
|
// Sanity check
|
|
|
|
|
state int totalIds = 0;
|
2018-11-22 03:18:26 +08:00
|
|
|
|
for (auto& destTeam : destinationTeams) {
|
2018-08-30 05:40:39 +08:00
|
|
|
|
totalIds += destTeam.servers.size();
|
|
|
|
|
}
|
2018-11-22 03:18:26 +08:00
|
|
|
|
if (totalIds != self->teamSize) {
|
|
|
|
|
TraceEvent(SevWarn, "IncorrectDestTeamSize")
|
|
|
|
|
.suppressFor(1.0)
|
|
|
|
|
.detail("ExpectedTeamSize", self->teamSize)
|
|
|
|
|
.detail("DestTeamSize", totalIds);
|
2018-08-30 05:40:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (!rd.isRestore()) {
|
|
|
|
|
self->shardsAffectedByTeamFailure->moveShard(rd.keys, destinationTeams);
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2017-10-11 01:36:33 +08:00
|
|
|
|
// FIXME: do not add data in flight to servers that were already in the src.
|
2022-04-01 00:57:00 +08:00
|
|
|
|
healthyDestinations.addDataInFlightToTeam(+metrics.bytes);
|
|
|
|
|
healthyDestinations.addReadInFlightToTeam(+metrics.bytesReadPerKSecond);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2021-12-14 02:13:34 +08:00
|
|
|
|
launchDest(rd, bestTeams, self->destBusymap);
|
|
|
|
|
|
2020-03-01 05:45:00 +08:00
|
|
|
|
if (SERVER_KNOBS->DD_ENABLE_VERBOSE_TRACING) {
|
|
|
|
|
// StorageMetrics is the rd shard's metrics, e.g., bytes and write bandwidth
|
|
|
|
|
TraceEvent(SevInfo, "RelocateShardDecision", distributorId)
|
|
|
|
|
.detail("PairId", relocateShardInterval.pairID)
|
|
|
|
|
.detail("Priority", rd.priority)
|
|
|
|
|
.detail("KeyBegin", rd.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rd.keys.end)
|
|
|
|
|
.detail("StorageMetrics", metrics.toString())
|
|
|
|
|
.detail("SourceServers", describe(rd.src))
|
|
|
|
|
.detail("DestinationTeam", describe(destIds))
|
|
|
|
|
.detail("ExtraIds", describe(extraIds));
|
|
|
|
|
} else {
|
|
|
|
|
TraceEvent(relocateShardInterval.severity, "RelocateShardHasDestination", distributorId)
|
|
|
|
|
.detail("PairId", relocateShardInterval.pairID)
|
2022-03-29 05:20:07 +08:00
|
|
|
|
.detail("Priority", rd.priority)
|
2020-11-06 08:13:18 +08:00
|
|
|
|
.detail("KeyBegin", rd.keys.begin)
|
|
|
|
|
.detail("KeyEnd", rd.keys.end)
|
|
|
|
|
.detail("SourceServers", describe(rd.src))
|
2020-03-01 05:45:00 +08:00
|
|
|
|
.detail("DestinationTeam", describe(destIds))
|
|
|
|
|
.detail("ExtraIds", describe(extraIds));
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
self->serverCounter.increaseForTeam(rd.src, rd.reason, DDQueue::ServerCounter::LaunchedSource);
|
|
|
|
|
self->serverCounter.increaseForTeam(destIds, rd.reason, DDQueue::ServerCounter::LaunchedDest);
|
|
|
|
|
self->serverCounter.increaseForTeam(extraIds, rd.reason, DDQueue::ServerCounter::LaunchedDest);
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
state Error error = success();
|
|
|
|
|
state Promise<Void> dataMovementComplete;
|
2020-02-28 10:32:02 +08:00
|
|
|
|
// Move keys from source to destination by changing the serverKeyList and keyServerList system keys
|
2022-09-13 06:40:18 +08:00
|
|
|
|
state Future<Void> doMoveKeys =
|
|
|
|
|
self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
|
|
|
|
|
rd.keys,
|
|
|
|
|
destIds,
|
|
|
|
|
healthyIds,
|
|
|
|
|
self->lock,
|
|
|
|
|
dataMovementComplete,
|
|
|
|
|
&self->startMoveKeysParallelismLock,
|
|
|
|
|
&self->finishMoveKeysParallelismLock,
|
|
|
|
|
self->teamCollections.size() > 1,
|
|
|
|
|
relocateShardInterval.pairID,
|
|
|
|
|
ddEnabledState,
|
|
|
|
|
CancelConflictingDataMoves::False });
|
2019-06-25 17:47:35 +08:00
|
|
|
|
state Future<Void> pollHealth =
|
|
|
|
|
signalledTransferComplete ? Never()
|
|
|
|
|
: delay(SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
try {
|
|
|
|
|
loop {
|
|
|
|
|
choose {
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(doMoveKeys)) {
|
2018-06-20 14:15:30 +08:00
|
|
|
|
if (extraIds.size()) {
|
|
|
|
|
destIds.insert(destIds.end(), extraIds.begin(), extraIds.end());
|
|
|
|
|
healthyIds.insert(healthyIds.end(), extraIds.begin(), extraIds.end());
|
|
|
|
|
extraIds.clear();
|
2018-11-22 03:18:26 +08:00
|
|
|
|
ASSERT(totalIds == destIds.size()); // Sanity check the destIDs before we move keys
|
2022-09-13 06:40:18 +08:00
|
|
|
|
doMoveKeys =
|
|
|
|
|
self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
|
|
|
|
|
rd.keys,
|
|
|
|
|
destIds,
|
|
|
|
|
healthyIds,
|
|
|
|
|
self->lock,
|
|
|
|
|
Promise<Void>(),
|
|
|
|
|
&self->startMoveKeysParallelismLock,
|
|
|
|
|
&self->finishMoveKeysParallelismLock,
|
|
|
|
|
self->teamCollections.size() > 1,
|
|
|
|
|
relocateShardInterval.pairID,
|
|
|
|
|
ddEnabledState,
|
|
|
|
|
CancelConflictingDataMoves::False });
|
2018-06-20 14:15:30 +08:00
|
|
|
|
} else {
|
|
|
|
|
self->fetchKeysComplete.insert(rd);
|
2022-08-04 04:51:40 +08:00
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
auto ranges = self->dataMoves.getAffectedRangesAfterInsertion(rd.keys);
|
|
|
|
|
if (ranges.size() == 1 && static_cast<KeyRange>(ranges[0]) == rd.keys &&
|
|
|
|
|
ranges[0].value.id == rd.dataMoveId && !ranges[0].value.cancel.isValid()) {
|
2022-08-06 03:01:11 +08:00
|
|
|
|
self->dataMoves.insert(rd.keys, DDQueue::DDDataMove());
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "DequeueDataMoveOnSuccess", self->distributorId)
|
|
|
|
|
.detail("DataMoveID", rd.dataMoveId)
|
|
|
|
|
.detail("DataMoveRange", rd.keys);
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-06-20 14:15:30 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(pollHealth)) {
|
2018-02-03 03:46:04 +08:00
|
|
|
|
if (!healthyDestinations.isHealthy()) {
|
2017-10-11 01:36:33 +08:00
|
|
|
|
if (!signalledTransferComplete) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
signalledTransferComplete = true;
|
2017-10-11 01:36:33 +08:00
|
|
|
|
self->dataTransferComplete.send(rd);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-06-25 17:47:35 +08:00
|
|
|
|
pollHealth = signalledTransferComplete ? Never()
|
|
|
|
|
: delay(SERVER_KNOBS->HEALTH_POLL_TIME,
|
|
|
|
|
TaskPriority::DataDistributionLaunch);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(signalledTransferComplete ? Never() : dataMovementComplete.getFuture())) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
self->fetchKeysComplete.insert(rd);
|
|
|
|
|
if (!signalledTransferComplete) {
|
|
|
|
|
signalledTransferComplete = true;
|
|
|
|
|
self->dataTransferComplete.send(rd);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (Error& e) {
|
|
|
|
|
error = e;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-14 05:31:37 +08:00
|
|
|
|
//TraceEvent("RelocateShardFinished", distributorId).detail("RelocateId", relocateShardInterval.pairID);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
if (error.code() != error_code_move_to_removed_server) {
|
|
|
|
|
if (!error.code()) {
|
|
|
|
|
try {
|
2019-07-26 07:27:32 +08:00
|
|
|
|
wait(healthyDestinations
|
|
|
|
|
.updateStorageMetrics()); // prevent a gap between the polling for an increase in
|
|
|
|
|
// storage metrics and decrementing data in flight
|
2017-05-26 04:48:44 +08:00
|
|
|
|
} catch (Error& e) {
|
|
|
|
|
error = e;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-01 00:57:00 +08:00
|
|
|
|
healthyDestinations.addDataInFlightToTeam(-metrics.bytes);
|
|
|
|
|
auto readLoad = metrics.bytesReadPerKSecond;
|
2022-05-28 03:14:34 +08:00
|
|
|
|
// Note: It’s equal to trigger([healthyDestinations, readLoad], which is a value capture of
|
|
|
|
|
// healthyDestinations. Have to create a reference to healthyDestinations because in ACTOR the state
|
|
|
|
|
// variable is actually a member variable, I can’t write trigger([healthyDestinations, readLoad]
|
|
|
|
|
// directly.
|
2022-04-01 00:57:00 +08:00
|
|
|
|
auto& destinationRef = healthyDestinations;
|
|
|
|
|
self->noErrorActors.add(
|
2022-04-07 14:03:25 +08:00
|
|
|
|
trigger([destinationRef, readLoad]() mutable { destinationRef.addReadInFlightToTeam(-readLoad); },
|
2022-04-01 00:57:00 +08:00
|
|
|
|
delay(SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL)));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
// onFinished.send( rs );
|
|
|
|
|
if (!error.code()) {
|
2019-08-17 06:15:36 +08:00
|
|
|
|
TraceEvent(relocateShardInterval.end(), distributorId)
|
|
|
|
|
.detail("Duration", now() - startTime)
|
|
|
|
|
.detail("Result", "Success");
|
2019-08-17 05:56:58 +08:00
|
|
|
|
if (now() - startTime > 600) {
|
2019-08-22 02:48:29 +08:00
|
|
|
|
TraceEvent(SevWarnAlways, "RelocateShardTooLong")
|
|
|
|
|
.detail("Duration", now() - startTime)
|
|
|
|
|
.detail("Dest", describe(destIds))
|
|
|
|
|
.detail("Src", describe(rd.src));
|
2019-08-17 05:56:58 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (rd.keys.begin == keyServersPrefix) {
|
2018-06-20 14:15:30 +08:00
|
|
|
|
TraceEvent("MovedKeyServerKeys")
|
|
|
|
|
.detail("Dest", describe(destIds))
|
2021-09-25 01:04:30 +08:00
|
|
|
|
.trackLatest(self->movedKeyServersEventHolder->trackingKey);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!signalledTransferComplete) {
|
|
|
|
|
signalledTransferComplete = true;
|
|
|
|
|
dataTransferComplete.send(rd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self->bytesWritten += metrics.bytes;
|
2018-11-12 04:33:31 +08:00
|
|
|
|
self->shardsAffectedByTeamFailure->finishMove(rd.keys);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
relocationComplete.send(rd);
|
2022-08-20 02:47:00 +08:00
|
|
|
|
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
// update physical shard collection
|
|
|
|
|
std::vector<ShardsAffectedByTeamFailure::Team> selectedTeams;
|
|
|
|
|
for (int i = 0; i < bestTeams.size(); i++) {
|
|
|
|
|
auto serverIds = bestTeams[i].first->getServerIDs();
|
|
|
|
|
selectedTeams.push_back(ShardsAffectedByTeamFailure::Team(serverIds, i == 0));
|
|
|
|
|
}
|
|
|
|
|
// The update of PhysicalShardToTeams, PhysicalShardInstances, keyRangePhysicalShardIDMap should
|
|
|
|
|
// be atomic
|
|
|
|
|
self->physicalShardCollection->updatePhysicalShardCollection(
|
|
|
|
|
rd.keys, rd.isRestore(), selectedTeams, rd.dataMoveId.first(), metrics, debugID);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return Void();
|
|
|
|
|
} else {
|
|
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2022-09-26 06:28:32 +08:00
|
|
|
|
CODE_PROBE(true, "move to removed server", probe::decoration::rare);
|
2022-04-01 00:57:00 +08:00
|
|
|
|
healthyDestinations.addDataInFlightToTeam(-metrics.bytes);
|
|
|
|
|
auto readLoad = metrics.bytesReadPerKSecond;
|
|
|
|
|
auto& destinationRef = healthyDestinations;
|
|
|
|
|
self->noErrorActors.add(
|
2022-04-07 14:03:25 +08:00
|
|
|
|
trigger([destinationRef, readLoad]() mutable { destinationRef.addReadInFlightToTeam(-readLoad); },
|
2022-04-01 00:57:00 +08:00
|
|
|
|
delay(SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL)));
|
2022-04-21 03:15:40 +08:00
|
|
|
|
|
2022-04-21 04:32:04 +08:00
|
|
|
|
completeDest(rd, self->destBusymap);
|
|
|
|
|
rd.completeDests.clear();
|
2022-04-21 03:15:40 +08:00
|
|
|
|
|
2019-06-25 17:47:35 +08:00
|
|
|
|
wait(delay(SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskPriority::DataDistributionLaunch));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (Error& e) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
state Error err = e;
|
2022-02-25 04:25:52 +08:00
|
|
|
|
TraceEvent(relocateShardInterval.end(), distributorId)
|
2022-07-08 11:49:16 +08:00
|
|
|
|
.errorUnsuppressed(err)
|
2022-02-25 04:25:52 +08:00
|
|
|
|
.detail("Duration", now() - startTime);
|
2019-08-17 05:56:58 +08:00
|
|
|
|
if (now() - startTime > 600) {
|
2019-08-22 02:48:29 +08:00
|
|
|
|
TraceEvent(SevWarnAlways, "RelocateShardTooLong")
|
2022-07-08 11:49:16 +08:00
|
|
|
|
.errorUnsuppressed(err)
|
2019-08-22 02:48:29 +08:00
|
|
|
|
.detail("Duration", now() - startTime)
|
|
|
|
|
.detail("Dest", describe(destIds))
|
|
|
|
|
.detail("Src", describe(rd.src));
|
2019-08-17 05:56:58 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (!signalledTransferComplete)
|
|
|
|
|
dataTransferComplete.send(rd);
|
|
|
|
|
|
|
|
|
|
relocationComplete.send(rd);
|
|
|
|
|
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (err.code() == error_code_data_move_dest_team_not_found) {
|
|
|
|
|
wait(cancelDataMove(self, rd.keys, ddEnabledState));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (err.code() != error_code_actor_cancelled && err.code() != error_code_data_move_cancelled) {
|
2019-07-24 07:16:31 +08:00
|
|
|
|
if (errorOut.canBeSet()) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
errorOut.sendError(err);
|
2019-07-24 07:16:31 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-07-08 11:49:16 +08:00
|
|
|
|
throw err;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-25 08:10:58 +08:00
|
|
|
|
inline double getWorstCpu(const HealthMetrics& metrics, const std::vector<UID>& ids) {
|
2022-03-24 02:18:58 +08:00
|
|
|
|
double cpu = 0;
|
2022-04-25 08:10:58 +08:00
|
|
|
|
for (auto& id : ids) {
|
|
|
|
|
if (metrics.storageStats.count(id)) {
|
|
|
|
|
cpu = std::max(cpu, metrics.storageStats.at(id).cpuUsage);
|
|
|
|
|
} else {
|
|
|
|
|
// assume the server is too busy to report its stats
|
|
|
|
|
cpu = std::max(cpu, 100.0);
|
2022-05-04 08:21:08 +08:00
|
|
|
|
break;
|
2022-04-25 08:10:58 +08:00
|
|
|
|
}
|
2022-03-24 02:18:58 +08:00
|
|
|
|
}
|
|
|
|
|
return cpu;
|
|
|
|
|
}
|
2022-04-23 06:26:44 +08:00
|
|
|
|
|
|
|
|
|
// Move the shard with the top K highest read density of sourceTeam's to destTeam if sourceTeam has much more read load
|
|
|
|
|
// than destTeam
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<bool> rebalanceReadLoad(DDQueue* self,
|
2022-07-25 15:50:37 +08:00
|
|
|
|
DataMovementReason moveReason,
|
2022-02-25 08:41:01 +08:00
|
|
|
|
Reference<IDataDistributionTeam> sourceTeam,
|
|
|
|
|
Reference<IDataDistributionTeam> destTeam,
|
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent) {
|
2022-09-15 08:10:49 +08:00
|
|
|
|
if (g_network->isSimulated() && g_simulator->speedUpSimulation) {
|
2022-02-25 08:41:01 +08:00
|
|
|
|
traceEvent->detail("CancelingDueToSimulationSpeedup", true);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor(
|
|
|
|
|
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
2022-04-23 05:14:58 +08:00
|
|
|
|
traceEvent->detail("ShardsInSource", shards.size());
|
|
|
|
|
// For read rebalance if there is just 1 hot shard remained, move this shard to another server won't solve the
|
|
|
|
|
// problem.
|
|
|
|
|
// TODO: This situation should be solved by split and merge
|
2022-04-21 13:19:56 +08:00
|
|
|
|
if (shards.size() <= 1) {
|
2022-03-03 13:56:03 +08:00
|
|
|
|
traceEvent->detail("SkipReason", "NoShardOnSource");
|
2022-02-25 08:41:01 +08:00
|
|
|
|
return false;
|
2022-03-03 13:56:03 +08:00
|
|
|
|
}
|
2022-04-21 13:19:56 +08:00
|
|
|
|
|
2022-05-24 02:04:37 +08:00
|
|
|
|
// Check lastAsSource, at most SERVER_KNOBS->READ_REBALANCE_SRC_PARALLELISM shards can be moved within a sample
|
|
|
|
|
// period. It takes time for the sampled metrics being updated after a shard is moved, so we should control the
|
|
|
|
|
// cadence of movement here to avoid moving churn caused by making many decision based on out-of-date sampled
|
|
|
|
|
// metrics.
|
2022-04-28 14:37:35 +08:00
|
|
|
|
if (self->timeThrottle(sourceTeam->getServerIDs())) {
|
2022-04-26 07:59:20 +08:00
|
|
|
|
traceEvent->detail("SkipReason", "SourceTeamThrottle");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2022-05-07 07:37:12 +08:00
|
|
|
|
// check team difference
|
|
|
|
|
auto srcLoad = sourceTeam->getLoadReadBandwidth(false), destLoad = destTeam->getLoadReadBandwidth();
|
|
|
|
|
traceEvent->detail("SrcReadBandwidth", srcLoad).detail("DestReadBandwidth", destLoad);
|
2022-04-26 07:59:20 +08:00
|
|
|
|
|
2022-05-07 07:37:12 +08:00
|
|
|
|
// read bandwidth difference is less than 30% of src load
|
2022-05-18 05:49:27 +08:00
|
|
|
|
if ((1.0 - SERVER_KNOBS->READ_REBALANCE_DIFF_FRAC) * srcLoad <= destLoad) {
|
2022-05-07 07:37:12 +08:00
|
|
|
|
traceEvent->detail("SkipReason", "TeamTooSimilar");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2022-04-22 13:37:16 +08:00
|
|
|
|
// randomly choose topK shards
|
2022-09-03 03:00:43 +08:00
|
|
|
|
int topK = std::max(1, std::min(int(0.1 * shards.size()), SERVER_KNOBS->READ_REBALANCE_SHARD_TOPK));
|
2022-09-22 01:56:22 +08:00
|
|
|
|
state Future<HealthMetrics> healthMetrics = self->txnProcessor->getHealthMetrics(true);
|
2022-05-13 07:30:21 +08:00
|
|
|
|
state GetTopKMetricsRequest req(
|
2022-05-18 05:49:27 +08:00
|
|
|
|
shards, topK, (srcLoad - destLoad) * SERVER_KNOBS->READ_REBALANCE_MAX_SHARD_FRAC, srcLoad / shards.size());
|
2022-05-18 01:19:09 +08:00
|
|
|
|
state GetTopKMetricsReply reply = wait(brokenPromiseToNever(self->getTopKMetrics.getReply(req)));
|
2022-03-24 02:18:58 +08:00
|
|
|
|
wait(ready(healthMetrics));
|
2022-05-17 12:25:56 +08:00
|
|
|
|
auto cpu = getWorstCpu(healthMetrics.get(), sourceTeam->getServerIDs());
|
|
|
|
|
if (cpu < SERVER_KNOBS->READ_REBALANCE_CPU_THRESHOLD) { // 15.0 +- (0.3 * 15) < 20.0
|
|
|
|
|
traceEvent->detail("SkipReason", "LowReadLoad").detail("WorstSrcCpu", cpu);
|
2022-03-24 02:18:58 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
2022-04-21 03:15:40 +08:00
|
|
|
|
|
2022-05-28 08:10:01 +08:00
|
|
|
|
auto& metricsList = reply.shardMetrics;
|
2022-05-18 01:19:09 +08:00
|
|
|
|
// NOTE: randomize is important here since we don't want to always push the same shard into the queue
|
|
|
|
|
deterministicRandom()->randomShuffle(metricsList);
|
|
|
|
|
traceEvent->detail("MinReadLoad", reply.minReadLoad).detail("MaxReadLoad", reply.maxReadLoad);
|
2022-05-17 12:25:56 +08:00
|
|
|
|
|
2022-07-21 07:09:38 +08:00
|
|
|
|
if (metricsList.empty()) {
|
2022-04-22 13:37:16 +08:00
|
|
|
|
traceEvent->detail("SkipReason", "NoEligibleShards");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-21 07:09:38 +08:00
|
|
|
|
auto& [shard, metrics] = metricsList[0];
|
2022-05-07 07:37:12 +08:00
|
|
|
|
traceEvent->detail("ShardReadBandwidth", metrics.bytesReadPerKSecond);
|
2022-04-22 13:37:16 +08:00
|
|
|
|
// Verify the shard is still in ShardsAffectedByTeamFailure
|
|
|
|
|
shards = self->shardsAffectedByTeamFailure->getShardsFor(
|
|
|
|
|
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
|
|
|
|
for (int i = 0; i < shards.size(); i++) {
|
2022-05-28 08:10:01 +08:00
|
|
|
|
if (shard == shards[i]) {
|
2022-08-05 07:57:55 +08:00
|
|
|
|
UID traceId = deterministicRandom()->randomUniqueID();
|
|
|
|
|
self->output.send(RelocateShard(shard, moveReason, RelocateReason::REBALANCE_READ, traceId));
|
|
|
|
|
traceEvent->detail("TraceId", traceId);
|
2022-08-06 03:01:11 +08:00
|
|
|
|
|
|
|
|
|
auto serverIds = sourceTeam->getServerIDs();
|
|
|
|
|
self->updateLastAsSource(serverIds);
|
|
|
|
|
|
|
|
|
|
self->serverCounter.increaseForTeam(
|
|
|
|
|
serverIds, RelocateReason::REBALANCE_READ, DDQueue::ServerCounter::ProposedSource);
|
2022-04-22 13:37:16 +08:00
|
|
|
|
return true;
|
2022-02-25 08:41:01 +08:00
|
|
|
|
}
|
2022-04-22 13:37:16 +08:00
|
|
|
|
}
|
|
|
|
|
traceEvent->detail("SkipReason", "ShardNotPresent");
|
2022-02-25 08:41:01 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-19 01:20:04 +08:00
|
|
|
|
// Move a random shard from sourceTeam if sourceTeam has much more data than provided destTeam
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR static Future<bool> rebalanceTeams(DDQueue* self,
|
2022-07-25 15:50:37 +08:00
|
|
|
|
DataMovementReason moveReason,
|
2022-03-19 01:20:04 +08:00
|
|
|
|
Reference<IDataDistributionTeam const> sourceTeam,
|
|
|
|
|
Reference<IDataDistributionTeam const> destTeam,
|
2022-03-18 14:27:33 +08:00
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent) {
|
2022-09-15 08:10:49 +08:00
|
|
|
|
if (g_network->isSimulated() && g_simulator->speedUpSimulation) {
|
2020-02-22 02:55:14 +08:00
|
|
|
|
traceEvent->detail("CancelingDueToSimulationSpeedup", true);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-29 14:50:42 +08:00
|
|
|
|
Promise<int64_t> req;
|
|
|
|
|
self->getAverageShardBytes.send(req);
|
|
|
|
|
|
|
|
|
|
state int64_t averageShardBytes = wait(req.getFuture());
|
|
|
|
|
state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor(
|
|
|
|
|
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2020-02-22 02:55:14 +08:00
|
|
|
|
traceEvent->detail("AverageShardBytes", averageShardBytes).detail("ShardsInSource", shards.size());
|
|
|
|
|
|
2022-03-03 13:56:03 +08:00
|
|
|
|
if (!shards.size()) {
|
|
|
|
|
traceEvent->detail("SkipReason", "NoShardOnSource");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return false;
|
2022-03-03 13:56:03 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2019-07-29 14:50:42 +08:00
|
|
|
|
state KeyRange moveShard;
|
|
|
|
|
state StorageMetrics metrics;
|
|
|
|
|
state int retries = 0;
|
2019-07-31 08:04:41 +08:00
|
|
|
|
while (retries < SERVER_KNOBS->REBALANCE_MAX_RETRIES) {
|
2019-07-29 14:50:42 +08:00
|
|
|
|
state KeyRange testShard = deterministicRandom()->randomChoice(shards);
|
2022-05-04 15:00:03 +08:00
|
|
|
|
StorageMetrics testMetrics =
|
2019-07-29 14:50:42 +08:00
|
|
|
|
wait(brokenPromiseToNever(self->getShardMetrics.getReply(GetMetricsRequest(testShard))));
|
2022-05-04 15:00:03 +08:00
|
|
|
|
if (testMetrics.bytes > metrics.bytes) {
|
2019-07-29 14:50:42 +08:00
|
|
|
|
moveShard = testShard;
|
2022-05-04 15:00:03 +08:00
|
|
|
|
metrics = testMetrics;
|
2019-07-31 08:04:41 +08:00
|
|
|
|
if (metrics.bytes > averageShardBytes) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-07-29 14:50:42 +08:00
|
|
|
|
}
|
|
|
|
|
retries++;
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
int64_t sourceBytes = sourceTeam->getLoadBytes(false);
|
|
|
|
|
int64_t destBytes = destTeam->getLoadBytes();
|
2020-02-22 02:55:14 +08:00
|
|
|
|
|
|
|
|
|
bool sourceAndDestTooSimilar =
|
2022-04-21 03:15:40 +08:00
|
|
|
|
sourceBytes - destBytes <= 3 * std::max<int64_t>(SERVER_KNOBS->MIN_SHARD_BYTES, metrics.bytes);
|
2020-02-22 02:55:14 +08:00
|
|
|
|
traceEvent->detail("SourceBytes", sourceBytes)
|
|
|
|
|
.detail("DestBytes", destBytes)
|
|
|
|
|
.detail("ShardBytes", metrics.bytes)
|
|
|
|
|
.detail("SourceAndDestTooSimilar", sourceAndDestTooSimilar);
|
|
|
|
|
|
|
|
|
|
if (sourceAndDestTooSimilar || metrics.bytes == 0) {
|
2022-03-03 13:56:03 +08:00
|
|
|
|
traceEvent->detail("SkipReason", sourceAndDestTooSimilar ? "TeamTooSimilar" : "ShardZeroSize");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return false;
|
2020-02-22 02:55:14 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2020-07-13 09:30:02 +08:00
|
|
|
|
// Verify the shard is still in ShardsAffectedByTeamFailure
|
2020-02-22 02:55:14 +08:00
|
|
|
|
shards = self->shardsAffectedByTeamFailure->getShardsFor(
|
|
|
|
|
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
|
|
|
|
for (int i = 0; i < shards.size(); i++) {
|
|
|
|
|
if (moveShard == shards[i]) {
|
2022-08-05 07:57:55 +08:00
|
|
|
|
UID traceId = deterministicRandom()->randomUniqueID();
|
|
|
|
|
self->output.send(RelocateShard(moveShard, moveReason, RelocateReason::REBALANCE_DISK, traceId));
|
|
|
|
|
traceEvent->detail("TraceId", traceId);
|
2022-08-06 03:01:11 +08:00
|
|
|
|
|
|
|
|
|
self->serverCounter.increaseForTeam(
|
|
|
|
|
sourceTeam->getServerIDs(), RelocateReason::REBALANCE_DISK, DDQueue::ServerCounter::ProposedSource);
|
2020-02-22 02:55:14 +08:00
|
|
|
|
return true;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-03 13:56:03 +08:00
|
|
|
|
traceEvent->detail("SkipReason", "ShardNotPresent");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<SrcDestTeamPair> getSrcDestTeams(DDQueue* self,
|
2022-05-23 15:12:48 +08:00
|
|
|
|
int teamCollectionIndex,
|
|
|
|
|
GetTeamRequest srcReq,
|
|
|
|
|
GetTeamRequest destReq,
|
|
|
|
|
int priority,
|
|
|
|
|
TraceEvent* traceEvent) {
|
2022-02-26 03:01:23 +08:00
|
|
|
|
|
2022-05-23 15:12:48 +08:00
|
|
|
|
state std::pair<Optional<ITeamRef>, bool> randomTeam =
|
2022-02-26 03:01:23 +08:00
|
|
|
|
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(destReq)));
|
2022-05-23 15:12:48 +08:00
|
|
|
|
traceEvent->detail(
|
|
|
|
|
"DestTeam", printable(randomTeam.first.map<std::string>([](const ITeamRef& team) { return team->getDesc(); })));
|
2022-02-26 03:01:23 +08:00
|
|
|
|
|
|
|
|
|
if (randomTeam.first.present()) {
|
2022-05-23 15:12:48 +08:00
|
|
|
|
state std::pair<Optional<ITeamRef>, bool> loadedTeam =
|
2022-02-26 03:01:23 +08:00
|
|
|
|
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(srcReq)));
|
|
|
|
|
|
2022-05-23 15:12:48 +08:00
|
|
|
|
traceEvent->detail("SourceTeam", printable(loadedTeam.first.map<std::string>([](const ITeamRef& team) {
|
|
|
|
|
return team->getDesc();
|
|
|
|
|
})));
|
2022-02-26 03:01:23 +08:00
|
|
|
|
|
2022-05-23 15:12:48 +08:00
|
|
|
|
if (loadedTeam.first.present()) {
|
|
|
|
|
return std::make_pair(loadedTeam.first.get(), randomTeam.first.get());
|
|
|
|
|
}
|
2022-02-26 03:01:23 +08:00
|
|
|
|
}
|
2022-05-23 15:12:48 +08:00
|
|
|
|
return {};
|
2022-02-26 03:01:23 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 08:57:40 +08:00
|
|
|
|
Future<SrcDestTeamPair> DDQueue::getSrcDestTeams(const int& teamCollectionIndex,
|
|
|
|
|
const GetTeamRequest& srcReq,
|
|
|
|
|
const GetTeamRequest& destReq,
|
|
|
|
|
const int& priority,
|
|
|
|
|
TraceEvent* traceEvent) {
|
|
|
|
|
return ::getSrcDestTeams(this, teamCollectionIndex, srcReq, destReq, priority, traceEvent);
|
|
|
|
|
}
|
|
|
|
|
Future<bool> DDQueue::rebalanceReadLoad(DataMovementReason moveReason,
|
|
|
|
|
Reference<IDataDistributionTeam> sourceTeam,
|
|
|
|
|
Reference<IDataDistributionTeam> destTeam,
|
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent) {
|
|
|
|
|
return ::rebalanceReadLoad(this, moveReason, sourceTeam, destTeam, primary, traceEvent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<bool> DDQueue::rebalanceTeams(DataMovementReason moveReason,
|
|
|
|
|
Reference<const IDataDistributionTeam> sourceTeam,
|
|
|
|
|
Reference<const IDataDistributionTeam> destTeam,
|
|
|
|
|
bool primary,
|
|
|
|
|
TraceEvent* traceEvent) {
|
|
|
|
|
return ::rebalanceTeams(this, moveReason, sourceTeam, destTeam, primary, traceEvent);
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-28 02:22:47 +08:00
|
|
|
|
ACTOR Future<bool> getSkipRebalanceValue(Reference<IDDTxnProcessor> txnProcessor, bool readRebalance) {
|
2022-09-22 05:58:34 +08:00
|
|
|
|
Optional<Value> val = wait(txnProcessor->readRebalanceDDIgnoreKey());
|
|
|
|
|
|
2022-09-23 08:11:07 +08:00
|
|
|
|
if (!val.present())
|
|
|
|
|
return false;
|
|
|
|
|
|
2022-09-22 05:58:34 +08:00
|
|
|
|
bool skipCurrentLoop = false;
|
2022-09-23 08:11:07 +08:00
|
|
|
|
// NOTE: check special value "" and "on" might written in old version < 7.2
|
|
|
|
|
if (val.get().size() > 0 && val.get() != "on"_sr) {
|
|
|
|
|
int ddIgnore = BinaryReader::fromStringRef<uint8_t>(val.get(), Unversioned());
|
|
|
|
|
if (readRebalance) {
|
|
|
|
|
skipCurrentLoop = (ddIgnore & DDIgnore::REBALANCE_READ) > 0;
|
2022-09-22 05:58:34 +08:00
|
|
|
|
} else {
|
2022-09-23 08:11:07 +08:00
|
|
|
|
skipCurrentLoop = (ddIgnore & DDIgnore::REBALANCE_DISK) > 0;
|
2022-09-22 05:58:34 +08:00
|
|
|
|
}
|
2022-09-23 08:11:07 +08:00
|
|
|
|
} else {
|
|
|
|
|
skipCurrentLoop = true;
|
2022-09-22 05:58:34 +08:00
|
|
|
|
}
|
2022-09-23 08:11:07 +08:00
|
|
|
|
|
2022-09-22 05:58:34 +08:00
|
|
|
|
return skipCurrentLoop;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-06 03:01:11 +08:00
|
|
|
|
ACTOR Future<Void> BgDDLoadRebalance(DDQueue* self, int teamCollectionIndex, DataMovementReason reason) {
|
2022-09-22 05:58:34 +08:00
|
|
|
|
state int resetCount = 0;
|
2019-07-31 11:20:02 +08:00
|
|
|
|
state double lastRead = 0;
|
2019-07-25 06:32:52 +08:00
|
|
|
|
state bool skipCurrentLoop = false;
|
2022-07-15 00:06:56 +08:00
|
|
|
|
state const bool readRebalance = isDataMovementForReadBalancing(reason);
|
2022-09-22 05:58:34 +08:00
|
|
|
|
state const char* eventName = isDataMovementForMountainChopper(reason) ? "BgDDMountainChopper" : "BgDDValleyFiller";
|
2022-07-15 00:06:56 +08:00
|
|
|
|
state int ddPriority = dataMovementPriority(reason);
|
2022-09-22 05:58:34 +08:00
|
|
|
|
state double rebalancePollingInterval = 0;
|
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
loop {
|
2020-02-22 02:55:14 +08:00
|
|
|
|
state bool moved = false;
|
2022-03-01 02:22:32 +08:00
|
|
|
|
state Reference<IDataDistributionTeam> sourceTeam;
|
|
|
|
|
state Reference<IDataDistributionTeam> destTeam;
|
2022-02-26 03:01:23 +08:00
|
|
|
|
state GetTeamRequest srcReq;
|
|
|
|
|
state GetTeamRequest destReq;
|
2022-04-13 07:22:17 +08:00
|
|
|
|
state TraceEvent traceEvent(eventName, self->distributorId);
|
2022-05-23 14:35:39 +08:00
|
|
|
|
traceEvent.suppressFor(5.0)
|
2022-09-22 05:58:34 +08:00
|
|
|
|
.detail("PollingInterval", rebalancePollingInterval)
|
2022-04-13 07:22:17 +08:00
|
|
|
|
.detail("Rebalance", readRebalance ? "Read" : "Disk");
|
2020-02-22 02:55:14 +08:00
|
|
|
|
|
2022-09-22 05:58:34 +08:00
|
|
|
|
// NOTE: the DD throttling relies on DDQueue, so here just trigger the balancer periodically
|
|
|
|
|
wait(delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch));
|
2022-09-22 08:57:40 +08:00
|
|
|
|
try {
|
|
|
|
|
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
|
|
|
|
|
wait(store(skipCurrentLoop, getSkipRebalanceValue(self->txnProcessor, readRebalance)));
|
|
|
|
|
lastRead = now();
|
|
|
|
|
}
|
|
|
|
|
traceEvent.detail("Enabled", !skipCurrentLoop);
|
2020-02-22 02:55:14 +08:00
|
|
|
|
|
2022-09-22 08:57:40 +08:00
|
|
|
|
if (skipCurrentLoop) {
|
|
|
|
|
rebalancePollingInterval =
|
|
|
|
|
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
|
|
|
|
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
|
|
|
|
|
}
|
2020-02-22 02:55:14 +08:00
|
|
|
|
|
2022-04-07 13:10:23 +08:00
|
|
|
|
traceEvent.detail("QueuedRelocations", self->priority_relocations[ddPriority]);
|
2022-04-13 07:22:17 +08:00
|
|
|
|
|
|
|
|
|
if (self->priority_relocations[ddPriority] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
|
2022-07-30 01:22:21 +08:00
|
|
|
|
bool mcMove = isDataMovementForMountainChopper(reason);
|
2022-07-30 01:04:14 +08:00
|
|
|
|
srcReq = GetTeamRequest(WantNewServers::True,
|
|
|
|
|
WantTrueBest(mcMove),
|
|
|
|
|
PreferLowerDiskUtil::False,
|
|
|
|
|
TeamMustHaveShards::True,
|
|
|
|
|
ForReadBalance(readRebalance),
|
|
|
|
|
PreferLowerReadUtil::False);
|
|
|
|
|
destReq = GetTeamRequest(WantNewServers::True,
|
|
|
|
|
WantTrueBest(!mcMove),
|
|
|
|
|
PreferLowerDiskUtil::True,
|
|
|
|
|
TeamMustHaveShards::False,
|
|
|
|
|
ForReadBalance(readRebalance),
|
|
|
|
|
PreferLowerReadUtil::True);
|
2022-05-23 15:12:48 +08:00
|
|
|
|
state Future<SrcDestTeamPair> getTeamFuture =
|
2022-09-22 08:57:40 +08:00
|
|
|
|
self->getSrcDestTeams(teamCollectionIndex, srcReq, destReq, ddPriority, &traceEvent);
|
2022-05-23 15:12:48 +08:00
|
|
|
|
wait(ready(getTeamFuture));
|
|
|
|
|
sourceTeam = getTeamFuture.get().first;
|
|
|
|
|
destTeam = getTeamFuture.get().second;
|
2022-05-05 08:42:49 +08:00
|
|
|
|
|
2022-02-26 03:01:23 +08:00
|
|
|
|
// clang-format off
|
|
|
|
|
if (sourceTeam.isValid() && destTeam.isValid()) {
|
2022-04-07 13:10:23 +08:00
|
|
|
|
if (readRebalance) {
|
2022-09-22 08:57:40 +08:00
|
|
|
|
wait(store(moved,self->rebalanceReadLoad( reason, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
2022-02-26 03:01:23 +08:00
|
|
|
|
} else {
|
2022-09-22 08:57:40 +08:00
|
|
|
|
wait(store(moved,self->rebalanceTeams( reason, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-04-21 13:19:56 +08:00
|
|
|
|
// clang-format on
|
2022-02-26 03:01:23 +08:00
|
|
|
|
moved ? resetCount = 0 : resetCount++;
|
2019-07-17 06:12:18 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-04-12 08:09:39 +08:00
|
|
|
|
traceEvent.detail("ResetCount", resetCount);
|
|
|
|
|
} catch (Error& e) {
|
|
|
|
|
// Log actor_cancelled because it's not legal to suppress an event that's initialized
|
|
|
|
|
traceEvent.errorUnsuppressed(e);
|
2022-09-22 05:58:34 +08:00
|
|
|
|
throw;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2020-02-22 02:55:14 +08:00
|
|
|
|
|
|
|
|
|
traceEvent.detail("Moved", moved);
|
2020-02-22 08:28:03 +08:00
|
|
|
|
traceEvent.log();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-28 02:22:47 +08:00
|
|
|
|
ACTOR Future<Void> dataDistributionQueue(Reference<IDDTxnProcessor> db,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
PromiseStream<RelocateShard> output,
|
|
|
|
|
FutureStream<RelocateShard> input,
|
|
|
|
|
PromiseStream<GetMetricsRequest> getShardMetrics,
|
2022-05-04 15:00:03 +08:00
|
|
|
|
PromiseStream<GetTopKMetricsRequest> getTopKMetrics,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
Reference<AsyncVar<bool>> processingUnhealthy,
|
2021-10-15 07:22:47 +08:00
|
|
|
|
Reference<AsyncVar<bool>> processingWiggle,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
std::vector<TeamCollectionInterface> teamCollections,
|
|
|
|
|
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
2022-08-20 02:47:00 +08:00
|
|
|
|
Reference<PhysicalShardCollection> physicalShardCollection,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
MoveKeysLock lock,
|
|
|
|
|
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
2022-04-04 14:47:54 +08:00
|
|
|
|
FutureStream<Promise<int>> getUnhealthyRelocationCount,
|
2020-09-28 06:26:50 +08:00
|
|
|
|
UID distributorId,
|
|
|
|
|
int teamSize,
|
|
|
|
|
int singleRegionTeamSize,
|
|
|
|
|
const DDEnabledState* ddEnabledState) {
|
2022-08-06 03:01:11 +08:00
|
|
|
|
state DDQueue self(distributorId,
|
2022-08-06 06:26:34 +08:00
|
|
|
|
lock,
|
2022-09-23 08:11:07 +08:00
|
|
|
|
db,
|
2022-08-06 06:26:34 +08:00
|
|
|
|
teamCollections,
|
|
|
|
|
shardsAffectedByTeamFailure,
|
2022-08-20 02:47:00 +08:00
|
|
|
|
physicalShardCollection,
|
2022-08-06 06:26:34 +08:00
|
|
|
|
getAverageShardBytes,
|
|
|
|
|
teamSize,
|
|
|
|
|
singleRegionTeamSize,
|
|
|
|
|
output,
|
|
|
|
|
input,
|
|
|
|
|
getShardMetrics,
|
|
|
|
|
getTopKMetrics);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
state std::set<UID> serversToLaunchFrom;
|
|
|
|
|
state KeyRange keysToLaunchFrom;
|
|
|
|
|
state RelocateData launchData;
|
|
|
|
|
state Future<Void> recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL);
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2022-08-06 06:26:34 +08:00
|
|
|
|
state std::vector<Future<Void>> ddQueueFutures;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
|
|
state PromiseStream<KeyRange> rangesComplete;
|
|
|
|
|
state Future<Void> launchQueuedWorkTimeout = Never();
|
|
|
|
|
|
2017-10-11 01:36:33 +08:00
|
|
|
|
for (int i = 0; i < teamCollections.size(); i++) {
|
2022-09-22 06:11:04 +08:00
|
|
|
|
ddQueueFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_OVERUTILIZED_TEAM));
|
|
|
|
|
ddQueueFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM));
|
2022-04-17 13:51:55 +08:00
|
|
|
|
if (SERVER_KNOBS->READ_SAMPLING_ENABLED) {
|
2022-08-06 06:26:34 +08:00
|
|
|
|
ddQueueFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM));
|
|
|
|
|
ddQueueFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM));
|
2022-04-13 07:22:17 +08:00
|
|
|
|
}
|
2017-10-11 01:36:33 +08:00
|
|
|
|
}
|
2022-08-06 06:26:34 +08:00
|
|
|
|
ddQueueFutures.push_back(delayedAsyncVar(self.rawProcessingUnhealthy, processingUnhealthy, 0));
|
|
|
|
|
ddQueueFutures.push_back(delayedAsyncVar(self.rawProcessingWiggle, processingWiggle, 0));
|
|
|
|
|
ddQueueFutures.push_back(self.periodicalRefreshCounter());
|
2017-10-11 01:36:33 +08:00
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
|
try {
|
|
|
|
|
loop {
|
|
|
|
|
self.validate();
|
|
|
|
|
|
2022-02-26 03:01:23 +08:00
|
|
|
|
// For the given servers that caused us to go around the loop, find the next item(s) that can be
|
|
|
|
|
// launched.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (launchData.startTime != -1) {
|
2019-07-20 07:22:15 +08:00
|
|
|
|
// Launch dataDistributionRelocator actor to relocate the launchData
|
2020-09-28 06:26:50 +08:00
|
|
|
|
self.launchQueuedWork(launchData, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
launchData = RelocateData();
|
|
|
|
|
} else if (!keysToLaunchFrom.empty()) {
|
2020-09-28 06:26:50 +08:00
|
|
|
|
self.launchQueuedWork(keysToLaunchFrom, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
keysToLaunchFrom = KeyRangeRef();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ASSERT(launchData.startTime == -1 && keysToLaunchFrom.empty());
|
|
|
|
|
|
|
|
|
|
choose {
|
2018-08-10 03:37:46 +08:00
|
|
|
|
when(RelocateShard rs = waitNext(self.input)) {
|
2022-07-08 11:49:16 +08:00
|
|
|
|
if (rs.isRestore()) {
|
|
|
|
|
ASSERT(rs.dataMove != nullptr);
|
|
|
|
|
ASSERT(rs.dataMoveId.isValid());
|
|
|
|
|
self.launchQueuedWork(RelocateData(rs), ddEnabledState);
|
|
|
|
|
} else if (rs.cancelled) {
|
|
|
|
|
self.enqueueCancelledDataMove(rs.dataMoveId, rs.keys, ddEnabledState);
|
|
|
|
|
} else {
|
|
|
|
|
bool wasEmpty = serversToLaunchFrom.empty();
|
|
|
|
|
self.queueRelocation(rs, serversToLaunchFrom);
|
|
|
|
|
if (wasEmpty && !serversToLaunchFrom.empty())
|
|
|
|
|
launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(launchQueuedWorkTimeout)) {
|
2020-09-28 06:26:50 +08:00
|
|
|
|
self.launchQueuedWork(serversToLaunchFrom, ddEnabledState);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
serversToLaunchFrom = std::set<UID>();
|
|
|
|
|
launchQueuedWorkTimeout = Never();
|
|
|
|
|
}
|
|
|
|
|
when(RelocateData results = waitNext(self.fetchSourceServersComplete.getFuture())) {
|
2019-07-20 07:22:15 +08:00
|
|
|
|
// This when is triggered by queueRelocation() which is triggered by sending self.input
|
2017-05-26 04:48:44 +08:00
|
|
|
|
self.completeSourceFetch(results);
|
|
|
|
|
launchData = results;
|
|
|
|
|
}
|
|
|
|
|
when(RelocateData done = waitNext(self.dataTransferComplete.getFuture())) {
|
2021-12-14 02:13:34 +08:00
|
|
|
|
complete(done, self.busymap, self.destBusymap);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (serversToLaunchFrom.empty() && !done.src.empty())
|
2019-06-25 17:47:35 +08:00
|
|
|
|
launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
serversToLaunchFrom.insert(done.src.begin(), done.src.end());
|
|
|
|
|
}
|
|
|
|
|
when(RelocateData done = waitNext(self.relocationComplete.getFuture())) {
|
|
|
|
|
self.activeRelocations--;
|
2022-07-08 11:49:16 +08:00
|
|
|
|
TraceEvent(SevVerbose, "InFlightRelocationChange")
|
|
|
|
|
.detail("Complete", done.dataMoveId)
|
|
|
|
|
.detail("IsRestore", done.isRestore())
|
|
|
|
|
.detail("Total", self.activeRelocations);
|
2019-10-12 08:50:43 +08:00
|
|
|
|
self.finishRelocation(done.priority, done.healthPriority);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
self.fetchKeysComplete.erase(done);
|
|
|
|
|
// self.logRelocation( done, "ShardRelocatorDone" );
|
2022-04-01 00:57:00 +08:00
|
|
|
|
self.noErrorActors.add(
|
|
|
|
|
tag(delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
if (g_network->isSimulated() && debug_isCheckRelocationDuration() && now() - done.startTime > 60) {
|
|
|
|
|
TraceEvent(SevWarnAlways, "RelocationDurationTooLong")
|
|
|
|
|
.detail("Duration", now() - done.startTime);
|
|
|
|
|
debug_setCheckRelocationDuration(false);
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-10 05:02:57 +08:00
|
|
|
|
when(KeyRange done = waitNext(rangesComplete.getFuture())) { keysToLaunchFrom = done; }
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(recordMetrics)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
Promise<int64_t> req;
|
|
|
|
|
getAverageShardBytes.send(req);
|
|
|
|
|
|
2020-03-14 06:19:33 +08:00
|
|
|
|
recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL, TaskPriority::FlushTrace);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2022-04-04 13:31:45 +08:00
|
|
|
|
auto const highestPriorityRelocation = self.getHighestPriorityRelocation();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
2018-12-14 05:31:37 +08:00
|
|
|
|
TraceEvent("MovingData", distributorId)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("InFlight", self.activeRelocations)
|
|
|
|
|
.detail("InQueue", self.queuedRelocations)
|
|
|
|
|
.detail("AverageShardSize", req.getFuture().isReady() ? req.getFuture().get() : -1)
|
2019-10-12 08:50:43 +08:00
|
|
|
|
.detail("UnhealthyRelocations", self.unhealthyRelocations)
|
2017-05-26 04:48:44 +08:00
|
|
|
|
.detail("HighestPriority", highestPriorityRelocation)
|
|
|
|
|
.detail("BytesWritten", self.bytesWritten)
|
2019-10-12 08:50:43 +08:00
|
|
|
|
.detail("PriorityRecoverMove", self.priority_relocations[SERVER_KNOBS->PRIORITY_RECOVER_MOVE])
|
|
|
|
|
.detail("PriorityRebalanceUnderutilizedTeam",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM])
|
2019-12-19 08:57:39 +08:00
|
|
|
|
.detail("PriorityRebalanceOverutilizedTeam",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM])
|
2022-04-07 13:10:23 +08:00
|
|
|
|
.detail("PriorityRebalanceReadUnderutilTeam",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_READ_UNDERUTIL_TEAM])
|
|
|
|
|
.detail("PriorityRebalanceReadOverutilTeam",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_READ_OVERUTIL_TEAM])
|
2021-06-21 13:18:19 +08:00
|
|
|
|
.detail("PriorityStorageWiggle",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE])
|
2019-10-12 08:50:43 +08:00
|
|
|
|
.detail("PriorityTeamHealthy", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_HEALTHY])
|
|
|
|
|
.detail("PriorityTeamContainsUndesiredServer",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER])
|
|
|
|
|
.detail("PriorityTeamRedundant",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT])
|
|
|
|
|
.detail("PriorityMergeShard", self.priority_relocations[SERVER_KNOBS->PRIORITY_MERGE_SHARD])
|
2020-03-05 06:07:32 +08:00
|
|
|
|
.detail("PriorityPopulateRegion",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_POPULATE_REGION])
|
2019-10-12 08:50:43 +08:00
|
|
|
|
.detail("PriorityTeamUnhealthy",
|
|
|
|
|
self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY])
|
|
|
|
|
.detail("PriorityTeam2Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_2_LEFT])
|
|
|
|
|
.detail("PriorityTeam1Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_1_LEFT])
|
|
|
|
|
.detail("PriorityTeam0Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_0_LEFT])
|
|
|
|
|
.detail("PrioritySplitShard", self.priority_relocations[SERVER_KNOBS->PRIORITY_SPLIT_SHARD])
|
2021-09-25 03:46:51 +08:00
|
|
|
|
.trackLatest("MovingData"); // This trace event's trackLatest lifetime is controlled by
|
2022-07-09 05:11:31 +08:00
|
|
|
|
// DataDistributor::movingDataEventHolder. The track latest
|
2022-02-26 03:01:23 +08:00
|
|
|
|
// key we use here must match the key used in the holder.
|
2022-10-20 13:09:04 +08:00
|
|
|
|
|
|
|
|
|
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
|
|
|
|
TraceEvent("PhysicalShardMoveStats")
|
|
|
|
|
.detail("MoveCreateNewPhysicalShard", self.moveCreateNewPhysicalShard)
|
2022-10-23 11:48:58 +08:00
|
|
|
|
.detail("MoveReusePhysicalShard", self.moveReusePhysicalShard)
|
|
|
|
|
.detail("RemoteBestTeamNotReady",
|
2022-10-25 01:39:32 +08:00
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteBestTeamNotReady])
|
|
|
|
|
.detail("PrimaryNoHealthyTeam",
|
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam])
|
|
|
|
|
.detail("RemoteNoHealthyTeam",
|
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteNoHealthyTeam])
|
2022-10-23 11:48:58 +08:00
|
|
|
|
.detail("RemoteTeamIsFull",
|
2022-10-25 01:39:32 +08:00
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsFull])
|
2022-10-23 11:48:58 +08:00
|
|
|
|
.detail("RemoteTeamIsNotHealthy",
|
2022-10-25 01:39:32 +08:00
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy])
|
|
|
|
|
.detail(
|
|
|
|
|
"NoAvailablePhysicalShard",
|
|
|
|
|
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]);
|
2022-10-20 13:09:04 +08:00
|
|
|
|
self.moveCreateNewPhysicalShard = 0;
|
|
|
|
|
self.moveReusePhysicalShard = 0;
|
2022-10-25 01:39:32 +08:00
|
|
|
|
for (int i = 0; i < self.retryFindDstReasonCount.size(); ++i) {
|
|
|
|
|
self.retryFindDstReasonCount[i] = 0;
|
2022-10-23 11:48:58 +08:00
|
|
|
|
}
|
2022-10-20 13:09:04 +08:00
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
2018-08-11 04:57:10 +08:00
|
|
|
|
when(wait(self.error.getFuture())) {} // Propagate errors from dataDistributionRelocator
|
2022-08-06 06:26:34 +08:00
|
|
|
|
when(wait(waitForAll(ddQueueFutures))) {}
|
2022-08-17 05:32:55 +08:00
|
|
|
|
when(Promise<int> r = waitNext(getUnhealthyRelocationCount)) {
|
|
|
|
|
r.send(self.getUnhealthyRelocationCount());
|
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (Error& e) {
|
2022-02-26 03:01:23 +08:00
|
|
|
|
if (e.code() != error_code_broken_promise && // FIXME: Get rid of these broken_promise errors every time we
|
|
|
|
|
// are killed by the master dying
|
2022-07-08 11:49:16 +08:00
|
|
|
|
e.code() != error_code_movekeys_conflict && e.code() != error_code_data_move_cancelled &&
|
|
|
|
|
e.code() != error_code_data_move_dest_team_not_found)
|
2018-12-14 05:31:37 +08:00
|
|
|
|
TraceEvent(SevError, "DataDistributionQueueError", distributorId).error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
throw e;
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-06 14:57:52 +08:00
|
|
|
|
|
2022-08-17 05:32:55 +08:00
|
|
|
|
ACTOR Future<Void> dataDistributionQueue(Reference<DDSharedContext> context, Database cx);
|
|
|
|
|
|
2022-08-06 14:57:52 +08:00
|
|
|
|
TEST_CASE("/DataDistribution/DDQueue/ServerCounterTrace") {
|
2022-08-10 14:32:40 +08:00
|
|
|
|
state double duration = 2.5 * SERVER_KNOBS->DD_QUEUE_COUNTER_REFRESH_INTERVAL;
|
2022-08-06 14:57:52 +08:00
|
|
|
|
state DDQueue self;
|
|
|
|
|
state Future<Void> counterFuture = self.periodicalRefreshCounter();
|
|
|
|
|
state Future<Void> finishFuture = delay(duration);
|
|
|
|
|
std::cout << "Start trace counter unit test for " << duration << "s ...\n";
|
|
|
|
|
loop choose {
|
|
|
|
|
when(wait(counterFuture)) {}
|
2022-08-10 05:02:57 +08:00
|
|
|
|
when(wait(finishFuture)) { break; }
|
2022-08-06 14:57:52 +08:00
|
|
|
|
when(wait(delayJittered(2.0))) {
|
|
|
|
|
std::vector<UID> team(3);
|
|
|
|
|
for (int i = 0; i < team.size(); ++i) {
|
2022-08-10 09:22:48 +08:00
|
|
|
|
team[i] = UID(deterministicRandom()->randomInt(1, 400), 0);
|
2022-08-06 14:57:52 +08:00
|
|
|
|
}
|
|
|
|
|
auto reason = RelocateReason(deterministicRandom()->randomInt(0, RelocateReason::typeCount()));
|
|
|
|
|
auto countType = DDQueue::ServerCounter::randomCountType();
|
|
|
|
|
self.serverCounter.increaseForTeam(team, reason, countType);
|
|
|
|
|
ASSERT(self.serverCounter.get(team[0], reason, countType));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
std::cout << "Finished.";
|
|
|
|
|
return Void();
|
2022-09-15 08:10:49 +08:00
|
|
|
|
}
|