Move resolutionBalancing() back to master

This revert the behavior done by a recent refactor on master recovery in PR #6191.
This commit is contained in:
Jingyu Zhou 2022-03-18 15:14:02 -07:00
parent f7ec39c0ee
commit 3379f1e974
5 changed files with 188 additions and 191 deletions

View File

@ -470,7 +470,6 @@ ACTOR Future<Void> trackTlogRecovery(Reference<ClusterRecoveryData> self,
self->dbgid)
.detail("StatusCode", RecoveryStatus::fully_recovered)
.detail("Status", RecoveryStatus::names[RecoveryStatus::fully_recovered])
.detail("FullyRecoveredAtVersion", self->version)
.detail("ClusterId", self->clusterId)
.trackLatest(self->clusterRecoveryStateEventHolder->trackingKey);
@ -511,144 +510,6 @@ ACTOR Future<Void> trackTlogRecovery(Reference<ClusterRecoveryData> self,
}
}
std::pair<KeyRangeRef, bool> findRange(CoalescedKeyRangeMap<int>& key_resolver,
Standalone<VectorRef<ResolverMoveRef>>& movedRanges,
int src,
int dest) {
auto ranges = key_resolver.ranges();
auto prev = ranges.begin();
auto it = ranges.begin();
++it;
if (it == ranges.end()) {
if (ranges.begin().value() != src ||
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(ranges.begin()->range(), dest)) !=
movedRanges.end())
throw operation_failed();
return std::make_pair(ranges.begin().range(), true);
}
std::set<int> borders;
// If possible expand an existing boundary between the two resolvers
for (; it != ranges.end(); ++it) {
if (it->value() == src && prev->value() == dest &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
if (it->value() == dest && prev->value() == src &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(prev->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(prev->range(), false);
}
if (it->value() == dest)
borders.insert(prev->value());
if (prev->value() == dest)
borders.insert(it->value());
++prev;
}
prev = ranges.begin();
it = ranges.begin();
++it;
// If possible create a new boundry which doesn't exist yet
for (; it != ranges.end(); ++it) {
if (it->value() == src && !borders.count(prev->value()) &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
if (prev->value() == src && !borders.count(it->value()) &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(prev->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(prev->range(), false);
}
++prev;
}
it = ranges.begin();
for (; it != ranges.end(); ++it) {
if (it->value() == src &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
}
throw operation_failed(); // we are already attempting to move all of the data one resolver is assigned, so do not
// move anything
}
ACTOR Future<Void> resolutionBalancing(Reference<ClusterRecoveryData> self) {
state CoalescedKeyRangeMap<int> key_resolver;
key_resolver.insert(allKeys, 0);
loop {
wait(delay(SERVER_KNOBS->MIN_BALANCE_TIME, TaskPriority::ResolutionMetrics));
while (self->resolverChanges.get().size())
wait(self->resolverChanges.onChange());
state std::vector<Future<ResolutionMetricsReply>> futures;
for (auto& p : self->resolvers)
futures.push_back(
brokenPromiseToNever(p.metrics.getReply(ResolutionMetricsRequest(), TaskPriority::ResolutionMetrics)));
wait(waitForAll(futures));
state IndexedSet<std::pair<int64_t, int>, NoMetric> metrics;
int64_t total = 0;
for (int i = 0; i < futures.size(); i++) {
total += futures[i].get().value;
metrics.insert(std::make_pair(futures[i].get().value, i), NoMetric());
//TraceEvent("ResolverMetric").detail("I", i).detail("Metric", futures[i].get());
}
if (metrics.lastItem()->first - metrics.begin()->first > SERVER_KNOBS->MIN_BALANCE_DIFFERENCE) {
try {
state int src = metrics.lastItem()->second;
state int dest = metrics.begin()->second;
state int64_t amount = std::min(metrics.lastItem()->first - total / self->resolvers.size(),
total / self->resolvers.size() - metrics.begin()->first) /
2;
state Standalone<VectorRef<ResolverMoveRef>> movedRanges;
loop {
state std::pair<KeyRangeRef, bool> range = findRange(key_resolver, movedRanges, src, dest);
ResolutionSplitRequest req;
req.front = range.second;
req.offset = amount;
req.range = range.first;
ResolutionSplitReply split =
wait(brokenPromiseToNever(self->resolvers[metrics.lastItem()->second].split.getReply(
req, TaskPriority::ResolutionMetrics)));
KeyRangeRef moveRange = range.second ? KeyRangeRef(range.first.begin, split.key)
: KeyRangeRef(split.key, range.first.end);
movedRanges.push_back_deep(movedRanges.arena(), ResolverMoveRef(moveRange, dest));
TraceEvent("MovingResolutionRange")
.detail("Src", src)
.detail("Dest", dest)
.detail("Amount", amount)
.detail("StartRange", range.first)
.detail("MoveRange", moveRange)
.detail("Used", split.used)
.detail("KeyResolverRanges", key_resolver.size());
amount -= split.used;
if (moveRange != range.first || amount <= 0)
break;
}
for (auto& it : movedRanges)
key_resolver.insert(it.range, it.dest);
// for(auto& it : key_resolver.ranges())
// TraceEvent("KeyResolver").detail("Range", it.range()).detail("Value", it.value());
self->resolverChangesVersion = self->version + 1;
for (auto& p : self->commitProxies)
self->resolverNeedingChanges.insert(p.id());
self->resolverChanges.set(movedRanges);
} catch (Error& e) {
if (e.code() != error_code_operation_failed)
throw;
}
}
}
}
ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
loop {
ChangeCoordinatorsRequest req = waitNext(self->clusterController.changeCoordinators.getFuture());
@ -1127,8 +988,8 @@ ACTOR Future<std::vector<Standalone<CommitTransactionRef>>> recruitEverything(
newTLogServers(self, recruits, oldLogSystem, &confChanges));
// Update recovery related information to the newly elected sequencer (master) process.
wait(brokenPromiseToNever(self->masterInterface.updateRecoveryData.getReply(
UpdateRecoveryDataRequest(self->recoveryTransactionVersion, self->lastEpochEnd, self->commitProxies))));
wait(brokenPromiseToNever(self->masterInterface.updateRecoveryData.getReply(UpdateRecoveryDataRequest(
self->recoveryTransactionVersion, self->lastEpochEnd, self->commitProxies, self->resolvers))));
return confChanges;
}
@ -1802,14 +1663,6 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
.detail("RecoveryDuration", recoveryDuration)
.trackLatest(self->clusterRecoveryStateEventHolder->trackingKey);
TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_AVAILABLE_EVENT_NAME).c_str(),
self->dbgid)
.detail("AvailableAtVersion", self->version)
.trackLatest(self->clusterRecoveryAvailableEventHolder->trackingKey);
if (self->resolvers.size() > 1)
self->addActor.send(resolutionBalancing(self));
self->addActor.send(changeCoordinators(self));
Database cx = openDBOnServer(self->dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
self->addActor.send(configurationMonitor(self, cx));

View File

@ -185,7 +185,6 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
ServerCoordinators coordinators;
Reference<ILogSystem> logSystem;
Version version; // The last version assigned to a proxy by getVersion()
double lastVersionTime;
LogSystemDiskQueueAdapter* txnStateLogAdapter;
IKeyValueStore* txnStateStore;
@ -225,10 +224,6 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
RecoveryState recoveryState;
AsyncVar<Standalone<VectorRef<ResolverMoveRef>>> resolverChanges;
Version resolverChangesVersion;
std::set<UID> resolverNeedingChanges;
PromiseStream<Future<Void>> addActor;
Reference<AsyncVar<bool>> recruitmentStalled;
bool forceRecovery;
@ -266,12 +261,11 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
: controllerData(controllerData), dbgid(masterInterface.id()), lastEpochEnd(invalidVersion),
recoveryTransactionVersion(invalidVersion), lastCommitTime(0), liveCommittedVersion(invalidVersion),
databaseLocked(false), minKnownCommittedVersion(invalidVersion), hasConfiguration(false),
coordinators(coordinators), version(invalidVersion), lastVersionTime(0), txnStateStore(nullptr),
memoryLimit(2e9), dbId(dbId), masterInterface(masterInterface), masterLifetime(masterLifetimeToken),
clusterController(clusterController), cstate(coordinators, addActor, dbgid), dbInfo(dbInfo),
registrationCount(0), addActor(addActor), recruitmentStalled(makeReference<AsyncVar<bool>>(false)),
forceRecovery(forceRecovery), neverCreated(false), safeLocality(tagLocalityInvalid),
primaryLocality(tagLocalityInvalid), cc("Master", dbgid.toString()),
coordinators(coordinators), lastVersionTime(0), txnStateStore(nullptr), memoryLimit(2e9), dbId(dbId),
masterInterface(masterInterface), masterLifetime(masterLifetimeToken), clusterController(clusterController),
cstate(coordinators, addActor, dbgid), dbInfo(dbInfo), registrationCount(0), addActor(addActor),
recruitmentStalled(makeReference<AsyncVar<bool>>(false)), forceRecovery(forceRecovery), neverCreated(false),
safeLocality(tagLocalityInvalid), primaryLocality(tagLocalityInvalid), cc("Master", dbgid.toString()),
changeCoordinatorsRequests("ChangeCoordinatorsRequests", cc),
getCommitVersionRequests("GetCommitVersionRequests", cc),
backupWorkerDoneRequests("BackupWorkerDoneRequests", cc),

View File

@ -23,14 +23,15 @@
#pragma once
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbserver/TLogInterface.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Notified.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbserver/ResolverInterface.h"
#include "fdbserver/TLogInterface.h"
typedef uint64_t DBRecoveryCount;
using DBRecoveryCount = uint64_t;
struct MasterInterface {
constexpr static FileIdentifier file_identifier = 5979145;
@ -155,18 +156,20 @@ struct UpdateRecoveryDataRequest {
Version recoveryTransactionVersion;
Version lastEpochEnd;
std::vector<CommitProxyInterface> commitProxies;
std::vector<ResolverInterface> resolvers;
ReplyPromise<Void> reply;
UpdateRecoveryDataRequest() {}
UpdateRecoveryDataRequest() = default;
UpdateRecoveryDataRequest(Version recoveryTransactionVersion,
Version lastEpochEnd,
std::vector<CommitProxyInterface> commitProxies)
const std::vector<CommitProxyInterface>& commitProxies,
const std::vector<ResolverInterface>& resolvers)
: recoveryTransactionVersion(recoveryTransactionVersion), lastEpochEnd(lastEpochEnd),
commitProxies(commitProxies) {}
commitProxies(commitProxies), resolvers(resolvers) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, recoveryTransactionVersion, lastEpochEnd, commitProxies, reply);
serializer(ar, recoveryTransactionVersion, lastEpochEnd, commitProxies, resolvers, reply);
}
};

View File

@ -67,6 +67,9 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
std::vector<CommitProxyInterface> commitProxies;
std::map<UID, CommitProxyVersionReplies> lastCommitProxyVersionReplies;
std::vector<ResolverInterface> resolvers;
PromiseStream<Future<Void>> addActor;
MasterInterface myInterface;
@ -94,7 +97,7 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
: dbgid(myInterface.id()), lastEpochEnd(invalidVersion), recoveryTransactionVersion(invalidVersion),
liveCommittedVersion(invalidVersion), databaseLocked(false), minKnownCommittedVersion(invalidVersion),
coordinators(coordinators), version(invalidVersion), lastVersionTime(0), txnStateStore(nullptr),
myInterface(myInterface), forceRecovery(forceRecovery), cc("Master", dbgid.toString()),
addActor(addActor), myInterface(myInterface), forceRecovery(forceRecovery), cc("Master", dbgid.toString()),
getCommitVersionRequests("GetCommitVersionRequests", cc),
getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc),
reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc) {
@ -110,6 +113,145 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
}
};
static std::pair<KeyRangeRef, bool> findRange(CoalescedKeyRangeMap<int>& key_resolver,
Standalone<VectorRef<ResolverMoveRef>>& movedRanges,
int src,
int dest) {
auto ranges = key_resolver.ranges();
auto prev = ranges.begin();
auto it = ranges.begin();
++it;
if (it == ranges.end()) {
if (ranges.begin().value() != src ||
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(ranges.begin()->range(), dest)) !=
movedRanges.end())
throw operation_failed();
return std::make_pair(ranges.begin().range(), true);
}
std::set<int> borders;
// If possible expand an existing boundary between the two resolvers
for (; it != ranges.end(); ++it) {
if (it->value() == src && prev->value() == dest &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
if (it->value() == dest && prev->value() == src &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(prev->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(prev->range(), false);
}
if (it->value() == dest)
borders.insert(prev->value());
if (prev->value() == dest)
borders.insert(it->value());
++prev;
}
prev = ranges.begin();
it = ranges.begin();
++it;
// If possible create a new boundry which doesn't exist yet
for (; it != ranges.end(); ++it) {
if (it->value() == src && !borders.count(prev->value()) &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
if (prev->value() == src && !borders.count(it->value()) &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(prev->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(prev->range(), false);
}
++prev;
}
it = ranges.begin();
for (; it != ranges.end(); ++it) {
if (it->value() == src &&
std::find(movedRanges.begin(), movedRanges.end(), ResolverMoveRef(it->range(), dest)) ==
movedRanges.end()) {
return std::make_pair(it->range(), true);
}
}
throw operation_failed(); // we are already attempting to move all of the data one resolver is assigned, so do not
// move anything
}
// Balance key ranges among resolvers so that their load are evenly distributed.
ACTOR Future<Void> resolutionBalancing(Reference<MasterData> self) {
state CoalescedKeyRangeMap<int> key_resolver;
key_resolver.insert(allKeys, 0);
loop {
wait(delay(SERVER_KNOBS->MIN_BALANCE_TIME, TaskPriority::ResolutionMetrics));
while (self->resolverChanges.get().size())
wait(self->resolverChanges.onChange());
state std::vector<Future<ResolutionMetricsReply>> futures;
for (auto& p : self->resolvers)
futures.push_back(
brokenPromiseToNever(p.metrics.getReply(ResolutionMetricsRequest(), TaskPriority::ResolutionMetrics)));
wait(waitForAll(futures));
state IndexedSet<std::pair<int64_t, int>, NoMetric> metrics;
int64_t total = 0;
for (int i = 0; i < futures.size(); i++) {
total += futures[i].get().value;
metrics.insert(std::make_pair(futures[i].get().value, i), NoMetric());
//TraceEvent("ResolverMetric").detail("I", i).detail("Metric", futures[i].get());
}
if (metrics.lastItem()->first - metrics.begin()->first > SERVER_KNOBS->MIN_BALANCE_DIFFERENCE) {
try {
state int src = metrics.lastItem()->second;
state int dest = metrics.begin()->second;
state int64_t amount = std::min(metrics.lastItem()->first - total / self->resolvers.size(),
total / self->resolvers.size() - metrics.begin()->first) /
2;
state Standalone<VectorRef<ResolverMoveRef>> movedRanges;
loop {
state std::pair<KeyRangeRef, bool> range = findRange(key_resolver, movedRanges, src, dest);
ResolutionSplitRequest req;
req.front = range.second;
req.offset = amount;
req.range = range.first;
ResolutionSplitReply split =
wait(brokenPromiseToNever(self->resolvers[metrics.lastItem()->second].split.getReply(
req, TaskPriority::ResolutionMetrics)));
KeyRangeRef moveRange = range.second ? KeyRangeRef(range.first.begin, split.key)
: KeyRangeRef(split.key, range.first.end);
movedRanges.push_back_deep(movedRanges.arena(), ResolverMoveRef(moveRange, dest));
TraceEvent("MovingResolutionRange")
.detail("Src", src)
.detail("Dest", dest)
.detail("Amount", amount)
.detail("StartRange", range.first)
.detail("MoveRange", moveRange)
.detail("Used", split.used)
.detail("KeyResolverRanges", key_resolver.size());
amount -= split.used;
if (moveRange != range.first || amount <= 0)
break;
}
for (auto& it : movedRanges)
key_resolver.insert(it.range, it.dest);
// for(auto& it : key_resolver.ranges())
// TraceEvent("KeyResolver").detail("Range", it.range()).detail("Value", it.value());
self->resolverChangesVersion = self->version + 1;
for (auto& p : self->commitProxies)
self->resolverNeedingChanges.insert(p.id());
self->resolverChanges.set(movedRanges);
} catch (Error& e) {
if (e.code() != error_code_operation_failed)
throw;
}
}
}
}
ACTOR Future<Void> getVersion(Reference<MasterData> self, GetCommitVersionRequest req) {
state Span span("M:getVersion"_loc, { req.spanContext });
state std::map<UID, CommitProxyVersionReplies>::iterator proxyItr =
@ -244,31 +386,33 @@ ACTOR Future<Void> serveLiveCommittedVersion(Reference<MasterData> self) {
ACTOR Future<Void> updateRecoveryData(Reference<MasterData> self) {
loop {
choose {
when(UpdateRecoveryDataRequest req = waitNext(self->myInterface.updateRecoveryData.getFuture())) {
TraceEvent("UpdateRecoveryData", self->dbgid)
.detail("RecoveryTxnVersion", req.recoveryTransactionVersion)
.detail("LastEpochEnd", req.lastEpochEnd)
.detail("NumCommitProxies", req.commitProxies.size());
UpdateRecoveryDataRequest req = waitNext(self->myInterface.updateRecoveryData.getFuture());
TraceEvent("UpdateRecoveryData", self->dbgid)
.detail("RecoveryTxnVersion", req.recoveryTransactionVersion)
.detail("LastEpochEnd", req.lastEpochEnd)
.detail("NumCommitProxies", req.commitProxies.size());
if (self->recoveryTransactionVersion == invalidVersion ||
req.recoveryTransactionVersion > self->recoveryTransactionVersion) {
self->recoveryTransactionVersion = req.recoveryTransactionVersion;
}
if (self->lastEpochEnd == invalidVersion || req.lastEpochEnd > self->lastEpochEnd) {
self->lastEpochEnd = req.lastEpochEnd;
}
if (req.commitProxies.size() > 0) {
self->commitProxies = req.commitProxies;
self->lastCommitProxyVersionReplies.clear();
if (self->recoveryTransactionVersion == invalidVersion ||
req.recoveryTransactionVersion > self->recoveryTransactionVersion) {
self->recoveryTransactionVersion = req.recoveryTransactionVersion;
}
if (self->lastEpochEnd == invalidVersion || req.lastEpochEnd > self->lastEpochEnd) {
self->lastEpochEnd = req.lastEpochEnd;
}
if (req.commitProxies.size() > 0) {
self->commitProxies = req.commitProxies;
self->lastCommitProxyVersionReplies.clear();
for (auto& p : self->commitProxies) {
self->lastCommitProxyVersionReplies[p.id()] = CommitProxyVersionReplies();
}
}
req.reply.send(Void());
for (auto& p : self->commitProxies) {
self->lastCommitProxyVersionReplies[p.id()] = CommitProxyVersionReplies();
}
}
self->resolvers = req.resolvers;
if (req.resolvers.size() > 1)
self->addActor.send(resolutionBalancing(self));
req.reply.send(Void());
}
}

View File

@ -107,7 +107,10 @@ struct KillRegionWorkload : TestWorkload {
DatabaseConfiguration conf = wait(getDatabaseConfiguration(cx));
TraceEvent("ForceRecovery_GotConfig").detail("Conf", conf.toString());
TraceEvent("ForceRecovery_GotConfig")
.setMaxEventLength(11000)
.setMaxFieldLength(10000)
.detail("Conf", conf.toString());
if (conf.usableRegions > 1) {
loop {