Merge pull request #9447 from sfc-gh-ajbeamon/metacluster-restore-fixes
Metacluster restore fixes
This commit is contained in:
commit
9b906d9b3d
|
@ -171,14 +171,14 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
|
|||
}
|
||||
|
||||
state ClusterNameRef clusterName = tokens[tokens.size() - 1];
|
||||
state bool force = tokens.size() == 4;
|
||||
|
||||
state ClusterType clusterType = wait(runTransaction(db, [](Reference<ITransaction> tr) {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
return TenantAPI::getClusterType(tr);
|
||||
}));
|
||||
|
||||
if (clusterType == ClusterType::METACLUSTER_DATA && !force) {
|
||||
ForceRemove forceRemove(tokens.size() == 4);
|
||||
if (clusterType == ClusterType::METACLUSTER_DATA && !forceRemove) {
|
||||
if (tokens[2] == "FORCE"_sr) {
|
||||
fmt::print("ERROR: a cluster name must be specified.\n");
|
||||
} else {
|
||||
|
@ -190,8 +190,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
|
|||
return false;
|
||||
}
|
||||
|
||||
bool updatedDataCluster =
|
||||
wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, tokens.size() == 4, 15.0));
|
||||
bool updatedDataCluster = wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, forceRemove, 15.0));
|
||||
|
||||
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
|
||||
fmt::print("The cluster `{}' has been removed\n", printable(clusterName).c_str());
|
||||
|
@ -211,7 +210,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
|
|||
|
||||
void printRestoreUsage() {
|
||||
fmt::print("Usage: metacluster restore <NAME> [dryrun] connection_string=<CONNECTION_STRING>\n"
|
||||
"<restore_known_data_cluster|repopulate_from_data_cluster> [force_join_new_metacluster]\n\n");
|
||||
"<restore_known_data_cluster|repopulate_from_data_cluster> [force_join]\n\n");
|
||||
|
||||
fmt::print("Add a restored data cluster back to a metacluster.\n\n");
|
||||
|
||||
|
@ -223,8 +222,9 @@ void printRestoreUsage() {
|
|||
fmt::print("that the metacluster is already tracking. This mode should be used if only data\n");
|
||||
fmt::print("clusters are being restored, and any discrepancies between the management and\n");
|
||||
fmt::print("data clusters will be resolved using the management cluster metadata.\n");
|
||||
fmt::print("If `force_join_new_metacluster' is specified, the cluster will try to restore\n");
|
||||
fmt::print("to a different metacluster than it was originally registered to.\n\n");
|
||||
fmt::print("If `force_join' is specified, the cluster will try to restore to a different\n");
|
||||
fmt::print("metacluster than it was originally registered to or with a different ID than\n");
|
||||
fmt::print("is associated with the given cluster name.\n\n");
|
||||
|
||||
fmt::print("Use `repopulate_from_data_cluster' to rebuild a lost management cluster from the\n");
|
||||
fmt::print("data clusters in a metacluster. This mode should be used if the management\n");
|
||||
|
@ -244,7 +244,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
|
|||
}
|
||||
|
||||
state bool dryRun = tokens[3] == "dryrun"_sr;
|
||||
state bool forceJoin = tokens[tokens.size() - 1] == "force_join_new_metacluster"_sr;
|
||||
state bool forceJoin = tokens[tokens.size() - 1] == "force_join"_sr;
|
||||
|
||||
if (tokens.size() < 5 + (int)dryRun + (int)forceJoin) {
|
||||
printRestoreUsage();
|
||||
|
@ -274,7 +274,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
|
|||
config.get().first.get(),
|
||||
ApplyManagementClusterUpdates::True,
|
||||
RestoreDryRun(dryRun),
|
||||
ForceJoinNewMetacluster(forceJoin),
|
||||
ForceJoin(forceJoin),
|
||||
&messages));
|
||||
} else if (restoreType == "repopulate_from_data_cluster"_sr) {
|
||||
wait(MetaclusterAPI::restoreCluster(db,
|
||||
|
@ -282,7 +282,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
|
|||
config.get().first.get(),
|
||||
ApplyManagementClusterUpdates::False,
|
||||
RestoreDryRun(dryRun),
|
||||
ForceJoinNewMetacluster(forceJoin),
|
||||
ForceJoin(forceJoin),
|
||||
&messages));
|
||||
} else {
|
||||
fmt::print(stderr, "ERROR: unrecognized restore mode `{}'\n", printable(restoreType));
|
||||
|
@ -589,7 +589,7 @@ void metaclusterGenerator(const char* text,
|
|||
const char* opts[] = { "restore_known_data_cluster", "repopulate_from_data_cluster", nullptr };
|
||||
arrayGenerator(text, line, opts, lc);
|
||||
} else if (tokens.size() == 5 + (int)dryrun) {
|
||||
const char* opts[] = { "force_join_new_metacluster", nullptr };
|
||||
const char* opts[] = { "force_join", nullptr };
|
||||
arrayGenerator(text, line, opts, lc);
|
||||
}
|
||||
}
|
||||
|
@ -624,7 +624,7 @@ std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const&
|
|||
"[dryrun]",
|
||||
"connection_string=<CONNECTION_STRING>",
|
||||
"<restore_known_data_cluster|repopulate_from_data_cluster>",
|
||||
"[force_join_new_metacluster]" };
|
||||
"[force_join]" };
|
||||
if (tokens.size() < 4 || (tokens[3].size() <= 6 && "dryrun"_sr.startsWith(tokens[3]))) {
|
||||
return std::vector<const char*>(opts.begin() + tokens.size() - 2, opts.end());
|
||||
} else if (tokens.size() < 6) {
|
||||
|
|
|
@ -31,7 +31,8 @@ FDB_DEFINE_BOOLEAN_PARAM(IsRestoring);
|
|||
FDB_DEFINE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(RunOnMismatchedCluster);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(RestoreDryRun);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(ForceJoin);
|
||||
FDB_DEFINE_BOOLEAN_PARAM(ForceRemove);
|
||||
|
||||
namespace MetaclusterAPI {
|
||||
|
||||
|
|
|
@ -100,7 +100,8 @@ FDB_DECLARE_BOOLEAN_PARAM(IsRestoring);
|
|||
FDB_DECLARE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
|
||||
FDB_DECLARE_BOOLEAN_PARAM(RunOnMismatchedCluster);
|
||||
FDB_DECLARE_BOOLEAN_PARAM(RestoreDryRun);
|
||||
FDB_DECLARE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
|
||||
FDB_DECLARE_BOOLEAN_PARAM(ForceJoin);
|
||||
FDB_DECLARE_BOOLEAN_PARAM(ForceRemove);
|
||||
|
||||
namespace MetaclusterAPI {
|
||||
|
||||
|
@ -365,6 +366,12 @@ struct MetaclusterOperationContext {
|
|||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state bool checkRestoring = !self->extraSupportedDataClusterStates.count(DataClusterState::RESTORING);
|
||||
state Future<KeyBackedRangeResult<std::pair<ClusterName, UID>>> activeRestoreIdFuture;
|
||||
if (checkRestoring && self->clusterName.present()) {
|
||||
activeRestoreIdFuture = MetaclusterMetadata::activeRestoreIds().getRange(tr, {}, {}, 1);
|
||||
}
|
||||
|
||||
state Optional<MetaclusterRegistrationEntry> currentMetaclusterRegistration =
|
||||
wait(MetaclusterMetadata::metaclusterRegistration().get(tr));
|
||||
|
||||
|
@ -381,6 +388,13 @@ struct MetaclusterOperationContext {
|
|||
}
|
||||
}
|
||||
|
||||
if (checkRestoring) {
|
||||
KeyBackedRangeResult<std::pair<ClusterName, UID>> activeRestoreId = wait(activeRestoreIdFuture);
|
||||
if (!activeRestoreId.results.empty()) {
|
||||
throw cluster_restoring();
|
||||
}
|
||||
}
|
||||
|
||||
self->dataClusterIsRegistered = currentMetaclusterRegistration.present();
|
||||
state decltype(std::declval<Function>()(Reference<typename DB::TransactionT>()).getValue()) result =
|
||||
wait(func(tr));
|
||||
|
@ -658,210 +672,6 @@ void updateClusterMetadata(Transaction tr,
|
|||
}
|
||||
}
|
||||
|
||||
// Store the cluster entry for the new cluster
|
||||
ACTOR template <class Transaction>
|
||||
static Future<Void> registerInManagementCluster(Transaction tr,
|
||||
ClusterName clusterName,
|
||||
DataClusterEntry clusterEntry,
|
||||
ClusterConnectionString connectionString,
|
||||
RestoreDryRun restoreDryRun) {
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, clusterName));
|
||||
if (dataClusterMetadata.present() &&
|
||||
!dataClusterMetadata.get().matchesConfiguration(DataClusterMetadata(clusterEntry, connectionString))) {
|
||||
TraceEvent("RegisterClusterAlreadyExists").detail("ClusterName", clusterName);
|
||||
throw cluster_already_exists();
|
||||
} else if (!restoreDryRun && !dataClusterMetadata.present()) {
|
||||
clusterEntry.allocated = ClusterUsage();
|
||||
|
||||
if (clusterEntry.hasCapacity()) {
|
||||
ManagementClusterMetadata::clusterCapacityIndex.insert(
|
||||
tr, Tuple::makeTuple(clusterEntry.allocated.numTenantGroups, clusterName));
|
||||
}
|
||||
ManagementClusterMetadata::dataClusters().set(tr, clusterName, clusterEntry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, clusterName, connectionString);
|
||||
|
||||
TraceEvent("RegisteredDataCluster")
|
||||
.detail("ClusterName", clusterName)
|
||||
.detail("ClusterID", clusterEntry.id)
|
||||
.detail("Capacity", clusterEntry.capacity)
|
||||
.detail("Version", tr->getCommittedVersion())
|
||||
.detail("ConnectionString", connectionString.toString());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
template <class DB>
|
||||
struct RegisterClusterImpl {
|
||||
MetaclusterOperationContext<DB> ctx;
|
||||
|
||||
// Initialization parameters
|
||||
ClusterName clusterName;
|
||||
ClusterConnectionString connectionString;
|
||||
DataClusterEntry clusterEntry;
|
||||
|
||||
RegisterClusterImpl(Reference<DB> managementDb,
|
||||
ClusterName clusterName,
|
||||
ClusterConnectionString connectionString,
|
||||
DataClusterEntry clusterEntry)
|
||||
: ctx(managementDb), clusterName(clusterName), connectionString(connectionString), clusterEntry(clusterEntry) {}
|
||||
|
||||
// Store the cluster entry for the new cluster in a registering state
|
||||
ACTOR static Future<Void> registerInManagementCluster(RegisterClusterImpl* self,
|
||||
Reference<typename DB::TransactionT> tr) {
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, self->clusterName));
|
||||
if (!dataClusterMetadata.present()) {
|
||||
self->clusterEntry.clusterState = DataClusterState::REGISTERING;
|
||||
self->clusterEntry.allocated = ClusterUsage();
|
||||
self->clusterEntry.id = deterministicRandom()->randomUniqueID();
|
||||
|
||||
ManagementClusterMetadata::dataClusters().set(tr, self->clusterName, self->clusterEntry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, self->clusterName, self->connectionString);
|
||||
} else if (dataClusterMetadata.get().entry.clusterState == DataClusterState::REMOVING) {
|
||||
throw cluster_removed();
|
||||
} else if (!dataClusterMetadata.get().matchesConfiguration(
|
||||
DataClusterMetadata(self->clusterEntry, self->connectionString)) ||
|
||||
dataClusterMetadata.get().entry.clusterState != DataClusterState::REGISTERING) {
|
||||
throw cluster_already_exists();
|
||||
} else {
|
||||
self->clusterEntry = dataClusterMetadata.get().entry;
|
||||
}
|
||||
|
||||
TraceEvent("RegisteringDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", self->clusterEntry.capacity)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> configureDataCluster(RegisterClusterImpl* self) {
|
||||
state Reference<IDatabase> dataClusterDb = wait(openDatabase(self->connectionString));
|
||||
state Reference<ITransaction> tr = dataClusterDb->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state Future<std::vector<std::pair<TenantName, int64_t>>> existingTenantsFuture =
|
||||
TenantAPI::listTenantsTransaction(tr, ""_sr, "\xff\xff"_sr, 1);
|
||||
state ThreadFuture<RangeResult> existingDataFuture = tr->getRange(normalKeys, 1);
|
||||
state Future<bool> tombstoneFuture =
|
||||
MetaclusterMetadata::registrationTombstones().exists(tr, self->clusterEntry.id);
|
||||
|
||||
// Check whether this cluster has already been registered
|
||||
state Optional<MetaclusterRegistrationEntry> existingRegistration =
|
||||
wait(MetaclusterMetadata::metaclusterRegistration().get(tr));
|
||||
if (existingRegistration.present()) {
|
||||
if (existingRegistration.get().clusterType != ClusterType::METACLUSTER_DATA ||
|
||||
existingRegistration.get().name != self->clusterName ||
|
||||
!existingRegistration.get().matches(self->ctx.metaclusterRegistration.get()) ||
|
||||
existingRegistration.get().id != self->clusterEntry.id) {
|
||||
throw cluster_already_registered();
|
||||
} else {
|
||||
// We already successfully registered the cluster with these details, so there's nothing to
|
||||
// do
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the cluster was removed concurrently
|
||||
bool tombstone = wait(tombstoneFuture);
|
||||
if (tombstone) {
|
||||
throw cluster_removed();
|
||||
}
|
||||
|
||||
// Check for any existing data
|
||||
std::vector<std::pair<TenantName, int64_t>> existingTenants =
|
||||
wait(safeThreadFutureToFuture(existingTenantsFuture));
|
||||
if (!existingTenants.empty()) {
|
||||
TraceEvent(SevWarn, "CannotRegisterClusterWithTenants").detail("ClusterName", self->clusterName);
|
||||
throw cluster_not_empty();
|
||||
}
|
||||
|
||||
RangeResult existingData = wait(safeThreadFutureToFuture(existingDataFuture));
|
||||
if (!existingData.empty()) {
|
||||
TraceEvent(SevWarn, "CannotRegisterClusterWithData").detail("ClusterName", self->clusterName);
|
||||
throw cluster_not_empty();
|
||||
}
|
||||
|
||||
MetaclusterMetadata::metaclusterRegistration().set(
|
||||
tr,
|
||||
self->ctx.metaclusterRegistration.get().toDataClusterRegistration(self->clusterName,
|
||||
self->clusterEntry.id));
|
||||
|
||||
wait(buggifiedCommit(tr, BUGGIFY_WITH_PROB(0.1)));
|
||||
|
||||
TraceEvent("ConfiguredDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", self->clusterEntry.capacity)
|
||||
.detail("Version", tr->getCommittedVersion())
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the cluster entry for the new cluster
|
||||
ACTOR static Future<Void> markClusterReady(RegisterClusterImpl* self, Reference<typename DB::TransactionT> tr) {
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, self->clusterName));
|
||||
if (!dataClusterMetadata.present() ||
|
||||
dataClusterMetadata.get().entry.clusterState == DataClusterState::REMOVING) {
|
||||
throw cluster_removed();
|
||||
} else if (dataClusterMetadata.get().entry.id != self->clusterEntry.id) {
|
||||
throw cluster_already_exists();
|
||||
} else if (dataClusterMetadata.get().entry.clusterState == DataClusterState::READY) {
|
||||
return Void();
|
||||
} else {
|
||||
ASSERT(dataClusterMetadata.get().entry.clusterState == DataClusterState::REGISTERING);
|
||||
dataClusterMetadata.get().entry.clusterState = DataClusterState::READY;
|
||||
|
||||
if (dataClusterMetadata.get().entry.hasCapacity()) {
|
||||
ManagementClusterMetadata::clusterCapacityIndex.insert(
|
||||
tr, Tuple::makeTuple(dataClusterMetadata.get().entry.allocated.numTenantGroups, self->clusterName));
|
||||
}
|
||||
ManagementClusterMetadata::dataClusters().set(tr, self->clusterName, dataClusterMetadata.get().entry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, self->clusterName, self->connectionString);
|
||||
}
|
||||
|
||||
TraceEvent("RegisteredDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", dataClusterMetadata.get().entry.capacity)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> run(RegisterClusterImpl* self) {
|
||||
wait(self->ctx.runManagementTransaction(
|
||||
[self = self](Reference<typename DB::TransactionT> tr) { return registerInManagementCluster(self, tr); }));
|
||||
|
||||
// Don't use ctx to run this transaction because we have not set up the data cluster metadata on it and we
|
||||
// don't have a metacluster registration on the data cluster
|
||||
wait(configureDataCluster(self));
|
||||
wait(self->ctx.runManagementTransaction(
|
||||
[self = self](Reference<typename DB::TransactionT> tr) { return markClusterReady(self, tr); }));
|
||||
|
||||
return Void();
|
||||
}
|
||||
Future<Void> run() { return run(this); }
|
||||
};
|
||||
|
||||
ACTOR template <class DB>
|
||||
Future<Void> registerCluster(Reference<DB> db,
|
||||
ClusterName name,
|
||||
ClusterConnectionString connectionString,
|
||||
DataClusterEntry entry) {
|
||||
state RegisterClusterImpl<DB> impl(db, name, connectionString, entry);
|
||||
wait(impl.run());
|
||||
return Void();
|
||||
}
|
||||
|
||||
template <class DB>
|
||||
struct RemoveClusterImpl {
|
||||
MetaclusterOperationContext<DB> ctx;
|
||||
|
@ -870,9 +680,13 @@ struct RemoveClusterImpl {
|
|||
Reference<DB> db;
|
||||
ClusterType clusterType;
|
||||
ClusterName clusterName;
|
||||
bool forceRemove;
|
||||
ForceRemove forceRemove;
|
||||
double dataClusterTimeout;
|
||||
|
||||
// Optional parameters that are set by internal users
|
||||
Optional<UID> clusterId;
|
||||
std::set<DataClusterState> legalClusterStates;
|
||||
|
||||
// Parameters set in markClusterRemoving
|
||||
Optional<int64_t> lastTenantId;
|
||||
|
||||
|
@ -882,7 +696,7 @@ struct RemoveClusterImpl {
|
|||
RemoveClusterImpl(Reference<DB> db,
|
||||
ClusterName clusterName,
|
||||
ClusterType clusterType,
|
||||
bool forceRemove,
|
||||
ForceRemove forceRemove,
|
||||
double dataClusterTimeout)
|
||||
: ctx(db,
|
||||
Optional<ClusterName>(),
|
||||
|
@ -895,6 +709,14 @@ struct RemoveClusterImpl {
|
|||
state DataClusterMetadata clusterMetadata = wait(getClusterTransaction(tr, self->clusterName));
|
||||
wait(self->ctx.setCluster(tr, self->clusterName));
|
||||
|
||||
if ((self->clusterId.present() && clusterMetadata.entry.id != self->clusterId.get()) ||
|
||||
(!self->legalClusterStates.empty() &&
|
||||
!self->legalClusterStates.count(clusterMetadata.entry.clusterState))) {
|
||||
// The type of error is currently ignored, and this is only used to terminate the remove operation.
|
||||
// If that changes in the future, we may want to introduce a more suitable error type.
|
||||
throw operation_failed();
|
||||
}
|
||||
|
||||
if (!self->forceRemove && self->ctx.dataClusterMetadata.get().entry.allocated.numTenantGroups > 0) {
|
||||
throw cluster_not_empty();
|
||||
} else if (self->ctx.dataClusterMetadata.get().entry.clusterState != DataClusterState::REMOVING) {
|
||||
|
@ -932,6 +754,7 @@ struct RemoveClusterImpl {
|
|||
if (self->ctx.dataClusterIsRegistered) {
|
||||
// Delete metacluster related metadata
|
||||
MetaclusterMetadata::metaclusterRegistration().clear(tr);
|
||||
MetaclusterMetadata::activeRestoreIds().clear(tr);
|
||||
TenantMetadata::tenantTombstones().clear(tr);
|
||||
TenantMetadata::tombstoneCleanupData().clear(tr);
|
||||
|
||||
|
@ -971,9 +794,12 @@ struct RemoveClusterImpl {
|
|||
state KeyBackedRangeResult<Tuple> tenantEntries = wait(tenantEntriesFuture);
|
||||
|
||||
// Erase each tenant from the tenant map on the management cluster
|
||||
std::set<int64_t> erasedTenants;
|
||||
for (Tuple entry : tenantEntries.results) {
|
||||
int64_t tenantId = entry.getInt(2);
|
||||
ASSERT(entry.getString(0) == self->ctx.clusterName.get());
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, entry.getInt(2));
|
||||
erasedTenants.insert(tenantId);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, tenantId);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantNameIndex.erase(tr, entry.getString(1));
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
}
|
||||
|
@ -987,9 +813,9 @@ struct RemoveClusterImpl {
|
|||
}
|
||||
|
||||
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(
|
||||
tr, -tenantEntries.results.size(), MutationRef::AddValue);
|
||||
tr, -erasedTenants.size(), MutationRef::AddValue);
|
||||
ManagementClusterMetadata::clusterTenantCount.atomicOp(
|
||||
tr, self->ctx.clusterName.get(), -tenantEntries.results.size(), MutationRef::AddValue);
|
||||
tr, self->ctx.clusterName.get(), -erasedTenants.size(), MutationRef::AddValue);
|
||||
|
||||
return !tenantEntries.more;
|
||||
}
|
||||
|
@ -1032,6 +858,7 @@ struct RemoveClusterImpl {
|
|||
ManagementClusterMetadata::dataClusters().erase(tr, ctx.clusterName.get());
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.erase(tr, ctx.clusterName.get());
|
||||
ManagementClusterMetadata::clusterTenantCount.erase(tr, ctx.clusterName.get());
|
||||
MetaclusterMetadata::activeRestoreIds().erase(tr, ctx.clusterName.get());
|
||||
}
|
||||
|
||||
// Removes the next set of metadata from the management cluster; returns true when all specified
|
||||
|
@ -1191,13 +1018,217 @@ ACTOR template <class DB>
|
|||
Future<bool> removeCluster(Reference<DB> db,
|
||||
ClusterName name,
|
||||
ClusterType clusterType,
|
||||
bool forceRemove,
|
||||
ForceRemove forceRemove,
|
||||
double dataClusterTimeout = 0) {
|
||||
state RemoveClusterImpl<DB> impl(db, name, clusterType, forceRemove, dataClusterTimeout);
|
||||
wait(impl.run());
|
||||
return impl.dataClusterUpdated;
|
||||
}
|
||||
|
||||
template <class DB>
|
||||
struct RegisterClusterImpl {
|
||||
MetaclusterOperationContext<DB> ctx;
|
||||
|
||||
// Initialization parameters
|
||||
ClusterName clusterName;
|
||||
ClusterConnectionString connectionString;
|
||||
DataClusterEntry clusterEntry;
|
||||
|
||||
RegisterClusterImpl(Reference<DB> managementDb,
|
||||
ClusterName clusterName,
|
||||
ClusterConnectionString connectionString,
|
||||
DataClusterEntry clusterEntry)
|
||||
: ctx(managementDb), clusterName(clusterName), connectionString(connectionString), clusterEntry(clusterEntry) {}
|
||||
|
||||
// Store the cluster entry for the new cluster in a registering state
|
||||
ACTOR static Future<Void> registerInManagementCluster(RegisterClusterImpl* self,
|
||||
Reference<typename DB::TransactionT> tr) {
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, self->clusterName));
|
||||
if (!dataClusterMetadata.present()) {
|
||||
self->clusterEntry.clusterState = DataClusterState::REGISTERING;
|
||||
self->clusterEntry.allocated = ClusterUsage();
|
||||
self->clusterEntry.id = deterministicRandom()->randomUniqueID();
|
||||
|
||||
ManagementClusterMetadata::dataClusters().set(tr, self->clusterName, self->clusterEntry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, self->clusterName, self->connectionString);
|
||||
} else if (dataClusterMetadata.get().entry.clusterState == DataClusterState::REMOVING) {
|
||||
throw cluster_removed();
|
||||
} else if (!dataClusterMetadata.get().matchesConfiguration(
|
||||
DataClusterMetadata(self->clusterEntry, self->connectionString)) ||
|
||||
dataClusterMetadata.get().entry.clusterState != DataClusterState::REGISTERING) {
|
||||
throw cluster_already_exists();
|
||||
} else {
|
||||
self->clusterEntry = dataClusterMetadata.get().entry;
|
||||
}
|
||||
|
||||
TraceEvent("RegisteringDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", self->clusterEntry.capacity)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> configureDataCluster(RegisterClusterImpl* self) {
|
||||
state Reference<IDatabase> dataClusterDb = wait(openDatabase(self->connectionString));
|
||||
state Reference<ITransaction> tr = dataClusterDb->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state Future<std::vector<std::pair<TenantName, int64_t>>> existingTenantsFuture =
|
||||
TenantAPI::listTenantsTransaction(tr, ""_sr, "\xff\xff"_sr, 1);
|
||||
state ThreadFuture<RangeResult> existingDataFuture = tr->getRange(normalKeys, 1);
|
||||
state Future<bool> tombstoneFuture =
|
||||
MetaclusterMetadata::registrationTombstones().exists(tr, self->clusterEntry.id);
|
||||
|
||||
// Check whether this cluster has already been registered
|
||||
state Optional<MetaclusterRegistrationEntry> existingRegistration =
|
||||
wait(MetaclusterMetadata::metaclusterRegistration().get(tr));
|
||||
if (existingRegistration.present()) {
|
||||
if (existingRegistration.get().clusterType != ClusterType::METACLUSTER_DATA ||
|
||||
existingRegistration.get().name != self->clusterName ||
|
||||
!existingRegistration.get().matches(self->ctx.metaclusterRegistration.get()) ||
|
||||
existingRegistration.get().id != self->clusterEntry.id) {
|
||||
throw cluster_already_registered();
|
||||
} else {
|
||||
// We already successfully registered the cluster with these details, so there's nothing to
|
||||
// do
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the cluster was removed concurrently
|
||||
bool tombstone = wait(tombstoneFuture);
|
||||
if (tombstone) {
|
||||
throw cluster_removed();
|
||||
}
|
||||
|
||||
// Check for any existing data
|
||||
std::vector<std::pair<TenantName, int64_t>> existingTenants =
|
||||
wait(safeThreadFutureToFuture(existingTenantsFuture));
|
||||
if (!existingTenants.empty()) {
|
||||
TraceEvent(SevWarn, "CannotRegisterClusterWithTenants").detail("ClusterName", self->clusterName);
|
||||
throw cluster_not_empty();
|
||||
}
|
||||
|
||||
RangeResult existingData = wait(safeThreadFutureToFuture(existingDataFuture));
|
||||
if (!existingData.empty()) {
|
||||
TraceEvent(SevWarn, "CannotRegisterClusterWithData").detail("ClusterName", self->clusterName);
|
||||
throw cluster_not_empty();
|
||||
}
|
||||
|
||||
MetaclusterMetadata::metaclusterRegistration().set(
|
||||
tr,
|
||||
self->ctx.metaclusterRegistration.get().toDataClusterRegistration(self->clusterName,
|
||||
self->clusterEntry.id));
|
||||
|
||||
// If we happen to have any orphaned restore IDs from a previous time this cluster was in a metacluster,
|
||||
// erase them now.
|
||||
MetaclusterMetadata::activeRestoreIds().clear(tr);
|
||||
|
||||
wait(buggifiedCommit(tr, BUGGIFY_WITH_PROB(0.1)));
|
||||
|
||||
TraceEvent("ConfiguredDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", self->clusterEntry.capacity)
|
||||
.detail("Version", tr->getCommittedVersion())
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the cluster entry for the new cluster
|
||||
ACTOR static Future<Void> markClusterReady(RegisterClusterImpl* self, Reference<typename DB::TransactionT> tr) {
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, self->clusterName));
|
||||
if (!dataClusterMetadata.present() ||
|
||||
dataClusterMetadata.get().entry.clusterState == DataClusterState::REMOVING) {
|
||||
throw cluster_removed();
|
||||
} else if (dataClusterMetadata.get().entry.id != self->clusterEntry.id) {
|
||||
throw cluster_already_exists();
|
||||
} else if (dataClusterMetadata.get().entry.clusterState == DataClusterState::READY) {
|
||||
return Void();
|
||||
} else if (dataClusterMetadata.get().entry.clusterState == DataClusterState::RESTORING) {
|
||||
throw cluster_restoring();
|
||||
} else {
|
||||
ASSERT(dataClusterMetadata.get().entry.clusterState == DataClusterState::REGISTERING);
|
||||
dataClusterMetadata.get().entry.clusterState = DataClusterState::READY;
|
||||
|
||||
if (dataClusterMetadata.get().entry.hasCapacity()) {
|
||||
ManagementClusterMetadata::clusterCapacityIndex.insert(
|
||||
tr, Tuple::makeTuple(dataClusterMetadata.get().entry.allocated.numTenantGroups, self->clusterName));
|
||||
}
|
||||
ManagementClusterMetadata::dataClusters().set(tr, self->clusterName, dataClusterMetadata.get().entry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, self->clusterName, self->connectionString);
|
||||
}
|
||||
|
||||
TraceEvent("RegisteredDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", self->clusterEntry.id)
|
||||
.detail("Capacity", dataClusterMetadata.get().entry.capacity)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> run(RegisterClusterImpl* self) {
|
||||
// Used if we need to rollback
|
||||
state RemoveClusterImpl<DB> removeCluster(
|
||||
self->ctx.managementDb, self->clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove::True, 5.0);
|
||||
|
||||
wait(self->ctx.runManagementTransaction(
|
||||
[self = self](Reference<typename DB::TransactionT> tr) { return registerInManagementCluster(self, tr); }));
|
||||
|
||||
// Don't use ctx to run this transaction because we have not set up the data cluster metadata on it and we
|
||||
// don't have a metacluster registration on the data cluster
|
||||
try {
|
||||
wait(configureDataCluster(self));
|
||||
} catch (Error& e) {
|
||||
state Error error = e;
|
||||
try {
|
||||
// Attempt to unregister the cluster if we could not configure the data cluster. We should only do this
|
||||
// if the data cluster state matches our ID and is in the REGISTERING in case somebody else has
|
||||
// attempted to complete the registration or start a new one.
|
||||
removeCluster.clusterId = self->clusterEntry.id;
|
||||
removeCluster.legalClusterStates.insert(DataClusterState::REGISTERING);
|
||||
wait(removeCluster.run());
|
||||
TraceEvent("RegisterClusterRolledBack")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
} catch (Error& e) {
|
||||
// Removing the cluster after failing to register the data cluster is a best effort attempt. If it
|
||||
// fails, the operator will need to remove it (or re-register it) themselves.
|
||||
TraceEvent(SevWarn, "RegisterClusterRollbackFailed")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
wait(self->ctx.runManagementTransaction(
|
||||
[self = self](Reference<typename DB::TransactionT> tr) { return markClusterReady(self, tr); }));
|
||||
|
||||
return Void();
|
||||
}
|
||||
Future<Void> run() { return run(this); }
|
||||
};
|
||||
|
||||
ACTOR template <class DB>
|
||||
Future<Void> registerCluster(Reference<DB> db,
|
||||
ClusterName name,
|
||||
ClusterConnectionString connectionString,
|
||||
DataClusterEntry entry) {
|
||||
state RegisterClusterImpl<DB> impl(db, name, connectionString, entry);
|
||||
wait(impl.run());
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR template <class Transaction>
|
||||
Future<std::map<ClusterName, DataClusterMetadata>> listClustersTransaction(Transaction tr,
|
||||
ClusterNameRef begin,
|
||||
|
@ -1332,7 +1363,7 @@ struct RestoreClusterImpl {
|
|||
ClusterConnectionString connectionString;
|
||||
ApplyManagementClusterUpdates applyManagementClusterUpdates;
|
||||
RestoreDryRun restoreDryRun;
|
||||
ForceJoinNewMetacluster forceJoinNewMetacluster;
|
||||
ForceJoin forceJoin;
|
||||
std::vector<std::string>& messages;
|
||||
|
||||
// Unique ID generated for this restore. Used to avoid concurrent restores
|
||||
|
@ -1352,11 +1383,11 @@ struct RestoreClusterImpl {
|
|||
ClusterConnectionString connectionString,
|
||||
ApplyManagementClusterUpdates applyManagementClusterUpdates,
|
||||
RestoreDryRun restoreDryRun,
|
||||
ForceJoinNewMetacluster forceJoinNewMetacluster,
|
||||
ForceJoin forceJoin,
|
||||
std::vector<std::string>& messages)
|
||||
: ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName),
|
||||
connectionString(connectionString), applyManagementClusterUpdates(applyManagementClusterUpdates),
|
||||
restoreDryRun(restoreDryRun), forceJoinNewMetacluster(forceJoinNewMetacluster), messages(messages) {}
|
||||
restoreDryRun(restoreDryRun), forceJoin(forceJoin), messages(messages) {}
|
||||
|
||||
ACTOR template <class Transaction>
|
||||
static Future<Void> checkRestoreId(RestoreClusterImpl* self, Transaction tr) {
|
||||
|
@ -1422,7 +1453,7 @@ struct RestoreClusterImpl {
|
|||
if (!metaclusterRegistration.present()) {
|
||||
throw invalid_data_cluster();
|
||||
} else if (!metaclusterRegistration.get().matches(self->ctx.metaclusterRegistration.get())) {
|
||||
if (!self->forceJoinNewMetacluster) {
|
||||
if (!self->forceJoin) {
|
||||
TraceEvent(SevWarn, "MetaclusterRestoreClusterMismatch")
|
||||
.detail("ExistingRegistration", metaclusterRegistration.get())
|
||||
.detail("ManagementClusterRegistration", self->ctx.metaclusterRegistration.get());
|
||||
|
@ -1458,6 +1489,37 @@ struct RestoreClusterImpl {
|
|||
}
|
||||
}
|
||||
|
||||
// Store the cluster entry for the restored cluster
|
||||
ACTOR static Future<Void> registerRestoringClusterInManagementCluster(RestoreClusterImpl* self,
|
||||
Reference<typename DB::TransactionT> tr) {
|
||||
state DataClusterEntry clusterEntry;
|
||||
clusterEntry.id = self->dataClusterId;
|
||||
clusterEntry.clusterState = DataClusterState::RESTORING;
|
||||
|
||||
state Optional<DataClusterMetadata> dataClusterMetadata = wait(tryGetClusterTransaction(tr, self->clusterName));
|
||||
if (dataClusterMetadata.present() &&
|
||||
(dataClusterMetadata.get().entry.clusterState != DataClusterState::RESTORING ||
|
||||
!dataClusterMetadata.get().matchesConfiguration(
|
||||
DataClusterMetadata(clusterEntry, self->connectionString)))) {
|
||||
TraceEvent("RestoredClusterAlreadyExists").detail("ClusterName", self->clusterName);
|
||||
throw cluster_already_exists();
|
||||
} else if (!self->restoreDryRun) {
|
||||
MetaclusterMetadata::activeRestoreIds().set(tr, self->clusterName, self->restoreId);
|
||||
|
||||
ManagementClusterMetadata::dataClusters().set(tr, self->clusterName, clusterEntry);
|
||||
ManagementClusterMetadata::dataClusterConnectionRecords.set(tr, self->clusterName, self->connectionString);
|
||||
|
||||
TraceEvent("RegisteredRestoringDataCluster")
|
||||
.detail("ClusterName", self->clusterName)
|
||||
.detail("ClusterID", clusterEntry.id)
|
||||
.detail("Capacity", clusterEntry.capacity)
|
||||
.detail("Version", tr->getCommittedVersion())
|
||||
.detail("ConnectionString", self->connectionString.toString());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// If adding a data cluster to a restored management cluster, write a metacluster registration entry
|
||||
// to attach it
|
||||
ACTOR static Future<Void> writeDataClusterRegistration(RestoreClusterImpl* self) {
|
||||
|
@ -1467,9 +1529,18 @@ struct RestoreClusterImpl {
|
|||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
state Future<bool> tombstoneFuture =
|
||||
MetaclusterMetadata::registrationTombstones().exists(tr, self->dataClusterId);
|
||||
|
||||
state Optional<MetaclusterRegistrationEntry> metaclusterRegistration =
|
||||
wait(MetaclusterMetadata::metaclusterRegistration().get(tr));
|
||||
|
||||
// Check if the cluster was removed concurrently
|
||||
bool tombstone = wait(tombstoneFuture);
|
||||
if (tombstone) {
|
||||
throw cluster_removed();
|
||||
}
|
||||
|
||||
MetaclusterRegistrationEntry dataClusterEntry =
|
||||
self->ctx.metaclusterRegistration.get().toDataClusterRegistration(self->clusterName,
|
||||
self->dataClusterId);
|
||||
|
@ -1480,7 +1551,8 @@ struct RestoreClusterImpl {
|
|||
}
|
||||
|
||||
TraceEvent(SevWarn, "MetaclusterRestoreClusterAlreadyRegistered")
|
||||
.detail("ExistingRegistration", metaclusterRegistration.get());
|
||||
.detail("ExistingRegistration", metaclusterRegistration.get())
|
||||
.detail("NewRegistration", dataClusterEntry);
|
||||
throw cluster_already_registered();
|
||||
}
|
||||
|
||||
|
@ -2073,7 +2145,7 @@ struct RestoreClusterImpl {
|
|||
wait(self->runRestoreDataClusterTransaction(
|
||||
[self = self](Reference<ITransaction> tr) { return getTenantsFromDataCluster(self, tr); },
|
||||
RunOnDisconnectedCluster::False,
|
||||
RunOnMismatchedCluster(self->restoreDryRun && self->forceJoinNewMetacluster)));
|
||||
RunOnMismatchedCluster(self->restoreDryRun && self->forceJoin)));
|
||||
|
||||
// Fix any differences between the data cluster and the management cluster
|
||||
wait(reconcileTenants(self));
|
||||
|
@ -2100,15 +2172,7 @@ struct RestoreClusterImpl {
|
|||
|
||||
// Record the data cluster in the management cluster
|
||||
wait(self->ctx.runManagementTransaction([self = self](Reference<typename DB::TransactionT> tr) {
|
||||
if (!self->restoreDryRun) {
|
||||
MetaclusterMetadata::activeRestoreIds().set(tr, self->clusterName, self->restoreId);
|
||||
}
|
||||
|
||||
DataClusterEntry entry;
|
||||
entry.id = self->dataClusterId;
|
||||
entry.clusterState = DataClusterState::RESTORING;
|
||||
return registerInManagementCluster(
|
||||
tr, self->clusterName, entry, self->connectionString, self->restoreDryRun);
|
||||
return registerRestoringClusterInManagementCluster(self, tr);
|
||||
}));
|
||||
|
||||
// Write a metacluster registration entry in the data cluster
|
||||
|
@ -2164,10 +2228,10 @@ Future<Void> restoreCluster(Reference<DB> db,
|
|||
ClusterConnectionString connectionString,
|
||||
ApplyManagementClusterUpdates applyManagementClusterUpdates,
|
||||
RestoreDryRun restoreDryRun,
|
||||
ForceJoinNewMetacluster forceJoinNewMetacluster,
|
||||
ForceJoin forceJoin,
|
||||
std::vector<std::string>* messages) {
|
||||
state RestoreClusterImpl<DB> impl(
|
||||
db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoinNewMetacluster, *messages);
|
||||
db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoin, *messages);
|
||||
wait(impl.run());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -98,10 +98,11 @@ private:
|
|||
auto allocatedItr = data.clusterAllocatedMap.find(clusterName);
|
||||
if (!clusterMetadata.entry.hasCapacity()) {
|
||||
ASSERT(allocatedItr == data.clusterAllocatedMap.end());
|
||||
} else {
|
||||
ASSERT(allocatedItr != data.clusterAllocatedMap.end());
|
||||
} else if (allocatedItr != data.clusterAllocatedMap.end()) {
|
||||
ASSERT_EQ(allocatedItr->second, clusterMetadata.entry.allocated.numTenantGroups);
|
||||
++numFoundInAllocatedMap;
|
||||
} else {
|
||||
ASSERT_NE(clusterMetadata.entry.clusterState, DataClusterState::READY);
|
||||
}
|
||||
|
||||
// Check that the number of tenant groups in the cluster is smaller than the allocated number of tenant
|
||||
|
|
|
@ -164,14 +164,19 @@ private:
|
|||
ASSERT_EQ(t.size(), 3);
|
||||
TenantName tenantName = t.getString(1);
|
||||
int64_t tenantId = t.getInt(2);
|
||||
ASSERT(tenantName == self->managementMetadata.tenantData.tenantMap[tenantId].tenantName);
|
||||
self->managementMetadata.clusterTenantMap[t.getString(0)].insert(tenantId);
|
||||
MetaclusterTenantMapEntry const& entry = self->managementMetadata.tenantData.tenantMap[tenantId];
|
||||
bool renaming =
|
||||
entry.tenantState == MetaclusterAPI::TenantState::RENAMING && entry.renameDestination == tenantName;
|
||||
ASSERT(tenantName == entry.tenantName || renaming);
|
||||
if (!renaming) {
|
||||
ASSERT(self->managementMetadata.clusterTenantMap[t.getString(0)].insert(tenantId).second);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto t : clusterTenantGroupTuples.results) {
|
||||
ASSERT_EQ(t.size(), 2);
|
||||
TenantGroupName tenantGroupName = t.getString(1);
|
||||
self->managementMetadata.clusterTenantGroupMap[t.getString(0)].insert(tenantGroupName);
|
||||
ASSERT(self->managementMetadata.clusterTenantGroupMap[t.getString(0)].insert(tenantGroupName).second);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -143,7 +143,7 @@ struct MetaclusterManagementConcurrencyWorkload : TestWorkload {
|
|||
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemovingCluster", debugId)
|
||||
.detail("ClusterName", clusterName);
|
||||
Future<bool> removeFuture = MetaclusterAPI::removeCluster(
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, false);
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove::False);
|
||||
Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
|
||||
if (result.present()) {
|
||||
ASSERT(result.get());
|
||||
|
|
|
@ -219,7 +219,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
|
|||
try {
|
||||
loop {
|
||||
Future<bool> removeFuture = MetaclusterAPI::removeCluster(
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, detachCluster);
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove(detachCluster));
|
||||
try {
|
||||
Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
|
||||
if (result.present()) {
|
||||
|
@ -288,7 +288,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
|
|||
dataDb->db->getConnectionRecord()->getConnectionString(),
|
||||
ApplyManagementClusterUpdates::True,
|
||||
RestoreDryRun(dryRun),
|
||||
ForceJoinNewMetacluster(forceJoin),
|
||||
ForceJoin(forceJoin),
|
||||
&messages);
|
||||
Optional<Void> result = wait(timeout(restoreFuture, deterministicRandom()->randomInt(1, 30)));
|
||||
if (result.present()) {
|
||||
|
@ -1001,7 +1001,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
|
|||
std::vector<Future<Void>> removeClusterFutures;
|
||||
for (auto [clusterName, clusterMetadata] : dataClusters) {
|
||||
removeClusterFutures.push_back(success(MetaclusterAPI::removeCluster(
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, !deleteTenants)));
|
||||
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove(!deleteTenants))));
|
||||
}
|
||||
|
||||
wait(waitForAll(removeClusterFutures));
|
||||
|
|
|
@ -239,7 +239,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
Database dataDb,
|
||||
std::string backupUrl,
|
||||
bool addToMetacluster,
|
||||
ForceJoinNewMetacluster forceJoinNewMetacluster,
|
||||
ForceJoin forceJoin,
|
||||
int simultaneousRestoreCount,
|
||||
MetaclusterRestoreWorkload* self) {
|
||||
state FileBackupAgent backupAgent;
|
||||
|
@ -274,7 +274,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
dataDb->getConnectionRecord()->getConnectionString(),
|
||||
ApplyManagementClusterUpdates::True,
|
||||
RestoreDryRun::True,
|
||||
forceJoinNewMetacluster,
|
||||
forceJoin,
|
||||
&messages));
|
||||
|
||||
state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
|
||||
|
@ -298,7 +298,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
dataDb->getConnectionRecord()->getConnectionString(),
|
||||
ApplyManagementClusterUpdates::True,
|
||||
RestoreDryRun::False,
|
||||
forceJoinNewMetacluster,
|
||||
forceJoin,
|
||||
&messages));
|
||||
TraceEvent("MetaclusterRestoreWorkloadRestoreComplete").detail("ClusterName", clusterName);
|
||||
}
|
||||
|
@ -516,8 +516,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
TraceEvent("MetaclusterRestoreWorkloadProcessDataCluster").detail("FromCluster", clusterItr->first);
|
||||
|
||||
// Remove the data cluster from its old metacluster
|
||||
wait(success(MetaclusterAPI::removeCluster(
|
||||
clusterItr->second.db.getReference(), clusterItr->first, ClusterType::METACLUSTER_DATA, true)));
|
||||
wait(success(MetaclusterAPI::removeCluster(clusterItr->second.db.getReference(),
|
||||
clusterItr->first,
|
||||
ClusterType::METACLUSTER_DATA,
|
||||
ForceRemove::True)));
|
||||
TraceEvent("MetaclusterRestoreWorkloadForgotMetacluster").detail("ClusterName", clusterItr->first);
|
||||
|
||||
state std::pair<TenantCollisions, GroupCollisions> collisions =
|
||||
|
@ -554,7 +556,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
clusterItr->second.db->getConnectionRecord()->getConnectionString(),
|
||||
ApplyManagementClusterUpdates::False,
|
||||
RestoreDryRun::True,
|
||||
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
|
||||
ForceJoin(deterministicRandom()->coinflip()),
|
||||
&messages));
|
||||
|
||||
state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
|
||||
|
@ -582,7 +584,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
clusterItr->second.db->getConnectionRecord()->getConnectionString(),
|
||||
ApplyManagementClusterUpdates::False,
|
||||
RestoreDryRun::False,
|
||||
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
|
||||
ForceJoin(deterministicRandom()->coinflip()),
|
||||
&messages));
|
||||
|
||||
ASSERT(collisions.first.empty() && collisions.second.empty());
|
||||
|
@ -597,8 +599,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
|
||||
// If the restore did not succeed, remove the partially restored cluster
|
||||
try {
|
||||
wait(success(MetaclusterAPI::removeCluster(
|
||||
self->managementDb, clusterItr->first, ClusterType::METACLUSTER_MANAGEMENT, true)));
|
||||
wait(success(MetaclusterAPI::removeCluster(self->managementDb,
|
||||
clusterItr->first,
|
||||
ClusterType::METACLUSTER_MANAGEMENT,
|
||||
ForceRemove::True)));
|
||||
TraceEvent("MetaclusterRestoreWorkloadRemoveFailedCluster")
|
||||
.detail("ClusterName", clusterItr->first);
|
||||
} catch (Error& e) {
|
||||
|
@ -928,7 +932,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
self->dataDbs[cluster].db,
|
||||
backupUrl.get(),
|
||||
!self->recoverManagementCluster,
|
||||
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
|
||||
ForceJoin(deterministicRandom()->coinflip()),
|
||||
backups.size(),
|
||||
self));
|
||||
}
|
||||
|
@ -945,7 +949,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
|
|||
self->dataDbs[cluster].db,
|
||||
backupUrl.get(),
|
||||
true,
|
||||
ForceJoinNewMetacluster::True,
|
||||
ForceJoin::True,
|
||||
backups.size(),
|
||||
self));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue