From d981d5e9c2f186a3bce83795cdd5114cbfdda064 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 8 Feb 2023 10:39:46 -0800 Subject: [PATCH 01/31] initial commit to separate metadata --- fdbcli/TenantCommands.actor.cpp | 19 +- fdbclient/MetaclusterManagement.actor.cpp | 4 +- fdbclient/Tenant.cpp | 84 ++++++-- .../fdbclient/MetaclusterManagement.actor.h | 201 +++++++++--------- fdbclient/include/fdbclient/Tenant.h | 80 +++++-- .../fdbclient/TenantManagement.actor.h | 3 - .../workloads/MetaclusterConsistency.actor.h | 27 ++- .../workloads/TenantConsistency.actor.h | 24 +-- ...antManagementConcurrencyWorkload.actor.cpp | 10 +- .../TenantManagementWorkload.actor.cpp | 37 ++-- 10 files changed, 283 insertions(+), 206 deletions(-) diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index b4577e6fc5..81b17d92e8 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -91,7 +91,7 @@ bool parseTenantListOptions(std::vector const& tokens, int startIndex, int& limit, int& offset, - std::vector& filters) { + std::vector& filters) { for (int tokenNum = startIndex; tokenNum < tokens.size(); ++tokenNum) { Optional value; StringRef token = tokens[tokenNum]; @@ -123,7 +123,7 @@ bool parseTenantListOptions(std::vector const& tokens, auto filterStrings = value.get().splitAny(","_sr); try { for (auto sref : filterStrings) { - filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); + filters.push_back(TenantAPI::stringToTenantState(sref.toString())); } } catch (Error& e) { fmt::print(stderr, "ERROR: unrecognized tenant state(s) `{}'.\n", value.get().toString()); @@ -185,7 +185,7 @@ ACTOR Future tenantCreateCommand(Reference db, std::vectorsetOption(FDBTransactionOptions::READ_SYSTEM_KEYS); state ClusterType clusterType = wait(TenantAPI::getClusterType(tr)); if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - TenantMapEntry tenantEntry; + MetaclusterTenantMapEntry tenantEntry; AssignClusterAutomatically assignClusterAutomatically = AssignClusterAutomatically::True; for (auto const& [name, value] : configuration.get()) { if (name == "assigned_cluster"_sr) { @@ -337,7 +337,7 @@ ACTOR Future tenantListCommand(Reference db, std::vector filters; + state std::vector filters; if (tokens.size() >= 3) { beginTenant = tokens[2]; @@ -372,7 +372,7 @@ ACTOR Future tenantListCommand(Reference db, std::vector> tenants = + std::vector> tenants = wait(MetaclusterAPI::listTenantMetadata(db, beginTenant, endTenant, limit, offset, filters)); for (auto tenant : tenants) { tenantNames.push_back(tenant.first); @@ -433,7 +433,7 @@ ACTOR Future tenantGetCommand(Reference db, std::vector tenantGetCommand(Reference db, std::vector tenantConfigureCommand(Reference db, std::vectorsetOption(FDBTransactionOptions::READ_SYSTEM_KEYS); ClusterType clusterType = wait(TenantAPI::getClusterType(tr)); if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - TenantMapEntry tenantEntry; wait(MetaclusterAPI::configureTenant(db, tokens[2], configuration.get())); } else { applyConfigurationToSpecialKeys(tr, tokens[2], configuration.get()); diff --git a/fdbclient/MetaclusterManagement.actor.cpp b/fdbclient/MetaclusterManagement.actor.cpp index d354ac77d1..b1b50a8169 100644 --- a/fdbclient/MetaclusterManagement.actor.cpp +++ b/fdbclient/MetaclusterManagement.actor.cpp @@ -70,8 +70,8 @@ KeyBackedMap, BinaryCodec KeyBackedSet ManagementClusterMetadata::clusterTenantIndex("metacluster/dataCluster/tenantMap/"_sr); KeyBackedSet ManagementClusterMetadata::clusterTenantGroupIndex("metacluster/dataCluster/tenantGroupMap/"_sr); -TenantMetadataSpecification& ManagementClusterMetadata::tenantMetadata() { - static TenantMetadataSpecification instance(""_sr); +TenantMetadataSpecification& ManagementClusterMetadata::tenantMetadata() { + static TenantMetadataSpecification instance(""_sr); return instance; } diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index 002facf9c0..3489216df1 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -50,7 +50,7 @@ int64_t prefixToId(KeyRef prefix, EnforceValidTenantId enforceValidTenantId) { } }; // namespace TenantAPI -std::string TenantMapEntry::tenantStateToString(TenantState tenantState) { +std::string TenantAPI::tenantStateToString(TenantState tenantState) { switch (tenantState) { case TenantState::REGISTERING: return "registering"; @@ -69,7 +69,7 @@ std::string TenantMapEntry::tenantStateToString(TenantState tenantState) { } } -TenantState TenantMapEntry::stringToTenantState(std::string stateStr) { +TenantAPI::TenantState TenantAPI::stringToTenantState(std::string stateStr) { std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); }); if (stateStr == "registering") { return TenantState::REGISTERING; @@ -88,7 +88,7 @@ TenantState TenantMapEntry::stringToTenantState(std::string stateStr) { throw invalid_option(); } -std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) { +std::string TenantAPI::tenantLockStateToString(TenantLockState tenantState) { switch (tenantState) { case TenantLockState::UNLOCKED: return "unlocked"; @@ -101,7 +101,7 @@ std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) } } -TenantLockState TenantMapEntry::stringToTenantLockState(std::string stateStr) { +TenantAPI::TenantLockState TenantAPI::stringToTenantLockState(std::string stateStr) { std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); }); if (stateStr == "unlocked") { return TenantLockState::UNLOCKED; @@ -127,15 +127,16 @@ json_spirit::mObject binaryToJson(StringRef bytes) { } TenantMapEntry::TenantMapEntry() {} -TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState) - : tenantName(tenantName), tenantState(tenantState) { +TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName) : tenantName(tenantName) { setId(id); } -TenantMapEntry::TenantMapEntry(int64_t id, - TenantName tenantName, - TenantState tenantState, - Optional tenantGroup) - : tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) { +TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, Optional tenantGroup) + : tenantName(tenantName), tenantGroup(tenantGroup) { + setId(id); +} +TenantMapEntry::TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry) + : tenantName(metaclusterEntry.tenantName), tenantLockState(metaclusterEntry.tenantLockState), + tenantGroup(metaclusterEntry.tenantGroup), configurationSequenceNum(metaclusterEntry.configurationSequenceNum) { setId(id); } @@ -151,26 +152,22 @@ std::string TenantMapEntry::toJson() const { tenantEntry["name"] = binaryToJson(tenantName); tenantEntry["prefix"] = binaryToJson(prefix); - tenantEntry["tenant_state"] = TenantMapEntry::tenantStateToString(tenantState); - if (assignedCluster.present()) { - tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster.get()); - } if (tenantGroup.present()) { tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get()); } + tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState); + return json_spirit::write_string(json_spirit::mValue(tenantEntry)); } bool TenantMapEntry::matchesConfiguration(TenantMapEntry const& other) const { - return tenantGroup == other.tenantGroup; + return tenantGroup == other.tenantGroup && tenantLockState == other.tenantLockState; } void TenantMapEntry::configure(Standalone parameter, Optional value) { if (parameter == "tenant_group"_sr) { tenantGroup = value; - } else if (parameter == "assigned_cluster"_sr) { - assignedCluster = value; } else { TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter); throw invalid_tenant_configuration(); @@ -186,8 +183,49 @@ json_spirit::mObject TenantGroupEntry::toJson() const { return tenantGroupEntry; } -TenantMetadataSpecification& TenantMetadata::instance() { - static TenantMetadataSpecification _instance = TenantMetadataSpecification("\xff/"_sr); +std::string MetaclusterTenantMapEntry::toJson() const { + json_spirit::mObject tenantEntry; + tenantEntry["id"] = id; + + tenantEntry["name"] = binaryToJson(tenantName); + tenantEntry["prefix"] = binaryToJson(prefix); + + tenantEntry["tenant_state"] = TenantAPI::tenantStateToString(tenantState); + tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster); + + if (tenantGroup.present()) { + tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get()); + } + + tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState); + if (tenantState == TenantAPI::TenantState::RENAMING) { + ASSERT(renameDestination.present()); + tenantEntry["rename_destination"] = binaryToJson(renameDestination.get()); + } else if (tenantState == TenantAPI::TenantState::ERROR) { + tenantEntry["error"] = error; + } + + return json_spirit::write_string(json_spirit::mValue(tenantEntry)); +} + +bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const { + return tenantName == other.tenantName && tenantLockState == other.tenantLockState && + tenantGroup == other.tenantGroup && assignedCluster == other.assignedCluster; +} + +void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { + if (parameter == "tenant_group"_sr) { + tenantGroup = value; + } else if (parameter == "assigned_cluster"_sr && value.present()) { + assignedCluster = value.get(); + } else { + TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter); + throw invalid_tenant_configuration(); + } +} + +TenantMetadataSpecification& TenantMetadata::instance() { + static TenantMetadataSpecification _instance = TenantMetadataSpecification("\xff/"_sr); return _instance; } @@ -215,12 +253,12 @@ TEST_CASE("/fdbclient/libb64/base64decoder") { } TEST_CASE("/fdbclient/TenantMapEntry/Serialization") { - TenantMapEntry entry1(1, "name"_sr, TenantState::READY); + TenantMapEntry entry1(1, "name"_sr); ASSERT(entry1.prefix == "\x00\x00\x00\x00\x00\x00\x00\x01"_sr); TenantMapEntry entry2 = TenantMapEntry::decode(entry1.encode()); ASSERT(entry1.id == entry2.id && entry1.prefix == entry2.prefix); - TenantMapEntry entry3(std::numeric_limits::max(), "name"_sr, TenantState::READY); + TenantMapEntry entry3(std::numeric_limits::max(), "name"_sr); ASSERT(entry3.prefix == "\x7f\xff\xff\xff\xff\xff\xff\xff"_sr); TenantMapEntry entry4 = TenantMapEntry::decode(entry3.encode()); ASSERT(entry3.id == entry4.id && entry3.prefix == entry4.prefix); @@ -231,7 +269,7 @@ TEST_CASE("/fdbclient/TenantMapEntry/Serialization") { int64_t maxPlusOne = std::min(UINT64_C(1) << bits, std::numeric_limits::max()); int64_t id = deterministicRandom()->randomInt64(min, maxPlusOne); - TenantMapEntry entry(id, "name"_sr, TenantState::READY); + TenantMapEntry entry(id, "name"_sr); int64_t bigEndianId = bigEndian64(id); ASSERT(entry.id == id && entry.prefix == StringRef(reinterpret_cast(&bigEndianId), 8)); diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 177c5fabf1..28922c49ac 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -96,7 +96,7 @@ struct ManagementClusterMetadata { } }; - static TenantMetadataSpecification& tenantMetadata(); + static TenantMetadataSpecification& tenantMetadata(); // A map from cluster name to the metadata associated with a cluster static KeyBackedObjectMap& dataClusters(); @@ -376,33 +376,33 @@ struct MetaclusterOperationContext { }; template -Future> tryGetTenantTransaction(Transaction tr, int64_t tenantId) { +Future> tryGetTenantTransaction(Transaction tr, int64_t tenantId) { tr->setOption(FDBTransactionOptions::RAW_ACCESS); return ManagementClusterMetadata::tenantMetadata().tenantMap.get(tr, tenantId); } ACTOR template -Future> tryGetTenantTransaction(Transaction tr, TenantName name) { +Future> tryGetTenantTransaction(Transaction tr, TenantName name) { tr->setOption(FDBTransactionOptions::RAW_ACCESS); Optional tenantId = wait(ManagementClusterMetadata::tenantMetadata().tenantNameIndex.get(tr, name)); if (tenantId.present()) { - Optional entry = + Optional entry = wait(ManagementClusterMetadata::tenantMetadata().tenantMap.get(tr, tenantId.get())); return entry; } else { - return Optional(); + return Optional(); } } ACTOR template -Future> tryGetTenant(Reference db, Tenant tenant) { +Future> tryGetTenant(Reference db, Tenant tenant) { state Reference tr = db->createTransaction(); loop { try { tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); - Optional entry = wait(tryGetTenantTransaction(tr, tenant)); + Optional entry = wait(tryGetTenantTransaction(tr, tenant)); return entry; } catch (Error& e) { wait(safeThreadFutureToFuture(tr->onError(e))); @@ -411,8 +411,8 @@ Future> tryGetTenant(Reference db, Tenant tenant) { } ACTOR template -Future getTenantTransaction(Transaction tr, Tenant tenant) { - Optional entry = wait(tryGetTenantTransaction(tr, tenant)); +Future getTenantTransaction(Transaction tr, Tenant tenant) { + Optional entry = wait(tryGetTenantTransaction(tr, tenant)); if (!entry.present()) { throw tenant_not_found(); } @@ -421,8 +421,8 @@ Future getTenantTransaction(Transaction tr, Tenant tenant) { } ACTOR template -Future getTenant(Reference db, Tenant tenant) { - Optional entry = wait(tryGetTenant(db, tenant)); +Future getTenant(Reference db, Tenant tenant) { + Optional entry = wait(tryGetTenant(db, tenant)); if (!entry.present()) { throw tenant_not_found(); } @@ -1010,7 +1010,7 @@ Future> listClusters(Reference db template void managementClusterAddTenantToGroup(Transaction tr, - TenantMapEntry tenantEntry, + MetaclusterTenantMapEntry tenantEntry, DataClusterMetadata* clusterMetadata, bool groupAlreadyExists) { if (tenantEntry.tenantGroup.present()) { @@ -1022,7 +1022,7 @@ void managementClusterAddTenantToGroup(Transaction tr, ManagementClusterMetadata::tenantMetadata().tenantGroupMap.set( tr, tenantEntry.tenantGroup.get(), TenantGroupEntry(tenantEntry.assignedCluster)); ManagementClusterMetadata::clusterTenantGroupIndex.insert( - tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantGroup.get())); + tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantGroup.get())); } ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.insert( tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantEntry.id)); @@ -1035,7 +1035,7 @@ void managementClusterAddTenantToGroup(Transaction tr, ++updatedEntry.allocated.numTenantGroups; updateClusterMetadata( - tr, tenantEntry.assignedCluster.get(), *clusterMetadata, Optional(), updatedEntry); + tr, tenantEntry.assignedCluster, *clusterMetadata, Optional(), updatedEntry); clusterMetadata->entry = updatedEntry; } @@ -1043,7 +1043,7 @@ void managementClusterAddTenantToGroup(Transaction tr, ACTOR template Future managementClusterRemoveTenantFromGroup(Transaction tr, - TenantMapEntry tenantEntry, + MetaclusterTenantMapEntry tenantEntry, DataClusterMetadata* clusterMetadata) { state bool updateClusterCapacity = !tenantEntry.tenantGroup.present(); if (tenantEntry.tenantGroup.present()) { @@ -1059,7 +1059,7 @@ Future managementClusterRemoveTenantFromGroup(Transaction tr, if (result.results.size() == 0) { ManagementClusterMetadata::clusterTenantGroupIndex.erase( - tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantGroup.get())); + tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantGroup.get())); ManagementClusterMetadata::tenantMetadata().tenantGroupMap.erase(tr, tenantEntry.tenantGroup.get()); updateClusterCapacity = true; @@ -1072,7 +1072,7 @@ Future managementClusterRemoveTenantFromGroup(Transaction tr, DataClusterEntry updatedEntry = clusterMetadata->entry; --updatedEntry.allocated.numTenantGroups; updateClusterMetadata( - tr, tenantEntry.assignedCluster.get(), *clusterMetadata, Optional(), updatedEntry); + tr, tenantEntry.assignedCluster, *clusterMetadata, Optional(), updatedEntry); clusterMetadata->entry = updatedEntry; } @@ -1086,13 +1086,13 @@ struct CreateTenantImpl { AssignClusterAutomatically assignClusterAutomatically; // Initialization parameters - TenantMapEntry tenantEntry; + MetaclusterTenantMapEntry tenantEntry; // Parameter set if tenant creation permanently fails on the data cluster Optional replaceExistingTenantId; CreateTenantImpl(Reference managementDb, - TenantMapEntry tenantEntry, + MetaclusterTenantMapEntry tenantEntry, AssignClusterAutomatically assignClusterAutomatically) : ctx(managementDb), tenantEntry(tenantEntry), assignClusterAutomatically(assignClusterAutomatically) {} @@ -1117,10 +1117,11 @@ struct CreateTenantImpl { ACTOR static Future checkForExistingTenant(CreateTenantImpl* self, Reference tr) { // Check if the tenant already exists. If it's partially created and matches the parameters we // specified, continue creating it. Otherwise, fail with an error. - state Optional existingEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName)); + state Optional existingEntry = + wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName)); if (existingEntry.present()) { if (!existingEntry.get().matchesConfiguration(self->tenantEntry) || - existingEntry.get().tenantState != TenantState::REGISTERING) { + existingEntry.get().tenantState != TenantAPI::TenantState::REGISTERING) { // The tenant already exists and is either completely created or has a different // configuration throw tenant_already_exists(); @@ -1135,7 +1136,7 @@ struct CreateTenantImpl { throw invalid_tenant_configuration(); } self->tenantEntry = existingEntry.get(); - wait(self->ctx.setCluster(tr, existingEntry.get().assignedCluster.get())); + wait(self->ctx.setCluster(tr, existingEntry.get().assignedCluster)); return true; } else { // The previous creation is permanently failed, so cleanup the tenant and create it again from scratch @@ -1144,16 +1145,15 @@ struct CreateTenantImpl { ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, existingEntry.get().id); ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue); ManagementClusterMetadata::clusterTenantCount.atomicOp( - tr, existingEntry.get().assignedCluster.get(), -1, MutationRef::AddValue); + tr, existingEntry.get().assignedCluster, -1, MutationRef::AddValue); ManagementClusterMetadata::clusterTenantIndex.erase( tr, - Tuple::makeTuple(existingEntry.get().assignedCluster.get(), - self->tenantEntry.tenantName, - existingEntry.get().id)); + Tuple::makeTuple( + existingEntry.get().assignedCluster, self->tenantEntry.tenantName, existingEntry.get().id)); state DataClusterMetadata previousAssignedClusterMetadata = - wait(getClusterTransaction(tr, existingEntry.get().assignedCluster.get())); + wait(getClusterTransaction(tr, existingEntry.get().assignedCluster)); wait(managementClusterRemoveTenantFromGroup(tr, existingEntry.get(), &previousAssignedClusterMetadata)); } @@ -1178,10 +1178,10 @@ struct CreateTenantImpl { if (groupEntry.present()) { ASSERT(groupEntry.get().assignedCluster.present()); if (!self->assignClusterAutomatically && - groupEntry.get().assignedCluster.get() != self->tenantEntry.assignedCluster.get()) { + groupEntry.get().assignedCluster.get() != self->tenantEntry.assignedCluster) { TraceEvent("MetaclusterCreateTenantGroupClusterMismatch") .detail("TenantGroupCluster", groupEntry.get().assignedCluster.get()) - .detail("SpecifiedCluster", self->tenantEntry.assignedCluster.get()); + .detail("SpecifiedCluster", self->tenantEntry.assignedCluster); throw invalid_tenant_configuration(); } return std::make_pair(groupEntry.get().assignedCluster.get(), true); @@ -1195,11 +1195,11 @@ struct CreateTenantImpl { // If preferred cluster is specified, look for that one. if (!self->assignClusterAutomatically) { DataClusterMetadata dataClusterMetadata = - wait(getClusterTransaction(tr, self->tenantEntry.assignedCluster.get())); + wait(getClusterTransaction(tr, self->tenantEntry.assignedCluster)); if (!dataClusterMetadata.entry.hasCapacity()) { throw cluster_no_capacity(); } - dataClusterNames.push_back(self->tenantEntry.assignedCluster.get()); + dataClusterNames.push_back(self->tenantEntry.assignedCluster); } else { state KeyBackedSet::RangeResultType availableClusters = wait(ManagementClusterMetadata::clusterCapacityIndex.getRange( @@ -1270,7 +1270,7 @@ struct CreateTenantImpl { self->tenantEntry.setId(lastId.orDefault(-1) + 1); ManagementClusterMetadata::tenantMetadata().lastTenantId.set(tr, self->tenantEntry.id); - self->tenantEntry.tenantState = TenantState::REGISTERING; + self->tenantEntry.tenantState = TenantAPI::TenantState::REGISTERING; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantEntry.id, self->tenantEntry); ManagementClusterMetadata::tenantMetadata().tenantNameIndex.set( tr, self->tenantEntry.tenantName, self->tenantEntry.id); @@ -1278,20 +1278,19 @@ struct CreateTenantImpl { ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue); ManagementClusterMetadata::clusterTenantCount.atomicOp( - tr, self->tenantEntry.assignedCluster.get(), 1, MutationRef::AddValue); + tr, self->tenantEntry.assignedCluster, 1, MutationRef::AddValue); int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD( - tr, self->tenantEntry.assignedCluster.get(), Snapshot::False, 0)); + tr, self->tenantEntry.assignedCluster, Snapshot::False, 0)); if (clusterTenantCount > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) { throw cluster_no_capacity(); } // Updated indexes to include the new tenant - ManagementClusterMetadata::clusterTenantIndex.insert(tr, - Tuple::makeTuple(self->tenantEntry.assignedCluster.get(), - self->tenantEntry.tenantName, - self->tenantEntry.id)); + ManagementClusterMetadata::clusterTenantIndex.insert( + tr, + Tuple::makeTuple(self->tenantEntry.assignedCluster, self->tenantEntry.tenantName, self->tenantEntry.id)); wait(setClusterFuture); @@ -1308,7 +1307,7 @@ struct CreateTenantImpl { } ACTOR static Future storeTenantInDataCluster(CreateTenantImpl* self, Reference tr) { - std::pair, bool> dataClusterTenant = + std::pair, bool> dataClusterTenant = wait(TenantAPI::createTenantTransaction(tr, self->tenantEntry, ClusterType::METACLUSTER_DATA)); // If the tenant map entry is empty, then we encountered a tombstone indicating that the tenant was @@ -1321,14 +1320,15 @@ struct CreateTenantImpl { } ACTOR static Future markTenantReady(CreateTenantImpl* self, Reference tr) { - state Optional managementEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.id)); + state Optional managementEntry = + wait(tryGetTenantTransaction(tr, self->tenantEntry.id)); if (!managementEntry.present()) { throw tenant_removed(); } - if (managementEntry.get().tenantState == TenantState::REGISTERING) { - TenantMapEntry updatedEntry = managementEntry.get(); - updatedEntry.tenantState = TenantState::READY; + if (managementEntry.get().tenantState == TenantAPI::TenantState::REGISTERING) { + MetaclusterTenantMapEntry updatedEntry = managementEntry.get(); + updatedEntry.tenantState = TenantAPI::TenantState::READY; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, updatedEntry.id, updatedEntry); ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0); } @@ -1372,7 +1372,7 @@ struct CreateTenantImpl { ACTOR template Future createTenant(Reference db, - TenantMapEntry tenantEntry, + MetaclusterTenantMapEntry tenantEntry, AssignClusterAutomatically assignClusterAutomatically) { state CreateTenantImpl impl(db, tenantEntry, assignClusterAutomatically); wait(impl.run()); @@ -1404,17 +1404,17 @@ struct DeleteTenantImpl { tr, self->tenantName.get(), Snapshot::False, TenantInfo::INVALID_TENANT))); } - state TenantMapEntry tenantEntry = wait(getTenantTransaction(tr, resolvedId)); + state MetaclusterTenantMapEntry tenantEntry = wait(getTenantTransaction(tr, resolvedId)); // Disallow removing the "new" name of a renamed tenant before it completes if (self->tenantName.present() && tenantEntry.tenantName != self->tenantName.get()) { - ASSERT(tenantEntry.tenantState == TenantState::RENAMING || - tenantEntry.tenantState == TenantState::REMOVING); + ASSERT(tenantEntry.tenantState == TenantAPI::TenantState::RENAMING || + tenantEntry.tenantState == TenantAPI::TenantState::REMOVING); throw tenant_not_found(); } - wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster.get())); - return std::make_pair(resolvedId, tenantEntry.tenantState == TenantState::REMOVING); + wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster)); + return std::make_pair(resolvedId, tenantEntry.tenantState == TenantAPI::TenantState::REMOVING); } // Does an initial check if the tenant is empty. This is an optimization to prevent us marking a tenant @@ -1441,10 +1441,10 @@ struct DeleteTenantImpl { // Mark the tenant as being in a removing state on the management cluster ACTOR static Future markTenantInRemovingState(DeleteTenantImpl* self, Reference tr) { - state TenantMapEntry tenantEntry = wait(getTenantTransaction(tr, self->tenantId)); + state MetaclusterTenantMapEntry tenantEntry = wait(getTenantTransaction(tr, self->tenantId)); - if (tenantEntry.tenantState != TenantState::REMOVING) { - tenantEntry.tenantState = TenantState::REMOVING; + if (tenantEntry.tenantState != TenantAPI::TenantState::REMOVING) { + tenantEntry.tenantState = TenantAPI::TenantState::REMOVING; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.id, tenantEntry); ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0); @@ -1456,13 +1456,13 @@ struct DeleteTenantImpl { // Delete the tenant and related metadata on the management cluster ACTOR static Future deleteTenantFromManagementCluster(DeleteTenantImpl* self, Reference tr) { - state Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId)); + state Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId)); if (!tenantEntry.present()) { return Void(); } - ASSERT(tenantEntry.get().tenantState == TenantState::REMOVING); + ASSERT(tenantEntry.get().tenantState == TenantAPI::TenantState::REMOVING); // Erase the tenant entry itself ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, tenantEntry.get().id); @@ -1472,12 +1472,11 @@ struct DeleteTenantImpl { // This is idempotent because this function is only called if the tenant is in the map ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue); ManagementClusterMetadata::clusterTenantCount.atomicOp( - tr, tenantEntry.get().assignedCluster.get(), -1, MutationRef::AddValue); + tr, tenantEntry.get().assignedCluster, -1, MutationRef::AddValue); // Remove the tenant from the cluster -> tenant index ManagementClusterMetadata::clusterTenantIndex.erase( - tr, - Tuple::makeTuple(tenantEntry.get().assignedCluster.get(), tenantEntry.get().tenantName, self->tenantId)); + tr, Tuple::makeTuple(tenantEntry.get().assignedCluster, tenantEntry.get().tenantName, self->tenantId)); if (tenantEntry.get().renameDestination.present()) { // If renaming, remove the metadata associated with the tenant destination @@ -1486,9 +1485,8 @@ struct DeleteTenantImpl { ManagementClusterMetadata::clusterTenantIndex.erase( tr, - Tuple::makeTuple(tenantEntry.get().assignedCluster.get(), - tenantEntry.get().renameDestination.get(), - self->tenantId)); + Tuple::makeTuple( + tenantEntry.get().assignedCluster, tenantEntry.get().renameDestination.get(), self->tenantId)); } // Remove the tenant from its tenant group @@ -1576,21 +1574,21 @@ Future>> listTenants(Reference db // Scan the tenant index to get a list of tenant IDs, and then lookup the metadata for each ID individually ACTOR template -Future>> listTenantMetadataTransaction( +Future>> listTenantMetadataTransaction( Transaction tr, std::vector> tenantIds) { state int idIdx = 0; - state std::vector>> futures; + state std::vector>> futures; for (; idIdx < tenantIds.size(); ++idIdx) { futures.push_back(MetaclusterAPI::tryGetTenantTransaction(tr, tenantIds[idIdx].second)); } wait(waitForAll(futures)); - std::vector> results; + std::vector> results; results.reserve(futures.size()); for (int i = 0; i < futures.size(); ++i) { - const TenantMapEntry& entry = futures[i].get().get(); + const MetaclusterTenantMapEntry& entry = futures[i].get().get(); results.emplace_back(entry.tenantName, entry); } @@ -1598,26 +1596,26 @@ Future>> listTenantMetadataTra } ACTOR template -Future>> listTenantMetadataTransaction(Transaction tr, - TenantNameRef begin, - TenantNameRef end, - int limit) { +Future>> listTenantMetadataTransaction(Transaction tr, + TenantNameRef begin, + TenantNameRef end, + int limit) { std::vector> matchingTenants = wait(listTenantsTransaction(tr, begin, end, limit)); - std::vector> results = + std::vector> results = wait(listTenantMetadataTransaction(tr, matchingTenants)); return results; } ACTOR template -Future>> listTenantMetadata( +Future>> listTenantMetadata( Reference db, TenantName begin, TenantName end, int limit, int offset = 0, - std::vector filters = std::vector()) { + std::vector filters = std::vector()) { state Reference tr = db->createTransaction(); - state std::vector> results; + state std::vector> results; loop { try { @@ -1633,7 +1631,7 @@ Future>> listTenantMetadata( // read in batch state int count = 0; loop { - std::vector> tenantBatch = + std::vector> tenantBatch = wait(MetaclusterAPI::listTenantMetadataTransaction(tr, begin, end, std::max(limit + offset, 1000))); if (tenantBatch.empty()) { @@ -1670,7 +1668,7 @@ struct ConfigureTenantImpl { std::map, Optional> configurationParameters; // Parameters set in updateManagementCluster - TenantMapEntry updatedEntry; + MetaclusterTenantMapEntry updatedEntry; ConfigureTenantImpl(Reference managementDb, TenantName tenantName, @@ -1681,10 +1679,10 @@ struct ConfigureTenantImpl { // structures. It does not update the TenantMapEntry stored in the tenant map. ACTOR static Future updateTenantGroup(ConfigureTenantImpl* self, Reference tr, - TenantMapEntry tenantEntry, + MetaclusterTenantMapEntry tenantEntry, Optional desiredGroup) { - state TenantMapEntry entryWithUpdatedGroup = tenantEntry; + state MetaclusterTenantMapEntry entryWithUpdatedGroup = tenantEntry; entryWithUpdatedGroup.tenantGroup = desiredGroup; if (tenantEntry.tenantGroup == desiredGroup) { @@ -1738,21 +1736,21 @@ struct ConfigureTenantImpl { // Updates the configuration in the management cluster and marks it as being in the UPDATING_CONFIGURATION state ACTOR static Future updateManagementCluster(ConfigureTenantImpl* self, Reference tr) { - state Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantName)); + state Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantName)); if (!tenantEntry.present()) { throw tenant_not_found(); } - if (tenantEntry.get().tenantState != TenantState::READY && - tenantEntry.get().tenantState != TenantState::UPDATING_CONFIGURATION) { + if (tenantEntry.get().tenantState != TenantAPI::TenantState::READY && + tenantEntry.get().tenantState != TenantAPI::TenantState::UPDATING_CONFIGURATION) { throw invalid_tenant_state(); } - wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get())); + wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster)); self->updatedEntry = tenantEntry.get(); - self->updatedEntry.tenantState = TenantState::UPDATING_CONFIGURATION; + self->updatedEntry.tenantState = TenantAPI::TenantState::UPDATING_CONFIGURATION; state std::map, Optional>::iterator configItr; for (configItr = self->configurationParameters.begin(); configItr != self->configurationParameters.end(); @@ -1782,8 +1780,6 @@ struct ConfigureTenantImpl { } TenantMapEntry dataClusterEntry = self->updatedEntry; - dataClusterEntry.tenantState = TenantState::READY; - dataClusterEntry.assignedCluster = {}; wait(TenantAPI::configureTenantTransaction(tr, tenantEntry.get(), dataClusterEntry)); return Void(); @@ -1792,14 +1788,15 @@ struct ConfigureTenantImpl { // Updates the tenant state in the management cluster to READY ACTOR static Future markManagementTenantAsReady(ConfigureTenantImpl* self, Reference tr) { - state Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->updatedEntry.id)); + state Optional tenantEntry = + wait(tryGetTenantTransaction(tr, self->updatedEntry.id)); - if (!tenantEntry.present() || tenantEntry.get().tenantState != TenantState::UPDATING_CONFIGURATION || + if (!tenantEntry.present() || tenantEntry.get().tenantState != TenantAPI::TenantState::UPDATING_CONFIGURATION || tenantEntry.get().configurationSequenceNum > self->updatedEntry.configurationSequenceNum) { return Void(); } - tenantEntry.get().tenantState = TenantState::READY; + tenantEntry.get().tenantState = TenantAPI::TenantState::READY; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.get().id, tenantEntry.get()); ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0); return Void(); @@ -1844,7 +1841,7 @@ struct RenameTenantImpl { ACTOR static Future markTenantsInRenamingState(RenameTenantImpl* self, Reference tr) { - state TenantMapEntry tenantEntry; + state MetaclusterTenantMapEntry tenantEntry; state Optional newNameId; wait(store(tenantEntry, getTenantTransaction(tr, self->oldName)) && store(newNameId, ManagementClusterMetadata::tenantMetadata().tenantNameIndex.get(tr, self->newName))); @@ -1858,7 +1855,7 @@ struct RenameTenantImpl { self->tenantId = tenantEntry.id; // If marked for deletion, abort the rename - if (tenantEntry.tenantState == TenantState::REMOVING) { + if (tenantEntry.tenantState == TenantAPI::TenantState::REMOVING) { CODE_PROBE(true, "Metacluster rename candidates marked for deletion"); throw tenant_removed(); } @@ -1868,9 +1865,9 @@ struct RenameTenantImpl { throw tenant_already_exists(); } - wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster.get())); + wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster)); - if (tenantEntry.tenantState == TenantState::RENAMING) { + if (tenantEntry.tenantState == TenantAPI::TenantState::RENAMING) { if (tenantEntry.tenantName != self->oldName) { CODE_PROBE(true, "Renaming a tenant that is currently the destination of another rename"); throw tenant_not_found(); @@ -1885,7 +1882,7 @@ struct RenameTenantImpl { } } - if (tenantEntry.tenantState != TenantState::READY) { + if (tenantEntry.tenantState != TenantAPI::TenantState::READY) { CODE_PROBE(true, "Metacluster unable to proceed with rename operation"); throw invalid_tenant_state(); } @@ -1893,15 +1890,15 @@ struct RenameTenantImpl { self->configurationSequenceNum = tenantEntry.configurationSequenceNum + 1; // Check cluster capacity. If we would exceed the amount due to temporary extra tenants // then we deny the rename request altogether. - int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD( - tr, tenantEntry.assignedCluster.get(), Snapshot::False, 0)); + int64_t clusterTenantCount = wait( + ManagementClusterMetadata::clusterTenantCount.getD(tr, tenantEntry.assignedCluster, Snapshot::False, 0)); if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) { throw cluster_no_capacity(); } - TenantMapEntry updatedEntry = tenantEntry; - updatedEntry.tenantState = TenantState::RENAMING; + MetaclusterTenantMapEntry updatedEntry = tenantEntry; + updatedEntry.tenantState = TenantAPI::TenantState::RENAMING; updatedEntry.renameDestination = self->newName; updatedEntry.configurationSequenceNum = self->configurationSequenceNum; @@ -1911,7 +1908,7 @@ struct RenameTenantImpl { // Updated indexes to include the new tenant ManagementClusterMetadata::clusterTenantIndex.insert( - tr, Tuple::makeTuple(updatedEntry.assignedCluster.get(), self->newName, self->tenantId)); + tr, Tuple::makeTuple(updatedEntry.assignedCluster, self->newName, self->tenantId)); return Void(); } @@ -1930,7 +1927,7 @@ struct RenameTenantImpl { ACTOR static Future finishRenameFromManagementCluster(RenameTenantImpl* self, Reference tr) { - Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId)); + Optional tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId)); // Another (or several other) operations have already removed/changed the old entry // Possible for the new entry to also have been tampered with, @@ -1943,16 +1940,16 @@ struct RenameTenantImpl { "configuration sequence."); return Void(); } - if (tenantEntry.get().tenantState == TenantState::REMOVING) { + if (tenantEntry.get().tenantState == TenantAPI::TenantState::REMOVING) { throw tenant_removed(); } - TenantMapEntry updatedEntry = tenantEntry.get(); + MetaclusterTenantMapEntry updatedEntry = tenantEntry.get(); // Only update if in the expected state - if (updatedEntry.tenantState == TenantState::RENAMING) { + if (updatedEntry.tenantState == TenantAPI::TenantState::RENAMING) { updatedEntry.tenantName = self->newName; - updatedEntry.tenantState = TenantState::READY; + updatedEntry.tenantState = TenantAPI::TenantState::READY; updatedEntry.renameDestination.reset(); ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantId, updatedEntry); ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0); @@ -1961,7 +1958,7 @@ struct RenameTenantImpl { // Remove the tenant from the cluster -> tenant index ManagementClusterMetadata::clusterTenantIndex.erase( - tr, Tuple::makeTuple(updatedEntry.assignedCluster.get(), self->oldName, self->tenantId)); + tr, Tuple::makeTuple(updatedEntry.assignedCluster, self->oldName, self->tenantId)); } return Void(); diff --git a/fdbclient/include/fdbclient/Tenant.h b/fdbclient/include/fdbclient/Tenant.h index cbd20972d4..d65d8b67b7 100644 --- a/fdbclient/include/fdbclient/Tenant.h +++ b/fdbclient/include/fdbclient/Tenant.h @@ -37,7 +37,6 @@ Key idToPrefix(int64_t id); int64_t prefixToId(KeyRef prefix, EnforceValidTenantId = EnforceValidTenantId::True); constexpr static int PREFIX_SIZE = sizeof(int64_t); -} // namespace TenantAPI // Represents the various states that a tenant could be in. // In a standalone cluster, a tenant should only ever be in the READY state. @@ -65,41 +64,44 @@ enum class TenantState { REGISTERING, READY, REMOVING, UPDATING_CONFIGURATION, R // Can be used in conjunction with the other tenant states above. enum class TenantLockState : uint8_t { UNLOCKED, READ_ONLY, LOCKED }; -struct TenantMapEntry { +std::string tenantStateToString(TenantState tenantState); +TenantState stringToTenantState(std::string stateStr); + +std::string tenantLockStateToString(TenantLockState tenantState); +TenantLockState stringToTenantLockState(std::string stateStr); +} // namespace TenantAPI +struct MetaclusterTenantMapEntry { constexpr static FileIdentifier file_identifier = 12247338; - static std::string tenantStateToString(TenantState tenantState); - static TenantState stringToTenantState(std::string stateStr); - - static std::string tenantLockStateToString(TenantLockState tenantState); - static TenantLockState stringToTenantLockState(std::string stateStr); - int64_t id = -1; Key prefix; TenantName tenantName; - TenantState tenantState = TenantState::READY; - TenantLockState tenantLockState = TenantLockState::UNLOCKED; + TenantAPI::TenantState tenantState = TenantAPI::TenantState::READY; + TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED; Optional tenantGroup; - Optional assignedCluster; + ClusterName assignedCluster; int64_t configurationSequenceNum = 0; Optional renameDestination; // Can be set to an error string if the tenant is in the ERROR state std::string error; - TenantMapEntry(); - TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState); - TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState, Optional tenantGroup); + MetaclusterTenantMapEntry(); + MetaclusterTenantMapEntry(int64_t id, TenantName tenantName, TenantAPI::TenantState tenantState); + MetaclusterTenantMapEntry(int64_t id, + TenantName tenantName, + TenantAPI::TenantState tenantState, + Optional tenantGroup); void setId(int64_t id); std::string toJson() const; - bool matchesConfiguration(TenantMapEntry const& other) const; + bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; void configure(Standalone parameter, Optional value); Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } - static TenantMapEntry decode(ValueRef const& value) { - return ObjectReader::fromStringRef(value, IncludeVersion()); + static MetaclusterTenantMapEntry decode(ValueRef const& value) { + return ObjectReader::fromStringRef(value, IncludeVersion()); } template @@ -118,7 +120,44 @@ struct TenantMapEntry { if (id >= 0) { prefix = TenantAPI::idToPrefix(id); } - ASSERT(tenantState >= TenantState::REGISTERING && tenantState <= TenantState::ERROR); + ASSERT(tenantState >= TenantAPI::TenantState::REGISTERING && tenantState <= TenantAPI::TenantState::ERROR); + } + } +}; + +struct TenantMapEntry { + constexpr static FileIdentifier file_identifier = 7054389; + + int64_t id = -1; + Key prefix; + TenantName tenantName; + TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED; + Optional tenantGroup; + int64_t configurationSequenceNum = 0; + + TenantMapEntry(); + TenantMapEntry(int64_t id, TenantName tenantName); + TenantMapEntry(int64_t id, TenantName tenantName, Optional tenantGroup); + TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry); + + void setId(int64_t id); + std::string toJson() const; + + bool matchesConfiguration(TenantMapEntry const& other) const; + void configure(Standalone parameter, Optional value); + + Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } + static TenantMapEntry decode(ValueRef const& value) { + return ObjectReader::fromStringRef(value, IncludeVersion()); + } + + template + void serialize(Ar& ar) { + serializer(ar, id, tenantName, tenantLockState, tenantGroup, configurationSequenceNum); + if constexpr (Ar::isDeserializing) { + if (id >= 0) { + prefix = TenantAPI::idToPrefix(id); + } } } }; @@ -189,10 +228,11 @@ struct TenantIdCodec { } }; +template struct TenantMetadataSpecification { Key subspace; - KeyBackedObjectMap tenantMap; + KeyBackedObjectMap tenantMap; KeyBackedMap tenantNameIndex; KeyBackedProperty lastTenantId; KeyBackedBinaryValue tenantCount; @@ -213,7 +253,7 @@ struct TenantMetadataSpecification { }; struct TenantMetadata { - static TenantMetadataSpecification& instance(); + static TenantMetadataSpecification& instance(); static inline auto& subspace() { return instance().subspace; } static inline auto& tenantMap() { return instance().tenantMap; } diff --git a/fdbclient/include/fdbclient/TenantManagement.actor.h b/fdbclient/include/fdbclient/TenantManagement.actor.h index 5a3912dc8c..218fff2db5 100644 --- a/fdbclient/include/fdbclient/TenantManagement.actor.h +++ b/fdbclient/include/fdbclient/TenantManagement.actor.h @@ -183,9 +183,6 @@ createTenantTransaction(Transaction tr, TenantMapEntry tenantEntry, ClusterType throw tenant_prefix_allocator_conflict(); } - tenantEntry.tenantState = TenantState::READY; - tenantEntry.assignedCluster = Optional(); - TenantMetadata::tenantMap().set(tr, tenantEntry.id, tenantEntry); TenantMetadata::tenantNameIndex().set(tr, tenantEntry.tenantName, tenantEntry.id); TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0); diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index f7dcfd964d..48e6d7e2af 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -52,7 +52,7 @@ private: KeyBackedRangeResult clusterTenantTuples; KeyBackedRangeResult clusterTenantGroupTuples; - std::map tenantMap; + std::map tenantMap; KeyBackedRangeResult> tenantGroups; std::map> clusterTenantMap; @@ -70,7 +70,7 @@ private: ACTOR static Future loadManagementClusterMetadata(MetaclusterConsistencyCheck* self) { state Reference managementTr = self->managementDb->createTransaction(); - state KeyBackedRangeResult> tenantList; + state KeyBackedRangeResult> tenantList; loop { try { @@ -114,7 +114,7 @@ private: } self->managementMetadata.tenantMap = - std::map(tenantList.results.begin(), tenantList.results.end()); + std::map(tenantList.results.begin(), tenantList.results.end()); for (auto t : self->managementMetadata.clusterTenantTuples.results) { ASSERT_EQ(t.size(), 3); @@ -255,7 +255,7 @@ private: state KeyBackedRangeResult> dataClusterTenantList; state KeyBackedRangeResult> dataClusterTenantGroupList; - state TenantConsistencyCheck tenantConsistencyCheck(dataDb); + state TenantConsistencyCheck tenantConsistencyCheck(dataDb); wait(tenantConsistencyCheck.run()); loop { @@ -294,13 +294,13 @@ private: } else { ASSERT_LE(dataClusterTenantMap.size(), expectedTenants.size()); for (auto tenantName : expectedTenants) { - TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantName]; + MetaclusterTenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantName]; if (!dataClusterTenantMap.count(tenantName)) { if (metaclusterEntry.tenantGroup.present()) { groupExpectedTenantCounts.try_emplace(metaclusterEntry.tenantGroup.get(), 0); } - ASSERT(metaclusterEntry.tenantState == TenantState::REGISTERING || - metaclusterEntry.tenantState == TenantState::REMOVING); + ASSERT(metaclusterEntry.tenantState == TenantAPI::TenantState::REGISTERING || + metaclusterEntry.tenantState == TenantAPI::TenantState::REMOVING); } else if (metaclusterEntry.tenantGroup.present()) { ++groupExpectedTenantCounts[metaclusterEntry.tenantGroup.get()]; } @@ -309,17 +309,15 @@ private: for (auto [tenantId, entry] : dataClusterTenantMap) { ASSERT(expectedTenants.count(tenantId)); - TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantId]; - ASSERT(!entry.assignedCluster.present()); + MetaclusterTenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantId]; ASSERT_EQ(entry.id, metaclusterEntry.id); ASSERT(entry.tenantName == metaclusterEntry.tenantName); - ASSERT_EQ(entry.tenantState, TenantState::READY); if (!self->allowPartialMetaclusterOperations) { - ASSERT_EQ(metaclusterEntry.tenantState, TenantState::READY); + ASSERT_EQ(metaclusterEntry.tenantState, TenantAPI::TenantState::READY); } - if (metaclusterEntry.tenantState != TenantState::UPDATING_CONFIGURATION && - metaclusterEntry.tenantState != TenantState::REMOVING) { + if (metaclusterEntry.tenantState != TenantAPI::TenantState::UPDATING_CONFIGURATION && + metaclusterEntry.tenantState != TenantAPI::TenantState::REMOVING) { ASSERT_EQ(entry.configurationSequenceNum, metaclusterEntry.configurationSequenceNum); } else { ASSERT_LE(entry.configurationSequenceNum, metaclusterEntry.configurationSequenceNum); @@ -352,7 +350,8 @@ private: } ACTOR static Future run(MetaclusterConsistencyCheck* self) { - state TenantConsistencyCheck managementTenantConsistencyCheck(self->managementDb); + state TenantConsistencyCheck managementTenantConsistencyCheck( + self->managementDb); wait(managementTenantConsistencyCheck.run()); wait(loadManagementClusterMetadata(self)); self->validateManagementCluster(); diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index e1b838f239..6ce9baf9e2 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -38,14 +38,14 @@ #include "fdbclient/TenantManagement.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -template +template class TenantConsistencyCheck { private: Reference db; struct TenantData { Optional metaclusterRegistration; - std::map tenantMap; + std::map tenantMap; std::map tenantNameIndex; int64_t lastTenantId; int64_t tenantCount; @@ -67,12 +67,12 @@ private: ACTOR static Future loadTenantMetadata(TenantConsistencyCheck* self) { state Reference tr = self->db->createTransaction(); - state KeyBackedRangeResult> tenantList; + state KeyBackedRangeResult> tenantList; state KeyBackedRangeResult> tenantNameIndexList; state KeyBackedRangeResult tenantTombstoneList; state KeyBackedRangeResult> tenantGroupList; state KeyBackedRangeResult tenantGroupTenantTuples; - state TenantMetadataSpecification* tenantMetadata; + state TenantMetadataSpecification* tenantMetadata; loop { try { @@ -111,7 +111,7 @@ private: ASSERT(!tenantList.more); self->metadata.tenantMap = - std::map(tenantList.results.begin(), tenantList.results.end()); + std::map(tenantList.results.begin(), tenantList.results.end()); ASSERT(!tenantNameIndexList.more); self->metadata.tenantNameIndex = @@ -160,28 +160,28 @@ private: if (tenantMapEntry.tenantGroup.present()) { auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get()); ASSERT(tenantGroupMapItr != metadata.tenantGroupMap.end()); - ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster); + if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { + ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster.get()); + } ASSERT(metadata.tenantGroupIndex[tenantMapEntry.tenantGroup.get()].count(tenantId)); } else { ASSERT(!metadata.tenantsInTenantGroupIndex.count(tenantId)); } if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - ASSERT(tenantMapEntry.assignedCluster.present()); if (tenantMapEntry.renameDestination.present()) { - ASSERT(tenantMapEntry.tenantState == TenantState::RENAMING || - tenantMapEntry.tenantState == TenantState::REMOVING); + ASSERT(tenantMapEntry.tenantState == TenantAPI::TenantState::RENAMING || + tenantMapEntry.tenantState == TenantAPI::TenantState::REMOVING); auto nameIndexItr = metadata.tenantNameIndex.find(tenantMapEntry.renameDestination.get()); ASSERT(nameIndexItr != metadata.tenantNameIndex.end()); ASSERT_EQ(nameIndexItr->second, tenantMapEntry.id); ++renameCount; } else { - ASSERT_NE(tenantMapEntry.tenantState, TenantState::RENAMING); + ASSERT_NE(tenantMapEntry.tenantState, TenantAPI::TenantState::RENAMING); } } else { - ASSERT_EQ(tenantMapEntry.tenantState, TenantState::READY); - ASSERT(!tenantMapEntry.assignedCluster.present()); + ASSERT_EQ(tenantMapEntry.tenantState, TenantAPI::TenantState::READY); ASSERT(!tenantMapEntry.renameDestination.present()); } } diff --git a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp index 733f0827c2..5e715475b9 100644 --- a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp @@ -171,14 +171,15 @@ struct TenantManagementConcurrencyWorkload : TestWorkload { ACTOR static Future createTenant(TenantManagementConcurrencyWorkload* self) { state TenantName tenant = self->chooseTenantName(); state TenantMapEntry entry; - entry.tenantName = tenant; - entry.tenantGroup = self->chooseTenantGroup(); + state MetaclusterTenantMapEntry mEntry; + entry.tenantName = mEntry.tenantName = tenant; + entry.tenantGroup = mEntry.tenantGroup = self->chooseTenantGroup(); try { loop { Future createFuture = self->useMetacluster - ? MetaclusterAPI::createTenant(self->mvDb, entry, AssignClusterAutomatically::True) + ? MetaclusterAPI::createTenant(self->mvDb, mEntry, AssignClusterAutomatically::True) : success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, entry)); Optional result = wait(timeout(createFuture, 30)); if (result.present()) { @@ -333,7 +334,8 @@ struct TenantManagementConcurrencyWorkload : TestWorkload { self->mvDb, AllowPartialMetaclusterOperations::True); wait(metaclusterConsistencyCheck.run()); } else { - state TenantConsistencyCheck tenantConsistencyCheck(self->dataDb.getReference()); + state TenantConsistencyCheck tenantConsistencyCheck( + self->dataDb.getReference()); wait(tenantConsistencyCheck.run()); } diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 45aee0cd17..980cbbb2fd 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -394,11 +394,13 @@ struct TenantManagementWorkload : TestWorkload { wait(tr->commit()); } else { ASSERT(tenantsToCreate.size() == 1); + TenantMapEntry tEntry = tenantsToCreate.begin()->second; + MetaclusterTenantMapEntry modifiedEntry( + tEntry.id, tEntry.tenantName, TenantAPI::TenantState::REGISTERING, tEntry.tenantGroup); if (deterministicRandom()->coinflip()) { - tenantsToCreate.begin()->second.assignedCluster = self->dataClusterName; + modifiedEntry.assignedCluster = self->dataClusterName; } - wait(MetaclusterAPI::createTenant( - self->mvDb, tenantsToCreate.begin()->second, AssignClusterAutomatically::True)); + wait(MetaclusterAPI::createTenant(self->mvDb, modifiedEntry, AssignClusterAutomatically::True)); } return Void(); @@ -503,18 +505,18 @@ struct TenantManagementWorkload : TestWorkload { } // Check the state of the first created tenant - Optional resultEntry = - wait(self->tryGetTenant(tenantsToCreate.begin()->first, operationType)); - - if (resultEntry.present()) { - if (resultEntry.get().tenantState == TenantState::READY) { - // The tenant now exists, so we will retry and expect the creation to react accordingly - alreadyExists = true; - } else { - // Only a metacluster tenant creation can end up in a partially created state - // We should be able to retry and pick up where we left off - ASSERT(operationType == OperationType::METACLUSTER); - ASSERT(resultEntry.get().tenantState == TenantState::REGISTERING); + if (operationType == OperationType::METACLUSTER) { + Optional resultEntry = + wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenantsToCreate.begin()->first)); + if (resultEntry.present()) { + if (resultEntry.get().tenantState == TenantAPI::TenantState::READY) { + // The tenant now exists, so we will retry and expect the creation to react accordingly + alreadyExists = true; + } else { + // Only a metacluster tenant creation can end up in a partially created state + // We should be able to retry and pick up where we left off + ASSERT(resultEntry.get().tenantState == TenantAPI::TenantState::REGISTERING); + } } } } @@ -1072,7 +1074,7 @@ struct TenantManagementWorkload : TestWorkload { assignedCluster = ClusterNameRef(assignedClusterStr); } - TenantMapEntry entry(id, TenantNameRef(name), TenantMapEntry::stringToTenantState(tenantStateStr), tenantGroup); + TenantMapEntry entry(id, TenantNameRef(name), tenantGroup); ASSERT(entry.prefix == prefix); return entry; } @@ -1907,7 +1909,8 @@ struct TenantManagementWorkload : TestWorkload { wait(metaclusterConsistencyCheck.run()); wait(checkTombstoneCleanup(self)); } else { - state TenantConsistencyCheck tenantConsistencyCheck(self->dataDb.getReference()); + state TenantConsistencyCheck tenantConsistencyCheck( + self->dataDb.getReference()); wait(tenantConsistencyCheck.run()); } From 1c0e784a7a6ca179180235af6784ae27b6b89529 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 9 Feb 2023 17:06:52 -0800 Subject: [PATCH 02/31] attempt to fix broken test workloads --- fdbclient/Tenant.cpp | 21 +++ .../fdbclient/MetaclusterManagement.actor.h | 5 +- .../fdbclient/TenantSpecialKeys.actor.h | 2 +- fdbserver/TenantCache.actor.cpp | 6 +- .../workloads/MetaclusterConsistency.actor.h | 15 +- .../workloads/TenantConsistency.actor.h | 132 +++++++++------- .../MetaclusterManagementWorkload.actor.cpp | 48 +++--- .../TenantEntryCacheWorkload.actor.cpp | 2 +- ...antManagementConcurrencyWorkload.actor.cpp | 3 +- .../TenantManagementWorkload.actor.cpp | 149 +++++++++++------- 10 files changed, 232 insertions(+), 151 deletions(-) diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index 3489216df1..c12578fd93 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -137,8 +137,29 @@ TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, Optional tenantGroup) + : tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) { + setId(id); +} + +void MetaclusterTenantMapEntry::setId(int64_t id) { + ASSERT(id >= 0); + this->id = id; + prefix = TenantAPI::idToPrefix(id); +} void TenantMapEntry::setId(int64_t id) { ASSERT(id >= 0); diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 28922c49ac..cc25fe842e 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1307,8 +1307,9 @@ struct CreateTenantImpl { } ACTOR static Future storeTenantInDataCluster(CreateTenantImpl* self, Reference tr) { - std::pair, bool> dataClusterTenant = - wait(TenantAPI::createTenantTransaction(tr, self->tenantEntry, ClusterType::METACLUSTER_DATA)); + TenantMapEntry entry(self->tenantEntry); + std::pair, bool> dataClusterTenant = + wait(TenantAPI::createTenantTransaction(tr, entry, ClusterType::METACLUSTER_DATA)); // If the tenant map entry is empty, then we encountered a tombstone indicating that the tenant was // simultaneously removed. diff --git a/fdbclient/include/fdbclient/TenantSpecialKeys.actor.h b/fdbclient/include/fdbclient/TenantSpecialKeys.actor.h index cf92a06646..8767e8346f 100644 --- a/fdbclient/include/fdbclient/TenantSpecialKeys.actor.h +++ b/fdbclient/include/fdbclient/TenantSpecialKeys.actor.h @@ -110,7 +110,7 @@ private: std::vector, Optional>> configMutations, int64_t tenantId, std::map* tenantGroupNetTenantDelta) { - state TenantMapEntry tenantEntry(tenantId, tenantName, TenantState::READY); + state TenantMapEntry tenantEntry(tenantId, tenantName); for (auto const& [name, value] : configMutations) { tenantEntry.configure(name, value); diff --git a/fdbserver/TenantCache.actor.cpp b/fdbserver/TenantCache.actor.cpp index b4b5883970..1a01206393 100644 --- a/fdbserver/TenantCache.actor.cpp +++ b/fdbserver/TenantCache.actor.cpp @@ -355,7 +355,7 @@ public: for (uint16_t i = 0; i < tenantCount; i++) { TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantNumber + i)); - TenantMapEntry tenant(tenantNumber + i, tenantName, TenantState::READY); + TenantMapEntry tenant(tenantNumber + i, tenantName); tenantCache.insert(tenant); } @@ -383,7 +383,7 @@ public: for (uint16_t i = 0; i < tenantCount; i++) { TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantNumber + i)); - TenantMapEntry tenant(tenantNumber + i, tenantName, TenantState::READY); + TenantMapEntry tenant(tenantNumber + i, tenantName); tenantCache.insert(tenant); } @@ -397,7 +397,7 @@ public: if (tenantOrdinal % staleTenantFraction != 0) { TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantOrdinal)); - TenantMapEntry tenant(tenantOrdinal, tenantName, TenantState::READY); + TenantMapEntry tenant(tenantOrdinal, tenantName); bool newTenant = tenantCache.update(tenant); ASSERT(!newTenant); keepCount++; diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index 48e6d7e2af..5b9734d3ac 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -202,27 +202,25 @@ private: std::map clusterAllocated; std::set processedTenantGroups; for (auto [tenantId, entry] : managementMetadata.tenantMap) { - ASSERT(entry.assignedCluster.present()); - // Each tenant should be assigned to the same cluster where it is stored in the cluster tenant index - auto clusterItr = managementMetadata.clusterTenantMap.find(entry.assignedCluster.get()); + auto clusterItr = managementMetadata.clusterTenantMap.find(entry.assignedCluster); ASSERT(clusterItr != managementMetadata.clusterTenantMap.end()); ASSERT(clusterItr->second.count(tenantId)); if (entry.tenantGroup.present()) { // Count the number of tenant groups allocated in each cluster if (processedTenantGroups.insert(entry.tenantGroup.get()).second) { - ++clusterAllocated[entry.assignedCluster.get()]; + ++clusterAllocated[entry.assignedCluster]; } // The tenant group should be stored in the same cluster where it is stored in the cluster tenant // group index - auto clusterTenantGroupItr = managementMetadata.clusterTenantGroupMap.find(entry.assignedCluster.get()); + auto clusterTenantGroupItr = managementMetadata.clusterTenantGroupMap.find(entry.assignedCluster); ASSERT(clusterTenantGroupItr != managementMetadata.clusterTenantGroupMap.end()); ASSERT(clusterTenantGroupItr->second.count(entry.tenantGroup.get())); } else { // Track the actual tenant group allocation per cluster (a tenant with no group counts against the // allocation) - ++clusterAllocated[entry.assignedCluster.get()]; + ++clusterAllocated[entry.assignedCluster]; } } @@ -255,7 +253,7 @@ private: state KeyBackedRangeResult> dataClusterTenantList; state KeyBackedRangeResult> dataClusterTenantGroupList; - state TenantConsistencyCheck tenantConsistencyCheck(dataDb); + state TenantConsistencyCheck tenantConsistencyCheck(dataDb); wait(tenantConsistencyCheck.run()); loop { @@ -350,8 +348,7 @@ private: } ACTOR static Future run(MetaclusterConsistencyCheck* self) { - state TenantConsistencyCheck managementTenantConsistencyCheck( - self->managementDb); + state TenantConsistencyCheck managementTenantConsistencyCheck(self->managementDb); wait(managementTenantConsistencyCheck.run()); wait(loadManagementClusterMetadata(self)); self->validateManagementCluster(); diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index 6ce9baf9e2..5561f71f61 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -38,14 +38,13 @@ #include "fdbclient/TenantManagement.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -template +template class TenantConsistencyCheck { private: Reference db; struct TenantData { Optional metaclusterRegistration; - std::map tenantMap; std::map tenantNameIndex; int64_t lastTenantId; int64_t tenantCount; @@ -65,31 +64,19 @@ private: // the case with the current metacluster simulation workloads static inline const int metaclusterMaxTenants = 10e6; - ACTOR static Future loadTenantMetadata(TenantConsistencyCheck* self) { - state Reference tr = self->db->createTransaction(); + ACTOR template + static Future> loadTenantMetadataImpl( + TenantConsistencyCheck* self, + TenantMetadataSpecification* tenantMetadata, + Reference tr) { state KeyBackedRangeResult> tenantList; state KeyBackedRangeResult> tenantNameIndexList; state KeyBackedRangeResult tenantTombstoneList; state KeyBackedRangeResult> tenantGroupList; state KeyBackedRangeResult tenantGroupTenantTuples; - state TenantMetadataSpecification* tenantMetadata; loop { try { - tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); - wait(store(self->metadata.metaclusterRegistration, - MetaclusterMetadata::metaclusterRegistration().get(tr))); - - self->metadata.clusterType = self->metadata.metaclusterRegistration.present() - ? self->metadata.metaclusterRegistration.get().clusterType - : ClusterType::STANDALONE; - - if (self->metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - tenantMetadata = &MetaclusterAPI::ManagementClusterMetadata::tenantMetadata(); - } else { - tenantMetadata = &TenantMetadata::instance(); - } - wait( store(tenantList, tenantMetadata->tenantMap.getRange(tr, {}, {}, metaclusterMaxTenants)) && store(tenantNameIndexList, @@ -110,7 +97,7 @@ private: } ASSERT(!tenantList.more); - self->metadata.tenantMap = + std::map localMap = std::map(tenantList.results.begin(), tenantList.results.end()); ASSERT(!tenantNameIndexList.more); @@ -130,63 +117,103 @@ private: TenantGroupName tenantGroupName = t.getString(0); int64_t tenantId = t.getInt(1); ASSERT(self->metadata.tenantGroupMap.count(tenantGroupName)); - ASSERT(self->metadata.tenantMap.count(tenantId)); + ASSERT(localMap.count(tenantId)); self->metadata.tenantGroupIndex[tenantGroupName].insert(tenantId); ASSERT(self->metadata.tenantsInTenantGroupIndex.insert(tenantId).second); } ASSERT_EQ(self->metadata.tenantGroupIndex.size(), self->metadata.tenantGroupMap.size()); - return Void(); + return localMap; } - void validateTenantMetadata() { - if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - ASSERT_LE(metadata.tenantMap.size(), metaclusterMaxTenants); - } else { - ASSERT_LE(metadata.tenantMap.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER); - } - - ASSERT_EQ(metadata.tenantMap.size(), metadata.tenantCount); + void validateTenantMetadata(std::map tenantMap) { + ASSERT(metadata.clusterType == ClusterType::METACLUSTER_DATA); + ASSERT_LE(tenantMap.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER); + ASSERT_EQ(tenantMap.size(), metadata.tenantCount); ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount); int renameCount = 0; - for (auto [tenantId, tenantMapEntry] : metadata.tenantMap) { + for (auto [tenantId, tenantMapEntry] : tenantMap) { ASSERT_EQ(tenantId, tenantMapEntry.id); - if (metadata.clusterType != ClusterType::METACLUSTER_DATA) { - ASSERT_LE(tenantId, metadata.lastTenantId); - } + ASSERT_LE(tenantId, metadata.lastTenantId); ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId); if (tenantMapEntry.tenantGroup.present()) { auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get()); ASSERT(tenantGroupMapItr != metadata.tenantGroupMap.end()); - if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster.get()); - } ASSERT(metadata.tenantGroupIndex[tenantMapEntry.tenantGroup.get()].count(tenantId)); } else { ASSERT(!metadata.tenantsInTenantGroupIndex.count(tenantId)); } + } - if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { - if (tenantMapEntry.renameDestination.present()) { - ASSERT(tenantMapEntry.tenantState == TenantAPI::TenantState::RENAMING || - tenantMapEntry.tenantState == TenantAPI::TenantState::REMOVING); + ASSERT_EQ(tenantMap.size() + renameCount, metadata.tenantNameIndex.size()); + } - auto nameIndexItr = metadata.tenantNameIndex.find(tenantMapEntry.renameDestination.get()); - ASSERT(nameIndexItr != metadata.tenantNameIndex.end()); - ASSERT_EQ(nameIndexItr->second, tenantMapEntry.id); - ++renameCount; - } else { - ASSERT_NE(tenantMapEntry.tenantState, TenantAPI::TenantState::RENAMING); - } + void validateTenantMetadata(std::map tenantMap) { + ASSERT(metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT); + ASSERT_LE(tenantMap.size(), metaclusterMaxTenants); + ASSERT_EQ(tenantMap.size(), metadata.tenantCount); + ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount); + + int renameCount = 0; + for (auto [tenantId, tenantMapEntry] : tenantMap) { + ASSERT_EQ(tenantId, tenantMapEntry.id); + ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId); + + if (tenantMapEntry.tenantGroup.present()) { + auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get()); + ASSERT(tenantGroupMapItr != metadata.tenantGroupMap.end()); + ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster.get()); + ASSERT(metadata.tenantGroupIndex[tenantMapEntry.tenantGroup.get()].count(tenantId)); } else { - ASSERT_EQ(tenantMapEntry.tenantState, TenantAPI::TenantState::READY); - ASSERT(!tenantMapEntry.renameDestination.present()); + ASSERT(!metadata.tenantsInTenantGroupIndex.count(tenantId)); + } + if (tenantMapEntry.renameDestination.present()) { + ASSERT(tenantMapEntry.tenantState == TenantAPI::TenantState::RENAMING || + tenantMapEntry.tenantState == TenantAPI::TenantState::REMOVING); + + auto nameIndexItr = metadata.tenantNameIndex.find(tenantMapEntry.renameDestination.get()); + ASSERT(nameIndexItr != metadata.tenantNameIndex.end()); + ASSERT_EQ(nameIndexItr->second, tenantMapEntry.id); + ++renameCount; + } else { + ASSERT_NE(tenantMapEntry.tenantState, TenantAPI::TenantState::RENAMING); } } - ASSERT_EQ(metadata.tenantMap.size() + renameCount, metadata.tenantNameIndex.size()); + ASSERT_EQ(tenantMap.size() + renameCount, metadata.tenantNameIndex.size()); + } + + ACTOR static Future loadAndValidateTenantMetadata(TenantConsistencyCheck* self) { + state Reference tr = self->db->createTransaction(); + + loop { + try { + tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + wait(store(self->metadata.metaclusterRegistration, + MetaclusterMetadata::metaclusterRegistration().get(tr))); + + self->metadata.clusterType = self->metadata.metaclusterRegistration.present() + ? self->metadata.metaclusterRegistration.get().clusterType + : ClusterType::STANDALONE; + break; + } catch (Error& e) { + wait(safeThreadFutureToFuture(tr->onError(e))); + } + } + if (self->metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { + std::map tenantMap = + wait(loadTenantMetadataImpl( + self, &MetaclusterAPI::ManagementClusterMetadata::tenantMetadata(), tr)); + self->validateTenantMetadata(tenantMap); + } else { + std::map tenantMap = + wait(loadTenantMetadataImpl(self, &TenantMetadata::instance(), tr)); + self->validateTenantMetadata(tenantMap); + } + + return Void(); } // Check that the tenant tombstones are properly cleaned up and only present on a metacluster data cluster @@ -206,8 +233,7 @@ private: } ACTOR static Future run(TenantConsistencyCheck* self) { - wait(loadTenantMetadata(self)); - self->validateTenantMetadata(); + wait(loadAndValidateTenantMetadata(self)); self->checkTenantTombstones(); return Void(); diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index bd7540836b..930c3e5740 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -394,13 +394,13 @@ struct MetaclusterManagementWorkload : TestWorkload { ACTOR static Future verifyListFilter(MetaclusterManagementWorkload* self, TenantName tenant) { try { - state TenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); - state TenantState checkState = checkEntry.tenantState; - state std::vector filters; + state MetaclusterTenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); + state TenantAPI::TenantState checkState = checkEntry.tenantState; + state std::vector filters; filters.push_back(checkState); - state std::vector> tenantList; + state std::vector> tenantList; // Possible to have changed state between now and the getTenant call above - state TenantMapEntry checkEntry2; + state MetaclusterTenantMapEntry checkEntry2; wait(store(checkEntry2, MetaclusterAPI::getTenant(self->managementDb, tenant)) && store(tenantList, MetaclusterAPI::listTenantMetadata(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters))); @@ -442,7 +442,7 @@ struct MetaclusterManagementWorkload : TestWorkload { preferredClusters.second = self->chooseClusterName(); } - state TenantMapEntry tenantMapEntry; + state MetaclusterTenantMapEntry tenantMapEntry; tenantMapEntry.tenantName = tenant; tenantMapEntry.tenantGroup = tenantGroup; @@ -453,7 +453,7 @@ struct MetaclusterManagementWorkload : TestWorkload { tenantMapEntry.assignedCluster = deterministicRandom()->coinflip() ? preferredClusters.first : preferredClusters.second; if (!originalPreferredCluster.present()) { - originalPreferredCluster = tenantMapEntry.assignedCluster.get(); + originalPreferredCluster = tenantMapEntry.assignedCluster; } } Future createFuture = @@ -467,12 +467,13 @@ struct MetaclusterManagementWorkload : TestWorkload { } } catch (Error& e) { if (e.code() == error_code_tenant_already_exists && retried && !exists) { - Optional entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); + Optional entry = + wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); ASSERT(entry.present()); tenantMapEntry = entry.get(); break; } else if (!assignClusterAutomatically && retried && - originalPreferredCluster.get() != tenantMapEntry.assignedCluster.get() && + originalPreferredCluster.get() != tenantMapEntry.assignedCluster && (e.code() == error_code_cluster_no_capacity || e.code() == error_code_cluster_not_found || e.code() == error_code_invalid_tenant_configuration)) { @@ -487,24 +488,22 @@ struct MetaclusterManagementWorkload : TestWorkload { } } - TenantMapEntry entry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); + MetaclusterTenantMapEntry entry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); ASSERT(!exists); ASSERT(hasCapacity); - ASSERT(entry.assignedCluster.present()); ASSERT(entry.tenantGroup == tenantGroup); if (tenantGroup.present()) { - auto tenantGroupData = - self->tenantGroups.try_emplace(tenantGroup.get(), entry.assignedCluster.get()).first; - ASSERT(tenantGroupData->second.cluster == entry.assignedCluster.get()); + auto tenantGroupData = self->tenantGroups.try_emplace(tenantGroup.get(), entry.assignedCluster).first; + ASSERT(tenantGroupData->second.cluster == entry.assignedCluster); tenantGroupData->second.tenants.insert(tenant); } else { self->ungroupedTenants.insert(tenant); } - auto assignedCluster = self->dataDbs.find(entry.assignedCluster.get()); - ASSERT(assignClusterAutomatically || tenantMapEntry.assignedCluster.get() == assignedCluster->first); + auto assignedCluster = self->dataDbs.find(entry.assignedCluster); + ASSERT(assignClusterAutomatically || tenantMapEntry.assignedCluster == assignedCluster->first); ASSERT(assignedCluster != self->dataDbs.end()); ASSERT(assignedCluster->second.tenants.insert(tenant).second); @@ -518,7 +517,7 @@ struct MetaclusterManagementWorkload : TestWorkload { assignedCluster->second.tenantGroupCapacity >= assignedCluster->second.tenantGroups.size() + assignedCluster->second.ungroupedTenants.size()); - self->createdTenants[tenant] = TenantData(entry.assignedCluster.get(), tenantGroup); + self->createdTenants[tenant] = TenantData(entry.assignedCluster, tenantGroup); } catch (Error& e) { if (e.code() == error_code_tenant_already_exists) { ASSERT(exists); @@ -534,9 +533,8 @@ struct MetaclusterManagementWorkload : TestWorkload { return Void(); } else if (e.code() == error_code_invalid_tenant_configuration) { ASSERT(tenantGroup.present()); - ASSERT(tenantMapEntry.assignedCluster.present()); auto itr = self->tenantGroups.find(tenantGroup.get()); - ASSERT(itr->second.cluster != tenantMapEntry.assignedCluster.get()); + ASSERT(itr->second.cluster != tenantMapEntry.assignedCluster); return Void(); } @@ -568,7 +566,8 @@ struct MetaclusterManagementWorkload : TestWorkload { } } catch (Error& e) { if (e.code() == error_code_tenant_not_found && retried && exists) { - Optional entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); + Optional entry = + wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); ASSERT(!entry.present()); break; } else { @@ -763,20 +762,21 @@ struct MetaclusterManagementWorkload : TestWorkload { ASSERT(exists); ASSERT(!newTenantExists); - Optional oldEntry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); + Optional oldEntry = + wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant)); ASSERT(!oldEntry.present()); - TenantMapEntry newEntry = wait(MetaclusterAPI::getTenant(self->managementDb, newTenantName)); + MetaclusterTenantMapEntry newEntry = wait(MetaclusterAPI::getTenant(self->managementDb, newTenantName)); auto tenantData = self->createdTenants.find(tenant); ASSERT(tenantData != self->createdTenants.end()); ASSERT(tenantData->second.tenantGroup == newEntry.tenantGroup); - ASSERT(newEntry.assignedCluster.present() && tenantData->second.cluster == newEntry.assignedCluster.get()); + ASSERT(tenantData->second.cluster == newEntry.assignedCluster); self->createdTenants[newTenantName] = tenantData->second; self->createdTenants.erase(tenantData); - auto& dataDb = self->dataDbs[newEntry.assignedCluster.get()]; + auto& dataDb = self->dataDbs[newEntry.assignedCluster]; ASSERT(dataDb.registered); dataDb.tenants.erase(tenant); diff --git a/fdbserver/workloads/TenantEntryCacheWorkload.actor.cpp b/fdbserver/workloads/TenantEntryCacheWorkload.actor.cpp index 62970629a8..4c44bce0aa 100644 --- a/fdbserver/workloads/TenantEntryCacheWorkload.actor.cpp +++ b/fdbserver/workloads/TenantEntryCacheWorkload.actor.cpp @@ -90,7 +90,7 @@ struct TenantEntryCacheWorkload : TestWorkload { // Ensure associated counter values gets updated ASSERT_EQ(cache->numRefreshByInit(), 1); - state TenantMapEntry dummy(std::numeric_limits::max(), "name"_sr, TenantState::READY); + state TenantMapEntry dummy(std::numeric_limits::max(), "name"_sr); Optional> value = wait(cache->getById(dummy.id)); ASSERT(!value.present()); diff --git a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp index 5e715475b9..7817607334 100644 --- a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp @@ -334,8 +334,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload { self->mvDb, AllowPartialMetaclusterOperations::True); wait(metaclusterConsistencyCheck.run()); } else { - state TenantConsistencyCheck tenantConsistencyCheck( - self->dataDb.getReference()); + state TenantConsistencyCheck tenantConsistencyCheck(self->dataDb.getReference()); wait(tenantConsistencyCheck.run()); } diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 980cbbb2fd..0248d79d00 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -351,19 +351,12 @@ struct TenantManagementWorkload : TestWorkload { return tenantGroup; } - Future> tryGetTenant(TenantName tenantName, OperationType operationType) { - if (operationType == OperationType::METACLUSTER) { - return MetaclusterAPI::tryGetTenant(mvDb, tenantName); - } else { - return TenantAPI::tryGetTenant(dataDb.getReference(), tenantName); - } - } - // Creates tenant(s) using the specified operation type - ACTOR static Future createTenantImpl(Reference tr, - std::map tenantsToCreate, - OperationType operationType, - TenantManagementWorkload* self) { + ACTOR template + static Future createTenantImpl(Reference tr, + std::map tenantsToCreate, + OperationType operationType, + TenantManagementWorkload* self) { if (operationType == OperationType::SPECIAL_KEYS) { tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); for (auto [tenant, entry] : tenantsToCreate) { @@ -394,7 +387,7 @@ struct TenantManagementWorkload : TestWorkload { wait(tr->commit()); } else { ASSERT(tenantsToCreate.size() == 1); - TenantMapEntry tEntry = tenantsToCreate.begin()->second; + TenantMapEntryImpl tEntry = tenantsToCreate.begin()->second; MetaclusterTenantMapEntry modifiedEntry( tEntry.id, tEntry.tenantName, TenantAPI::TenantState::REGISTERING, tEntry.tenantGroup); if (deterministicRandom()->coinflip()) { @@ -406,8 +399,41 @@ struct TenantManagementWorkload : TestWorkload { return Void(); } + ACTOR template + static Future verifyTenantCreate(TenantManagementWorkload* self, + Optional entry, + TenantName itrName, + Optional tGroup) { + ASSERT(entry.present()); + ASSERT(entry.get().id > self->maxId); + ASSERT(entry.get().tenantGroup == tGroup); + + if (self->useMetacluster) { + // In a metacluster, we should also see that the tenant was created on the data cluster + Optional dataEntry = wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), itrName)); + ASSERT(dataEntry.present()); + ASSERT(dataEntry.get().id == entry.get().id); + ASSERT(dataEntry.get().tenantGroup == entry.get().tenantGroup); + } + + // Update our local tenant state to include the newly created one + self->maxId = entry.get().id; + self->createdTenants[itrName] = TenantData(entry.get().id, tGroup, true); + return Void(); + } + ACTOR static Future createTenant(TenantManagementWorkload* self) { state OperationType operationType = self->randomOperationType(); + if (operationType == OperationType::METACLUSTER) { + wait(createTenantHelper(self, operationType)); + } else { + wait(createTenantHelper(self, operationType)); + } + return Void(); + } + + ACTOR template + static Future createTenantHelper(TenantManagementWorkload* self, OperationType operationType) { int numTenants = 1; // For transaction-based operations, test creating multiple tenants in the same transaction @@ -426,14 +452,14 @@ struct TenantManagementWorkload : TestWorkload { state bool hasSystemTenantGroup = false; state int newTenants = 0; - state std::map tenantsToCreate; + state std::map tenantsToCreate; for (int i = 0; i < numTenants; ++i) { TenantName tenant = self->chooseTenantName(true); while (tenantsToCreate.count(tenant)) { tenant = self->chooseTenantName(true); } - TenantMapEntry entry; + TenantMapEntryImpl entry; entry.tenantName = tenant; entry.tenantGroup = self->chooseTenantGroup(true); @@ -470,8 +496,9 @@ struct TenantManagementWorkload : TestWorkload { } try { - Optional result = wait(timeout(createTenantImpl(tr, tenantsToCreate, operationType, self), - deterministicRandom()->randomInt(1, 30))); + Optional result = + wait(timeout(createTenantImpl(tr, tenantsToCreate, operationType, self), + deterministicRandom()->randomInt(1, 30))); if (result.present()) { // Make sure that we had capacity to create the tenants. This cannot be validated for @@ -534,7 +561,7 @@ struct TenantManagementWorkload : TestWorkload { ASSERT(!hasSystemTenant); ASSERT(!hasSystemTenantGroup); - state std::map::iterator tenantItr; + state typename std::map::iterator tenantItr; for (tenantItr = tenantsToCreate.begin(); tenantItr != tenantsToCreate.end(); ++tenantItr) { // Ignore any tenants that already existed if (self->createdTenants.count(tenantItr->first)) { @@ -542,31 +569,24 @@ struct TenantManagementWorkload : TestWorkload { } // Read the created tenant object and verify that its state is correct - state Optional entry = wait(self->tryGetTenant(tenantItr->first, operationType)); - - ASSERT(entry.present()); - ASSERT(entry.get().id > self->maxId); - ASSERT(entry.get().tenantGroup == tenantItr->second.tenantGroup); - ASSERT(entry.get().tenantState == TenantState::READY); + state StringRef tPrefix; + if (operationType == OperationType::METACLUSTER) { + state Optional metaEntry = + wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenantItr->first)); + wait(verifyTenantCreate( + self, metaEntry, tenantItr->first, tenantItr->second.tenantGroup)); + tPrefix = metaEntry.get().prefix; + } else { + state Optional normalEntry = + wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantItr->first)); + wait(verifyTenantCreate( + self, normalEntry, tenantItr->first, tenantItr->second.tenantGroup)); + tPrefix = normalEntry.get().prefix; + } Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType)); ASSERT_GT(currentVersionstamp.version, originalReadVersion); - if (self->useMetacluster) { - // In a metacluster, we should also see that the tenant was created on the data cluster - Optional dataEntry = - wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantItr->first)); - ASSERT(dataEntry.present()); - ASSERT(dataEntry.get().id == entry.get().id); - ASSERT(dataEntry.get().tenantGroup == entry.get().tenantGroup); - ASSERT(dataEntry.get().tenantState == TenantState::READY); - } - - // Update our local tenant state to include the newly created one - self->maxId = entry.get().id; - self->createdTenants[tenantItr->first] = - TenantData(entry.get().id, tenantItr->second.tenantGroup, true); - // If this tenant has a tenant group, create or update the entry for it if (tenantItr->second.tenantGroup.present()) { self->createdTenantGroups[tenantItr->second.tenantGroup.get()].tenantCount++; @@ -595,7 +615,7 @@ struct TenantManagementWorkload : TestWorkload { loop { try { checkTr.setOption(FDBTransactionOptions::RAW_ACCESS); - Optional val = wait(checkTr.get(self->keyName.withPrefix(entry.get().prefix))); + Optional val = wait(checkTr.get(self->keyName.withPrefix(tPrefix))); ASSERT(val.present()); ASSERT(val.get() == tenantItr->first); break; @@ -740,7 +760,7 @@ struct TenantManagementWorkload : TestWorkload { // getTenant throwing tenant_not_found will break some test cases because it is not wrapped // by runManagementTransaction. For such cases, fall back to delete by name and allow // the errors to flow through there - Optional entry = wait(MetaclusterAPI::tryGetTenant(self->mvDb, beginTenant)); + Optional entry = wait(MetaclusterAPI::tryGetTenant(self->mvDb, beginTenant)); if (entry.present() && deterministicRandom()->coinflip()) { wait(MetaclusterAPI::deleteTenant(self->mvDb, entry.get().id)); CODE_PROBE(true, "Deleted tenant by ID"); @@ -883,17 +903,16 @@ struct TenantManagementWorkload : TestWorkload { } } - if (!tenants.empty()) { + if (!tenants.empty() && operationType == OperationType::METACLUSTER) { // Check the state of the first deleted tenant - Optional resultEntry = - wait(self->tryGetTenant(tenants.begin()->first, operationType)); + Optional resultEntry = + wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenants.begin()->first)); if (!resultEntry.present()) { alreadyExists = false; - } else if (resultEntry.get().tenantState == TenantState::REMOVING) { - ASSERT(operationType == OperationType::METACLUSTER); } else { - ASSERT(resultEntry.get().tenantState == TenantState::READY); + ASSERT(resultEntry.get().tenantState == TenantAPI::TenantState::READY || + resultEntry.get().tenantState == TenantAPI::TenantState::REMOVING); } } } @@ -1097,8 +1116,6 @@ struct TenantManagementWorkload : TestWorkload { } else if (operationType == OperationType::MANAGEMENT_TRANSACTION) { tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); wait(store(entry, TenantAPI::getTenantTransaction(tr, tenant))); - } else { - wait(store(entry, MetaclusterAPI::getTenant(self->mvDb, tenant))); } return entry; @@ -1118,10 +1135,20 @@ struct TenantManagementWorkload : TestWorkload { loop { try { // Get the tenant metadata and check that it matches our local state - state TenantMapEntry entry = wait(getTenantImpl(tr, tenant, operationType, self)); + state int64_t entryId; + state Optional tGroup; + if (operationType == OperationType::METACLUSTER) { + state MetaclusterTenantMapEntry metaEntry = wait(MetaclusterAPI::getTenant(self->mvDb, tenant)); + entryId = metaEntry.id; + tGroup = metaEntry.tenantGroup; + } else { + state TenantMapEntry normalEntry = wait(getTenantImpl(tr, tenant, operationType, self)); + entryId = normalEntry.id; + tGroup = normalEntry.tenantGroup; + } ASSERT(alreadyExists); - ASSERT(entry.id == tenantData.tenant->id()); - ASSERT(entry.tenantGroup == tenantData.tenantGroup); + ASSERT(entryId == tenantData.tenant->id()); + ASSERT(tGroup == tenantData.tenantGroup); wait(checkTenantContents(self, tenant, tenantData)); return Void(); } catch (Error& e) { @@ -1177,13 +1204,22 @@ struct TenantManagementWorkload : TestWorkload { } else if (operationType == OperationType::MANAGEMENT_TRANSACTION) { tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); wait(store(tenants, TenantAPI::listTenantMetadataTransaction(tr, beginTenant, endTenant, limit))); - } else { - wait(store(tenants, MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit))); } return tenants; } + ACTOR static Future>> listTenantsImplMeta( + Reference tr, + TenantName beginTenant, + TenantName endTenant, + int limit, + TenantManagementWorkload* self) { + state std::vector> tenants; + wait(store(tenants, MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit))); + return tenants; + } + ACTOR static Future listTenants(TenantManagementWorkload* self) { state TenantName beginTenant = self->chooseTenantName(false); state TenantName endTenant = self->chooseTenantName(false); @@ -1201,6 +1237,8 @@ struct TenantManagementWorkload : TestWorkload { // Attempt to read the chosen list of tenants state std::vector> tenants = wait(listTenantsImpl(tr, beginTenant, endTenant, limit, operationType, self)); + state std::vector> tenants2 = + wait(listTenantsImplMeta(tr, beginTenant, endTenant, limit, self)); // Attempting to read the list of tenants using the metacluster API in a non-metacluster should // return nothing in this test @@ -1909,8 +1947,7 @@ struct TenantManagementWorkload : TestWorkload { wait(metaclusterConsistencyCheck.run()); wait(checkTombstoneCleanup(self)); } else { - state TenantConsistencyCheck tenantConsistencyCheck( - self->dataDb.getReference()); + state TenantConsistencyCheck tenantConsistencyCheck(self->dataDb.getReference()); wait(tenantConsistencyCheck.run()); } From bf508f5642e39b6b51013dd0548d877cba2328a6 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 10 Feb 2023 12:25:29 -0800 Subject: [PATCH 03/31] adjust tenantconsistency workload to account for lastTenantId on data clusters --- .../workloads/MetaclusterConsistency.actor.h | 2 +- .../fdbserver/workloads/TenantConsistency.actor.h | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index 5b9734d3ac..707584dbf9 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -253,7 +253,7 @@ private: state KeyBackedRangeResult> dataClusterTenantList; state KeyBackedRangeResult> dataClusterTenantGroupList; - state TenantConsistencyCheck tenantConsistencyCheck(dataDb); + state TenantConsistencyCheck tenantConsistencyCheck(dataDb, true); wait(tenantConsistencyCheck.run()); loop { diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index 5561f71f61..8f79553540 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -42,6 +42,7 @@ template class TenantConsistencyCheck { private: Reference db; + bool isDataCluster = false; struct TenantData { Optional metaclusterRegistration; @@ -132,10 +133,14 @@ private: ASSERT_EQ(tenantMap.size(), metadata.tenantCount); ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount); - int renameCount = 0; for (auto [tenantId, tenantMapEntry] : tenantMap) { ASSERT_EQ(tenantId, tenantMapEntry.id); - ASSERT_LE(tenantId, metadata.lastTenantId); + // Only standalone clusters will have lastTenantId set + // For Metacluster, the lastTenantId field is updated for MetaclusterMetadata + // and not TenantMetadata + if (!isDataCluster) { + ASSERT_LE(tenantId, metadata.lastTenantId); + } ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId); if (tenantMapEntry.tenantGroup.present()) { @@ -147,7 +152,7 @@ private: } } - ASSERT_EQ(tenantMap.size() + renameCount, metadata.tenantNameIndex.size()); + ASSERT_EQ(tenantMap.size(), metadata.tenantNameIndex.size()); } void validateTenantMetadata(std::map tenantMap) { @@ -242,6 +247,7 @@ private: public: TenantConsistencyCheck() {} TenantConsistencyCheck(Reference db) : db(db) {} + TenantConsistencyCheck(Reference db, bool isDataCluster) : db(db), isDataCluster(isDataCluster) {} Future run() { return run(this); } }; From 5c68c95a60626d87a79a1fd56aec21fe211a01b6 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 10 Feb 2023 13:12:24 -0800 Subject: [PATCH 04/31] fix assertion --- .../include/fdbserver/workloads/TenantConsistency.actor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index 8f79553540..0d784b9b33 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -128,7 +128,8 @@ private: } void validateTenantMetadata(std::map tenantMap) { - ASSERT(metadata.clusterType == ClusterType::METACLUSTER_DATA); + ASSERT(metadata.clusterType == ClusterType::METACLUSTER_DATA || + metadata.clusterType == ClusterType::STANDALONE); ASSERT_LE(tenantMap.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER); ASSERT_EQ(tenantMap.size(), metadata.tenantCount); ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount); From ad210072c0464b966c6d0e5cf28d093e675e4b2f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 13 Feb 2023 13:29:19 -0800 Subject: [PATCH 05/31] address code review --- fdbclient/Metacluster.cpp | 70 +++++++++++++++++++ fdbclient/Tenant.cpp | 67 ------------------ fdbclient/include/fdbclient/Metacluster.h | 57 +++++++++++++++ fdbclient/include/fdbclient/Tenant.h | 55 +-------------- .../workloads/MetaclusterConsistency.actor.h | 2 +- .../workloads/TenantConsistency.actor.h | 4 +- ...antManagementConcurrencyWorkload.actor.cpp | 11 ++- .../TenantManagementWorkload.actor.cpp | 33 ++++++--- 8 files changed, 159 insertions(+), 140 deletions(-) diff --git a/fdbclient/Metacluster.cpp b/fdbclient/Metacluster.cpp index b298762f11..4157e93611 100644 --- a/fdbclient/Metacluster.cpp +++ b/fdbclient/Metacluster.cpp @@ -20,6 +20,8 @@ #include "fdbclient/Metacluster.h" #include "fdbclient/MetaclusterManagement.actor.h" +#include "libb64/decode.h" +#include "libb64/encode.h" FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants); FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants); @@ -77,6 +79,74 @@ json_spirit::mObject ClusterUsage::toJson() const { return obj; } +TenantMapEntry::TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry) + : tenantName(metaclusterEntry.tenantName), tenantLockState(metaclusterEntry.tenantLockState), + tenantGroup(metaclusterEntry.tenantGroup), configurationSequenceNum(metaclusterEntry.configurationSequenceNum) { + setId(metaclusterEntry.id); +} + +MetaclusterTenantMapEntry::MetaclusterTenantMapEntry() {} +MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(int64_t id, + TenantName tenantName, + TenantAPI::TenantState tenantState) + : tenantName(tenantName), tenantState(tenantState) { + setId(id); +} +MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(int64_t id, + TenantName tenantName, + TenantAPI::TenantState tenantState, + Optional tenantGroup) + : tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) { + setId(id); +} + +void MetaclusterTenantMapEntry::setId(int64_t id) { + ASSERT(id >= 0); + this->id = id; + prefix = TenantAPI::idToPrefix(id); +} + +std::string MetaclusterTenantMapEntry::toJson() const { + json_spirit::mObject tenantEntry; + tenantEntry["id"] = id; + + tenantEntry["name"] = binaryToJson(tenantName); + tenantEntry["prefix"] = binaryToJson(prefix); + + tenantEntry["tenant_state"] = TenantAPI::tenantStateToString(tenantState); + tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster); + + if (tenantGroup.present()) { + tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get()); + } + + tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState); + if (tenantState == TenantAPI::TenantState::RENAMING) { + ASSERT(renameDestination.present()); + tenantEntry["rename_destination"] = binaryToJson(renameDestination.get()); + } else if (tenantState == TenantAPI::TenantState::ERROR) { + tenantEntry["error"] = error; + } + + return json_spirit::write_string(json_spirit::mValue(tenantEntry)); +} + +bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const { + return tenantName == other.tenantName && tenantLockState == other.tenantLockState && + tenantGroup == other.tenantGroup && assignedCluster == other.assignedCluster; +} + +void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { + if (parameter == "tenant_group"_sr) { + tenantGroup = value; + } else if (parameter == "assigned_cluster"_sr && value.present()) { + assignedCluster = value.get(); + } else { + TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter); + throw invalid_tenant_configuration(); + } +} + KeyBackedObjectProperty& MetaclusterMetadata::metaclusterRegistration() { static KeyBackedObjectProperty instance( diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index c12578fd93..c13d841079 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -134,32 +134,6 @@ TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, Optional tenantGroup) - : tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) { - setId(id); -} - -void MetaclusterTenantMapEntry::setId(int64_t id) { - ASSERT(id >= 0); - this->id = id; - prefix = TenantAPI::idToPrefix(id); -} void TenantMapEntry::setId(int64_t id) { ASSERT(id >= 0); @@ -204,47 +178,6 @@ json_spirit::mObject TenantGroupEntry::toJson() const { return tenantGroupEntry; } -std::string MetaclusterTenantMapEntry::toJson() const { - json_spirit::mObject tenantEntry; - tenantEntry["id"] = id; - - tenantEntry["name"] = binaryToJson(tenantName); - tenantEntry["prefix"] = binaryToJson(prefix); - - tenantEntry["tenant_state"] = TenantAPI::tenantStateToString(tenantState); - tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster); - - if (tenantGroup.present()) { - tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get()); - } - - tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState); - if (tenantState == TenantAPI::TenantState::RENAMING) { - ASSERT(renameDestination.present()); - tenantEntry["rename_destination"] = binaryToJson(renameDestination.get()); - } else if (tenantState == TenantAPI::TenantState::ERROR) { - tenantEntry["error"] = error; - } - - return json_spirit::write_string(json_spirit::mValue(tenantEntry)); -} - -bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const { - return tenantName == other.tenantName && tenantLockState == other.tenantLockState && - tenantGroup == other.tenantGroup && assignedCluster == other.assignedCluster; -} - -void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { - if (parameter == "tenant_group"_sr) { - tenantGroup = value; - } else if (parameter == "assigned_cluster"_sr && value.present()) { - assignedCluster = value.get(); - } else { - TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter); - throw invalid_tenant_configuration(); - } -} - TenantMetadataSpecification& TenantMetadata::instance() { static TenantMetadataSpecification _instance = TenantMetadataSpecification("\xff/"_sr); return _instance; diff --git a/fdbclient/include/fdbclient/Metacluster.h b/fdbclient/include/fdbclient/Metacluster.h index c08ae0e667..b66977816a 100644 --- a/fdbclient/include/fdbclient/Metacluster.h +++ b/fdbclient/include/fdbclient/Metacluster.h @@ -46,6 +46,8 @@ struct ClusterUsage { } }; +json_spirit::mObject binaryToJson(StringRef bytes); + template <> struct Traceable : std::true_type { static std::string toString(const ClusterUsage& value) { @@ -100,6 +102,61 @@ struct DataClusterEntry { } }; +struct MetaclusterTenantMapEntry { + constexpr static FileIdentifier file_identifier = 12247338; + + int64_t id = -1; + Key prefix; + TenantName tenantName; + TenantAPI::TenantState tenantState = TenantAPI::TenantState::READY; + TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED; + Optional tenantGroup; + ClusterName assignedCluster; + int64_t configurationSequenceNum = 0; + Optional renameDestination; + + // Can be set to an error string if the tenant is in the ERROR state + std::string error; + + MetaclusterTenantMapEntry(); + MetaclusterTenantMapEntry(int64_t id, TenantName tenantName, TenantAPI::TenantState tenantState); + MetaclusterTenantMapEntry(int64_t id, + TenantName tenantName, + TenantAPI::TenantState tenantState, + Optional tenantGroup); + + void setId(int64_t id); + std::string toJson() const; + + bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; + void configure(Standalone parameter, Optional value); + + Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } + static MetaclusterTenantMapEntry decode(ValueRef const& value) { + return ObjectReader::fromStringRef(value, IncludeVersion()); + } + + template + void serialize(Ar& ar) { + serializer(ar, + id, + tenantName, + tenantState, + tenantLockState, + tenantGroup, + assignedCluster, + configurationSequenceNum, + renameDestination, + error); + if constexpr (Ar::isDeserializing) { + if (id >= 0) { + prefix = TenantAPI::idToPrefix(id); + } + ASSERT(tenantState >= TenantAPI::TenantState::REGISTERING && tenantState <= TenantAPI::TenantState::ERROR); + } + } +}; + struct MetaclusterMetrics { int numTenants = 0; int numDataClusters = 0; diff --git a/fdbclient/include/fdbclient/Tenant.h b/fdbclient/include/fdbclient/Tenant.h index d65d8b67b7..058f878fa1 100644 --- a/fdbclient/include/fdbclient/Tenant.h +++ b/fdbclient/include/fdbclient/Tenant.h @@ -70,61 +70,8 @@ TenantState stringToTenantState(std::string stateStr); std::string tenantLockStateToString(TenantLockState tenantState); TenantLockState stringToTenantLockState(std::string stateStr); } // namespace TenantAPI -struct MetaclusterTenantMapEntry { - constexpr static FileIdentifier file_identifier = 12247338; - - int64_t id = -1; - Key prefix; - TenantName tenantName; - TenantAPI::TenantState tenantState = TenantAPI::TenantState::READY; - TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED; - Optional tenantGroup; - ClusterName assignedCluster; - int64_t configurationSequenceNum = 0; - Optional renameDestination; - - // Can be set to an error string if the tenant is in the ERROR state - std::string error; - - MetaclusterTenantMapEntry(); - MetaclusterTenantMapEntry(int64_t id, TenantName tenantName, TenantAPI::TenantState tenantState); - MetaclusterTenantMapEntry(int64_t id, - TenantName tenantName, - TenantAPI::TenantState tenantState, - Optional tenantGroup); - - void setId(int64_t id); - std::string toJson() const; - - bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; - void configure(Standalone parameter, Optional value); - - Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } - static MetaclusterTenantMapEntry decode(ValueRef const& value) { - return ObjectReader::fromStringRef(value, IncludeVersion()); - } - - template - void serialize(Ar& ar) { - serializer(ar, - id, - tenantName, - tenantState, - tenantLockState, - tenantGroup, - assignedCluster, - configurationSequenceNum, - renameDestination, - error); - if constexpr (Ar::isDeserializing) { - if (id >= 0) { - prefix = TenantAPI::idToPrefix(id); - } - ASSERT(tenantState >= TenantAPI::TenantState::REGISTERING && tenantState <= TenantAPI::TenantState::ERROR); - } - } -}; +struct MetaclusterTenantMapEntry; struct TenantMapEntry { constexpr static FileIdentifier file_identifier = 7054389; diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index 707584dbf9..5b9734d3ac 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -253,7 +253,7 @@ private: state KeyBackedRangeResult> dataClusterTenantList; state KeyBackedRangeResult> dataClusterTenantGroupList; - state TenantConsistencyCheck tenantConsistencyCheck(dataDb, true); + state TenantConsistencyCheck tenantConsistencyCheck(dataDb); wait(tenantConsistencyCheck.run()); loop { diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index 0d784b9b33..ecfab5e1ed 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -42,7 +42,6 @@ template class TenantConsistencyCheck { private: Reference db; - bool isDataCluster = false; struct TenantData { Optional metaclusterRegistration; @@ -139,7 +138,7 @@ private: // Only standalone clusters will have lastTenantId set // For Metacluster, the lastTenantId field is updated for MetaclusterMetadata // and not TenantMetadata - if (!isDataCluster) { + if (metadata.clusterType != ClusterType::METACLUSTER_DATA) { ASSERT_LE(tenantId, metadata.lastTenantId); } ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId); @@ -248,7 +247,6 @@ private: public: TenantConsistencyCheck() {} TenantConsistencyCheck(Reference db) : db(db) {} - TenantConsistencyCheck(Reference db, bool isDataCluster) : db(db), isDataCluster(isDataCluster) {} Future run() { return run(this); } }; diff --git a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp index 7817607334..fefa55de6c 100644 --- a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp @@ -170,17 +170,16 @@ struct TenantManagementConcurrencyWorkload : TestWorkload { ACTOR static Future createTenant(TenantManagementConcurrencyWorkload* self) { state TenantName tenant = self->chooseTenantName(); - state TenantMapEntry entry; - state MetaclusterTenantMapEntry mEntry; - entry.tenantName = mEntry.tenantName = tenant; - entry.tenantGroup = mEntry.tenantGroup = self->chooseTenantGroup(); + state MetaclusterTenantMapEntry entry; + entry.tenantName = tenant; + entry.tenantGroup = self->chooseTenantGroup(); try { loop { Future createFuture = self->useMetacluster - ? MetaclusterAPI::createTenant(self->mvDb, mEntry, AssignClusterAutomatically::True) - : success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, entry)); + ? MetaclusterAPI::createTenant(self->mvDb, entry, AssignClusterAutomatically::True) + : success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, TenantMapEntry(entry))); Optional result = wait(timeout(createFuture, 30)); if (result.present()) { break; diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 0248d79d00..cebebaa95a 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -544,7 +544,14 @@ struct TenantManagementWorkload : TestWorkload { // We should be able to retry and pick up where we left off ASSERT(resultEntry.get().tenantState == TenantAPI::TenantState::REGISTERING); } + } else { + CODE_PROBE(true, "Created tenant (metacluster) is not present and may have been removed."); } + } else { + Optional tenantEntry = + wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantsToCreate.begin()->first)); + CODE_PROBE(!tenantEntry.present(), + "Created tenant (non-metacluster) is not present and may have been removed."); } } @@ -575,6 +582,7 @@ struct TenantManagementWorkload : TestWorkload { wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenantItr->first)); wait(verifyTenantCreate( self, metaEntry, tenantItr->first, tenantItr->second.tenantGroup)); + ASSERT(metaEntry.get().tenantState == TenantAPI::TenantState::READY); tPrefix = metaEntry.get().prefix; } else { state Optional normalEntry = @@ -903,16 +911,23 @@ struct TenantManagementWorkload : TestWorkload { } } - if (!tenants.empty() && operationType == OperationType::METACLUSTER) { - // Check the state of the first deleted tenant - Optional resultEntry = - wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenants.begin()->first)); - - if (!resultEntry.present()) { - alreadyExists = false; + if (!tenants.empty()) { + if (operationType == OperationType::METACLUSTER) { + // Check the state of the first deleted tenant + Optional resultEntry = + wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenants.begin()->first)); + if (!resultEntry.present()) { + alreadyExists = false; + } else { + ASSERT(resultEntry.get().tenantState == TenantAPI::TenantState::READY || + resultEntry.get().tenantState == TenantAPI::TenantState::REMOVING); + } } else { - ASSERT(resultEntry.get().tenantState == TenantAPI::TenantState::READY || - resultEntry.get().tenantState == TenantAPI::TenantState::REMOVING); + Optional tenantEntry = + wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenants.begin()->first)); + if (!tenantEntry.present()) { + alreadyExists = false; + } } } } From 1af6d8ff926e8adf01a250e157097e2b0c31d42c Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 15 Feb 2023 15:01:17 -0800 Subject: [PATCH 06/31] fix various correctness issues and listTenant test --- fdbclient/Metacluster.cpp | 8 +- fdbclient/include/fdbclient/Metacluster.h | 5 +- .../fdbclient/MetaclusterManagement.actor.h | 2 +- .../TenantManagementWorkload.actor.cpp | 86 +++++++++---------- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/fdbclient/Metacluster.cpp b/fdbclient/Metacluster.cpp index 4157e93611..6874aabf00 100644 --- a/fdbclient/Metacluster.cpp +++ b/fdbclient/Metacluster.cpp @@ -82,7 +82,9 @@ json_spirit::mObject ClusterUsage::toJson() const { TenantMapEntry::TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry) : tenantName(metaclusterEntry.tenantName), tenantLockState(metaclusterEntry.tenantLockState), tenantGroup(metaclusterEntry.tenantGroup), configurationSequenceNum(metaclusterEntry.configurationSequenceNum) { - setId(metaclusterEntry.id); + if (metaclusterEntry.id >= 0) { + setId(metaclusterEntry.id); + } } MetaclusterTenantMapEntry::MetaclusterTenantMapEntry() {} @@ -131,9 +133,9 @@ std::string MetaclusterTenantMapEntry::toJson() const { return json_spirit::write_string(json_spirit::mValue(tenantEntry)); } -bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const { +bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other, bool matchAssigned) const { return tenantName == other.tenantName && tenantLockState == other.tenantLockState && - tenantGroup == other.tenantGroup && assignedCluster == other.assignedCluster; + tenantGroup == other.tenantGroup && (!matchAssigned || assignedCluster == other.assignedCluster); } void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { diff --git a/fdbclient/include/fdbclient/Metacluster.h b/fdbclient/include/fdbclient/Metacluster.h index b66977816a..c8a36e48c2 100644 --- a/fdbclient/include/fdbclient/Metacluster.h +++ b/fdbclient/include/fdbclient/Metacluster.h @@ -128,7 +128,10 @@ struct MetaclusterTenantMapEntry { void setId(int64_t id); std::string toJson() const; - bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; + // On retries, configurations need to be compared, but it may be possible in the early + // stages of creation that the assignedCluster is unset. + // matchAssigned flag should be false to account for this case + bool matchesConfiguration(MetaclusterTenantMapEntry const& other, bool matchAssigned = true) const; void configure(Standalone parameter, Optional value); Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index cc25fe842e..28ba17f314 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1120,7 +1120,7 @@ struct CreateTenantImpl { state Optional existingEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName)); if (existingEntry.present()) { - if (!existingEntry.get().matchesConfiguration(self->tenantEntry) || + if (!existingEntry.get().matchesConfiguration(self->tenantEntry, false) || existingEntry.get().tenantState != TenantAPI::TenantState::REGISTERING) { // The tenant already exists and is either completely created or has a different // configuration diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index cebebaa95a..40aa5abd56 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -388,12 +388,15 @@ struct TenantManagementWorkload : TestWorkload { } else { ASSERT(tenantsToCreate.size() == 1); TenantMapEntryImpl tEntry = tenantsToCreate.begin()->second; - MetaclusterTenantMapEntry modifiedEntry( - tEntry.id, tEntry.tenantName, TenantAPI::TenantState::REGISTERING, tEntry.tenantGroup); + MetaclusterTenantMapEntry modifiedEntry; + modifiedEntry.tenantName = tEntry.tenantName; + modifiedEntry.tenantGroup = tEntry.tenantGroup; + auto assign = AssignClusterAutomatically::True; if (deterministicRandom()->coinflip()) { modifiedEntry.assignedCluster = self->dataClusterName; + assign = AssignClusterAutomatically::False; } - wait(MetaclusterAPI::createTenant(self->mvDb, modifiedEntry, AssignClusterAutomatically::True)); + wait(MetaclusterAPI::createTenant(self->mvDb, modifiedEntry, assign)); } return Void(); @@ -1103,11 +1106,6 @@ struct TenantManagementWorkload : TestWorkload { tenantGroup = TenantGroupNameRef(tenantGroupStr); } - Optional assignedCluster; - if (jsonDoc.tryGet("assigned_cluster", assignedClusterStr)) { - assignedCluster = ClusterNameRef(assignedClusterStr); - } - TenantMapEntry entry(id, TenantNameRef(name), tenantGroup); ASSERT(entry.prefix == prefix); return entry; @@ -1224,15 +1222,25 @@ struct TenantManagementWorkload : TestWorkload { return tenants; } - ACTOR static Future>> listTenantsImplMeta( - Reference tr, - TenantName beginTenant, - TenantName endTenant, - int limit, - TenantManagementWorkload* self) { - state std::vector> tenants; - wait(store(tenants, MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit))); - return tenants; + template + static Future verifyTenantList(TenantManagementWorkload* self, + std::vector> tenants, + int limit, + TenantName beginTenant, + TenantName endTenant) { + ASSERT(tenants.size() <= limit); + + // Compare the resulting tenant list to the list we expected to get + auto localItr = self->createdTenants.lower_bound(beginTenant); + auto tenantMapItr = tenants.begin(); + for (; tenantMapItr != tenants.end(); ++tenantMapItr, ++localItr) { + ASSERT(localItr != self->createdTenants.end()); + ASSERT(localItr->first == tenantMapItr->first); + } + + // Make sure the list terminated at the right spot + ASSERT(tenants.size() == limit || localItr == self->createdTenants.end() || localItr->first >= endTenant); + return Void(); } ACTOR static Future listTenants(TenantManagementWorkload* self) { @@ -1240,7 +1248,8 @@ struct TenantManagementWorkload : TestWorkload { state TenantName endTenant = self->chooseTenantName(false); state int limit = std::min(CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1, deterministicRandom()->randomInt(1, self->maxTenants * 2)); - state OperationType operationType = self->randomOperationType(); + state OperationType operationType = + self->useMetacluster ? OperationType::METACLUSTER : self->randomOperationType(); state Reference tr = makeReference(self->dataDb); if (beginTenant > endTenant) { @@ -1249,40 +1258,27 @@ struct TenantManagementWorkload : TestWorkload { loop { try { - // Attempt to read the chosen list of tenants - state std::vector> tenants = - wait(listTenantsImpl(tr, beginTenant, endTenant, limit, operationType, self)); - state std::vector> tenants2 = - wait(listTenantsImplMeta(tr, beginTenant, endTenant, limit, self)); - - // Attempting to read the list of tenants using the metacluster API in a non-metacluster should - // return nothing in this test - if (operationType == OperationType::METACLUSTER && !self->useMetacluster) { - ASSERT(tenants.size() == 0); - return Void(); + if (self->useMetacluster) { + state std::vector> metaTenants = + wait(MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit)); + verifyTenantList(self, metaTenants, limit, beginTenant, endTenant); + } else { + state std::vector> tenants = + wait(listTenantsImpl(tr, beginTenant, endTenant, limit, operationType, self)); + if (operationType == OperationType::METACLUSTER) { + ASSERT_EQ(tenants.size(), 0); + return Void(); + } + verifyTenantList(self, tenants, limit, beginTenant, endTenant); } - - ASSERT(tenants.size() <= limit); - - // Compare the resulting tenant list to the list we expected to get - auto localItr = self->createdTenants.lower_bound(beginTenant); - auto tenantMapItr = tenants.begin(); - for (; tenantMapItr != tenants.end(); ++tenantMapItr, ++localItr) { - ASSERT(localItr != self->createdTenants.end()); - ASSERT(localItr->first == tenantMapItr->first); - } - - // Make sure the list terminated at the right spot - ASSERT(tenants.size() == limit || localItr == self->createdTenants.end() || - localItr->first >= endTenant); return Void(); } catch (Error& e) { state bool retry = false; state Error error = e; // Transaction-based operations need to be retried - if (operationType == OperationType::MANAGEMENT_TRANSACTION || - operationType == OperationType::SPECIAL_KEYS) { + if (!self->useMetacluster && (operationType == OperationType::MANAGEMENT_TRANSACTION || + operationType == OperationType::SPECIAL_KEYS)) { try { retry = true; wait(tr->onError(e)); From 8f4aec7d7f3b58f7a9bef1c39ea5e66c77091c43 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 15 Feb 2023 15:50:34 -0800 Subject: [PATCH 07/31] address review comments and fix some test errors --- fdbclient/Metacluster.cpp | 5 ++--- fdbclient/include/fdbclient/Metacluster.h | 7 +------ fdbclient/include/fdbclient/MetaclusterManagement.actor.h | 2 +- fdbclient/include/fdbclient/Tenant.h | 2 ++ .../include/fdbserver/workloads/TenantConsistency.actor.h | 1 + .../workloads/MetaclusterManagementWorkload.actor.cpp | 2 +- 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fdbclient/Metacluster.cpp b/fdbclient/Metacluster.cpp index 6874aabf00..4f7d277f65 100644 --- a/fdbclient/Metacluster.cpp +++ b/fdbclient/Metacluster.cpp @@ -133,9 +133,8 @@ std::string MetaclusterTenantMapEntry::toJson() const { return json_spirit::write_string(json_spirit::mValue(tenantEntry)); } -bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other, bool matchAssigned) const { - return tenantName == other.tenantName && tenantLockState == other.tenantLockState && - tenantGroup == other.tenantGroup && (!matchAssigned || assignedCluster == other.assignedCluster); +bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const { + return tenantGroup == other.tenantGroup; } void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { diff --git a/fdbclient/include/fdbclient/Metacluster.h b/fdbclient/include/fdbclient/Metacluster.h index c8a36e48c2..869a05415d 100644 --- a/fdbclient/include/fdbclient/Metacluster.h +++ b/fdbclient/include/fdbclient/Metacluster.h @@ -46,8 +46,6 @@ struct ClusterUsage { } }; -json_spirit::mObject binaryToJson(StringRef bytes); - template <> struct Traceable : std::true_type { static std::string toString(const ClusterUsage& value) { @@ -128,10 +126,7 @@ struct MetaclusterTenantMapEntry { void setId(int64_t id); std::string toJson() const; - // On retries, configurations need to be compared, but it may be possible in the early - // stages of creation that the assignedCluster is unset. - // matchAssigned flag should be false to account for this case - bool matchesConfiguration(MetaclusterTenantMapEntry const& other, bool matchAssigned = true) const; + bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; void configure(Standalone parameter, Optional value); Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 28ba17f314..cc25fe842e 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1120,7 +1120,7 @@ struct CreateTenantImpl { state Optional existingEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName)); if (existingEntry.present()) { - if (!existingEntry.get().matchesConfiguration(self->tenantEntry, false) || + if (!existingEntry.get().matchesConfiguration(self->tenantEntry) || existingEntry.get().tenantState != TenantAPI::TenantState::REGISTERING) { // The tenant already exists and is either completely created or has a different // configuration diff --git a/fdbclient/include/fdbclient/Tenant.h b/fdbclient/include/fdbclient/Tenant.h index 058f878fa1..2b2987bb8f 100644 --- a/fdbclient/include/fdbclient/Tenant.h +++ b/fdbclient/include/fdbclient/Tenant.h @@ -71,6 +71,8 @@ std::string tenantLockStateToString(TenantLockState tenantState); TenantLockState stringToTenantLockState(std::string stateStr); } // namespace TenantAPI +json_spirit::mObject binaryToJson(StringRef bytes); + struct MetaclusterTenantMapEntry; struct TenantMapEntry { constexpr static FileIdentifier file_identifier = 7054389; diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index ecfab5e1ed..54259d50b8 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -77,6 +77,7 @@ private: loop { try { + tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); wait( store(tenantList, tenantMetadata->tenantMap.getRange(tr, {}, {}, metaclusterMaxTenants)) && store(tenantNameIndexList, diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index 930c3e5740..7bfdcfa43d 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -749,7 +749,6 @@ struct MetaclusterManagementWorkload : TestWorkload { retried = true; wait(verifyListFilter(self, tenant)); - wait(verifyListFilter(self, newTenantName)); } catch (Error& e) { // If we retry the rename after it had succeeded, we will get an error that we should ignore if (e.code() == error_code_tenant_not_found && exists && !newTenantExists && retried) { @@ -758,6 +757,7 @@ struct MetaclusterManagementWorkload : TestWorkload { throw e; } } + wait(verifyListFilter(self, newTenantName)); ASSERT(exists); ASSERT(!newTenantExists); From 036078234e13f6657bb3e2ac2efb00295c125028 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 16 Feb 2023 10:14:29 -0800 Subject: [PATCH 08/31] fix some leftover issues from branch merge --- fdbclient/Metacluster.cpp | 19 ++++++ fdbclient/Tenant.cpp | 8 +-- fdbclient/include/fdbclient/Metacluster.h | 4 ++ .../fdbclient/MetaclusterManagement.actor.h | 68 ++++++++++--------- .../fdbclient/TenantManagement.actor.h | 1 - .../workloads/TenantConsistency.actor.h | 5 +- .../MetaclusterManagementWorkload.actor.cpp | 2 +- .../MetaclusterRestoreWorkload.actor.cpp | 42 ++++++------ .../workloads/TenantCapacityLimits.actor.cpp | 2 +- ...antManagementConcurrencyWorkload.actor.cpp | 2 +- .../TenantManagementWorkload.actor.cpp | 4 +- 11 files changed, 89 insertions(+), 68 deletions(-) diff --git a/fdbclient/Metacluster.cpp b/fdbclient/Metacluster.cpp index 349ac447e5..159f700131 100644 --- a/fdbclient/Metacluster.cpp +++ b/fdbclient/Metacluster.cpp @@ -98,6 +98,14 @@ TenantMapEntry::TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry) } } +MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(TenantMapEntry tenantEntry) + : tenantName(tenantEntry.tenantName), tenantLockState(tenantEntry.tenantLockState), + tenantGroup(tenantEntry.tenantGroup), configurationSequenceNum(tenantEntry.configurationSequenceNum) { + if (tenantEntry.id >= 0) { + setId(tenantEntry.id); + } +} + MetaclusterTenantMapEntry::MetaclusterTenantMapEntry() {} MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(int64_t id, TenantName tenantName, @@ -148,6 +156,10 @@ bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry c return tenantGroup == other.tenantGroup; } +bool MetaclusterTenantMapEntry::matchesConfiguration(TenantMapEntry const& other) const { + return tenantGroup == other.tenantGroup; +} + void MetaclusterTenantMapEntry::configure(Standalone parameter, Optional value) { if (parameter == "tenant_group"_sr) { tenantGroup = value; @@ -159,6 +171,13 @@ void MetaclusterTenantMapEntry::configure(Standalone parameter, Optio } } +bool MetaclusterTenantMapEntry::operator==(MetaclusterTenantMapEntry const& other) const { + return id == other.id && tenantName == other.tenantName && tenantState == other.tenantState && + tenantLockState == other.tenantLockState && tenantGroup == other.tenantGroup && + assignedCluster == other.assignedCluster && configurationSequenceNum == other.configurationSequenceNum && + renameDestination == other.renameDestination && error == other.error; +} + KeyBackedObjectProperty& MetaclusterMetadata::metaclusterRegistration() { static KeyBackedObjectProperty instance( diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index 0610c6dbb1..39994cca41 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -172,7 +172,7 @@ std::string TenantMapEntry::toJson() const { } bool TenantMapEntry::matchesConfiguration(TenantMapEntry const& other) const { - return tenantGroup == other.tenantGroup && tenantLockState == other.tenantLockState; + return tenantGroup == other.tenantGroup; } void TenantMapEntry::configure(Standalone parameter, Optional value) { @@ -185,10 +185,8 @@ void TenantMapEntry::configure(Standalone parameter, Optional } bool TenantMapEntry::operator==(TenantMapEntry const& other) const { - return id == other.id && tenantName == other.tenantName && tenantState == other.tenantState && - tenantLockState == other.tenantLockState && tenantGroup == other.tenantGroup && - assignedCluster == other.assignedCluster && configurationSequenceNum == other.configurationSequenceNum && - renameDestination == other.renameDestination && error == other.error; + return id == other.id && tenantName == other.tenantName && tenantLockState == other.tenantLockState && + tenantGroup == other.tenantGroup && configurationSequenceNum == other.configurationSequenceNum; } json_spirit::mObject TenantGroupEntry::toJson() const { diff --git a/fdbclient/include/fdbclient/Metacluster.h b/fdbclient/include/fdbclient/Metacluster.h index cab9e229a2..861590d23d 100644 --- a/fdbclient/include/fdbclient/Metacluster.h +++ b/fdbclient/include/fdbclient/Metacluster.h @@ -121,11 +121,13 @@ struct MetaclusterTenantMapEntry { TenantName tenantName, TenantAPI::TenantState tenantState, Optional tenantGroup); + MetaclusterTenantMapEntry(TenantMapEntry tenantEntry); void setId(int64_t id); std::string toJson() const; bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const; + bool matchesConfiguration(TenantMapEntry const& other) const; void configure(Standalone parameter, Optional value); Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); } @@ -133,6 +135,8 @@ struct MetaclusterTenantMapEntry { return ObjectReader::fromStringRef(value, IncludeVersion()); } + bool operator==(MetaclusterTenantMapEntry const& other) const; + template void serialize(Ar& ar) { serializer(ar, diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 50889ff0b6..539f7e0515 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1338,7 +1338,7 @@ struct RestoreClusterImpl { // Tenant list from data and management clusters std::unordered_map dataClusterTenantMap; std::unordered_set dataClusterTenantNames; - std::unordered_map mgmtClusterTenantMap; + std::unordered_map mgmtClusterTenantMap; std::unordered_set mgmtClusterTenantSetForCurrentDataCluster; RestoreClusterImpl(Reference managementDb, @@ -1533,7 +1533,7 @@ struct RestoreClusterImpl { ACTOR static Future markManagementTenantsAsError(RestoreClusterImpl* self, Reference tr, std::vector tenants) { - state std::vector>> getFutures; + state std::vector>> getFutures; for (auto tenantId : tenants) { getFutures.push_back(tryGetTenantTransaction(tr, tenantId)); } @@ -1545,8 +1545,8 @@ struct RestoreClusterImpl { continue; } - TenantMapEntry entry = f.get().get(); - entry.tenantState = TenantState::ERROR; + MetaclusterTenantMapEntry entry = f.get().get(); + entry.tenantState = TenantAPI::TenantState::ERROR; entry.error = "The tenant is missing after restoring its data cluster"; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, entry.id, entry); } @@ -1569,13 +1569,13 @@ struct RestoreClusterImpl { ACTOR static Future> getTenantsFromManagementCluster(RestoreClusterImpl* self, Reference tr, int64_t initialTenantId) { - state KeyBackedRangeResult> tenants = + state KeyBackedRangeResult> tenants = wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange( tr, initialTenantId, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER)); for (auto const& t : tenants.results) { self->mgmtClusterTenantMap.emplace(t.first, t.second); - if (t.second.assignedCluster.present() && self->clusterName == t.second.assignedCluster.get()) { + if (self->clusterName == t.second.assignedCluster) { self->mgmtClusterTenantSetForCurrentDataCluster.emplace(t.first); } } @@ -1652,7 +1652,6 @@ struct RestoreClusterImpl { int64_t tenantId, TenantMapEntry updatedEntry) { TenantMapEntry existingEntry = wait(TenantAPI::getTenantTransaction(tr, tenantId)); - updatedEntry.assignedCluster = Optional(); // The tenant should have already been renamed, so in most cases its name will match. // If we had to break a rename cycle using temporary tenant names, use that in the updated @@ -1671,14 +1670,15 @@ struct RestoreClusterImpl { // Updates a tenant to match the management cluster state // Returns the name of the tenant after it has been reconciled - ACTOR static Future>> reconcileTenant(RestoreClusterImpl* self, - TenantMapEntry tenantEntry) { - state std::unordered_map::iterator managementEntry = + ACTOR static Future>> reconcileTenant( + RestoreClusterImpl* self, + TenantMapEntry tenantEntry) { + state std::unordered_map::iterator managementEntry = self->mgmtClusterTenantMap.find(tenantEntry.id); // A data cluster tenant is not present on the management cluster if (managementEntry == self->mgmtClusterTenantMap.end() || - managementEntry->second.assignedCluster.get() != self->clusterName) { + managementEntry->second.assignedCluster != self->clusterName) { if (self->restoreDryRun) { if (managementEntry == self->mgmtClusterTenantMap.end()) { self->messages.push_back(fmt::format("Delete missing tenant `{}' with ID {} on data cluster", @@ -1697,10 +1697,10 @@ struct RestoreClusterImpl { })); } - return Optional>(); + return Optional>(); } else { state TenantName tenantName = tenantEntry.tenantName; - state TenantMapEntry managementTenant = managementEntry->second; + state MetaclusterTenantMapEntry managementTenant = managementEntry->second; // Rename state bool renamed = tenantName != managementTenant.tenantName; @@ -1758,7 +1758,8 @@ struct RestoreClusterImpl { } else { wait(self->runRestoreDataClusterTransaction( [self = self, managementTenant = managementTenant](Reference tr) { - return updateTenantConfiguration(self, tr, managementTenant.id, managementTenant); + return updateTenantConfiguration( + self, tr, managementTenant.id, TenantMapEntry(managementTenant)); })); // SOMEDAY: we could mark the tenant in the management cluster as READY if it is in the // UPDATING_CONFIGURATION state @@ -1769,7 +1770,7 @@ struct RestoreClusterImpl { } } - Future renameTenantBatch(std::vector> tenantsToRename) { + Future renameTenantBatch(std::vector> tenantsToRename) { return runRestoreDataClusterTransaction([this, tenantsToRename](Reference tr) { std::vector> renameFutures; for (auto t : tenantsToRename) { @@ -1781,7 +1782,7 @@ struct RestoreClusterImpl { } ACTOR static Future reconcileTenants(RestoreClusterImpl* self) { - state std::vector>>> reconcileFutures; + state std::vector>>> reconcileFutures; for (auto itr = self->dataClusterTenantMap.begin(); itr != self->dataClusterTenantMap.end(); ++itr) { reconcileFutures.push_back(reconcileTenant(self, itr->second)); } @@ -1790,9 +1791,10 @@ struct RestoreClusterImpl { if (!self->restoreDryRun) { state int reconcileIndex; - state std::vector> tenantsToRename; + state std::vector> tenantsToRename; for (reconcileIndex = 0; reconcileIndex < reconcileFutures.size(); ++reconcileIndex) { - Optional> const& result = reconcileFutures[reconcileIndex].get(); + Optional> const& result = + reconcileFutures[reconcileIndex].get(); if (result.present() && result.get().first.startsWith(metaclusterTemporaryRenamePrefix) && result.get().first != result.get().second.tenantName) { tenantsToRename.push_back(result.get()); @@ -1816,7 +1818,7 @@ struct RestoreClusterImpl { state int64_t missingTenantCount = 0; while (setItr != self->mgmtClusterTenantSetForCurrentDataCluster.end()) { int64_t tenantId = *setItr; - TenantMapEntry const& managementTenant = self->mgmtClusterTenantMap[tenantId]; + MetaclusterTenantMapEntry const& managementTenant = self->mgmtClusterTenantMap[tenantId]; // If a tenant is present on the management cluster and not on the data cluster, mark it in an error // state unless it is already in certain states (e.g. REGISTERING, REMOVING) that allow the tenant to be @@ -1825,8 +1827,8 @@ struct RestoreClusterImpl { // SOMEDAY: this could optionally complete the partial operations (e.g. finish creating or removing the // tenant) if (self->dataClusterTenantMap.find(tenantId) == self->dataClusterTenantMap.end() && - managementTenant.tenantState != TenantState::REGISTERING && - managementTenant.tenantState != TenantState::REMOVING) { + managementTenant.tenantState != TenantAPI::TenantState::REGISTERING && + managementTenant.tenantState != TenantAPI::TenantState::REMOVING) { if (self->restoreDryRun) { self->messages.push_back(fmt::format("The tenant `{}' with ID {} is missing on the data cluster", printable(managementTenant.tenantName), @@ -1836,7 +1838,7 @@ struct RestoreClusterImpl { // include tenants we previously marked as missing, and as new errors are added it could include // other tenants ++missingTenantCount; - if (managementTenant.tenantState != TenantState::ERROR) { + if (managementTenant.tenantState != TenantAPI::TenantState::ERROR) { missingTenants.push_back(tenantId); if (missingTenants.size() == CLIENT_KNOBS->METACLUSTER_RESTORE_BATCH_SIZE) { wait(self->runRestoreManagementTransaction([self = self, missingTenants = missingTenants]( @@ -1868,14 +1870,14 @@ struct RestoreClusterImpl { // Returns true if the group needs to be created ACTOR static Future addTenantToManagementCluster(RestoreClusterImpl* self, Reference tr, - TenantMapEntry tenantEntry) { + MetaclusterTenantMapEntry tenantEntry) { state Future> tenantGroupEntry = Optional(); if (tenantEntry.tenantGroup.present()) { tenantGroupEntry = ManagementClusterMetadata::tenantMetadata().tenantGroupMap.get(tr, tenantEntry.tenantGroup.get()); } - Optional existingEntry = wait(tryGetTenantTransaction(tr, tenantEntry.tenantName)); + Optional existingEntry = wait(tryGetTenantTransaction(tr, tenantEntry.tenantName)); if (existingEntry.present()) { if (existingEntry.get().assignedCluster == self->clusterName) { ASSERT(existingEntry.get().matchesConfiguration(tenantEntry)); @@ -1890,18 +1892,18 @@ struct RestoreClusterImpl { } if (!self->restoreDryRun) { - tenantEntry.tenantState = TenantState::READY; + tenantEntry.tenantState = TenantAPI::TenantState::READY; tenantEntry.assignedCluster = self->clusterName; ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.id, tenantEntry); ManagementClusterMetadata::tenantMetadata().tenantNameIndex.set(tr, tenantEntry.tenantName, tenantEntry.id); ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue); ManagementClusterMetadata::clusterTenantCount.atomicOp( - tr, tenantEntry.assignedCluster.get(), 1, MutationRef::AddValue); + tr, tenantEntry.assignedCluster, 1, MutationRef::AddValue); // Updated indexes to include the new tenant ManagementClusterMetadata::clusterTenantIndex.insert( - tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantName, tenantEntry.id)); + tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantName, tenantEntry.id)); } wait(success(tenantGroupEntry)); @@ -1928,7 +1930,7 @@ struct RestoreClusterImpl { ACTOR static Future addTenantBatchToManagementCluster(RestoreClusterImpl* self, Reference tr, - std::vector tenants) { + std::vector tenants) { Optional tenantIdPrefix = wait(TenantMetadata::tenantIdPrefix().get(tr)); ASSERT(tenantIdPrefix.present()); @@ -1983,17 +1985,17 @@ struct RestoreClusterImpl { ACTOR static Future addTenantsToManagementCluster(RestoreClusterImpl* self) { state std::unordered_map::iterator itr; - state std::vector tenantBatch; + state std::vector tenantBatch; state int64_t tenantsToAdd = 0; for (itr = self->dataClusterTenantMap.begin(); itr != self->dataClusterTenantMap.end(); ++itr) { - state std::unordered_map::iterator managementEntry = + state std::unordered_map::iterator managementEntry = self->mgmtClusterTenantMap.find(itr->second.id); if (managementEntry == self->mgmtClusterTenantMap.end()) { ++tenantsToAdd; - tenantBatch.push_back(itr->second); + tenantBatch.push_back(MetaclusterTenantMapEntry(itr->second)); } else if (managementEntry->second.tenantName != itr->second.tenantName || - managementEntry->second.assignedCluster.get() != self->clusterName || + managementEntry->second.assignedCluster != self->clusterName || !managementEntry->second.matchesConfiguration(itr->second)) { self->messages.push_back( fmt::format("The tenant `{}' has the same ID {} as an existing tenant `{}' on cluster `{}'", @@ -2862,7 +2864,7 @@ struct ConfigureTenantImpl { } if (self->updatedEntry.matchesConfiguration(tenantEntry.get()) && - tenantEntry.get().tenantState == TenantState::READY) { + tenantEntry.get().tenantState == TenantAPI::TenantState::READY) { return false; } diff --git a/fdbclient/include/fdbclient/TenantManagement.actor.h b/fdbclient/include/fdbclient/TenantManagement.actor.h index 90999567c1..f4e7999d73 100644 --- a/fdbclient/include/fdbclient/TenantManagement.actor.h +++ b/fdbclient/include/fdbclient/TenantManagement.actor.h @@ -446,7 +446,6 @@ Future configureTenantTransaction(Transaction tr, TenantMapEntry originalEntry, TenantMapEntry updatedTenantEntry) { ASSERT(updatedTenantEntry.id == originalEntry.id); - ASSERT(!updatedTenantEntry.assignedCluster.present()); tr->setOption(FDBTransactionOptions::RAW_ACCESS); TenantMetadata::tenantMap().set(tr, updatedTenantEntry.id, updatedTenantEntry); diff --git a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h index 24ef121944..d8ba12364c 100644 --- a/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/TenantConsistency.actor.h @@ -188,6 +188,8 @@ private: } else { ASSERT_NE(tenantMapEntry.tenantState, TenantAPI::TenantState::RENAMING); } + // An error string should be set if and only if the tenant state is an error + ASSERT((tenantMapEntry.tenantState == TenantAPI::TenantState::ERROR) != tenantMapEntry.error.empty()); } ASSERT_EQ(tenantMap.size() + renameCount, metadata.tenantNameIndex.size()); @@ -209,9 +211,6 @@ private: } catch (Error& e) { wait(safeThreadFutureToFuture(tr->onError(e))); } - - // An error string should be set if and only if the tenant state is an error - ASSERT((tenantMapEntry.tenantState == TenantState::ERROR) != tenantMapEntry.error.empty()); } if (self->metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { std::map tenantMap = diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index 2cb948af73..832b76c8f9 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -460,7 +460,7 @@ struct MetaclusterManagementWorkload : TestWorkload { state std::vector> tenantList = wait(MetaclusterAPI::listTenantMetadata(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters)); // Possible to have changed state between now and the getTenant call above - state TenantMapEntry checkEntry2 = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); + state MetaclusterTenantMapEntry checkEntry2 = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); DisabledTraceEvent(SevDebug, "VerifyListFilter") .detail("Context", context) .detail("Tenant", tenant) diff --git a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp index aca0212428..fc3f418902 100644 --- a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp @@ -81,7 +81,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { std::map tenantGroups; std::set deletedTenants; - std::vector> managementTenantsBeforeRestore; + std::vector> managementTenantsBeforeRestore; int initialTenants; int maxTenants; @@ -337,10 +337,11 @@ struct MetaclusterRestoreWorkload : TestWorkload { return waitForAll(deleteFutures); } - ACTOR template - static Future> getTenantsInGroup(Transaction tr, - TenantMetadataSpecification tenantMetadata, - TenantGroupName tenantGroup) { + ACTOR template + static Future> getTenantsInGroup( + Transaction tr, + TenantMetadataSpecification tenantMetadata, + TenantGroupName tenantGroup) { KeyBackedRangeResult groupTenants = wait(tenantMetadata.tenantGroupTenantIndex.getRange(tr, Tuple::makeTuple(tenantGroup), @@ -632,23 +633,24 @@ struct MetaclusterRestoreWorkload : TestWorkload { loop { try { - TenantMapEntry tenantEntry; + MetaclusterTenantMapEntry tenantEntry; tenantEntry.tenantName = tenantName; tenantEntry.tenantGroup = self->chooseTenantGroup(); wait(MetaclusterAPI::createTenant(self->managementDb, tenantEntry, AssignClusterAutomatically::True)); - TenantMapEntry createdEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenantName)); + MetaclusterTenantMapEntry createdEntry = + wait(MetaclusterAPI::getTenant(self->managementDb, tenantName)); TraceEvent(SevDebug, "MetaclusterRestoreWorkloadCreatedTenant") .detail("Tenant", tenantName) .detail("TenantId", createdEntry.id) .detail("AccessTime", createTime); self->createdTenants[createdEntry.id] = - TenantData(tenantName, createdEntry.assignedCluster.get(), createdEntry.tenantGroup, createTime); + TenantData(tenantName, createdEntry.assignedCluster, createdEntry.tenantGroup, createTime); self->tenantNameIndex[tenantName] = createdEntry.id; - auto& dataDb = self->dataDbs[createdEntry.assignedCluster.get()]; + auto& dataDb = self->dataDbs[createdEntry.assignedCluster]; dataDb.tenants.insert(createdEntry.id); if (createdEntry.tenantGroup.present()) { auto& tenantGroupData = self->tenantGroups[createdEntry.tenantGroup.get()]; - tenantGroupData.cluster = createdEntry.assignedCluster.get(); + tenantGroupData.cluster = createdEntry.assignedCluster; tenantGroupData.tenants.insert(createdEntry.id); dataDb.tenantGroups.insert(createdEntry.tenantGroup.get()); } @@ -872,7 +874,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { if (self->recoverManagementCluster) { wait(resetManagementCluster(self)); } else { - KeyBackedRangeResult> tenants = + KeyBackedRangeResult> tenants = wait(runTransaction(self->managementDb, [](Reference tr) { return MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantMap.getRange( tr, {}, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1); @@ -989,14 +991,14 @@ struct MetaclusterRestoreWorkload : TestWorkload { } ACTOR static Future checkTenants(MetaclusterRestoreWorkload* self) { - state KeyBackedRangeResult> tenants = + state KeyBackedRangeResult> tenants = wait(runTransaction(self->managementDb, [](Reference tr) { return MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantMap.getRange( tr, {}, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1); })); ASSERT_LE(tenants.results.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER); - std::map tenantMap(tenants.results.begin(), tenants.results.end()); + std::map tenantMap(tenants.results.begin(), tenants.results.end()); // If we did not restore the management cluster, then every tenant present in the management cluster before the // restore should be present after the restore. All tenants in the management cluster should be unchanged except @@ -1006,9 +1008,9 @@ struct MetaclusterRestoreWorkload : TestWorkload { auto itr = tenantMap.find(tenantId); ASSERT(itr != tenantMap.end()); - TenantMapEntry postRecoveryEntry = itr->second; - if (postRecoveryEntry.tenantState == TenantState::ERROR) { - ASSERT(self->dataDbs[itr->second.assignedCluster.get()].restored); + MetaclusterTenantMapEntry postRecoveryEntry = itr->second; + if (postRecoveryEntry.tenantState == TenantAPI::TenantState::ERROR) { + ASSERT(self->dataDbs[itr->second.assignedCluster].restored); postRecoveryEntry.tenantState = tenantEntry.tenantState; postRecoveryEntry.error.clear(); } @@ -1030,14 +1032,14 @@ struct MetaclusterRestoreWorkload : TestWorkload { } else { if (tenantData.createTime != TenantData::AccessTime::BEFORE_BACKUP && self->dataDbs[tenantData.cluster].restored) { - ASSERT(tenantItr->second.tenantState == TenantState::ERROR || - (tenantItr->second.tenantState == TenantState::READY && + ASSERT(tenantItr->second.tenantState == TenantAPI::TenantState::ERROR || + (tenantItr->second.tenantState == TenantAPI::TenantState::READY && tenantData.createTime == TenantData::AccessTime::DURING_BACKUP)); - if (tenantItr->second.tenantState == TenantState::ERROR) { + if (tenantItr->second.tenantState == TenantAPI::TenantState::ERROR) { ASSERT(self->dataDbs[tenantData.cluster].restoreHasMessages); } } else { - ASSERT_EQ(tenantItr->second.tenantState, TenantState::READY); + ASSERT_EQ(tenantItr->second.tenantState, TenantAPI::TenantState::READY); } } } diff --git a/fdbserver/workloads/TenantCapacityLimits.actor.cpp b/fdbserver/workloads/TenantCapacityLimits.actor.cpp index 80922bb31f..7de3a36067 100644 --- a/fdbserver/workloads/TenantCapacityLimits.actor.cpp +++ b/fdbserver/workloads/TenantCapacityLimits.actor.cpp @@ -130,7 +130,7 @@ struct TenantCapacityLimits : TestWorkload { } // Attempt to create a tenant on the metacluster which should fail since the cluster is at capacity try { - TenantMapEntry entry; + MetaclusterTenantMapEntry entry; entry.tenantName = "test_tenant_metacluster"_sr; wait(MetaclusterAPI::createTenant(self->managementDb, entry, AssignClusterAutomatically::True)); ASSERT(false); diff --git a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp index 658aecc38f..f34a01d666 100644 --- a/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementConcurrencyWorkload.actor.cpp @@ -181,7 +181,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload { ACTOR static Future createTenant(TenantManagementConcurrencyWorkload* self) { state TenantName tenant = self->chooseTenantName(); - state TenantMapEntry entry; + state MetaclusterTenantMapEntry entry; state UID debugId = deterministicRandom()->randomUniqueID(); diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index ff050e1330..94af556c0b 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -395,9 +395,7 @@ struct TenantManagementWorkload : TestWorkload { } else { ASSERT(tenantsToCreate.size() == 1); TenantMapEntryImpl tEntry = tenantsToCreate.begin()->second; - MetaclusterTenantMapEntry modifiedEntry; - modifiedEntry.tenantName = tEntry.tenantName; - modifiedEntry.tenantGroup = tEntry.tenantGroup; + MetaclusterTenantMapEntry modifiedEntry(tEntry); auto assign = AssignClusterAutomatically::True; if (deterministicRandom()->coinflip()) { modifiedEntry.assignedCluster = self->dataClusterName; From 0d7b6d626b7cd6192c7dbcb727fd0a38678fd89f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 17 Feb 2023 10:23:21 -0800 Subject: [PATCH 09/31] update restoreCluster test to account for conflicting_restore --- .../fdbclient/MetaclusterManagement.actor.h | 2 +- .../MetaclusterManagementWorkload.actor.cpp | 33 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 0ce08e2612..159eab0ce0 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1330,7 +1330,7 @@ struct RestoreClusterImpl { std::vector& messages; // Unique ID generated for this restore. Used to avoid concurrent restores - UID restoreId; + UID restoreId = deterministicRandom()->randomUniqueID(); // Loaded from the data cluster UID dataClusterId; diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index fdd528774d..26b08a77b7 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -280,18 +280,29 @@ struct MetaclusterManagementWorkload : TestWorkload { state std::vector messages; try { + state bool retried = false; loop { - Future restoreFuture = - MetaclusterAPI::restoreCluster(self->managementDb, - clusterName, - dataDb->db->getConnectionRecord()->getConnectionString(), - ApplyManagementClusterUpdates::True, - RestoreDryRun(dryRun), - ForceJoinNewMetacluster(forceJoin), - &messages); - Optional result = wait(timeout(restoreFuture, deterministicRandom()->randomInt(1, 30))); - if (result.present()) { - break; + try { + Future restoreFuture = + MetaclusterAPI::restoreCluster(self->managementDb, + clusterName, + dataDb->db->getConnectionRecord()->getConnectionString(), + ApplyManagementClusterUpdates::True, + RestoreDryRun(dryRun), + ForceJoinNewMetacluster(forceJoin), + &messages); + Optional result = wait(timeout(restoreFuture, deterministicRandom()->randomInt(1, 30))); + if (result.present()) { + break; + } + retried = true; + } catch (Error& e) { + if (e.code() == error_code_conflicting_restore) { + ASSERT(retried); + CODE_PROBE(true, "MetaclusterRestore: 2 restores on the same cluster simultaneously"); + continue; + } + throw; } } From 5fe32b85031edefff9e92fe8488e34552aed86e6 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 17 Feb 2023 14:01:58 -0800 Subject: [PATCH 10/31] let client supply restore id --- fdbcli/MetaclusterCommands.actor.cpp | 3 +++ .../fdbclient/MetaclusterManagement.actor.h | 16 ++++++++++++---- .../MetaclusterManagementWorkload.actor.cpp | 5 ++++- .../MetaclusterRestoreWorkload.actor.cpp | 6 ++++++ .../TenantManagementWorkload.actor.cpp | 18 ++++++++---------- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fdbcli/MetaclusterCommands.actor.cpp b/fdbcli/MetaclusterCommands.actor.cpp index c7ef3e9aa6..da21f4c15f 100644 --- a/fdbcli/MetaclusterCommands.actor.cpp +++ b/fdbcli/MetaclusterCommands.actor.cpp @@ -266,11 +266,13 @@ ACTOR Future metaclusterRestoreCommand(Reference db, std::vecto state std::vector messages; state bool success = true; + state UID restoreId = deterministicRandom()->randomUniqueID(); try { if (restoreType == "restore_known_data_cluster"_sr) { wait(MetaclusterAPI::restoreCluster(db, clusterName, + restoreId, config.get().first.get(), ApplyManagementClusterUpdates::True, RestoreDryRun(dryRun), @@ -279,6 +281,7 @@ ACTOR Future metaclusterRestoreCommand(Reference db, std::vecto } else if (restoreType == "repopulate_from_data_cluster"_sr) { wait(MetaclusterAPI::restoreCluster(db, clusterName, + restoreId, config.get().first.get(), ApplyManagementClusterUpdates::False, RestoreDryRun(dryRun), diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 159eab0ce0..f62afe0666 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1330,7 +1330,7 @@ struct RestoreClusterImpl { std::vector& messages; // Unique ID generated for this restore. Used to avoid concurrent restores - UID restoreId = deterministicRandom()->randomUniqueID(); + UID restoreId; // Loaded from the data cluster UID dataClusterId; @@ -1343,12 +1343,13 @@ struct RestoreClusterImpl { RestoreClusterImpl(Reference managementDb, ClusterName clusterName, + UID restoreId, ClusterConnectionString connectionString, ApplyManagementClusterUpdates applyManagementClusterUpdates, RestoreDryRun restoreDryRun, ForceJoinNewMetacluster forceJoinNewMetacluster, std::vector& messages) - : ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName), + : ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName), restoreId(restoreId), connectionString(connectionString), applyManagementClusterUpdates(applyManagementClusterUpdates), restoreDryRun(restoreDryRun), forceJoinNewMetacluster(forceJoinNewMetacluster), messages(messages) {} @@ -2153,13 +2154,20 @@ struct RestoreClusterImpl { ACTOR template Future restoreCluster(Reference db, ClusterName name, + UID restoreId, ClusterConnectionString connectionString, ApplyManagementClusterUpdates applyManagementClusterUpdates, RestoreDryRun restoreDryRun, ForceJoinNewMetacluster forceJoinNewMetacluster, std::vector* messages) { - state RestoreClusterImpl impl( - db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoinNewMetacluster, *messages); + state RestoreClusterImpl impl(db, + name, + restoreId, + connectionString, + applyManagementClusterUpdates, + restoreDryRun, + forceJoinNewMetacluster, + *messages); wait(impl.run()); return Void(); } diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index 26b08a77b7..a81952d4f4 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -277,6 +277,7 @@ struct MetaclusterManagementWorkload : TestWorkload { state DataClusterData* dataDb = &self->dataDbs[clusterName]; state bool dryRun = deterministicRandom()->coinflip(); state bool forceJoin = deterministicRandom()->coinflip(); + state UID restoreId = deterministicRandom()->randomUniqueID(); state std::vector messages; try { @@ -286,6 +287,7 @@ struct MetaclusterManagementWorkload : TestWorkload { Future restoreFuture = MetaclusterAPI::restoreCluster(self->managementDb, clusterName, + restoreId, dataDb->db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun(dryRun), @@ -299,7 +301,8 @@ struct MetaclusterManagementWorkload : TestWorkload { } catch (Error& e) { if (e.code() == error_code_conflicting_restore) { ASSERT(retried); - CODE_PROBE(true, "MetaclusterRestore: 2 restores on the same cluster simultaneously"); + CODE_PROBE(true, + "MetaclusterManagementWorkload: timed out restore conflicts with retried restore"); continue; } throw; diff --git a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp index fc3f418902..326702e84d 100644 --- a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp @@ -258,6 +258,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(success(backupAgent.restore(dataDb, dataDb, clusterName, StringRef(backupUrl), {}, backupRanges))); state std::vector messages; + state UID restoreId = deterministicRandom()->randomUniqueID(); if (addToMetacluster) { TraceEvent("MetaclusterRestoreWorkloadAddClusterToMetacluster").detail("ClusterName", clusterName); if (deterministicRandom()->coinflip()) { @@ -265,6 +266,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { .detail("ClusterName", clusterName); wait(MetaclusterAPI::restoreCluster(self->managementDb, clusterName, + restoreId, dataDb->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun::True, @@ -277,6 +279,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster(self->managementDb, clusterName, + restoreId, dataDb->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun::False, @@ -507,6 +510,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { state std::vector messages; state bool completed = false; + state UID restoreId = deterministicRandom()->randomUniqueID(); while (!completed) { state std::vector> dataTenantsBeforeRestore = wait(getDataClusterTenants(clusterItr->second.db)); @@ -524,6 +528,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster( self->managementDb, clusterItr->first, + restoreId, clusterItr->second.db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::False, RestoreDryRun::True, @@ -540,6 +545,7 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster( self->managementDb, clusterItr->first, + restoreId, clusterItr->second.db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::False, RestoreDryRun::False, diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 1ea59a9ed4..0cec7e6f42 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -1351,13 +1351,12 @@ struct TenantManagementWorkload : TestWorkload { return Void(); } - ACTOR static Future renameTenantImpl(Reference tr, + ACTOR static Future renameTenantImpl(TenantManagementWorkload* self, + Reference tr, OperationType operationType, std::map tenantRenames, bool tenantNotFound, - bool tenantExists, - bool tenantOverlap, - TenantManagementWorkload* self) { + bool tenantExists) { if (operationType == OperationType::SPECIAL_KEYS) { tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); for (auto& iter : tenantRenames) { @@ -1409,10 +1408,10 @@ struct TenantManagementWorkload : TestWorkload { TenantName newTenant = self->chooseTenantName(false); bool checkOverlap = oldTenant == newTenant || allTenantNames.count(oldTenant) || allTenantNames.count(newTenant); - // renameTenantTransaction does not handle rename collisions: - // reject the rename here if it has overlap and we are doing a transaction operation - // and then pick another combination - if (checkOverlap && operationType == OperationType::MANAGEMENT_TRANSACTION) { + // These operation types do not handle rename collisions + // reject the rename here if it has overlap + if (checkOverlap && (operationType == OperationType::MANAGEMENT_TRANSACTION || + operationType == OperationType::MANAGEMENT_DATABASE)) { --i; continue; } @@ -1431,8 +1430,7 @@ struct TenantManagementWorkload : TestWorkload { state Version originalReadVersion = wait(self->getLatestReadVersion(self, operationType)); loop { try { - wait(renameTenantImpl( - tr, operationType, tenantRenames, tenantNotFound, tenantExists, tenantOverlap, self)); + wait(renameTenantImpl(self, tr, operationType, tenantRenames, tenantNotFound, tenantExists)); wait(verifyTenantRenames(self, tenantRenames)); Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType)); ASSERT_GT(currentVersionstamp.version, originalReadVersion); From edb7a51b7e6a164a592f204f183671726f34461a Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 17 Feb 2023 14:37:22 -0800 Subject: [PATCH 11/31] Revert "let client supply restore id" This reverts commit 5fe32b85031edefff9e92fe8488e34552aed86e6. --- fdbcli/MetaclusterCommands.actor.cpp | 3 --- .../fdbclient/MetaclusterManagement.actor.h | 16 ++++------------ .../MetaclusterManagementWorkload.actor.cpp | 5 +---- .../MetaclusterRestoreWorkload.actor.cpp | 6 ------ .../TenantManagementWorkload.actor.cpp | 18 ++++++++++-------- 5 files changed, 15 insertions(+), 33 deletions(-) diff --git a/fdbcli/MetaclusterCommands.actor.cpp b/fdbcli/MetaclusterCommands.actor.cpp index da21f4c15f..c7ef3e9aa6 100644 --- a/fdbcli/MetaclusterCommands.actor.cpp +++ b/fdbcli/MetaclusterCommands.actor.cpp @@ -266,13 +266,11 @@ ACTOR Future metaclusterRestoreCommand(Reference db, std::vecto state std::vector messages; state bool success = true; - state UID restoreId = deterministicRandom()->randomUniqueID(); try { if (restoreType == "restore_known_data_cluster"_sr) { wait(MetaclusterAPI::restoreCluster(db, clusterName, - restoreId, config.get().first.get(), ApplyManagementClusterUpdates::True, RestoreDryRun(dryRun), @@ -281,7 +279,6 @@ ACTOR Future metaclusterRestoreCommand(Reference db, std::vecto } else if (restoreType == "repopulate_from_data_cluster"_sr) { wait(MetaclusterAPI::restoreCluster(db, clusterName, - restoreId, config.get().first.get(), ApplyManagementClusterUpdates::False, RestoreDryRun(dryRun), diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index f62afe0666..159eab0ce0 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1330,7 +1330,7 @@ struct RestoreClusterImpl { std::vector& messages; // Unique ID generated for this restore. Used to avoid concurrent restores - UID restoreId; + UID restoreId = deterministicRandom()->randomUniqueID(); // Loaded from the data cluster UID dataClusterId; @@ -1343,13 +1343,12 @@ struct RestoreClusterImpl { RestoreClusterImpl(Reference managementDb, ClusterName clusterName, - UID restoreId, ClusterConnectionString connectionString, ApplyManagementClusterUpdates applyManagementClusterUpdates, RestoreDryRun restoreDryRun, ForceJoinNewMetacluster forceJoinNewMetacluster, std::vector& messages) - : ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName), restoreId(restoreId), + : ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName), connectionString(connectionString), applyManagementClusterUpdates(applyManagementClusterUpdates), restoreDryRun(restoreDryRun), forceJoinNewMetacluster(forceJoinNewMetacluster), messages(messages) {} @@ -2154,20 +2153,13 @@ struct RestoreClusterImpl { ACTOR template Future restoreCluster(Reference db, ClusterName name, - UID restoreId, ClusterConnectionString connectionString, ApplyManagementClusterUpdates applyManagementClusterUpdates, RestoreDryRun restoreDryRun, ForceJoinNewMetacluster forceJoinNewMetacluster, std::vector* messages) { - state RestoreClusterImpl impl(db, - name, - restoreId, - connectionString, - applyManagementClusterUpdates, - restoreDryRun, - forceJoinNewMetacluster, - *messages); + state RestoreClusterImpl impl( + db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoinNewMetacluster, *messages); wait(impl.run()); return Void(); } diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index a81952d4f4..26b08a77b7 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -277,7 +277,6 @@ struct MetaclusterManagementWorkload : TestWorkload { state DataClusterData* dataDb = &self->dataDbs[clusterName]; state bool dryRun = deterministicRandom()->coinflip(); state bool forceJoin = deterministicRandom()->coinflip(); - state UID restoreId = deterministicRandom()->randomUniqueID(); state std::vector messages; try { @@ -287,7 +286,6 @@ struct MetaclusterManagementWorkload : TestWorkload { Future restoreFuture = MetaclusterAPI::restoreCluster(self->managementDb, clusterName, - restoreId, dataDb->db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun(dryRun), @@ -301,8 +299,7 @@ struct MetaclusterManagementWorkload : TestWorkload { } catch (Error& e) { if (e.code() == error_code_conflicting_restore) { ASSERT(retried); - CODE_PROBE(true, - "MetaclusterManagementWorkload: timed out restore conflicts with retried restore"); + CODE_PROBE(true, "MetaclusterRestore: 2 restores on the same cluster simultaneously"); continue; } throw; diff --git a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp index 326702e84d..fc3f418902 100644 --- a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp @@ -258,7 +258,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(success(backupAgent.restore(dataDb, dataDb, clusterName, StringRef(backupUrl), {}, backupRanges))); state std::vector messages; - state UID restoreId = deterministicRandom()->randomUniqueID(); if (addToMetacluster) { TraceEvent("MetaclusterRestoreWorkloadAddClusterToMetacluster").detail("ClusterName", clusterName); if (deterministicRandom()->coinflip()) { @@ -266,7 +265,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { .detail("ClusterName", clusterName); wait(MetaclusterAPI::restoreCluster(self->managementDb, clusterName, - restoreId, dataDb->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun::True, @@ -279,7 +277,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster(self->managementDb, clusterName, - restoreId, dataDb->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::True, RestoreDryRun::False, @@ -510,7 +507,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { state std::vector messages; state bool completed = false; - state UID restoreId = deterministicRandom()->randomUniqueID(); while (!completed) { state std::vector> dataTenantsBeforeRestore = wait(getDataClusterTenants(clusterItr->second.db)); @@ -528,7 +524,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster( self->managementDb, clusterItr->first, - restoreId, clusterItr->second.db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::False, RestoreDryRun::True, @@ -545,7 +540,6 @@ struct MetaclusterRestoreWorkload : TestWorkload { wait(MetaclusterAPI::restoreCluster( self->managementDb, clusterItr->first, - restoreId, clusterItr->second.db->getConnectionRecord()->getConnectionString(), ApplyManagementClusterUpdates::False, RestoreDryRun::False, diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 0cec7e6f42..1ea59a9ed4 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -1351,12 +1351,13 @@ struct TenantManagementWorkload : TestWorkload { return Void(); } - ACTOR static Future renameTenantImpl(TenantManagementWorkload* self, - Reference tr, + ACTOR static Future renameTenantImpl(Reference tr, OperationType operationType, std::map tenantRenames, bool tenantNotFound, - bool tenantExists) { + bool tenantExists, + bool tenantOverlap, + TenantManagementWorkload* self) { if (operationType == OperationType::SPECIAL_KEYS) { tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); for (auto& iter : tenantRenames) { @@ -1408,10 +1409,10 @@ struct TenantManagementWorkload : TestWorkload { TenantName newTenant = self->chooseTenantName(false); bool checkOverlap = oldTenant == newTenant || allTenantNames.count(oldTenant) || allTenantNames.count(newTenant); - // These operation types do not handle rename collisions - // reject the rename here if it has overlap - if (checkOverlap && (operationType == OperationType::MANAGEMENT_TRANSACTION || - operationType == OperationType::MANAGEMENT_DATABASE)) { + // renameTenantTransaction does not handle rename collisions: + // reject the rename here if it has overlap and we are doing a transaction operation + // and then pick another combination + if (checkOverlap && operationType == OperationType::MANAGEMENT_TRANSACTION) { --i; continue; } @@ -1430,7 +1431,8 @@ struct TenantManagementWorkload : TestWorkload { state Version originalReadVersion = wait(self->getLatestReadVersion(self, operationType)); loop { try { - wait(renameTenantImpl(self, tr, operationType, tenantRenames, tenantNotFound, tenantExists)); + wait(renameTenantImpl( + tr, operationType, tenantRenames, tenantNotFound, tenantExists, tenantOverlap, self)); wait(verifyTenantRenames(self, tenantRenames)); Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType)); ASSERT_GT(currentVersionstamp.version, originalReadVersion); From 762cbcdc5dfb48c54bfcaf98afd54b8c139f51ac Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 17 Feb 2023 14:41:41 -0800 Subject: [PATCH 12/31] unconditionally set restore id --- .../fdbclient/MetaclusterManagement.actor.h | 3 +-- .../TenantManagementWorkload.actor.cpp | 18 ++++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 159eab0ce0..e9abaf9f6b 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1494,13 +1494,12 @@ struct RestoreClusterImpl { } void markClusterRestoring(Reference tr) { + MetaclusterMetadata::activeRestoreIds().set(tr, clusterName, restoreId); if (ctx.dataClusterMetadata.get().entry.clusterState != DataClusterState::RESTORING) { DataClusterEntry updatedEntry = ctx.dataClusterMetadata.get().entry; updatedEntry.clusterState = DataClusterState::RESTORING; updateClusterMetadata(tr, clusterName, ctx.dataClusterMetadata.get(), connectionString, updatedEntry); - MetaclusterMetadata::activeRestoreIds().set(tr, clusterName, restoreId); - // Remove this cluster from the cluster capacity index, but leave its configured capacity intact in the // cluster entry. This allows us to retain the configured capacity while preventing the cluster from // being used to allocate new tenant groups. diff --git a/fdbserver/workloads/TenantManagementWorkload.actor.cpp b/fdbserver/workloads/TenantManagementWorkload.actor.cpp index 1ea59a9ed4..0cec7e6f42 100644 --- a/fdbserver/workloads/TenantManagementWorkload.actor.cpp +++ b/fdbserver/workloads/TenantManagementWorkload.actor.cpp @@ -1351,13 +1351,12 @@ struct TenantManagementWorkload : TestWorkload { return Void(); } - ACTOR static Future renameTenantImpl(Reference tr, + ACTOR static Future renameTenantImpl(TenantManagementWorkload* self, + Reference tr, OperationType operationType, std::map tenantRenames, bool tenantNotFound, - bool tenantExists, - bool tenantOverlap, - TenantManagementWorkload* self) { + bool tenantExists) { if (operationType == OperationType::SPECIAL_KEYS) { tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); for (auto& iter : tenantRenames) { @@ -1409,10 +1408,10 @@ struct TenantManagementWorkload : TestWorkload { TenantName newTenant = self->chooseTenantName(false); bool checkOverlap = oldTenant == newTenant || allTenantNames.count(oldTenant) || allTenantNames.count(newTenant); - // renameTenantTransaction does not handle rename collisions: - // reject the rename here if it has overlap and we are doing a transaction operation - // and then pick another combination - if (checkOverlap && operationType == OperationType::MANAGEMENT_TRANSACTION) { + // These operation types do not handle rename collisions + // reject the rename here if it has overlap + if (checkOverlap && (operationType == OperationType::MANAGEMENT_TRANSACTION || + operationType == OperationType::MANAGEMENT_DATABASE)) { --i; continue; } @@ -1431,8 +1430,7 @@ struct TenantManagementWorkload : TestWorkload { state Version originalReadVersion = wait(self->getLatestReadVersion(self, operationType)); loop { try { - wait(renameTenantImpl( - tr, operationType, tenantRenames, tenantNotFound, tenantExists, tenantOverlap, self)); + wait(renameTenantImpl(self, tr, operationType, tenantRenames, tenantNotFound, tenantExists)); wait(verifyTenantRenames(self, tenantRenames)); Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType)); ASSERT_GT(currentVersionstamp.version, originalReadVersion); From 8e6800663b9e33068835839484d273094edd9586 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 17 Feb 2023 15:22:04 -0800 Subject: [PATCH 13/31] add missing txn reset --- fdbserver/tester.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index f5179e7b52..d845fdea92 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -925,6 +925,7 @@ ACTOR Future clearData(Database cx, Optional defaultTenant) { TraceEvent("TesterClearingTenantsComplete", debugID).detail("AtVersion", tr.getCommittedVersion()); break; } + tr.reset(); } catch (Error& e) { TraceEvent(SevWarn, "TesterClearingTenantsError", debugID).error(e); wait(tr.onError(e)); From e169e65021811dfdfd9f1e7607793232b904fe74 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Thu, 16 Feb 2023 22:46:11 -0800 Subject: [PATCH 14/31] Fix to BTree node rebuild logic - rebuild when imbalance hits a limit controlled by a new knob. --- fdbclient/ServerKnobs.cpp | 1 + fdbclient/include/fdbclient/ServerKnobs.h | 1 + fdbserver/VersionedBTree.actor.cpp | 14 +++++++++----- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 6e3e7ff5c3..3c43a6fec3 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -976,6 +976,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 ); init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; } init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); } + init( REDWOOD_NODE_MAX_UNBALANCE, 2 ); init( REDWOOD_IO_PRIORITIES, "32,32,32,32" ); // Server request latency measurement diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index 08c1a270c3..2a3159d084 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -950,6 +950,7 @@ public: double REDWOOD_HISTOGRAM_INTERVAL; bool REDWOOD_EVICT_UPDATED_PAGES; // Whether to prioritize eviction of updated pages from cache. int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches + int REDWOOD_NODE_MAX_UNBALANCE; // Maximum imbalance in a node before it should be rebuilt instead of updated std::string REDWOOD_IO_PRIORITIES; diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 0aadc60607..5ea485d38e 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -6531,9 +6531,10 @@ private: bool updating, ParentInfo* parentInfo, Reference keyProvider, - Optional pageDomainId) + Optional pageDomainId, + int maxHeightAllowed) : updating(updating), page(p), clonedPage(alreadyCloned), changesMade(false), parentInfo(parentInfo), - keyProvider(keyProvider), pageDomainId(pageDomainId) {} + keyProvider(keyProvider), pageDomainId(pageDomainId), maxHeightAllowed(maxHeightAllowed) {} // Whether updating the existing page is allowed bool updating; @@ -6551,6 +6552,8 @@ private: Reference keyProvider; Optional pageDomainId; + int maxHeightAllowed; + BTreePage* btPage() const { return (BTreePage*)page->mutateData(); } bool empty() const { @@ -6590,7 +6593,7 @@ private: canInsert = keyProvider->keyFitsInDomain(pageDomainId.get(), rec.key, true); } if (canInsert) { - canInsert = end.insert(rec); + canInsert = end.insert(rec, 0, maxHeightAllowed); } if (!canInsert) { @@ -6791,6 +6794,8 @@ private: } } + state int maxHeightAllowed = btPage->tree()->initialHeight + SERVER_KNOBS->REDWOOD_NODE_MAX_UNBALANCE; + // Leaf Page if (btPage->isLeaf()) { // When true, we are modifying the existing DeltaTree @@ -6816,7 +6821,6 @@ private: // Now, process each mutation range and merge changes with existing data. bool firstMutationBoundary = true; - constexpr int maxHeightAllowed = 8; while (mBegin != mEnd) { // Apply the change to the mutation buffer start boundary key only if @@ -7297,7 +7301,7 @@ private: // If pageCopy is already set it was initialized to page above so the modifier doesn't need // to copy it state InternalPageModifier modifier( - page, pageCopy.isValid(), tryToUpdate, parentInfo, self->m_keyProvider, pageDomainId); + page, pageCopy.isValid(), tryToUpdate, parentInfo, self->m_keyProvider, pageDomainId, maxHeightAllowed); // Apply the possible changes for each subtree range recursed to, except the last one. // For each range, the expected next record, if any, is checked against the first boundary From 9bf28899d446284f7ccae4a354bbf8bf0ba48642 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Tue, 21 Feb 2023 00:48:23 -0800 Subject: [PATCH 15/31] Add transaction option definitions for read priority and read server side cache mode. --- fdbclient/NativeAPI.actor.cpp | 25 +++++++++++++++++++++++++ fdbclient/vexillographer/fdb.options | 9 +++++++++ 2 files changed, 34 insertions(+) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 8daaa0d76f..82f6db5d9a 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -6997,6 +6997,31 @@ void Transaction::setOption(FDBTransactionOptions::Option option, OptionalautomaticIdempotency = true; break; + // ReadOptions + case FDBTransactionOptions::READ_PRIORITY: + case FDBTransactionOptions::READ_SERVER_SIDE_CACHE_ENABLE: + case FDBTransactionOptions::READ_SERVER_SIDE_CACHE_AUTO: + case FDBTransactionOptions::READ_SERVER_SIDE_CACHE_DISABLE: + if (!trState->readOptions.present()) { + trState->readOptions = ReadOptions(); + } + + if( option == FDBTransactionOptions::READ_PRIORITY ) { + // Read Priority + int priority = extractIntOption(value); + if(priority == 0) { + trState->readOptions.get().type = ReadType::NORMAL; + } else if(priority < 0) { + trState->readOptions.get().type = ReadType::LOW; + } else { + trState->readOptions.get().type = ReadType::HIGH; + } + } else { + // Cache mode, currently only DISABLE maps to false, all others map to true + trState->readOptions.get().cacheResult = CacheResult(option != FDBTransactionOptions::READ_SERVER_SIDE_CACHE_DISABLE); + } + break; + default: break; } diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index 521cf07434..d63b770a82 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -241,6 +241,15 @@ description is not currently required but encouraged. description="Reads performed by a transaction will not see any prior mutations that occured in that transaction, instead seeing the value which was in the database at the transaction's read version. This option may provide a small performance benefit for the client, but also disables a number of client-side optimizations which are beneficial for transactions which tend to read and write the same keys within a single transaction. It is an error to set this option after performing any reads or writes on the transaction."/>