Merge remote-tracking branch 'origin/main' into features/tenant-lock2

This commit is contained in:
Markus Pilman 2023-02-22 13:12:23 -07:00
commit 8695fc15fc
40 changed files with 1126 additions and 589 deletions

View File

@ -469,6 +469,31 @@ func (o TransactionOptions) SetReadAheadDisable() error {
return o.setOpt(52, nil)
}
// Storage server should cache disk blocks needed for subsequent read requests in this transaction. This is the default behavior.
func (o TransactionOptions) SetReadServerSideCacheEnable() error {
return o.setOpt(507, nil)
}
// Storage server should not cache disk blocks needed for subsequent read requests in this transaction. This can be used to avoid cache pollution for reads not expected to be repeated.
func (o TransactionOptions) SetReadServerSideCacheDisable() error {
return o.setOpt(508, nil)
}
// Use normal read priority for subsequent read requests in this transaction. This is the default read priority.
func (o TransactionOptions) SetReadPriorityNormal() error {
return o.setOpt(509, nil)
}
// Use low read priority for subsequent read requests in this transaction.
func (o TransactionOptions) SetReadPriorityLow() error {
return o.setOpt(510, nil)
}
// Use high read priority for subsequent read requests in this transaction.
func (o TransactionOptions) SetReadPriorityHigh() error {
return o.setOpt(511, nil)
}
// Not yet implemented.
func (o TransactionOptions) SetDurabilityDatacenter() error {
return o.setOpt(110, nil)

View File

@ -117,4 +117,82 @@ class BlobGranuleIntegrationTest {
System.out.println("Blob granule management tests complete!");
}
}
@Test
void blobManagementFunctionsTenantTest() throws Exception {
/*
* A test that runs a blob range through the lifecycle of blob management.
* Identical to the above test, but everything is scoped to a tenant instead of a database
*/
Random rand = new Random();
byte[] key = new byte[16];
byte[] value = new byte[8];
rand.nextBytes(key);
key[0] = (byte)0x30;
rand.nextBytes(value);
Range blobRange = Range.startsWith(key);
byte[] tenantName = "BGManagementTenant".getBytes();
try (Database db = fdb.open()) {
TenantManagement.createTenant(db, tenantName).join();
System.out.println("Created tenant for test");
try (Tenant tenant = db.openTenant(tenantName)) {
System.out.println("Opened tenant for test");
boolean blobbifySuccess = tenant.blobbifyRangeBlocking(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(blobbifySuccess);
Long verifyVersion = tenant.verifyBlobRange(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(verifyVersion >= 0);
// list blob ranges
KeyRangeArrayResult blobRanges = tenant.listBlobbifiedRanges(blobRange.begin, blobRange.end, 2).join();
Assertions.assertEquals(1, blobRanges.getKeyRanges().size());
Assertions.assertArrayEquals(blobRange.begin, blobRanges.getKeyRanges().get(0).begin);
Assertions.assertArrayEquals(blobRange.end, blobRanges.getKeyRanges().get(0).end);
boolean flushSuccess = tenant.flushBlobRange(blobRange.begin, blobRange.end, false).join();
Assertions.assertTrue(flushSuccess);
// verify after flush
Long verifyVersionAfterFlush = tenant.verifyBlobRange(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(verifyVersionAfterFlush >= 0);
Assertions.assertTrue(verifyVersionAfterFlush >= verifyVersion);
boolean compactSuccess = tenant.flushBlobRange(blobRange.begin, blobRange.end, true).join();
Assertions.assertTrue(compactSuccess);
Long verifyVersionAfterCompact = tenant.verifyBlobRange(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(verifyVersionAfterCompact >= 0);
Assertions.assertTrue(verifyVersionAfterCompact >= verifyVersionAfterFlush);
// purge/wait
byte[] purgeKey = tenant.purgeBlobGranules(blobRange.begin, blobRange.end, -2, false).join();
db.waitPurgeGranulesComplete(purgeKey).join();
// verify again
Long verifyVersionAfterPurge = tenant.verifyBlobRange(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(verifyVersionAfterPurge >= 0);
Assertions.assertTrue(verifyVersionAfterPurge >= verifyVersionAfterCompact);
// force purge/wait
byte[] forcePurgeKey = tenant.purgeBlobGranules(blobRange.begin, blobRange.end, -2, true).join();
tenant.waitPurgeGranulesComplete(forcePurgeKey).join();
// check verify fails
Long verifyVersionLast = tenant.verifyBlobRange(blobRange.begin, blobRange.end).join();
Assertions.assertEquals(-1, verifyVersionLast);
// unblobbify
boolean unblobbifySuccess = tenant.unblobbifyRange(blobRange.begin, blobRange.end).join();
Assertions.assertTrue(unblobbifySuccess);
System.out.println("Blob granule management tenant tests complete!");
}
}
}
}

View File

@ -35,7 +35,7 @@ class GetClientStatusIntegrationTest {
try (Database db = fdb.open()) {
// Run a simple transaction to make sure the database is fully initialized
db.run(tr -> {
return tr.getReadVersion();
return tr.getReadVersion().join();
});
// Here we just check if a meaningful client report status is returned

View File

@ -465,6 +465,7 @@ class Summary:
self.out.append(child)
self.out.attributes['Ok'] = '1' if self.ok() else '0'
self.out.attributes['Runtime'] = str(self.runtime)
if not self.ok():
reason = 'Unknown'
if self.error:

View File

@ -91,7 +91,7 @@ bool parseTenantListOptions(std::vector<StringRef> const& tokens,
int startIndex,
int& limit,
int& offset,
std::vector<TenantState>& filters) {
std::vector<MetaclusterAPI::TenantState>& filters) {
for (int tokenNum = startIndex; tokenNum < tokens.size(); ++tokenNum) {
Optional<Value> value;
StringRef token = tokens[tokenNum];
@ -123,7 +123,7 @@ bool parseTenantListOptions(std::vector<StringRef> const& tokens,
auto filterStrings = value.get().splitAny(","_sr);
try {
for (auto sref : filterStrings) {
filters.push_back(TenantMapEntry::stringToTenantState(sref.toString()));
filters.push_back(MetaclusterAPI::stringToTenantState(sref.toString()));
}
} catch (Error& e) {
fmt::print(stderr, "ERROR: unrecognized tenant state(s) `{}'.\n", value.get().toString());
@ -185,7 +185,7 @@ ACTOR Future<bool> tenantCreateCommand(Reference<IDatabase> db, std::vector<Stri
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
state ClusterType clusterType = wait(TenantAPI::getClusterType(tr));
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
TenantMapEntry tenantEntry;
MetaclusterTenantMapEntry tenantEntry;
AssignClusterAutomatically assignClusterAutomatically = AssignClusterAutomatically::True;
for (auto const& [name, value] : configuration.get()) {
if (name == "assigned_cluster"_sr) {
@ -337,7 +337,7 @@ ACTOR Future<bool> tenantListCommand(Reference<IDatabase> db, std::vector<String
state StringRef endTenant = "\xff\xff"_sr;
state int limit = 100;
state int offset = 0;
state std::vector<TenantState> filters;
state std::vector<MetaclusterAPI::TenantState> filters;
if (tokens.size() >= 3) {
beginTenant = tokens[2];
@ -372,7 +372,7 @@ ACTOR Future<bool> tenantListCommand(Reference<IDatabase> db, std::vector<String
tenantNames.push_back(tenant.first);
}
} else {
std::vector<std::pair<TenantName, TenantMapEntry>> tenants =
std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> tenants =
wait(MetaclusterAPI::listTenantMetadata(db, beginTenant, endTenant, limit, offset, filters));
for (auto tenant : tenants) {
tenantNames.push_back(tenant.first);
@ -433,7 +433,7 @@ ACTOR Future<bool> tenantGetCommand(Reference<IDatabase> db, std::vector<StringR
state ClusterType clusterType = wait(TenantAPI::getClusterType(tr));
state std::string tenantJson;
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
TenantMapEntry entry = wait(MetaclusterAPI::getTenantTransaction(tr, tokens[2]));
MetaclusterTenantMapEntry entry = wait(MetaclusterAPI::getTenantTransaction(tr, tokens[2]));
tenantJson = entry.toJson();
} else {
// Hold the reference to the standalone's memory
@ -468,14 +468,16 @@ ACTOR Future<bool> tenantGetCommand(Reference<IDatabase> db, std::vector<StringR
doc.get("id", id);
doc.get("prefix.printable", prefix);
doc.get("tenant_state", tenantState);
bool hasTenantState = doc.tryGet("tenant_state", tenantState);
bool hasTenantGroup = doc.tryGet("tenant_group.printable", tenantGroup);
bool hasAssignedCluster = doc.tryGet("assigned_cluster.printable", assignedCluster);
bool hasError = doc.tryGet("error", error);
fmt::print(" id: {}\n", id);
fmt::print(" prefix: {}\n", printable(prefix).c_str());
fmt::print(" tenant state: {}\n", printable(tenantState).c_str());
if (hasTenantState) {
fmt::print(" tenant state: {}\n", printable(tenantState).c_str());
}
if (hasTenantGroup) {
fmt::print(" tenant group: {}\n", tenantGroup.c_str());
}
@ -544,7 +546,6 @@ ACTOR Future<bool> tenantConfigureCommand(Reference<IDatabase> db, std::vector<S
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
ClusterType clusterType = wait(TenantAPI::getClusterType(tr));
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
TenantMapEntry tenantEntry;
wait(MetaclusterAPI::configureTenant(db, tokens[2], configuration.get()));
} else {
applyConfigurationToSpecialKeys(tr, tokens[2], configuration.get());

View File

@ -1015,10 +1015,9 @@ def tenant_get(logger):
output = run_fdbcli_command("tenant get tenant")
lines = output.split("\n")
assert len(lines) == 3
assert len(lines) == 2
assert lines[0].strip().startswith("id: ")
assert lines[1].strip().startswith("prefix: ")
assert lines[2].strip() == "tenant state: ready"
output = run_fdbcli_command("tenant get tenant JSON")
json_output = json.loads(output, strict=False)
@ -1035,15 +1034,13 @@ def tenant_get(logger):
assert len(json_output["tenant"]["prefix"]) == 2
assert "base64" in json_output["tenant"]["prefix"]
assert "printable" in json_output["tenant"]["prefix"]
assert json_output["tenant"]["tenant_state"] == "ready"
output = run_fdbcli_command("tenant get tenant2")
lines = output.split("\n")
assert len(lines) == 4
assert len(lines) == 3
assert lines[0].strip().startswith("id: ")
assert lines[1].strip().startswith("prefix: ")
assert lines[2].strip() == "tenant state: ready"
assert lines[3].strip() == "tenant group: tenant_group2"
assert lines[2].strip() == "tenant group: tenant_group2"
output = run_fdbcli_command("tenant get tenant2 JSON")
json_output = json.loads(output, strict=False)
@ -1057,7 +1054,6 @@ def tenant_get(logger):
assert "base64" in json_output["tenant"]["name"]
assert "printable" in json_output["tenant"]["name"]
assert "prefix" in json_output["tenant"]
assert json_output["tenant"]["tenant_state"] == "ready"
assert "tenant_group" in json_output["tenant"]
assert len(json_output["tenant"]["tenant_group"]) == 2
assert "base64" in json_output["tenant"]["tenant_group"]
@ -1073,15 +1069,15 @@ def tenant_configure(logger):
output = run_fdbcli_command("tenant get tenant")
lines = output.split("\n")
assert len(lines) == 4
assert lines[3].strip() == "tenant group: tenant_group1"
assert len(lines) == 3
assert lines[2].strip() == "tenant group: tenant_group1"
output = run_fdbcli_command("tenant configure tenant unset tenant_group")
assert output == "The configuration for tenant `tenant' has been updated"
output = run_fdbcli_command("tenant get tenant")
lines = output.split("\n")
assert len(lines) == 3
assert len(lines) == 2
output = run_fdbcli_command_and_get_error(
"tenant configure tenant tenant_group=tenant_group1 tenant_group=tenant_group2"

View File

@ -20,6 +20,8 @@
#include "fdbclient/Metacluster.h"
#include "fdbclient/MetaclusterManagement.actor.h"
#include "libb64/decode.h"
#include "libb64/encode.h"
FDB_DEFINE_BOOLEAN_PARAM(ApplyManagementClusterUpdates);
FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants);
@ -31,6 +33,47 @@ FDB_DEFINE_BOOLEAN_PARAM(RunOnMismatchedCluster);
FDB_DEFINE_BOOLEAN_PARAM(RestoreDryRun);
FDB_DEFINE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
namespace MetaclusterAPI {
std::string tenantStateToString(TenantState tenantState) {
switch (tenantState) {
case TenantState::REGISTERING:
return "registering";
case TenantState::READY:
return "ready";
case TenantState::REMOVING:
return "removing";
case TenantState::UPDATING_CONFIGURATION:
return "updating configuration";
case TenantState::RENAMING:
return "renaming";
case TenantState::ERROR:
return "error";
default:
UNREACHABLE();
}
}
TenantState stringToTenantState(std::string stateStr) {
std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); });
if (stateStr == "registering") {
return TenantState::REGISTERING;
} else if (stateStr == "ready") {
return TenantState::READY;
} else if (stateStr == "removing") {
return TenantState::REMOVING;
} else if (stateStr == "updating configuration") {
return TenantState::UPDATING_CONFIGURATION;
} else if (stateStr == "renaming") {
return TenantState::RENAMING;
} else if (stateStr == "error") {
return TenantState::ERROR;
}
throw invalid_option();
}
} // namespace MetaclusterAPI
std::string clusterTypeToString(const ClusterType& clusterType) {
switch (clusterType) {
case ClusterType::STANDALONE:
@ -88,6 +131,98 @@ json_spirit::mObject ClusterUsage::toJson() const {
return obj;
}
TenantMapEntry::TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry)
: tenantName(metaclusterEntry.tenantName), tenantLockState(metaclusterEntry.tenantLockState),
tenantGroup(metaclusterEntry.tenantGroup), configurationSequenceNum(metaclusterEntry.configurationSequenceNum) {
if (metaclusterEntry.id >= 0) {
setId(metaclusterEntry.id);
}
}
MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(TenantMapEntry tenantEntry)
: tenantName(tenantEntry.tenantName), tenantLockState(tenantEntry.tenantLockState),
tenantGroup(tenantEntry.tenantGroup), configurationSequenceNum(tenantEntry.configurationSequenceNum) {
if (tenantEntry.id >= 0) {
setId(tenantEntry.id);
}
}
MetaclusterTenantMapEntry::MetaclusterTenantMapEntry() {}
MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(int64_t id,
TenantName tenantName,
MetaclusterAPI::TenantState tenantState)
: tenantName(tenantName), tenantState(tenantState) {
setId(id);
}
MetaclusterTenantMapEntry::MetaclusterTenantMapEntry(int64_t id,
TenantName tenantName,
MetaclusterAPI::TenantState tenantState,
Optional<TenantGroupName> tenantGroup)
: tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) {
setId(id);
}
void MetaclusterTenantMapEntry::setId(int64_t id) {
ASSERT(id >= 0);
this->id = id;
prefix = TenantAPI::idToPrefix(id);
}
std::string MetaclusterTenantMapEntry::toJson() const {
json_spirit::mObject tenantEntry;
tenantEntry["id"] = id;
tenantEntry["name"] = binaryToJson(tenantName);
tenantEntry["prefix"] = binaryToJson(prefix);
tenantEntry["tenant_state"] = MetaclusterAPI::tenantStateToString(tenantState);
tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster);
if (tenantGroup.present()) {
tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get());
}
tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState);
if (tenantState == MetaclusterAPI::TenantState::RENAMING) {
ASSERT(renameDestination.present());
tenantEntry["rename_destination"] = binaryToJson(renameDestination.get());
} else if (tenantState == MetaclusterAPI::TenantState::ERROR) {
tenantEntry["error"] = error;
}
return json_spirit::write_string(json_spirit::mValue(tenantEntry));
}
bool MetaclusterTenantMapEntry::matchesConfiguration(MetaclusterTenantMapEntry const& other) const {
return tenantGroup == other.tenantGroup;
}
bool MetaclusterTenantMapEntry::matchesConfiguration(TenantMapEntry const& other) const {
return tenantGroup == other.tenantGroup;
}
void MetaclusterTenantMapEntry::configure(Standalone<StringRef> parameter, Optional<Value> value) {
if (parameter == "tenant_group"_sr) {
tenantGroup = value;
} else if (parameter == "assigned_cluster"_sr && value.present()) {
assignedCluster = value.get();
} else {
TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter);
throw invalid_tenant_configuration();
}
}
bool MetaclusterTenantMapEntry::operator==(MetaclusterTenantMapEntry const& other) const {
return id == other.id && tenantName == other.tenantName && tenantState == other.tenantState &&
tenantLockState == other.tenantLockState && tenantGroup == other.tenantGroup &&
assignedCluster == other.assignedCluster && configurationSequenceNum == other.configurationSequenceNum &&
renameDestination == other.renameDestination && error == other.error;
}
bool MetaclusterTenantMapEntry::operator!=(MetaclusterTenantMapEntry const& other) const {
return !(*this == other);
}
KeyBackedObjectProperty<MetaclusterRegistrationEntry, decltype(IncludeVersion())>&
MetaclusterMetadata::metaclusterRegistration() {
static KeyBackedObjectProperty<MetaclusterRegistrationEntry, decltype(IncludeVersion())> instance(

View File

@ -70,8 +70,8 @@ KeyBackedMap<ClusterName, int64_t, TupleCodec<ClusterName>, BinaryCodec<int64_t>
KeyBackedSet<Tuple> ManagementClusterMetadata::clusterTenantIndex("metacluster/dataCluster/tenantMap/"_sr);
KeyBackedSet<Tuple> ManagementClusterMetadata::clusterTenantGroupIndex("metacluster/dataCluster/tenantGroupMap/"_sr);
TenantMetadataSpecification& ManagementClusterMetadata::tenantMetadata() {
static TenantMetadataSpecification instance(""_sr);
TenantMetadataSpecification<MetaclusterTenantMapEntry>& ManagementClusterMetadata::tenantMetadata() {
static TenantMetadataSpecification<MetaclusterTenantMapEntry> instance(""_sr);
return instance;
}

View File

@ -7095,6 +7095,26 @@ void Transaction::setOption(FDBTransactionOptions::Option option, Optional<Strin
trState->automaticIdempotency = true;
break;
case FDBTransactionOptions::READ_SERVER_SIDE_CACHE_ENABLE:
trState->readOptions.withDefault(ReadOptions()).cacheResult = CacheResult::True;
break;
case FDBTransactionOptions::READ_SERVER_SIDE_CACHE_DISABLE:
trState->readOptions.withDefault(ReadOptions()).cacheResult = CacheResult::False;
break;
case FDBTransactionOptions::READ_PRIORITY_LOW:
trState->readOptions.withDefault(ReadOptions()).type = ReadType::LOW;
break;
case FDBTransactionOptions::READ_PRIORITY_NORMAL:
trState->readOptions.withDefault(ReadOptions()).type = ReadType::NORMAL;
break;
case FDBTransactionOptions::READ_PRIORITY_HIGH:
trState->readOptions.withDefault(ReadOptions()).type = ReadType::HIGH;
break;
default:
break;
}
@ -10885,13 +10905,13 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
if ((!blobbifiedBegin.get().empty() && blobbifiedBegin.get().front().begin < purgeRange.begin) ||
(!blobbifiedEnd.get().empty() && blobbifiedEnd.get().front().begin < purgeRange.end)) {
TraceEvent("UnalignedPurge")
.detail("Range", range)
.detail("Range", purgeRange)
.detail("Version", purgeVersion)
.detail("Force", force);
throw unsupported_operation();
}
Value purgeValue = blobGranulePurgeValueFor(purgeVersion, range, force);
Value purgeValue = blobGranulePurgeValueFor(purgeVersion, purgeRange, force);
tr.atomicOp(
addVersionStampAtEnd(blobGranulePurgeKeys.begin), purgeValue, MutationRef::SetVersionstampedKey);
tr.set(blobGranulePurgeChangeKey, deterministicRandom()->randomUniqueID().toString());
@ -10901,8 +10921,8 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
purgeKey = blobGranulePurgeKeys.begin.withSuffix(vs);
if (BG_REQUEST_DEBUG) {
fmt::print("purgeBlobGranules for range [{0} - {1}) at version {2} registered {3}\n",
range.begin.printable(),
range.end.printable(),
purgeRange.begin.printable(),
purgeRange.end.printable(),
purgeVersion,
purgeKey.printable());
}
@ -10910,8 +10930,8 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
} catch (Error& e) {
if (BG_REQUEST_DEBUG) {
fmt::print("purgeBlobGranules for range [{0} - {1}) at version {2} encountered error {3}\n",
range.begin.printable(),
range.end.printable(),
purgeRange.begin.printable(),
purgeRange.end.printable(),
purgeVersion,
e.name());
}

View File

@ -976,6 +976,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
init( REDWOOD_NODE_MAX_UNBALANCE, 2 );
init( REDWOOD_IO_PRIORITIES, "32,32,32,32" );
// Server request latency measurement

View File

@ -74,47 +74,7 @@ bool withinSingleTenant(KeyRangeRef const& range) {
return tRange.contains(range);
}
} // namespace TenantAPI
std::string TenantMapEntry::tenantStateToString(TenantState tenantState) {
switch (tenantState) {
case TenantState::REGISTERING:
return "registering";
case TenantState::READY:
return "ready";
case TenantState::REMOVING:
return "removing";
case TenantState::UPDATING_CONFIGURATION:
return "updating configuration";
case TenantState::RENAMING:
return "renaming";
case TenantState::ERROR:
return "error";
default:
UNREACHABLE();
}
}
TenantState TenantMapEntry::stringToTenantState(std::string stateStr) {
std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); });
if (stateStr == "registering") {
return TenantState::REGISTERING;
} else if (stateStr == "ready") {
return TenantState::READY;
} else if (stateStr == "removing") {
return TenantState::REMOVING;
} else if (stateStr == "updating configuration") {
return TenantState::UPDATING_CONFIGURATION;
} else if (stateStr == "renaming") {
return TenantState::RENAMING;
} else if (stateStr == "error") {
return TenantState::ERROR;
}
throw invalid_option();
}
std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) {
std::string tenantLockStateToString(TenantLockState tenantState) {
switch (tenantState) {
case TenantLockState::UNLOCKED:
return "unlocked";
@ -127,7 +87,7 @@ std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState)
}
}
TenantLockState TenantMapEntry::stringToTenantLockState(std::string stateStr) {
TenantLockState stringToTenantLockState(std::string stateStr) {
std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); });
if (stateStr == "unlocked") {
return TenantLockState::UNLOCKED;
@ -139,6 +99,7 @@ TenantLockState TenantMapEntry::stringToTenantLockState(std::string stateStr) {
UNREACHABLE();
}
} // namespace TenantAPI
json_spirit::mObject binaryToJson(StringRef bytes) {
json_spirit::mObject obj;
@ -153,15 +114,11 @@ json_spirit::mObject binaryToJson(StringRef bytes) {
}
TenantMapEntry::TenantMapEntry() {}
TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState)
: tenantName(tenantName), tenantState(tenantState) {
TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName) : tenantName(tenantName) {
setId(id);
}
TenantMapEntry::TenantMapEntry(int64_t id,
TenantName tenantName,
TenantState tenantState,
Optional<TenantGroupName> tenantGroup)
: tenantName(tenantName), tenantState(tenantState), tenantGroup(tenantGroup) {
TenantMapEntry::TenantMapEntry(int64_t id, TenantName tenantName, Optional<TenantGroupName> tenantGroup)
: tenantName(tenantName), tenantGroup(tenantGroup) {
setId(id);
}
@ -177,17 +134,11 @@ std::string TenantMapEntry::toJson() const {
tenantEntry["name"] = binaryToJson(tenantName);
tenantEntry["prefix"] = binaryToJson(prefix);
tenantEntry["tenant_state"] = TenantMapEntry::tenantStateToString(tenantState);
if (assignedCluster.present()) {
tenantEntry["assigned_cluster"] = binaryToJson(assignedCluster.get());
}
if (tenantGroup.present()) {
tenantEntry["tenant_group"] = binaryToJson(tenantGroup.get());
}
if (tenantState == TenantState::ERROR && error.size()) {
tenantEntry["error"] = error;
}
tenantEntry["lock_state"] = TenantAPI::tenantLockStateToString(tenantLockState);
return json_spirit::write_string(json_spirit::mValue(tenantEntry));
}
@ -199,8 +150,6 @@ bool TenantMapEntry::matchesConfiguration(TenantMapEntry const& other) const {
void TenantMapEntry::configure(Standalone<StringRef> parameter, Optional<Value> value) {
if (parameter == "tenant_group"_sr) {
tenantGroup = value;
} else if (parameter == "assigned_cluster"_sr) {
assignedCluster = value;
} else {
TraceEvent(SevWarnAlways, "UnknownTenantConfigurationParameter").detail("Parameter", parameter);
throw invalid_tenant_configuration();
@ -208,10 +157,8 @@ void TenantMapEntry::configure(Standalone<StringRef> parameter, Optional<Value>
}
bool TenantMapEntry::operator==(TenantMapEntry const& other) const {
return id == other.id && tenantName == other.tenantName && tenantState == other.tenantState &&
tenantLockState == other.tenantLockState && tenantGroup == other.tenantGroup &&
assignedCluster == other.assignedCluster && configurationSequenceNum == other.configurationSequenceNum &&
renameDestination == other.renameDestination && error == other.error;
return id == other.id && tenantName == other.tenantName && tenantLockState == other.tenantLockState &&
tenantGroup == other.tenantGroup && configurationSequenceNum == other.configurationSequenceNum;
}
json_spirit::mObject TenantGroupEntry::toJson() const {
@ -223,8 +170,8 @@ json_spirit::mObject TenantGroupEntry::toJson() const {
return tenantGroupEntry;
}
TenantMetadataSpecification& TenantMetadata::instance() {
static TenantMetadataSpecification _instance = TenantMetadataSpecification("\xff/"_sr);
TenantMetadataSpecification<TenantMapEntry>& TenantMetadata::instance() {
static TenantMetadataSpecification _instance = TenantMetadataSpecification<TenantMapEntry>("\xff/"_sr);
return _instance;
}
@ -257,12 +204,12 @@ TEST_CASE("/fdbclient/libb64/base64decoder") {
}
TEST_CASE("/fdbclient/TenantMapEntry/Serialization") {
TenantMapEntry entry1(1, "name"_sr, TenantState::READY);
TenantMapEntry entry1(1, "name"_sr);
ASSERT(entry1.prefix == "\x00\x00\x00\x00\x00\x00\x00\x01"_sr);
TenantMapEntry entry2 = TenantMapEntry::decode(entry1.encode());
ASSERT(entry1.id == entry2.id && entry1.prefix == entry2.prefix);
TenantMapEntry entry3(std::numeric_limits<int64_t>::max(), "name"_sr, TenantState::READY);
TenantMapEntry entry3(std::numeric_limits<int64_t>::max(), "name"_sr);
ASSERT(entry3.prefix == "\x7f\xff\xff\xff\xff\xff\xff\xff"_sr);
TenantMapEntry entry4 = TenantMapEntry::decode(entry3.encode());
ASSERT(entry3.id == entry4.id && entry3.prefix == entry4.prefix);
@ -273,7 +220,7 @@ TEST_CASE("/fdbclient/TenantMapEntry/Serialization") {
int64_t maxPlusOne = std::min<uint64_t>(UINT64_C(1) << bits, std::numeric_limits<int64_t>::max());
int64_t id = deterministicRandom()->randomInt64(min, maxPlusOne);
TenantMapEntry entry(id, "name"_sr, TenantState::READY);
TenantMapEntry entry(id, "name"_sr);
int64_t bigEndianId = bigEndian64(id);
ASSERT(entry.id == id && entry.prefix == StringRef(reinterpret_cast<uint8_t*>(&bigEndianId), 8));

View File

@ -48,8 +48,8 @@ struct BlobWorkerStats {
Counter readDrivenCompactions;
Counter oldFeedSnapshots;
int numRangesAssigned;
int mutationBytesBuffered;
int64_t numRangesAssigned;
int64_t mutationBytesBuffered;
int activeReadRequests;
// TODO: add gauge for granules blocking on old snapshots, once this guage is fixed
int granulesPendingSplitCheck;

View File

@ -28,6 +28,32 @@
#include "fdbclient/KeyBackedTypes.h"
#include "flow/flat_buffers.h"
namespace MetaclusterAPI {
// Represents the various states that a tenant could be in. Only applies to metacluster, not standalone clusters.
// In a metacluster, a tenant on the management cluster could be in the other states while changes are applied to the
// data cluster.
//
// REGISTERING - the tenant has been created on the management cluster and is being created on the data cluster
// READY - the tenant has been created on both clusters, is active, and is consistent between the two clusters
// REMOVING - the tenant has been marked for removal and is being removed on the data cluster
// UPDATING_CONFIGURATION - the tenant configuration has changed on the management cluster and is being applied to the
// data cluster
// RENAMING - the tenant is in the process of being renamed
// ERROR - the tenant is in an error state
//
// A tenant in any configuration is allowed to be removed. Only tenants in the READY or UPDATING_CONFIGURATION phases
// can have their configuration updated. A tenant must not exist or be in the REGISTERING phase to be created. To be
// renamed, a tenant must be in the READY or RENAMING state. In the latter case, the rename destination must match
// the original rename attempt.
//
// If an operation fails and the tenant is left in a non-ready state, re-running the same operation is legal. If
// successful, the tenant will return to the READY state.
enum class TenantState { REGISTERING, READY, REMOVING, UPDATING_CONFIGURATION, RENAMING, ERROR };
std::string tenantStateToString(TenantState tenantState);
TenantState stringToTenantState(std::string stateStr);
} // namespace MetaclusterAPI
struct ClusterUsage {
int numTenantGroups = 0;
@ -99,6 +125,67 @@ struct DataClusterEntry {
}
};
struct MetaclusterTenantMapEntry {
constexpr static FileIdentifier file_identifier = 12247338;
int64_t id = -1;
Key prefix;
TenantName tenantName;
MetaclusterAPI::TenantState tenantState = MetaclusterAPI::TenantState::READY;
TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED;
Optional<TenantGroupName> tenantGroup;
ClusterName assignedCluster;
int64_t configurationSequenceNum = 0;
Optional<TenantName> renameDestination;
// Can be set to an error string if the tenant is in the ERROR state
std::string error;
MetaclusterTenantMapEntry();
MetaclusterTenantMapEntry(int64_t id, TenantName tenantName, MetaclusterAPI::TenantState tenantState);
MetaclusterTenantMapEntry(int64_t id,
TenantName tenantName,
MetaclusterAPI::TenantState tenantState,
Optional<TenantGroupName> tenantGroup);
MetaclusterTenantMapEntry(TenantMapEntry tenantEntry);
void setId(int64_t id);
std::string toJson() const;
bool matchesConfiguration(MetaclusterTenantMapEntry const& other) const;
bool matchesConfiguration(TenantMapEntry const& other) const;
void configure(Standalone<StringRef> parameter, Optional<Value> value);
Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); }
static MetaclusterTenantMapEntry decode(ValueRef const& value) {
return ObjectReader::fromStringRef<MetaclusterTenantMapEntry>(value, IncludeVersion());
}
bool operator==(MetaclusterTenantMapEntry const& other) const;
bool operator!=(MetaclusterTenantMapEntry const& other) const;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar,
id,
tenantName,
tenantState,
tenantLockState,
tenantGroup,
assignedCluster,
configurationSequenceNum,
renameDestination,
error);
if constexpr (Ar::isDeserializing) {
if (id >= 0) {
prefix = TenantAPI::idToPrefix(id);
}
ASSERT(tenantState >= MetaclusterAPI::TenantState::REGISTERING &&
tenantState <= MetaclusterAPI::TenantState::ERROR);
}
}
};
struct MetaclusterMetrics {
int numTenants = 0;
int numDataClusters = 0;

View File

@ -116,7 +116,7 @@ struct ManagementClusterMetadata {
}
};
static TenantMetadataSpecification& tenantMetadata();
static TenantMetadataSpecification<MetaclusterTenantMapEntry>& tenantMetadata();
// A map from cluster name to the metadata associated with a cluster
static KeyBackedObjectMap<ClusterName, DataClusterEntry, decltype(IncludeVersion())>& dataClusters();
@ -436,33 +436,33 @@ struct MetaclusterOperationContext {
};
template <class Transaction>
Future<Optional<TenantMapEntry>> tryGetTenantTransaction(Transaction tr, int64_t tenantId) {
Future<Optional<MetaclusterTenantMapEntry>> tryGetTenantTransaction(Transaction tr, int64_t tenantId) {
tr->setOption(FDBTransactionOptions::RAW_ACCESS);
return ManagementClusterMetadata::tenantMetadata().tenantMap.get(tr, tenantId);
}
ACTOR template <class Transaction>
Future<Optional<TenantMapEntry>> tryGetTenantTransaction(Transaction tr, TenantName name) {
Future<Optional<MetaclusterTenantMapEntry>> tryGetTenantTransaction(Transaction tr, TenantName name) {
tr->setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<int64_t> tenantId = wait(ManagementClusterMetadata::tenantMetadata().tenantNameIndex.get(tr, name));
if (tenantId.present()) {
Optional<TenantMapEntry> entry =
Optional<MetaclusterTenantMapEntry> entry =
wait(ManagementClusterMetadata::tenantMetadata().tenantMap.get(tr, tenantId.get()));
return entry;
} else {
return Optional<TenantMapEntry>();
return Optional<MetaclusterTenantMapEntry>();
}
}
ACTOR template <class DB, class Tenant>
Future<Optional<TenantMapEntry>> tryGetTenant(Reference<DB> db, Tenant tenant) {
Future<Optional<MetaclusterTenantMapEntry>> tryGetTenant(Reference<DB> db, Tenant tenant) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
loop {
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, tenant));
Optional<MetaclusterTenantMapEntry> entry = wait(tryGetTenantTransaction(tr, tenant));
return entry;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
@ -471,8 +471,8 @@ Future<Optional<TenantMapEntry>> tryGetTenant(Reference<DB> db, Tenant tenant) {
}
ACTOR template <class Transaction, class Tenant>
Future<TenantMapEntry> getTenantTransaction(Transaction tr, Tenant tenant) {
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, tenant));
Future<MetaclusterTenantMapEntry> getTenantTransaction(Transaction tr, Tenant tenant) {
Optional<MetaclusterTenantMapEntry> entry = wait(tryGetTenantTransaction(tr, tenant));
if (!entry.present()) {
throw tenant_not_found();
}
@ -481,8 +481,8 @@ Future<TenantMapEntry> getTenantTransaction(Transaction tr, Tenant tenant) {
}
ACTOR template <class DB, class Tenant>
Future<TenantMapEntry> getTenant(Reference<DB> db, Tenant tenant) {
Optional<TenantMapEntry> entry = wait(tryGetTenant(db, tenant));
Future<MetaclusterTenantMapEntry> getTenant(Reference<DB> db, Tenant tenant) {
Optional<MetaclusterTenantMapEntry> entry = wait(tryGetTenant(db, tenant));
if (!entry.present()) {
throw tenant_not_found();
}
@ -1246,7 +1246,7 @@ Future<std::map<ClusterName, DataClusterMetadata>> listClusters(Reference<DB> db
template <class Transaction>
void managementClusterAddTenantToGroup(Transaction tr,
TenantMapEntry tenantEntry,
MetaclusterTenantMapEntry tenantEntry,
DataClusterMetadata* clusterMetadata,
GroupAlreadyExists groupAlreadyExists,
IsRestoring isRestoring = IsRestoring::False) {
@ -1259,7 +1259,7 @@ void managementClusterAddTenantToGroup(Transaction tr,
ManagementClusterMetadata::tenantMetadata().tenantGroupMap.set(
tr, tenantEntry.tenantGroup.get(), TenantGroupEntry(tenantEntry.assignedCluster));
ManagementClusterMetadata::clusterTenantGroupIndex.insert(
tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantGroup.get()));
tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantGroup.get()));
}
ManagementClusterMetadata::tenantMetadata().tenantGroupTenantIndex.insert(
tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), tenantEntry.id));
@ -1271,12 +1271,8 @@ void managementClusterAddTenantToGroup(Transaction tr,
DataClusterEntry updatedEntry = clusterMetadata->entry;
++updatedEntry.allocated.numTenantGroups;
updateClusterMetadata(tr,
tenantEntry.assignedCluster.get(),
*clusterMetadata,
Optional<ClusterConnectionString>(),
updatedEntry,
isRestoring);
updateClusterMetadata(
tr, tenantEntry.assignedCluster, *clusterMetadata, Optional<ClusterConnectionString>(), updatedEntry);
clusterMetadata->entry = updatedEntry;
}
@ -1284,7 +1280,7 @@ void managementClusterAddTenantToGroup(Transaction tr,
ACTOR template <class Transaction>
Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
TenantMapEntry tenantEntry,
MetaclusterTenantMapEntry tenantEntry,
DataClusterMetadata* clusterMetadata) {
state bool updateClusterCapacity = !tenantEntry.tenantGroup.present();
if (tenantEntry.tenantGroup.present()) {
@ -1300,7 +1296,7 @@ Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
if (result.results.size() == 0) {
ManagementClusterMetadata::clusterTenantGroupIndex.erase(
tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantGroup.get()));
tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantGroup.get()));
ManagementClusterMetadata::tenantMetadata().tenantGroupMap.erase(tr, tenantEntry.tenantGroup.get());
updateClusterCapacity = true;
@ -1313,7 +1309,7 @@ Future<Void> managementClusterRemoveTenantFromGroup(Transaction tr,
DataClusterEntry updatedEntry = clusterMetadata->entry;
--updatedEntry.allocated.numTenantGroups;
updateClusterMetadata(
tr, tenantEntry.assignedCluster.get(), *clusterMetadata, Optional<ClusterConnectionString>(), updatedEntry);
tr, tenantEntry.assignedCluster, *clusterMetadata, Optional<ClusterConnectionString>(), updatedEntry);
clusterMetadata->entry = updatedEntry;
}
@ -1342,7 +1338,7 @@ struct RestoreClusterImpl {
// Tenant list from data and management clusters
std::unordered_map<int64_t, TenantMapEntry> dataClusterTenantMap;
std::unordered_set<TenantName> dataClusterTenantNames;
std::unordered_map<int64_t, TenantMapEntry> mgmtClusterTenantMap;
std::unordered_map<int64_t, MetaclusterTenantMapEntry> mgmtClusterTenantMap;
std::unordered_set<int64_t> mgmtClusterTenantSetForCurrentDataCluster;
RestoreClusterImpl(Reference<DB> managementDb,
@ -1499,13 +1495,11 @@ struct RestoreClusterImpl {
void markClusterRestoring(Reference<typename DB::TransactionT> tr) {
MetaclusterMetadata::activeRestoreIds().set(tr, clusterName, restoreId);
if (ctx.dataClusterMetadata.get().entry.clusterState != DataClusterState::RESTORING) {
DataClusterEntry updatedEntry = ctx.dataClusterMetadata.get().entry;
updatedEntry.clusterState = DataClusterState::RESTORING;
updateClusterMetadata(tr, clusterName, ctx.dataClusterMetadata.get(), connectionString, updatedEntry);
// Remove this cluster from the cluster capacity index, but leave its configured capacity intact in the
// cluster entry. This allows us to retain the configured capacity while preventing the cluster from
// being used to allocate new tenant groups.
@ -1538,7 +1532,7 @@ struct RestoreClusterImpl {
ACTOR static Future<Void> markManagementTenantsAsError(RestoreClusterImpl* self,
Reference<typename DB::TransactionT> tr,
std::vector<int64_t> tenants) {
state std::vector<Future<Optional<TenantMapEntry>>> getFutures;
state std::vector<Future<Optional<MetaclusterTenantMapEntry>>> getFutures;
for (auto tenantId : tenants) {
getFutures.push_back(tryGetTenantTransaction(tr, tenantId));
}
@ -1550,8 +1544,8 @@ struct RestoreClusterImpl {
continue;
}
TenantMapEntry entry = f.get().get();
entry.tenantState = TenantState::ERROR;
MetaclusterTenantMapEntry entry = f.get().get();
entry.tenantState = MetaclusterAPI::TenantState::ERROR;
entry.error = "The tenant is missing after restoring its data cluster";
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, entry.id, entry);
}
@ -1574,13 +1568,13 @@ struct RestoreClusterImpl {
ACTOR static Future<Optional<int64_t>> getTenantsFromManagementCluster(RestoreClusterImpl* self,
Reference<typename DB::TransactionT> tr,
int64_t initialTenantId) {
state KeyBackedRangeResult<std::pair<int64_t, TenantMapEntry>> tenants =
state KeyBackedRangeResult<std::pair<int64_t, MetaclusterTenantMapEntry>> tenants =
wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(
tr, initialTenantId, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER));
for (auto const& t : tenants.results) {
self->mgmtClusterTenantMap.emplace(t.first, t.second);
if (t.second.assignedCluster.present() && self->clusterName == t.second.assignedCluster.get()) {
if (self->clusterName == t.second.assignedCluster) {
self->mgmtClusterTenantSetForCurrentDataCluster.emplace(t.first);
}
}
@ -1657,7 +1651,6 @@ struct RestoreClusterImpl {
int64_t tenantId,
TenantMapEntry updatedEntry) {
TenantMapEntry existingEntry = wait(TenantAPI::getTenantTransaction(tr, tenantId));
updatedEntry.assignedCluster = Optional<ClusterName>();
// The tenant should have already been renamed, so in most cases its name will match.
// If we had to break a rename cycle using temporary tenant names, use that in the updated
@ -1676,14 +1669,15 @@ struct RestoreClusterImpl {
// Updates a tenant to match the management cluster state
// Returns the name of the tenant after it has been reconciled
ACTOR static Future<Optional<std::pair<TenantName, TenantMapEntry>>> reconcileTenant(RestoreClusterImpl* self,
TenantMapEntry tenantEntry) {
state std::unordered_map<int64_t, TenantMapEntry>::iterator managementEntry =
ACTOR static Future<Optional<std::pair<TenantName, MetaclusterTenantMapEntry>>> reconcileTenant(
RestoreClusterImpl* self,
TenantMapEntry tenantEntry) {
state std::unordered_map<int64_t, MetaclusterTenantMapEntry>::iterator managementEntry =
self->mgmtClusterTenantMap.find(tenantEntry.id);
// A data cluster tenant is not present on the management cluster
if (managementEntry == self->mgmtClusterTenantMap.end() ||
managementEntry->second.assignedCluster.get() != self->clusterName) {
managementEntry->second.assignedCluster != self->clusterName) {
if (self->restoreDryRun) {
if (managementEntry == self->mgmtClusterTenantMap.end()) {
self->messages.push_back(fmt::format("Delete missing tenant `{}' with ID {} on data cluster",
@ -1702,10 +1696,10 @@ struct RestoreClusterImpl {
}));
}
return Optional<std::pair<TenantName, TenantMapEntry>>();
return Optional<std::pair<TenantName, MetaclusterTenantMapEntry>>();
} else {
state TenantName tenantName = tenantEntry.tenantName;
state TenantMapEntry managementTenant = managementEntry->second;
state MetaclusterTenantMapEntry managementTenant = managementEntry->second;
// Rename
state bool renamed = tenantName != managementTenant.tenantName;
@ -1763,7 +1757,8 @@ struct RestoreClusterImpl {
} else {
wait(self->runRestoreDataClusterTransaction(
[self = self, managementTenant = managementTenant](Reference<ITransaction> tr) {
return updateTenantConfiguration(self, tr, managementTenant.id, managementTenant);
return updateTenantConfiguration(
self, tr, managementTenant.id, TenantMapEntry(managementTenant));
}));
// SOMEDAY: we could mark the tenant in the management cluster as READY if it is in the
// UPDATING_CONFIGURATION state
@ -1774,7 +1769,7 @@ struct RestoreClusterImpl {
}
}
Future<Void> renameTenantBatch(std::vector<std::pair<TenantName, TenantMapEntry>> tenantsToRename) {
Future<Void> renameTenantBatch(std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> tenantsToRename) {
return runRestoreDataClusterTransaction([this, tenantsToRename](Reference<ITransaction> tr) {
std::vector<Future<Void>> renameFutures;
for (auto t : tenantsToRename) {
@ -1786,7 +1781,7 @@ struct RestoreClusterImpl {
}
ACTOR static Future<Void> reconcileTenants(RestoreClusterImpl* self) {
state std::vector<Future<Optional<std::pair<TenantName, TenantMapEntry>>>> reconcileFutures;
state std::vector<Future<Optional<std::pair<TenantName, MetaclusterTenantMapEntry>>>> reconcileFutures;
for (auto itr = self->dataClusterTenantMap.begin(); itr != self->dataClusterTenantMap.end(); ++itr) {
reconcileFutures.push_back(reconcileTenant(self, itr->second));
}
@ -1795,9 +1790,10 @@ struct RestoreClusterImpl {
if (!self->restoreDryRun) {
state int reconcileIndex;
state std::vector<std::pair<TenantName, TenantMapEntry>> tenantsToRename;
state std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> tenantsToRename;
for (reconcileIndex = 0; reconcileIndex < reconcileFutures.size(); ++reconcileIndex) {
Optional<std::pair<TenantName, TenantMapEntry>> const& result = reconcileFutures[reconcileIndex].get();
Optional<std::pair<TenantName, MetaclusterTenantMapEntry>> const& result =
reconcileFutures[reconcileIndex].get();
if (result.present() && result.get().first.startsWith(metaclusterTemporaryRenamePrefix) &&
result.get().first != result.get().second.tenantName) {
tenantsToRename.push_back(result.get());
@ -1821,7 +1817,7 @@ struct RestoreClusterImpl {
state int64_t missingTenantCount = 0;
while (setItr != self->mgmtClusterTenantSetForCurrentDataCluster.end()) {
int64_t tenantId = *setItr;
TenantMapEntry const& managementTenant = self->mgmtClusterTenantMap[tenantId];
MetaclusterTenantMapEntry const& managementTenant = self->mgmtClusterTenantMap[tenantId];
// If a tenant is present on the management cluster and not on the data cluster, mark it in an error
// state unless it is already in certain states (e.g. REGISTERING, REMOVING) that allow the tenant to be
@ -1830,8 +1826,8 @@ struct RestoreClusterImpl {
// SOMEDAY: this could optionally complete the partial operations (e.g. finish creating or removing the
// tenant)
if (self->dataClusterTenantMap.find(tenantId) == self->dataClusterTenantMap.end() &&
managementTenant.tenantState != TenantState::REGISTERING &&
managementTenant.tenantState != TenantState::REMOVING) {
managementTenant.tenantState != MetaclusterAPI::TenantState::REGISTERING &&
managementTenant.tenantState != MetaclusterAPI::TenantState::REMOVING) {
if (self->restoreDryRun) {
self->messages.push_back(fmt::format("The tenant `{}' with ID {} is missing on the data cluster",
printable(managementTenant.tenantName),
@ -1841,7 +1837,7 @@ struct RestoreClusterImpl {
// include tenants we previously marked as missing, and as new errors are added it could include
// other tenants
++missingTenantCount;
if (managementTenant.tenantState != TenantState::ERROR) {
if (managementTenant.tenantState != MetaclusterAPI::TenantState::ERROR) {
missingTenants.push_back(tenantId);
if (missingTenants.size() == CLIENT_KNOBS->METACLUSTER_RESTORE_BATCH_SIZE) {
wait(self->runRestoreManagementTransaction([self = self, missingTenants = missingTenants](
@ -1873,14 +1869,14 @@ struct RestoreClusterImpl {
// Returns true if the group needs to be created
ACTOR static Future<bool> addTenantToManagementCluster(RestoreClusterImpl* self,
Reference<ITransaction> tr,
TenantMapEntry tenantEntry) {
MetaclusterTenantMapEntry tenantEntry) {
state Future<Optional<TenantGroupEntry>> tenantGroupEntry = Optional<TenantGroupEntry>();
if (tenantEntry.tenantGroup.present()) {
tenantGroupEntry =
ManagementClusterMetadata::tenantMetadata().tenantGroupMap.get(tr, tenantEntry.tenantGroup.get());
}
Optional<TenantMapEntry> existingEntry = wait(tryGetTenantTransaction(tr, tenantEntry.tenantName));
Optional<MetaclusterTenantMapEntry> existingEntry = wait(tryGetTenantTransaction(tr, tenantEntry.tenantName));
if (existingEntry.present()) {
if (existingEntry.get().assignedCluster == self->clusterName) {
ASSERT(existingEntry.get().matchesConfiguration(tenantEntry));
@ -1895,18 +1891,18 @@ struct RestoreClusterImpl {
}
if (!self->restoreDryRun) {
tenantEntry.tenantState = TenantState::READY;
tenantEntry.tenantState = MetaclusterAPI::TenantState::READY;
tenantEntry.assignedCluster = self->clusterName;
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.id, tenantEntry);
ManagementClusterMetadata::tenantMetadata().tenantNameIndex.set(tr, tenantEntry.tenantName, tenantEntry.id);
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue);
ManagementClusterMetadata::clusterTenantCount.atomicOp(
tr, tenantEntry.assignedCluster.get(), 1, MutationRef::AddValue);
tr, tenantEntry.assignedCluster, 1, MutationRef::AddValue);
// Updated indexes to include the new tenant
ManagementClusterMetadata::clusterTenantIndex.insert(
tr, Tuple::makeTuple(tenantEntry.assignedCluster.get(), tenantEntry.tenantName, tenantEntry.id));
tr, Tuple::makeTuple(tenantEntry.assignedCluster, tenantEntry.tenantName, tenantEntry.id));
}
wait(success(tenantGroupEntry));
@ -1933,7 +1929,7 @@ struct RestoreClusterImpl {
ACTOR static Future<Void> addTenantBatchToManagementCluster(RestoreClusterImpl* self,
Reference<typename DB::TransactionT> tr,
std::vector<TenantMapEntry> tenants) {
std::vector<MetaclusterTenantMapEntry> tenants) {
Optional<int64_t> tenantIdPrefix = wait(TenantMetadata::tenantIdPrefix().get(tr));
ASSERT(tenantIdPrefix.present());
@ -1988,17 +1984,17 @@ struct RestoreClusterImpl {
ACTOR static Future<Void> addTenantsToManagementCluster(RestoreClusterImpl* self) {
state std::unordered_map<int64_t, TenantMapEntry>::iterator itr;
state std::vector<TenantMapEntry> tenantBatch;
state std::vector<MetaclusterTenantMapEntry> tenantBatch;
state int64_t tenantsToAdd = 0;
for (itr = self->dataClusterTenantMap.begin(); itr != self->dataClusterTenantMap.end(); ++itr) {
state std::unordered_map<int64_t, TenantMapEntry>::iterator managementEntry =
state std::unordered_map<int64_t, MetaclusterTenantMapEntry>::iterator managementEntry =
self->mgmtClusterTenantMap.find(itr->second.id);
if (managementEntry == self->mgmtClusterTenantMap.end()) {
++tenantsToAdd;
tenantBatch.push_back(itr->second);
tenantBatch.push_back(MetaclusterTenantMapEntry(itr->second));
} else if (managementEntry->second.tenantName != itr->second.tenantName ||
managementEntry->second.assignedCluster.get() != self->clusterName ||
managementEntry->second.assignedCluster != self->clusterName ||
!managementEntry->second.matchesConfiguration(itr->second)) {
self->messages.push_back(
fmt::format("The tenant `{}' has the same ID {} as an existing tenant `{}' on cluster `{}'",
@ -2173,13 +2169,13 @@ struct CreateTenantImpl {
AssignClusterAutomatically assignClusterAutomatically;
// Initialization parameters
TenantMapEntry tenantEntry;
MetaclusterTenantMapEntry tenantEntry;
// Parameter set if tenant creation permanently fails on the data cluster
Optional<int64_t> replaceExistingTenantId;
CreateTenantImpl(Reference<DB> managementDb,
TenantMapEntry tenantEntry,
MetaclusterTenantMapEntry tenantEntry,
AssignClusterAutomatically assignClusterAutomatically)
: ctx(managementDb), tenantEntry(tenantEntry), assignClusterAutomatically(assignClusterAutomatically) {}
@ -2204,10 +2200,11 @@ struct CreateTenantImpl {
ACTOR static Future<bool> checkForExistingTenant(CreateTenantImpl* self, Reference<typename DB::TransactionT> tr) {
// Check if the tenant already exists. If it's partially created and matches the parameters we
// specified, continue creating it. Otherwise, fail with an error.
state Optional<TenantMapEntry> existingEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName));
state Optional<MetaclusterTenantMapEntry> existingEntry =
wait(tryGetTenantTransaction(tr, self->tenantEntry.tenantName));
if (existingEntry.present()) {
if (!existingEntry.get().matchesConfiguration(self->tenantEntry) ||
existingEntry.get().tenantState != TenantState::REGISTERING) {
existingEntry.get().tenantState != MetaclusterAPI::TenantState::REGISTERING) {
// The tenant already exists and is either completely created or has a different
// configuration
throw tenant_already_exists();
@ -2222,7 +2219,7 @@ struct CreateTenantImpl {
throw invalid_tenant_configuration();
}
self->tenantEntry = existingEntry.get();
wait(self->ctx.setCluster(tr, existingEntry.get().assignedCluster.get()));
wait(self->ctx.setCluster(tr, existingEntry.get().assignedCluster));
return true;
} else {
// The previous creation is permanently failed, so cleanup the tenant and create it again from
@ -2231,16 +2228,15 @@ struct CreateTenantImpl {
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, existingEntry.get().id);
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue);
ManagementClusterMetadata::clusterTenantCount.atomicOp(
tr, existingEntry.get().assignedCluster.get(), -1, MutationRef::AddValue);
tr, existingEntry.get().assignedCluster, -1, MutationRef::AddValue);
ManagementClusterMetadata::clusterTenantIndex.erase(
tr,
Tuple::makeTuple(existingEntry.get().assignedCluster.get(),
self->tenantEntry.tenantName,
existingEntry.get().id));
Tuple::makeTuple(
existingEntry.get().assignedCluster, self->tenantEntry.tenantName, existingEntry.get().id));
state DataClusterMetadata previousAssignedClusterMetadata =
wait(getClusterTransaction(tr, existingEntry.get().assignedCluster.get()));
wait(getClusterTransaction(tr, existingEntry.get().assignedCluster));
wait(managementClusterRemoveTenantFromGroup(tr, existingEntry.get(), &previousAssignedClusterMetadata));
}
@ -2265,10 +2261,10 @@ struct CreateTenantImpl {
if (groupEntry.present()) {
ASSERT(groupEntry.get().assignedCluster.present());
if (!self->assignClusterAutomatically &&
groupEntry.get().assignedCluster.get() != self->tenantEntry.assignedCluster.get()) {
groupEntry.get().assignedCluster.get() != self->tenantEntry.assignedCluster) {
TraceEvent("MetaclusterCreateTenantGroupClusterMismatch")
.detail("TenantGroupCluster", groupEntry.get().assignedCluster.get())
.detail("SpecifiedCluster", self->tenantEntry.assignedCluster.get());
.detail("SpecifiedCluster", self->tenantEntry.assignedCluster);
throw invalid_tenant_configuration();
}
return std::make_pair(groupEntry.get().assignedCluster.get(), true);
@ -2282,11 +2278,11 @@ struct CreateTenantImpl {
// If preferred cluster is specified, look for that one.
if (!self->assignClusterAutomatically) {
DataClusterMetadata dataClusterMetadata =
wait(getClusterTransaction(tr, self->tenantEntry.assignedCluster.get()));
wait(getClusterTransaction(tr, self->tenantEntry.assignedCluster));
if (!dataClusterMetadata.entry.hasCapacity()) {
throw cluster_no_capacity();
}
dataClusterNames.push_back(self->tenantEntry.assignedCluster.get());
dataClusterNames.push_back(self->tenantEntry.assignedCluster);
} else {
state KeyBackedSet<Tuple>::RangeResultType availableClusters =
wait(ManagementClusterMetadata::clusterCapacityIndex.getRange(
@ -2367,7 +2363,7 @@ struct CreateTenantImpl {
self->tenantEntry.setId(lastId.get() + 1);
ManagementClusterMetadata::tenantMetadata().lastTenantId.set(tr, self->tenantEntry.id);
self->tenantEntry.tenantState = TenantState::REGISTERING;
self->tenantEntry.tenantState = MetaclusterAPI::TenantState::REGISTERING;
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantEntry.id, self->tenantEntry);
ManagementClusterMetadata::tenantMetadata().tenantNameIndex.set(
tr, self->tenantEntry.tenantName, self->tenantEntry.id);
@ -2375,20 +2371,19 @@ struct CreateTenantImpl {
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue);
ManagementClusterMetadata::clusterTenantCount.atomicOp(
tr, self->tenantEntry.assignedCluster.get(), 1, MutationRef::AddValue);
tr, self->tenantEntry.assignedCluster, 1, MutationRef::AddValue);
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, self->tenantEntry.assignedCluster.get(), Snapshot::False, 0));
tr, self->tenantEntry.assignedCluster, Snapshot::False, 0));
if (clusterTenantCount > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
// Updated indexes to include the new tenant
ManagementClusterMetadata::clusterTenantIndex.insert(tr,
Tuple::makeTuple(self->tenantEntry.assignedCluster.get(),
self->tenantEntry.tenantName,
self->tenantEntry.id));
ManagementClusterMetadata::clusterTenantIndex.insert(
tr,
Tuple::makeTuple(self->tenantEntry.assignedCluster, self->tenantEntry.tenantName, self->tenantEntry.id));
wait(setClusterFuture);
@ -2409,8 +2404,9 @@ struct CreateTenantImpl {
}
ACTOR static Future<Void> storeTenantInDataCluster(CreateTenantImpl* self, Reference<ITransaction> tr) {
TenantMapEntry entry(self->tenantEntry);
std::pair<Optional<TenantMapEntry>, bool> dataClusterTenant =
wait(TenantAPI::createTenantTransaction(tr, self->tenantEntry, ClusterType::METACLUSTER_DATA));
wait(TenantAPI::createTenantTransaction(tr, entry, ClusterType::METACLUSTER_DATA));
// If the tenant map entry is empty, then we encountered a tombstone indicating that the tenant was
// simultaneously removed.
@ -2422,14 +2418,15 @@ struct CreateTenantImpl {
}
ACTOR static Future<Void> markTenantReady(CreateTenantImpl* self, Reference<typename DB::TransactionT> tr) {
state Optional<TenantMapEntry> managementEntry = wait(tryGetTenantTransaction(tr, self->tenantEntry.id));
state Optional<MetaclusterTenantMapEntry> managementEntry =
wait(tryGetTenantTransaction(tr, self->tenantEntry.id));
if (!managementEntry.present()) {
throw tenant_removed();
}
if (managementEntry.get().tenantState == TenantState::REGISTERING) {
TenantMapEntry updatedEntry = managementEntry.get();
updatedEntry.tenantState = TenantState::READY;
if (managementEntry.get().tenantState == MetaclusterAPI::TenantState::REGISTERING) {
MetaclusterTenantMapEntry updatedEntry = managementEntry.get();
updatedEntry.tenantState = MetaclusterAPI::TenantState::READY;
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, updatedEntry.id, updatedEntry);
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
}
@ -2473,7 +2470,7 @@ struct CreateTenantImpl {
ACTOR template <class DB>
Future<Void> createTenant(Reference<DB> db,
TenantMapEntry tenantEntry,
MetaclusterTenantMapEntry tenantEntry,
AssignClusterAutomatically assignClusterAutomatically) {
state CreateTenantImpl<DB> impl(db, tenantEntry, assignClusterAutomatically);
wait(impl.run());
@ -2505,17 +2502,17 @@ struct DeleteTenantImpl {
tr, self->tenantName.get(), Snapshot::False, TenantInfo::INVALID_TENANT)));
}
state TenantMapEntry tenantEntry = wait(getTenantTransaction(tr, resolvedId));
state MetaclusterTenantMapEntry tenantEntry = wait(getTenantTransaction(tr, resolvedId));
// Disallow removing the "new" name of a renamed tenant before it completes
if (self->tenantName.present() && tenantEntry.tenantName != self->tenantName.get()) {
ASSERT(tenantEntry.tenantState == TenantState::RENAMING ||
tenantEntry.tenantState == TenantState::REMOVING);
ASSERT(tenantEntry.tenantState == MetaclusterAPI::TenantState::RENAMING ||
tenantEntry.tenantState == MetaclusterAPI::TenantState::REMOVING);
throw tenant_not_found();
}
wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster.get()));
return std::make_pair(resolvedId, tenantEntry.tenantState == TenantState::REMOVING);
wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster));
return std::make_pair(resolvedId, tenantEntry.tenantState == MetaclusterAPI::TenantState::REMOVING);
}
// Does an initial check if the tenant is empty. This is an optimization to prevent us marking a tenant
@ -2542,10 +2539,10 @@ struct DeleteTenantImpl {
// Mark the tenant as being in a removing state on the management cluster
ACTOR static Future<Void> markTenantInRemovingState(DeleteTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
state TenantMapEntry tenantEntry = wait(getTenantTransaction(tr, self->tenantId));
state MetaclusterTenantMapEntry tenantEntry = wait(getTenantTransaction(tr, self->tenantId));
if (tenantEntry.tenantState != TenantState::REMOVING) {
tenantEntry.tenantState = TenantState::REMOVING;
if (tenantEntry.tenantState != MetaclusterAPI::TenantState::REMOVING) {
tenantEntry.tenantState = MetaclusterAPI::TenantState::REMOVING;
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.id, tenantEntry);
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
@ -2557,13 +2554,13 @@ struct DeleteTenantImpl {
// Delete the tenant and related metadata on the management cluster
ACTOR static Future<Void> deleteTenantFromManagementCluster(DeleteTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
state Optional<TenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId));
state Optional<MetaclusterTenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId));
if (!tenantEntry.present()) {
return Void();
}
ASSERT(tenantEntry.get().tenantState == TenantState::REMOVING);
ASSERT(tenantEntry.get().tenantState == MetaclusterAPI::TenantState::REMOVING);
// Erase the tenant entry itself
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, tenantEntry.get().id);
@ -2573,12 +2570,11 @@ struct DeleteTenantImpl {
// This is idempotent because this function is only called if the tenant is in the map
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue);
ManagementClusterMetadata::clusterTenantCount.atomicOp(
tr, tenantEntry.get().assignedCluster.get(), -1, MutationRef::AddValue);
tr, tenantEntry.get().assignedCluster, -1, MutationRef::AddValue);
// Remove the tenant from the cluster -> tenant index
ManagementClusterMetadata::clusterTenantIndex.erase(
tr,
Tuple::makeTuple(tenantEntry.get().assignedCluster.get(), tenantEntry.get().tenantName, self->tenantId));
tr, Tuple::makeTuple(tenantEntry.get().assignedCluster, tenantEntry.get().tenantName, self->tenantId));
if (tenantEntry.get().renameDestination.present()) {
// If renaming, remove the metadata associated with the tenant destination
@ -2587,9 +2583,8 @@ struct DeleteTenantImpl {
ManagementClusterMetadata::clusterTenantIndex.erase(
tr,
Tuple::makeTuple(tenantEntry.get().assignedCluster.get(),
tenantEntry.get().renameDestination.get(),
self->tenantId));
Tuple::makeTuple(
tenantEntry.get().assignedCluster, tenantEntry.get().renameDestination.get(), self->tenantId));
}
// Remove the tenant from its tenant group
@ -2677,48 +2672,48 @@ Future<std::vector<std::pair<TenantName, int64_t>>> listTenants(Reference<DB> db
// Scan the tenant index to get a list of tenant IDs, and then lookup the metadata for each ID individually
ACTOR template <class Transaction>
Future<std::vector<std::pair<TenantName, TenantMapEntry>>> listTenantMetadataTransaction(
Future<std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>>> listTenantMetadataTransaction(
Transaction tr,
std::vector<std::pair<TenantName, int64_t>> tenantIds) {
state int idIdx = 0;
state std::vector<Future<Optional<TenantMapEntry>>> futures;
state std::vector<Future<Optional<MetaclusterTenantMapEntry>>> futures;
for (; idIdx < tenantIds.size(); ++idIdx) {
futures.push_back(MetaclusterAPI::tryGetTenantTransaction(tr, tenantIds[idIdx].second));
}
wait(waitForAll(futures));
std::vector<std::pair<TenantName, TenantMapEntry>> results;
std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> results;
results.reserve(futures.size());
for (int i = 0; i < futures.size(); ++i) {
const TenantMapEntry& entry = futures[i].get().get();
results.emplace_back(tenantIds[i].first, entry);
const MetaclusterTenantMapEntry& entry = futures[i].get().get();
results.emplace_back(entry.tenantName, entry);
}
return results;
}
ACTOR template <class Transaction>
Future<std::vector<std::pair<TenantName, TenantMapEntry>>> listTenantMetadataTransaction(Transaction tr,
TenantNameRef begin,
TenantNameRef end,
int limit) {
Future<std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>>> listTenantMetadataTransaction(Transaction tr,
TenantNameRef begin,
TenantNameRef end,
int limit) {
std::vector<std::pair<TenantName, int64_t>> matchingTenants = wait(listTenantsTransaction(tr, begin, end, limit));
std::vector<std::pair<TenantName, TenantMapEntry>> results =
std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> results =
wait(listTenantMetadataTransaction(tr, matchingTenants));
return results;
}
ACTOR template <class DB>
Future<std::vector<std::pair<TenantName, TenantMapEntry>>> listTenantMetadata(
Future<std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>>> listTenantMetadata(
Reference<DB> db,
TenantName begin,
TenantName end,
int limit,
int offset = 0,
std::vector<TenantState> filters = std::vector<TenantState>()) {
std::vector<MetaclusterAPI::TenantState> filters = std::vector<MetaclusterAPI::TenantState>()) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
state std::vector<std::pair<TenantName, TenantMapEntry>> results;
state std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> results;
loop {
try {
@ -2734,7 +2729,7 @@ Future<std::vector<std::pair<TenantName, TenantMapEntry>>> listTenantMetadata(
// read in batch
state int count = 0;
loop {
std::vector<std::pair<TenantName, TenantMapEntry>> tenantBatch =
std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> tenantBatch =
wait(MetaclusterAPI::listTenantMetadataTransaction(tr, begin, end, std::max(limit + offset, 1000)));
if (tenantBatch.empty()) {
@ -2771,7 +2766,7 @@ struct ConfigureTenantImpl {
std::map<Standalone<StringRef>, Optional<Value>> configurationParameters;
// Parameters set in updateManagementCluster
TenantMapEntry updatedEntry;
MetaclusterTenantMapEntry updatedEntry;
ConfigureTenantImpl(Reference<DB> managementDb,
TenantName tenantName,
@ -2782,10 +2777,10 @@ struct ConfigureTenantImpl {
// structures. It does not update the TenantMapEntry stored in the tenant map.
ACTOR static Future<Void> updateTenantGroup(ConfigureTenantImpl* self,
Reference<typename DB::TransactionT> tr,
TenantMapEntry tenantEntry,
MetaclusterTenantMapEntry tenantEntry,
Optional<TenantGroupName> desiredGroup) {
state TenantMapEntry entryWithUpdatedGroup = tenantEntry;
state MetaclusterTenantMapEntry entryWithUpdatedGroup = tenantEntry;
entryWithUpdatedGroup.tenantGroup = desiredGroup;
if (tenantEntry.tenantGroup == desiredGroup) {
@ -2842,21 +2837,21 @@ struct ConfigureTenantImpl {
// Updates the configuration in the management cluster and marks it as being in the UPDATING_CONFIGURATION state
ACTOR static Future<bool> updateManagementCluster(ConfigureTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
state Optional<TenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantName));
state Optional<MetaclusterTenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantName));
if (!tenantEntry.present()) {
throw tenant_not_found();
}
if (tenantEntry.get().tenantState != TenantState::READY &&
tenantEntry.get().tenantState != TenantState::UPDATING_CONFIGURATION) {
if (tenantEntry.get().tenantState != MetaclusterAPI::TenantState::READY &&
tenantEntry.get().tenantState != MetaclusterAPI::TenantState::UPDATING_CONFIGURATION) {
throw invalid_tenant_state();
}
wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster.get()));
wait(self->ctx.setCluster(tr, tenantEntry.get().assignedCluster));
self->updatedEntry = tenantEntry.get();
self->updatedEntry.tenantState = TenantState::UPDATING_CONFIGURATION;
self->updatedEntry.tenantState = MetaclusterAPI::TenantState::UPDATING_CONFIGURATION;
state std::map<Standalone<StringRef>, Optional<Value>>::iterator configItr;
for (configItr = self->configurationParameters.begin(); configItr != self->configurationParameters.end();
@ -2876,7 +2871,7 @@ struct ConfigureTenantImpl {
}
if (self->updatedEntry.matchesConfiguration(tenantEntry.get()) &&
tenantEntry.get().tenantState == TenantState::READY) {
tenantEntry.get().tenantState == MetaclusterAPI::TenantState::READY) {
return false;
}
@ -2899,8 +2894,6 @@ struct ConfigureTenantImpl {
}
TenantMapEntry dataClusterEntry = self->updatedEntry;
dataClusterEntry.tenantState = TenantState::READY;
dataClusterEntry.assignedCluster = {};
wait(TenantAPI::configureTenantTransaction(tr, tenantEntry.get(), dataClusterEntry));
return Void();
@ -2909,14 +2902,16 @@ struct ConfigureTenantImpl {
// Updates the tenant state in the management cluster to READY
ACTOR static Future<Void> markManagementTenantAsReady(ConfigureTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
state Optional<TenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->updatedEntry.id));
state Optional<MetaclusterTenantMapEntry> tenantEntry =
wait(tryGetTenantTransaction(tr, self->updatedEntry.id));
if (!tenantEntry.present() || tenantEntry.get().tenantState != TenantState::UPDATING_CONFIGURATION ||
if (!tenantEntry.present() ||
tenantEntry.get().tenantState != MetaclusterAPI::TenantState::UPDATING_CONFIGURATION ||
tenantEntry.get().configurationSequenceNum > self->updatedEntry.configurationSequenceNum) {
return Void();
}
tenantEntry.get().tenantState = TenantState::READY;
tenantEntry.get().tenantState = MetaclusterAPI::TenantState::READY;
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, tenantEntry.get().id, tenantEntry.get());
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
return Void();
@ -2965,7 +2960,7 @@ struct RenameTenantImpl {
ACTOR static Future<Void> markTenantsInRenamingState(RenameTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
state TenantMapEntry tenantEntry;
state MetaclusterTenantMapEntry tenantEntry;
state Optional<int64_t> newNameId;
wait(store(tenantEntry, getTenantTransaction(tr, self->oldName)) &&
store(newNameId, ManagementClusterMetadata::tenantMetadata().tenantNameIndex.get(tr, self->newName)));
@ -2979,7 +2974,7 @@ struct RenameTenantImpl {
self->tenantId = tenantEntry.id;
// If marked for deletion, abort the rename
if (tenantEntry.tenantState == TenantState::REMOVING) {
if (tenantEntry.tenantState == MetaclusterAPI::TenantState::REMOVING) {
CODE_PROBE(true, "Metacluster rename candidates marked for deletion");
throw tenant_removed();
}
@ -2989,9 +2984,9 @@ struct RenameTenantImpl {
throw tenant_already_exists();
}
wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster.get()));
wait(self->ctx.setCluster(tr, tenantEntry.assignedCluster));
if (tenantEntry.tenantState == TenantState::RENAMING) {
if (tenantEntry.tenantState == MetaclusterAPI::TenantState::RENAMING) {
if (tenantEntry.tenantName != self->oldName) {
CODE_PROBE(true, "Renaming a tenant that is currently the destination of another rename");
throw tenant_not_found();
@ -3006,7 +3001,7 @@ struct RenameTenantImpl {
}
}
if (tenantEntry.tenantState != TenantState::READY) {
if (tenantEntry.tenantState != MetaclusterAPI::TenantState::READY) {
CODE_PROBE(true, "Metacluster unable to proceed with rename operation");
throw invalid_tenant_state();
}
@ -3014,15 +3009,15 @@ struct RenameTenantImpl {
self->configurationSequenceNum = tenantEntry.configurationSequenceNum + 1;
// Check cluster capacity. If we would exceed the amount due to temporary extra tenants
// then we deny the rename request altogether.
int64_t clusterTenantCount = wait(ManagementClusterMetadata::clusterTenantCount.getD(
tr, tenantEntry.assignedCluster.get(), Snapshot::False, 0));
int64_t clusterTenantCount = wait(
ManagementClusterMetadata::clusterTenantCount.getD(tr, tenantEntry.assignedCluster, Snapshot::False, 0));
if (clusterTenantCount + 1 > CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER) {
throw cluster_no_capacity();
}
TenantMapEntry updatedEntry = tenantEntry;
updatedEntry.tenantState = TenantState::RENAMING;
MetaclusterTenantMapEntry updatedEntry = tenantEntry;
updatedEntry.tenantState = MetaclusterAPI::TenantState::RENAMING;
updatedEntry.renameDestination = self->newName;
updatedEntry.configurationSequenceNum = self->configurationSequenceNum;
@ -3032,7 +3027,7 @@ struct RenameTenantImpl {
// Updated indexes to include the new tenant
ManagementClusterMetadata::clusterTenantIndex.insert(
tr, Tuple::makeTuple(updatedEntry.assignedCluster.get(), self->newName, self->tenantId));
tr, Tuple::makeTuple(updatedEntry.assignedCluster, self->newName, self->tenantId));
return Void();
}
@ -3051,7 +3046,7 @@ struct RenameTenantImpl {
ACTOR static Future<Void> finishRenameFromManagementCluster(RenameTenantImpl* self,
Reference<typename DB::TransactionT> tr) {
Optional<TenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId));
Optional<MetaclusterTenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, self->tenantId));
// Another (or several other) operations have already removed/changed the old entry
// Possible for the new entry to also have been tampered with,
@ -3064,16 +3059,16 @@ struct RenameTenantImpl {
"configuration sequence.");
return Void();
}
if (tenantEntry.get().tenantState == TenantState::REMOVING) {
if (tenantEntry.get().tenantState == MetaclusterAPI::TenantState::REMOVING) {
throw tenant_removed();
}
TenantMapEntry updatedEntry = tenantEntry.get();
MetaclusterTenantMapEntry updatedEntry = tenantEntry.get();
// Only update if in the expected state
if (updatedEntry.tenantState == TenantState::RENAMING) {
if (updatedEntry.tenantState == MetaclusterAPI::TenantState::RENAMING) {
updatedEntry.tenantName = self->newName;
updatedEntry.tenantState = TenantState::READY;
updatedEntry.tenantState = MetaclusterAPI::TenantState::READY;
updatedEntry.renameDestination.reset();
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantId, updatedEntry);
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
@ -3082,7 +3077,7 @@ struct RenameTenantImpl {
// Remove the tenant from the cluster -> tenant index
ManagementClusterMetadata::clusterTenantIndex.erase(
tr, Tuple::makeTuple(updatedEntry.assignedCluster.get(), self->oldName, self->tenantId));
tr, Tuple::makeTuple(updatedEntry.assignedCluster, self->oldName, self->tenantId));
}
return Void();

View File

@ -949,6 +949,7 @@ public:
double REDWOOD_HISTOGRAM_INTERVAL;
bool REDWOOD_EVICT_UPDATED_PAGES; // Whether to prioritize eviction of updated pages from cache.
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
int REDWOOD_NODE_MAX_UNBALANCE; // Maximum imbalance in a node before it should be rebuilt instead of updated
std::string REDWOOD_IO_PRIORITIES;

View File

@ -42,59 +42,32 @@ KeyRangeRef clampRangeToTenant(KeyRangeRef range, TenantInfo const& tenantInfo,
bool withinSingleTenant(KeyRangeRef const&);
constexpr static int PREFIX_SIZE = sizeof(int64_t);
} // namespace TenantAPI
// Represents the various states that a tenant could be in.
// In a standalone cluster, a tenant should only ever be in the READY state.
// In a metacluster, a tenant on the management cluster could be in the other states while changes are applied to the
// data cluster.
//
// REGISTERING - the tenant has been created on the management cluster and is being created on the data cluster
// READY - the tenant has been created on both clusters, is active, and is consistent between the two clusters
// REMOVING - the tenant has been marked for removal and is being removed on the data cluster
// UPDATING_CONFIGURATION - the tenant configuration has changed on the management cluster and is being applied to the
// data cluster
// RENAMING - the tenant is in the process of being renamed
// ERROR - the tenant is in an error state
//
// A tenant in any configuration is allowed to be removed. Only tenants in the READY or UPDATING_CONFIGURATION phases
// can have their configuration updated. A tenant must not exist or be in the REGISTERING phase to be created. To be
// renamed, a tenant must be in the READY or RENAMING state. In the latter case, the rename destination must match
// the original rename attempt.
//
// If an operation fails and the tenant is left in a non-ready state, re-running the same operation is legal. If
// successful, the tenant will return to the READY state.
enum class TenantState { REGISTERING, READY, REMOVING, UPDATING_CONFIGURATION, RENAMING, ERROR };
// Represents the lock state the tenant could be in.
// Can be used in conjunction with the other tenant states above.
enum class TenantLockState : uint8_t { UNLOCKED, READ_ONLY, LOCKED };
std::string tenantLockStateToString(TenantLockState tenantState);
TenantLockState stringToTenantLockState(std::string stateStr);
} // namespace TenantAPI
json_spirit::mObject binaryToJson(StringRef bytes);
struct MetaclusterTenantMapEntry;
struct TenantMapEntry {
constexpr static FileIdentifier file_identifier = 12247338;
static std::string tenantStateToString(TenantState tenantState);
static TenantState stringToTenantState(std::string stateStr);
static std::string tenantLockStateToString(TenantLockState tenantState);
static TenantLockState stringToTenantLockState(std::string stateStr);
constexpr static FileIdentifier file_identifier = 7054389;
int64_t id = -1;
Key prefix;
TenantName tenantName;
TenantState tenantState = TenantState::READY;
TenantLockState tenantLockState = TenantLockState::UNLOCKED;
TenantAPI::TenantLockState tenantLockState = TenantAPI::TenantLockState::UNLOCKED;
Optional<TenantGroupName> tenantGroup;
Optional<ClusterName> assignedCluster;
int64_t configurationSequenceNum = 0;
Optional<TenantName> renameDestination;
// Can be set to an error string if the tenant is in the ERROR state
std::string error;
TenantMapEntry();
TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState);
TenantMapEntry(int64_t id, TenantName tenantName, TenantState tenantState, Optional<TenantGroupName> tenantGroup);
TenantMapEntry(int64_t id, TenantName tenantName);
TenantMapEntry(int64_t id, TenantName tenantName, Optional<TenantGroupName> tenantGroup);
TenantMapEntry(MetaclusterTenantMapEntry metaclusterEntry);
void setId(int64_t id);
std::string toJson() const;
@ -111,21 +84,11 @@ struct TenantMapEntry {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar,
id,
tenantName,
tenantState,
tenantLockState,
tenantGroup,
assignedCluster,
configurationSequenceNum,
renameDestination,
error);
serializer(ar, id, tenantName, tenantLockState, tenantGroup, configurationSequenceNum);
if constexpr (Ar::isDeserializing) {
if (id >= 0) {
prefix = TenantAPI::idToPrefix(id);
}
ASSERT(tenantState >= TenantState::REGISTERING && tenantState <= TenantState::ERROR);
}
}
};
@ -199,10 +162,11 @@ struct TenantIdCodec {
}
};
template <class TenantMapEntryImpl>
struct TenantMetadataSpecification {
Key subspace;
KeyBackedObjectMap<int64_t, TenantMapEntry, decltype(IncludeVersion()), TenantIdCodec> tenantMap;
KeyBackedObjectMap<int64_t, TenantMapEntryImpl, decltype(IncludeVersion()), TenantIdCodec> tenantMap;
KeyBackedMap<TenantName, int64_t> tenantNameIndex;
KeyBackedMap<int64_t, UID> lockId;
KeyBackedProperty<int64_t> lastTenantId;
@ -227,7 +191,7 @@ struct TenantMetadataSpecification {
};
struct TenantMetadata {
static TenantMetadataSpecification& instance();
static TenantMetadataSpecification<TenantMapEntry>& instance();
static inline auto& subspace() { return instance().subspace; }
static inline auto& tenantMap() { return instance().tenantMap; }

View File

@ -193,9 +193,6 @@ createTenantTransaction(Transaction tr, TenantMapEntry tenantEntry, ClusterType
throw tenant_prefix_allocator_conflict();
}
tenantEntry.tenantState = TenantState::READY;
tenantEntry.assignedCluster = Optional<ClusterName>();
TenantMetadata::tenantMap().set(tr, tenantEntry.id, tenantEntry);
TenantMetadata::tenantNameIndex().set(tr, tenantEntry.tenantName, tenantEntry.id);
TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0);
@ -450,7 +447,6 @@ Future<Void> configureTenantTransaction(Transaction tr,
TenantMapEntry originalEntry,
TenantMapEntry updatedTenantEntry) {
ASSERT(updatedTenantEntry.id == originalEntry.id);
ASSERT(!updatedTenantEntry.assignedCluster.present());
tr->setOption(FDBTransactionOptions::RAW_ACCESS);
TenantMetadata::tenantMap().set(tr, updatedTenantEntry.id, updatedTenantEntry);

View File

@ -110,7 +110,7 @@ private:
std::vector<std::pair<Standalone<StringRef>, Optional<Value>>> configMutations,
int64_t tenantId,
std::map<TenantGroupName, int>* tenantGroupNetTenantDelta) {
state TenantMapEntry tenantEntry(tenantId, tenantName, TenantState::READY);
state TenantMapEntry tenantEntry(tenantId, tenantName);
for (auto const& [name, value] : configMutations) {
tenantEntry.configure(name, value);

View File

@ -241,6 +241,16 @@ description is not currently required but encouraged.
description="Reads performed by a transaction will not see any prior mutations that occured in that transaction, instead seeing the value which was in the database at the transaction's read version. This option may provide a small performance benefit for the client, but also disables a number of client-side optimizations which are beneficial for transactions which tend to read and write the same keys within a single transaction. It is an error to set this option after performing any reads or writes on the transaction."/>
<Option name="read_ahead_disable" code="52"
description="Deprecated" />
<Option name="read_server_side_cache_enable" code="507"
description="Storage server should cache disk blocks needed for subsequent read requests in this transaction. This is the default behavior."/>
<Option name="read_server_side_cache_disable" code="508"
description="Storage server should not cache disk blocks needed for subsequent read requests in this transaction. This can be used to avoid cache pollution for reads not expected to be repeated."/>
<Option name="read_priority_normal" code="509"
description="Use normal read priority for subsequent read requests in this transaction. This is the default read priority."/>
<Option name="read_priority_low" code="510"
description="Use low read priority for subsequent read requests in this transaction."/>
<Option name="read_priority_high" code="511"
description="Use high read priority for subsequent read requests in this transaction."/>
<Option name="durability_datacenter" code="110" />
<Option name="durability_risky" code="120" />
<Option name="durability_dev_null_is_web_scale" code="130"

View File

@ -33,8 +33,7 @@ ACTOR Future<std::pair<RangeResult, Version>> readFromFDB(Database cx, KeyRange
loop {
tr.setOption(FDBTransactionOptions::RAW_ACCESS);
// use no-cache as this is either used for test validation, or the blob granule consistency check
ReadOptions readOptions = { ReadType::NORMAL, CacheResult::False };
tr.trState->readOptions = readOptions;
tr.setOption(FDBTransactionOptions::READ_SERVER_SIDE_CACHE_DISABLE);
try {
state RangeResult r = wait(tr.getRange(currentRange, CLIENT_KNOBS->TOO_MANY));
Version grv = wait(tr.getReadVersion());

View File

@ -365,7 +365,7 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
// FIXME: buggify an extra multiplication factor for short periods of time to hopefully trigger this logic more
// often? estimate slack in bytes buffered as max(0, assignments * (delta file size / 2) - bytesBuffered)
// FIXME: this doesn't take increased delta file size for heavy write amp cases into account
int64_t expectedExtraBytesBuffered = std::max(
int64_t expectedExtraBytesBuffered = std::max<int64_t>(
0, stats.numRangesAssigned * (SERVER_KNOBS->BG_DELTA_FILE_TARGET_BYTES / 2) - stats.mutationBytesBuffered);
// estimate slack in potential pending resnapshot
int64_t totalExtra =
@ -458,9 +458,24 @@ ACTOR Future<BlobGranuleCipherKeysCtx> getLatestGranuleCipherKeys(Reference<Blob
Arena* arena) {
state BlobGranuleCipherKeysCtx cipherKeysCtx;
state EncryptCipherDomainId domainId = FDB_DEFAULT_ENCRYPT_DOMAIN_ID;
state Reference<GranuleTenantData> tenantData;
state int retryCount = 0;
if (bwData->encryptMode.mode == EncryptionAtRestMode::DOMAIN_AWARE) {
state Reference<GranuleTenantData> tenantData = wait(bwData->tenantData.getDataForGranule(keyRange));
ASSERT(tenantData.isValid());
loop {
wait(store(tenantData, bwData->tenantData.getDataForGranule(keyRange)));
if (tenantData.isValid()) {
break;
} else {
CODE_PROBE(true, "cipher keys for unknown tenant");
// Assume not loaded yet, just wait a bit. Could do sophisticated mechanism but will redo tenant
// loading to be versioned anyway. 10 retries means it's likely not a transient race with
// loading tenants, and instead a persistent issue.
retryCount++;
TraceEvent(retryCount <= 10 ? SevDebug : SevWarn, "BlobWorkerUnknownTenantForCipherKeys", bwData->id)
.detail("KeyRange", keyRange);
wait(delay(0.1));
}
}
domainId = tenantData->entry.id;
}
@ -3051,6 +3066,9 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
// Free last change feed data
metadata->activeCFData.set(Reference<ChangeFeedData>());
// clear out buffered data
bwData->stats.mutationBytesBuffered -= metadata->bufferedDeltaBytes;
if (e.code() == error_code_operation_cancelled) {
throw;
}
@ -3767,27 +3785,27 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
state Optional<Key> tenantPrefix;
state Arena arena;
if (req.tenantInfo.hasTenant()) {
ASSERT(req.tenantInfo.tenantId != TenantInfo::INVALID_TENANT);
Optional<TenantMapEntry> tenantEntry = bwData->tenantData.getTenantById(req.tenantInfo.tenantId);
if (tenantEntry.present()) {
ASSERT(tenantEntry.get().id == req.tenantInfo.tenantId);
tenantPrefix = tenantEntry.get().prefix;
} else {
CODE_PROBE(true, "Blob worker tenant not found");
// FIXME - better way. Wait on retry here, or just have better model for tenant metadata?
// Just throw wrong_shard_server and make the client retry and assume we load it later
TraceEvent(SevDebug, "BlobWorkerRequestTenantNotFound", bwData->id)
.suppressFor(5.0)
.detail("Tenant", req.tenantInfo.tenantId);
throw tenant_not_found();
}
req.keyRange = KeyRangeRef(req.keyRange.begin.withPrefix(tenantPrefix.get(), req.arena),
req.keyRange.end.withPrefix(tenantPrefix.get(), req.arena));
}
state bool didCollapse = false;
try {
if (req.tenantInfo.hasTenant()) {
ASSERT(req.tenantInfo.tenantId != TenantInfo::INVALID_TENANT);
Optional<TenantMapEntry> tenantEntry = bwData->tenantData.getTenantById(req.tenantInfo.tenantId);
if (tenantEntry.present()) {
ASSERT(tenantEntry.get().id == req.tenantInfo.tenantId);
tenantPrefix = tenantEntry.get().prefix;
} else {
CODE_PROBE(true, "Blob worker tenant not found");
// FIXME - better way. Wait on retry here, or just have better model for tenant metadata?
// Just throw wrong_shard_server and make the client retry and assume we load it later
TraceEvent(SevDebug, "BlobWorkerRequestTenantNotFound", bwData->id)
.suppressFor(5.0)
.detail("Tenant", req.tenantInfo.tenantId);
throw tenant_not_found();
}
req.keyRange = KeyRangeRef(req.keyRange.begin.withPrefix(tenantPrefix.get(), req.arena),
req.keyRange.end.withPrefix(tenantPrefix.get(), req.arena));
}
// TODO remove requirement for canCollapseBegin once we implement early replying
ASSERT(req.beginVersion == 0 || req.canCollapseBegin);
if (req.beginVersion != 0) {
@ -4158,6 +4176,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
req.reply.send(rep);
--bwData->stats.activeReadRequests;
} catch (Error& e) {
--bwData->stats.activeReadRequests;
if (e.code() == error_code_operation_cancelled) {
req.reply.sendError(wrong_shard_server());
throw;
@ -4166,7 +4185,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
if (e.code() == error_code_wrong_shard_server) {
++bwData->stats.wrongShardServer;
}
--bwData->stats.activeReadRequests;
if (canReplyWith(e)) {
req.reply.sendError(e);
} else {

View File

@ -247,6 +247,21 @@ public:
}
}
ACTOR static Future<bool> checkAnyBlobRanges(Database db) {
state Transaction tr(db);
loop {
try {
// FIXME: check if any active ranges. This still returns true if there are inactive ranges, but it
// mostly serves its purpose to allow setting blob_granules_enabled=1 on a cluster that has no blob
// workers currently.
RangeResult anyData = wait(tr.getRange(blobRangeKeys, 1));
return !anyData.empty();
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR static Future<Void> monitorBlobWorkers(Ratekeeper* self, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
state std::vector<BlobWorkerInterface> blobWorkers;
state int workerFetchCount = 0;
@ -257,6 +272,7 @@ public:
loop {
while (!self->configuration.blobGranulesEnabled) {
// FIXME: clear blob worker state if granules were previously enabled?
wait(delay(SERVER_KNOBS->SERVER_LIST_DELAY));
}
@ -267,8 +283,9 @@ public:
(SERVER_KNOBS->METRIC_UPDATE_RATE * FLOW_KNOBS->DELAY_JITTER_OFFSET);
if (++workerFetchCount == fetchAmount || blobWorkerDead) {
workerFetchCount = 0;
std::vector<BlobWorkerInterface> _blobWorkers = wait(getBlobWorkers(self->db, true, &grv));
blobWorkers = _blobWorkers;
state Future<bool> anyBlobRangesCheck = checkAnyBlobRanges(self->db);
wait(store(blobWorkers, getBlobWorkers(self->db, true, &grv)));
wait(store(self->anyBlobRanges, anyBlobRangesCheck));
} else {
grv = self->maxVersion;
}
@ -635,7 +652,7 @@ Ratekeeper::Ratekeeper(UID id, Database db)
SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE_BATCH,
SERVER_KNOBS->TARGET_DURABILITY_LAG_VERSIONS_BATCH,
SERVER_KNOBS->TARGET_BW_LAG_BATCH),
maxVersion(0), blobWorkerTime(now()), unblockedAssignmentTime(now()) {
maxVersion(0), blobWorkerTime(now()), unblockedAssignmentTime(now()), anyBlobRanges(false) {
if (SERVER_KNOBS->GLOBAL_TAG_THROTTLING) {
tagThrottler = std::make_unique<GlobalTagThrottler>(db, id, SERVER_KNOBS->MAX_MACHINES_FALLING_BEHIND);
} else {
@ -897,7 +914,7 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
break;
}
if (configuration.blobGranulesEnabled && SERVER_KNOBS->BW_THROTTLING_ENABLED) {
if (configuration.blobGranulesEnabled && SERVER_KNOBS->BW_THROTTLING_ENABLED && anyBlobRanges) {
Version lastBWVer = 0;
auto lastIter = version_transactions.end();
if (!blobWorkerVersionHistory.empty()) {

View File

@ -358,7 +358,7 @@ public:
for (uint16_t i = 0; i < tenantCount; i++) {
TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantNumber + i));
TenantMapEntry tenant(tenantNumber + i, tenantName, TenantState::READY);
TenantMapEntry tenant(tenantNumber + i, tenantName);
tenantCache.insert(tenant);
}
@ -386,7 +386,7 @@ public:
for (uint16_t i = 0; i < tenantCount; i++) {
TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantNumber + i));
TenantMapEntry tenant(tenantNumber + i, tenantName, TenantState::READY);
TenantMapEntry tenant(tenantNumber + i, tenantName);
tenantCache.insert(tenant);
}
@ -400,7 +400,7 @@ public:
if (tenantOrdinal % staleTenantFraction != 0) {
TenantName tenantName(format("%s_%08d", "ddtc_test_tenant", tenantOrdinal));
TenantMapEntry tenant(tenantOrdinal, tenantName, TenantState::READY);
TenantMapEntry tenant(tenantOrdinal, tenantName);
bool newTenant = tenantCache.update(tenant);
ASSERT(!newTenant);
keepCount++;

View File

@ -6550,9 +6550,10 @@ private:
bool updating,
ParentInfo* parentInfo,
Reference<IPageEncryptionKeyProvider> keyProvider,
Optional<int64_t> pageDomainId)
Optional<int64_t> pageDomainId,
int maxHeightAllowed)
: updating(updating), page(p), clonedPage(alreadyCloned), changesMade(false), parentInfo(parentInfo),
keyProvider(keyProvider), pageDomainId(pageDomainId) {}
keyProvider(keyProvider), pageDomainId(pageDomainId), maxHeightAllowed(maxHeightAllowed) {}
// Whether updating the existing page is allowed
bool updating;
@ -6570,6 +6571,8 @@ private:
Reference<IPageEncryptionKeyProvider> keyProvider;
Optional<int64_t> pageDomainId;
int maxHeightAllowed;
BTreePage* btPage() const { return (BTreePage*)page->mutateData(); }
bool empty() const {
@ -6609,7 +6612,7 @@ private:
canInsert = keyProvider->keyFitsInDomain(pageDomainId.get(), rec.key, true);
}
if (canInsert) {
canInsert = end.insert(rec);
canInsert = end.insert(rec, 0, maxHeightAllowed);
}
if (!canInsert) {
@ -6810,6 +6813,8 @@ private:
}
}
state int maxHeightAllowed = btPage->tree()->initialHeight + SERVER_KNOBS->REDWOOD_NODE_MAX_UNBALANCE;
// Leaf Page
if (btPage->isLeaf()) {
// When true, we are modifying the existing DeltaTree
@ -6835,7 +6840,6 @@ private:
// Now, process each mutation range and merge changes with existing data.
bool firstMutationBoundary = true;
constexpr int maxHeightAllowed = 8;
while (mBegin != mEnd) {
// Apply the change to the mutation buffer start boundary key only if
@ -7316,7 +7320,7 @@ private:
// If pageCopy is already set it was initialized to page above so the modifier doesn't need
// to copy it
state InternalPageModifier modifier(
page, pageCopy.isValid(), tryToUpdate, parentInfo, self->m_keyProvider, pageDomainId);
page, pageCopy.isValid(), tryToUpdate, parentInfo, self->m_keyProvider, pageDomainId, maxHeightAllowed);
// Apply the possible changes for each subtree range recursed to, except the last one.
// For each range, the expected next record, if any, is checked against the first boundary

View File

@ -206,6 +206,7 @@ class Ratekeeper {
std::map<Version, Ratekeeper::VersionInfo> version_transactions;
std::map<Version, std::pair<double, Optional<double>>> version_recovery;
Deque<std::pair<double, Version>> blobWorkerVersionHistory;
bool anyBlobRanges;
Optional<Key> remoteDC;
double getRecoveryDuration(Version ver) const {

View File

@ -54,7 +54,7 @@ private:
KeyBackedRangeResult<Tuple> clusterTenantTuples;
KeyBackedRangeResult<Tuple> clusterTenantGroupTuples;
std::map<int64_t, TenantMapEntry> tenantMap;
std::map<int64_t, MetaclusterTenantMapEntry> tenantMap;
KeyBackedRangeResult<std::pair<TenantGroupName, TenantGroupEntry>> tenantGroups;
std::map<ClusterName, std::set<int64_t>> clusterTenantMap;
@ -74,7 +74,7 @@ private:
ACTOR static Future<Void> loadManagementClusterMetadata(MetaclusterConsistencyCheck* self) {
state Reference<typename DB::TransactionT> managementTr = self->managementDb->createTransaction();
state KeyBackedRangeResult<std::pair<int64_t, TenantMapEntry>> tenantList;
state KeyBackedRangeResult<std::pair<int64_t, MetaclusterTenantMapEntry>> tenantList;
loop {
try {
@ -123,7 +123,7 @@ private:
}
self->managementMetadata.tenantMap =
std::map<int64_t, TenantMapEntry>(tenantList.results.begin(), tenantList.results.end());
std::map<int64_t, MetaclusterTenantMapEntry>(tenantList.results.begin(), tenantList.results.end());
for (auto t : self->managementMetadata.clusterTenantTuples.results) {
ASSERT_EQ(t.size(), 3);
@ -218,27 +218,25 @@ private:
std::map<ClusterName, int> clusterAllocated;
std::set<TenantGroupName> processedTenantGroups;
for (auto [tenantId, entry] : managementMetadata.tenantMap) {
ASSERT(entry.assignedCluster.present());
// Each tenant should be assigned to the same cluster where it is stored in the cluster tenant index
auto clusterItr = managementMetadata.clusterTenantMap.find(entry.assignedCluster.get());
auto clusterItr = managementMetadata.clusterTenantMap.find(entry.assignedCluster);
ASSERT(clusterItr != managementMetadata.clusterTenantMap.end());
ASSERT(clusterItr->second.count(tenantId));
if (entry.tenantGroup.present()) {
// Count the number of tenant groups allocated in each cluster
if (processedTenantGroups.insert(entry.tenantGroup.get()).second) {
++clusterAllocated[entry.assignedCluster.get()];
++clusterAllocated[entry.assignedCluster];
}
// The tenant group should be stored in the same cluster where it is stored in the cluster tenant
// group index
auto clusterTenantGroupItr = managementMetadata.clusterTenantGroupMap.find(entry.assignedCluster.get());
auto clusterTenantGroupItr = managementMetadata.clusterTenantGroupMap.find(entry.assignedCluster);
ASSERT(clusterTenantGroupItr != managementMetadata.clusterTenantGroupMap.end());
ASSERT(clusterTenantGroupItr->second.count(entry.tenantGroup.get()));
} else {
// Track the actual tenant group allocation per cluster (a tenant with no group counts against the
// allocation)
++clusterAllocated[entry.assignedCluster.get()];
++clusterAllocated[entry.assignedCluster];
}
}
@ -316,11 +314,11 @@ private:
} else {
ASSERT_LE(dataClusterTenantMap.size(), expectedTenants.size());
for (auto tenantName : expectedTenants) {
TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantName];
MetaclusterTenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantName];
if (!dataClusterTenantMap.count(tenantName)) {
ASSERT(metaclusterEntry.tenantState == TenantState::REGISTERING ||
metaclusterEntry.tenantState == TenantState::REMOVING ||
metaclusterEntry.tenantState == TenantState::ERROR);
ASSERT(metaclusterEntry.tenantState == MetaclusterAPI::TenantState::REGISTERING ||
metaclusterEntry.tenantState == MetaclusterAPI::TenantState::REMOVING ||
metaclusterEntry.tenantState == MetaclusterAPI::TenantState::ERROR);
} else if (metaclusterEntry.tenantGroup.present()) {
tenantGroupsWithCompletedTenants.insert(metaclusterEntry.tenantGroup.get());
}
@ -329,17 +327,15 @@ private:
for (auto [tenantId, entry] : dataClusterTenantMap) {
ASSERT(expectedTenants.count(tenantId));
TenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantId];
ASSERT(!entry.assignedCluster.present());
MetaclusterTenantMapEntry const& metaclusterEntry = self->managementMetadata.tenantMap[tenantId];
ASSERT_EQ(entry.id, metaclusterEntry.id);
ASSERT(entry.tenantName == metaclusterEntry.tenantName);
ASSERT_EQ(entry.tenantState, TenantState::READY);
if (!self->allowPartialMetaclusterOperations) {
ASSERT_EQ(metaclusterEntry.tenantState, TenantState::READY);
ASSERT_EQ(metaclusterEntry.tenantState, MetaclusterAPI::TenantState::READY);
}
if (metaclusterEntry.tenantState != TenantState::UPDATING_CONFIGURATION &&
metaclusterEntry.tenantState != TenantState::REMOVING) {
if (metaclusterEntry.tenantState != MetaclusterAPI::TenantState::UPDATING_CONFIGURATION &&
metaclusterEntry.tenantState != MetaclusterAPI::TenantState::REMOVING) {
ASSERT_EQ(entry.configurationSequenceNum, metaclusterEntry.configurationSequenceNum);
} else {
ASSERT_LE(entry.configurationSequenceNum, metaclusterEntry.configurationSequenceNum);

View File

@ -45,7 +45,6 @@ private:
struct TenantData {
Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
std::map<int64_t, TenantMapEntry> tenantMap;
std::map<TenantName, int64_t> tenantNameIndex;
int64_t lastTenantId;
int64_t tenantCount;
@ -65,31 +64,20 @@ private:
// the case with the current metacluster simulation workloads
static inline const int metaclusterMaxTenants = 10e6;
ACTOR static Future<Void> loadTenantMetadata(TenantConsistencyCheck* self) {
state Reference<typename DB::TransactionT> tr = self->db->createTransaction();
state KeyBackedRangeResult<std::pair<int64_t, TenantMapEntry>> tenantList;
ACTOR template <class TenantMapEntryImpl>
static Future<std::map<int64_t, TenantMapEntryImpl>> loadTenantMetadataImpl(
TenantConsistencyCheck* self,
TenantMetadataSpecification<TenantMapEntryImpl>* tenantMetadata,
Reference<typename DB::TransactionT> tr) {
state KeyBackedRangeResult<std::pair<int64_t, TenantMapEntryImpl>> tenantList;
state KeyBackedRangeResult<std::pair<TenantName, int64_t>> tenantNameIndexList;
state KeyBackedRangeResult<int64_t> tenantTombstoneList;
state KeyBackedRangeResult<std::pair<TenantGroupName, TenantGroupEntry>> tenantGroupList;
state KeyBackedRangeResult<Tuple> tenantGroupTenantTuples;
state TenantMetadataSpecification* tenantMetadata;
loop {
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
wait(store(self->metadata.metaclusterRegistration,
MetaclusterMetadata::metaclusterRegistration().get(tr)));
self->metadata.clusterType = self->metadata.metaclusterRegistration.present()
? self->metadata.metaclusterRegistration.get().clusterType
: ClusterType::STANDALONE;
if (self->metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
tenantMetadata = &MetaclusterAPI::ManagementClusterMetadata::tenantMetadata();
} else {
tenantMetadata = &TenantMetadata::instance();
}
wait(
store(tenantList, tenantMetadata->tenantMap.getRange(tr, {}, {}, metaclusterMaxTenants)) &&
store(tenantNameIndexList,
@ -110,8 +98,8 @@ private:
}
ASSERT(!tenantList.more);
self->metadata.tenantMap =
std::map<int64_t, TenantMapEntry>(tenantList.results.begin(), tenantList.results.end());
std::map<int64_t, TenantMapEntryImpl> localMap =
std::map<int64_t, TenantMapEntryImpl>(tenantList.results.begin(), tenantList.results.end());
ASSERT(!tenantNameIndexList.more);
self->metadata.tenantNameIndex =
@ -130,28 +118,27 @@ private:
TenantGroupName tenantGroupName = t.getString(0);
int64_t tenantId = t.getInt(1);
ASSERT(self->metadata.tenantGroupMap.count(tenantGroupName));
ASSERT(self->metadata.tenantMap.count(tenantId));
ASSERT(localMap.count(tenantId));
self->metadata.tenantGroupIndex[tenantGroupName].insert(tenantId);
ASSERT(self->metadata.tenantsInTenantGroupIndex.insert(tenantId).second);
}
ASSERT_EQ(self->metadata.tenantGroupIndex.size(), self->metadata.tenantGroupMap.size());
return Void();
return localMap;
}
void validateTenantMetadata() {
if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
ASSERT_LE(metadata.tenantMap.size(), metaclusterMaxTenants);
} else {
ASSERT_LE(metadata.tenantMap.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER);
}
ASSERT_EQ(metadata.tenantMap.size(), metadata.tenantCount);
void validateTenantMetadata(std::map<int64_t, TenantMapEntry> tenantMap) {
ASSERT(metadata.clusterType == ClusterType::METACLUSTER_DATA ||
metadata.clusterType == ClusterType::STANDALONE);
ASSERT_LE(tenantMap.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER);
ASSERT_EQ(tenantMap.size(), metadata.tenantCount);
ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount);
int renameCount = 0;
for (auto [tenantId, tenantMapEntry] : metadata.tenantMap) {
for (auto [tenantId, tenantMapEntry] : tenantMap) {
ASSERT_EQ(tenantId, tenantMapEntry.id);
// Only standalone clusters will have lastTenantId set
// For Metacluster, the lastTenantId field is updated for MetaclusterMetadata
// and not TenantMetadata
if (metadata.clusterType != ClusterType::METACLUSTER_DATA) {
if (TenantAPI::getTenantIdPrefix(tenantId) == TenantAPI::getTenantIdPrefix(metadata.lastTenantId)) {
ASSERT_LE(tenantId, metadata.lastTenantId);
@ -162,36 +149,81 @@ private:
if (tenantMapEntry.tenantGroup.present()) {
auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get());
ASSERT(tenantGroupMapItr != metadata.tenantGroupMap.end());
ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster);
ASSERT(metadata.tenantGroupIndex[tenantMapEntry.tenantGroup.get()].count(tenantId));
} else {
ASSERT(!metadata.tenantsInTenantGroupIndex.count(tenantId));
}
if (metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
ASSERT(tenantMapEntry.assignedCluster.present());
if (tenantMapEntry.renameDestination.present()) {
ASSERT(tenantMapEntry.tenantState == TenantState::RENAMING ||
tenantMapEntry.tenantState == TenantState::REMOVING);
auto nameIndexItr = metadata.tenantNameIndex.find(tenantMapEntry.renameDestination.get());
ASSERT(nameIndexItr != metadata.tenantNameIndex.end());
ASSERT_EQ(nameIndexItr->second, tenantMapEntry.id);
++renameCount;
} else {
ASSERT_NE(tenantMapEntry.tenantState, TenantState::RENAMING);
}
} else {
ASSERT_EQ(tenantMapEntry.tenantState, TenantState::READY);
ASSERT(!tenantMapEntry.assignedCluster.present());
ASSERT(!tenantMapEntry.renameDestination.present());
}
// An error string should be set if and only if the tenant state is an error
ASSERT((tenantMapEntry.tenantState == TenantState::ERROR) != tenantMapEntry.error.empty());
}
ASSERT_EQ(metadata.tenantMap.size() + renameCount, metadata.tenantNameIndex.size());
ASSERT_EQ(tenantMap.size(), metadata.tenantNameIndex.size());
}
void validateTenantMetadata(std::map<int64_t, MetaclusterTenantMapEntry> tenantMap) {
ASSERT(metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT);
ASSERT_LE(tenantMap.size(), metaclusterMaxTenants);
ASSERT_EQ(tenantMap.size(), metadata.tenantCount);
ASSERT_EQ(metadata.tenantNameIndex.size(), metadata.tenantCount);
int renameCount = 0;
for (auto [tenantId, tenantMapEntry] : tenantMap) {
ASSERT_EQ(tenantId, tenantMapEntry.id);
ASSERT_EQ(metadata.tenantNameIndex[tenantMapEntry.tenantName], tenantId);
if (tenantMapEntry.tenantGroup.present()) {
auto tenantGroupMapItr = metadata.tenantGroupMap.find(tenantMapEntry.tenantGroup.get());
ASSERT(tenantGroupMapItr != metadata.tenantGroupMap.end());
ASSERT(tenantMapEntry.assignedCluster == tenantGroupMapItr->second.assignedCluster.get());
ASSERT(metadata.tenantGroupIndex[tenantMapEntry.tenantGroup.get()].count(tenantId));
} else {
ASSERT(!metadata.tenantsInTenantGroupIndex.count(tenantId));
}
if (tenantMapEntry.renameDestination.present()) {
ASSERT(tenantMapEntry.tenantState == MetaclusterAPI::TenantState::RENAMING ||
tenantMapEntry.tenantState == MetaclusterAPI::TenantState::REMOVING);
auto nameIndexItr = metadata.tenantNameIndex.find(tenantMapEntry.renameDestination.get());
ASSERT(nameIndexItr != metadata.tenantNameIndex.end());
ASSERT_EQ(nameIndexItr->second, tenantMapEntry.id);
++renameCount;
} else {
ASSERT_NE(tenantMapEntry.tenantState, MetaclusterAPI::TenantState::RENAMING);
}
// An error string should be set if and only if the tenant state is an error
ASSERT((tenantMapEntry.tenantState == MetaclusterAPI::TenantState::ERROR) != tenantMapEntry.error.empty());
}
ASSERT_EQ(tenantMap.size() + renameCount, metadata.tenantNameIndex.size());
}
ACTOR static Future<Void> loadAndValidateTenantMetadata(TenantConsistencyCheck* self) {
state Reference<typename DB::TransactionT> tr = self->db->createTransaction();
loop {
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
wait(store(self->metadata.metaclusterRegistration,
MetaclusterMetadata::metaclusterRegistration().get(tr)));
self->metadata.clusterType = self->metadata.metaclusterRegistration.present()
? self->metadata.metaclusterRegistration.get().clusterType
: ClusterType::STANDALONE;
break;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
if (self->metadata.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
std::map<int64_t, MetaclusterTenantMapEntry> tenantMap =
wait(loadTenantMetadataImpl<MetaclusterTenantMapEntry>(
self, &MetaclusterAPI::ManagementClusterMetadata::tenantMetadata(), tr));
self->validateTenantMetadata(tenantMap);
} else {
std::map<int64_t, TenantMapEntry> tenantMap =
wait(loadTenantMetadataImpl<TenantMapEntry>(self, &TenantMetadata::instance(), tr));
self->validateTenantMetadata(tenantMap);
}
return Void();
}
// Check that the tenant tombstones are properly cleaned up and only present on a metacluster data cluster
@ -211,8 +243,7 @@ private:
}
ACTOR static Future<Void> run(TenantConsistencyCheck* self) {
wait(loadTenantMetadata(self));
self->validateTenantMetadata();
wait(loadAndValidateTenantMetadata(self));
self->checkTenantTombstones();
return Void();

View File

@ -925,7 +925,6 @@ ACTOR Future<Void> clearData(Database cx, Optional<TenantName> defaultTenant) {
TraceEvent("TesterClearingTenantsComplete", debugID).detail("AtVersion", tr.getCommittedVersion());
break;
}
tr.reset();
} catch (Error& e) {
TraceEvent(SevWarn, "TesterClearingTenantsError", debugID).error(e);

View File

@ -21,6 +21,7 @@
#include <cstring>
#include <unordered_set>
#include "fdbclient/BlobWorkerInterface.h"
#include "flow/Arena.h"
#include "flow/IRandom.h"
#include "flow/Trace.h"
@ -54,21 +55,26 @@ struct AuthzSecurityWorkload : TestWorkload {
WipedString signedTokenAnotherTenant;
Standalone<StringRef> tLogConfigKey;
PerfIntCounter crossTenantGetPositive, crossTenantGetNegative, crossTenantCommitPositive, crossTenantCommitNegative,
publicNonTenantRequestPositive, tLogReadNegative, keyLocationLeakNegative;
publicNonTenantRequestPositive, tLogReadNegative, keyLocationLeakNegative, crossTenantBGReadPositive,
crossTenantBGReadNegative;
std::vector<std::function<Future<Void>(Database cx)>> testFunctions;
bool checkBlobGranules;
AuthzSecurityWorkload(WorkloadContext const& wcx)
: TestWorkload(wcx), crossTenantGetPositive("CrossTenantGetPositive"),
crossTenantGetNegative("CrossTenantGetNegative"), crossTenantCommitPositive("CrossTenantCommitPositive"),
crossTenantCommitNegative("CrossTenantCommitNegative"),
publicNonTenantRequestPositive("PublicNonTenantRequestPositive"), tLogReadNegative("TLogReadNegative"),
keyLocationLeakNegative("KeyLocationLeakNegative") {
keyLocationLeakNegative("KeyLocationLeakNegative"), crossTenantBGReadPositive("CrossTenantBGReadPositive"),
crossTenantBGReadNegative("CrossTenantBGReadNegative") {
testDuration = getOption(options, "testDuration"_sr, 10.0);
transactionsPerSecond = getOption(options, "transactionsPerSecond"_sr, 500.0) / clientCount;
actorCount = getOption(options, "actorsPerClient"_sr, transactionsPerSecond / 5);
tenantName = getOption(options, "tenantA"_sr, "authzSecurityTestTenant"_sr);
anotherTenantName = getOption(options, "tenantB"_sr, "authzSecurityTestTenant"_sr);
tLogConfigKey = getOption(options, "tLogConfigKey"_sr, "TLogInterface"_sr);
checkBlobGranules = getOption(options, "checkBlobGranules"_sr, false);
ASSERT(g_network->isSimulated());
// make it comfortably longer than the timeout of the workload
testFunctions.push_back(
@ -83,6 +89,13 @@ struct AuthzSecurityWorkload : TestWorkload {
[this](Database cx) { return testPublicNonTenantRequestsAllowedWithoutTokens(this, cx); });
testFunctions.push_back([this](Database cx) { return testTLogReadDisallowed(this, cx); });
testFunctions.push_back([this](Database cx) { return testKeyLocationLeakDisallowed(this, cx); });
if (checkBlobGranules) {
testFunctions.push_back(
[this](Database cx) { return testCrossTenantBGReadDisallowed(this, cx, PositiveTestcase::True); });
testFunctions.push_back(
[this](Database cx) { return testCrossTenantBGReadDisallowed(this, cx, PositiveTestcase::False); });
}
}
Future<Void> setup(Database const& cx) override {
@ -122,6 +135,8 @@ struct AuthzSecurityWorkload : TestWorkload {
m.push_back(publicNonTenantRequestPositive.getMetric());
m.push_back(tLogReadNegative.getMetric());
m.push_back(keyLocationLeakNegative.getMetric());
m.push_back(crossTenantBGReadPositive.getMetric());
m.push_back(crossTenantBGReadNegative.getMetric());
}
void setAuthToken(Transaction& tr, StringRef token) {
@ -309,7 +324,7 @@ struct AuthzSecurityWorkload : TestWorkload {
++self->crossTenantCommitPositive;
} else if (outcome.get().code() == error_code_permission_denied) {
TraceEvent(SevError, "AuthzSecurityError")
.detail("Case", "CrossTenantGetDisallowed")
.detail("Case", "CrossTenantCommitDisallowed")
.detail("Subcase", "Positive")
.log();
}
@ -318,7 +333,7 @@ struct AuthzSecurityWorkload : TestWorkload {
tryCommit(self, self->tenant, self->signedTokenAnotherTenant, key, newValue, committedVersion, cx));
if (!outcome.present()) {
TraceEvent(SevError, "AuthzSecurityError")
.detail("Case", "CrossTenantGetDisallowed")
.detail("Case", "CrossTenantCommitDisallowed")
.detail("Subcase", "Negative")
.log();
} else if (outcome.get().code() == error_code_permission_denied) {
@ -463,6 +478,76 @@ struct AuthzSecurityWorkload : TestWorkload {
}
}
++self->keyLocationLeakNegative;
return Void();
}
ACTOR static Future<Optional<Error>> tryBlobGranuleRead(AuthzSecurityWorkload* self,
Database cx,
Reference<Tenant> tenant,
Key key,
WipedString token,
Version committedVersion) {
state Transaction tr(cx, tenant);
self->setAuthToken(tr, token);
KeyRange range(KeyRangeRef(key, keyAfter(key)));
try {
wait(success(tr.readBlobGranules(range, 0, committedVersion)));
return Optional<Error>();
} catch (Error& e) {
CODE_PROBE(e.code() == error_code_permission_denied,
"Cross tenant blob granule read meets permission_denied");
return e;
}
}
// TODO: add separate tests to separately test blob granule locations call and blob worker interface call
static void checkCrossTenantOutcome(std::string testcase,
PerfIntCounter& positiveCounter,
PerfIntCounter& negativeCounter,
Optional<Error> outcome,
PositiveTestcase positive) {
if (positive) {
// Supposed to succeed. Expected to occasionally fail because of buggify, faultInjection, or data
// distribution, but should not return permission_denied
if (!outcome.present()) {
++positiveCounter;
} else if (outcome.get().code() == error_code_permission_denied) {
TraceEvent(SevError, "AuthzSecurityError")
.detail("Case", "CrossTenant" + testcase + "Disallowed")
.detail("Subcase", "Positive")
.log();
}
} else {
// Should always fail. Expected to return permission_denied, but expected to occasionally fail with
// different errors
if (!outcome.present()) {
TraceEvent(SevError, "AuthzSecurityError")
.detail("Case", "CrossTenant" + testcase + "Disallowed")
.detail("Subcase", "Negative")
.log();
} else if (outcome.get().code() == error_code_permission_denied) {
++negativeCounter;
}
}
}
ACTOR static Future<Void> testCrossTenantBGReadDisallowed(AuthzSecurityWorkload* self,
Database cx,
PositiveTestcase positive) {
state Key key = self->randomString();
state Value value = self->randomString();
state Version committedVersion =
wait(setAndCommitKeyValueAndGetVersion(self, cx, self->tenant, self->signedToken, key, value));
Optional<Error> outcome = wait(tryBlobGranuleRead(self,
cx,
self->tenant,
key,
positive ? self->signedToken : self->signedTokenAnotherTenant,
committedVersion));
checkCrossTenantOutcome(
"BGRead", self->crossTenantBGReadPositive, self->crossTenantBGReadNegative, outcome, positive);
return Void();
}

View File

@ -31,12 +31,14 @@ struct CreateTenantWorkload : TestWorkload {
static constexpr auto NAME = "CreateTenant";
TenantName tenant;
Optional<TenantGroupName> tenantGroup;
bool blobbify;
CreateTenantWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
tenant = getOption(options, "name"_sr, "DefaultTenant"_sr);
if (hasOption(options, "group"_sr)) {
tenantGroup = getOption(options, "group"_sr, "DefaultGroup"_sr);
}
blobbify = getOption(options, "blobbify"_sr, false);
}
Future<Void> setup(Database const& cx) override {
@ -58,6 +60,12 @@ struct CreateTenantWorkload : TestWorkload {
}
Optional<TenantMapEntry> entry = wait(TenantAPI::createTenant(db.getReference(), self->tenant, givenEntry));
ASSERT(entry.present());
if (self->blobbify) {
// blobbify from db instead of within tenant so authz doesn't fail
bool success = wait(db->blobbifyRange(normalKeys.withPrefix(entry.get().prefix)));
ASSERT(success);
}
} catch (Error& e) {
TraceEvent(SevError, "TenantCreationFailed").error(e);
if (e.code() == error_code_actor_cancelled) {

View File

@ -454,16 +454,15 @@ struct MetaclusterManagementWorkload : TestWorkload {
TenantName tenant,
const char* context) {
try {
state TenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
state TenantState checkState = checkEntry.tenantState;
state std::vector<TenantState> filters;
state MetaclusterTenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
state MetaclusterAPI::TenantState checkState = checkEntry.tenantState;
state std::vector<MetaclusterAPI::TenantState> filters;
filters.push_back(checkState);
state std::vector<std::pair<TenantName, TenantMapEntry>> tenantList =
state std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> tenantList =
wait(MetaclusterAPI::listTenantMetadata(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters));
// Possible to have changed state between now and the getTenant call above
state TenantMapEntry checkEntry2 = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
state MetaclusterTenantMapEntry checkEntry2 = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
DisabledTraceEvent(SevDebug, "VerifyListFilter")
.detail("Context", context)
.detail("Tenant", tenant)
@ -499,7 +498,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
self->totalTenantGroupCapacity;
state bool retried = false;
state TenantMapEntry tenantMapEntry;
state MetaclusterTenantMapEntry tenantMapEntry;
tenantMapEntry.tenantName = tenant;
tenantMapEntry.tenantGroup = tenantGroup;
@ -532,7 +531,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
} catch (Error& e) {
if (e.code() == error_code_tenant_already_exists && retried && !exists) {
Optional<TenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
Optional<MetaclusterTenantMapEntry> entry =
wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
ASSERT(entry.present());
tenantMapEntry = entry.get();
break;
@ -540,7 +540,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
e.code() == error_code_cluster_not_found ||
e.code() == error_code_invalid_tenant_configuration)) {
state Error error = e;
Optional<TenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
Optional<MetaclusterTenantMapEntry> entry =
wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
// When picking a different assigned cluster, it is possible to leave the
// tenant creation in a partially completed state, which we want to avoid.
@ -562,25 +563,23 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
}
TenantMapEntry entry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
MetaclusterTenantMapEntry entry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant));
ASSERT(!exists);
ASSERT(hasCapacity);
ASSERT(entry.assignedCluster.present());
ASSERT(entry.tenantGroup == tenantGroup);
ASSERT(TenantAPI::getTenantIdPrefix(entry.id) == self->tenantIdPrefix);
if (tenantGroup.present()) {
auto tenantGroupData =
self->tenantGroups.try_emplace(tenantGroup.get(), entry.assignedCluster.get()).first;
ASSERT(tenantGroupData->second.cluster == entry.assignedCluster.get());
auto tenantGroupData = self->tenantGroups.try_emplace(tenantGroup.get(), entry.assignedCluster).first;
ASSERT(tenantGroupData->second.cluster == entry.assignedCluster);
tenantGroupData->second.tenants.insert(tenant);
} else {
self->ungroupedTenants.insert(tenant);
}
auto assignedCluster = self->dataDbs.find(entry.assignedCluster.get());
ASSERT(assignClusterAutomatically || tenantMapEntry.assignedCluster.get() == assignedCluster->first);
auto assignedCluster = self->dataDbs.find(entry.assignedCluster);
ASSERT(assignClusterAutomatically || tenantMapEntry.assignedCluster == assignedCluster->first);
ASSERT(assignedCluster != self->dataDbs.end());
ASSERT(assignedCluster->second.tenants.insert(tenant).second);
@ -594,7 +593,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
assignedCluster->second.tenantGroupCapacity >=
assignedCluster->second.tenantGroups.size() + assignedCluster->second.ungroupedTenants.size());
self->createdTenants[tenant] = TenantData(entry.assignedCluster.get(), tenantGroup);
self->createdTenants[tenant] = TenantData(entry.assignedCluster, tenantGroup);
} catch (Error& e) {
if (e.code() == error_code_tenant_already_exists) {
ASSERT(exists);
@ -610,10 +609,9 @@ struct MetaclusterManagementWorkload : TestWorkload {
return Void();
} else if (e.code() == error_code_invalid_tenant_configuration) {
ASSERT(tenantGroup.present());
ASSERT(tenantMapEntry.assignedCluster.present());
auto itr = self->tenantGroups.find(tenantGroup.get());
ASSERT(itr != self->tenantGroups.end());
ASSERT(itr->second.cluster != tenantMapEntry.assignedCluster.get());
ASSERT(itr->second.cluster != tenantMapEntry.assignedCluster);
return Void();
}
@ -645,7 +643,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
}
} catch (Error& e) {
if (e.code() == error_code_tenant_not_found && retried && exists) {
Optional<TenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
Optional<MetaclusterTenantMapEntry> entry =
wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
ASSERT(!entry.present());
break;
} else {
@ -839,7 +838,6 @@ struct MetaclusterManagementWorkload : TestWorkload {
retried = true;
wait(verifyListFilter(self, tenant, "renameTenant"));
wait(verifyListFilter(self, newTenantName, "renameTenantNew"));
} catch (Error& e) {
// If we retry the rename after it had succeeded, we will get an error that we should ignore
if (e.code() == error_code_tenant_not_found && exists && !newTenantExists && retried) {
@ -848,24 +846,26 @@ struct MetaclusterManagementWorkload : TestWorkload {
throw e;
}
}
wait(verifyListFilter(self, newTenantName, "renameTenantNew"));
ASSERT(exists);
ASSERT(!newTenantExists);
Optional<TenantMapEntry> oldEntry = wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
Optional<MetaclusterTenantMapEntry> oldEntry =
wait(MetaclusterAPI::tryGetTenant(self->managementDb, tenant));
ASSERT(!oldEntry.present());
TenantMapEntry newEntry = wait(MetaclusterAPI::getTenant(self->managementDb, newTenantName));
MetaclusterTenantMapEntry newEntry = wait(MetaclusterAPI::getTenant(self->managementDb, newTenantName));
auto tenantData = self->createdTenants.find(tenant);
ASSERT(tenantData != self->createdTenants.end());
ASSERT(tenantData->second.tenantGroup == newEntry.tenantGroup);
ASSERT(newEntry.assignedCluster.present() && tenantData->second.cluster == newEntry.assignedCluster.get());
ASSERT(tenantData->second.cluster == newEntry.assignedCluster);
self->createdTenants[newTenantName] = tenantData->second;
self->createdTenants.erase(tenantData);
auto& dataDb = self->dataDbs[newEntry.assignedCluster.get()];
auto& dataDb = self->dataDbs[newEntry.assignedCluster];
ASSERT(dataDb.registered);
dataDb.tenants.erase(tenant);

View File

@ -81,7 +81,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
std::map<TenantGroupName, TenantGroupData> tenantGroups;
std::set<int64_t> deletedTenants;
std::vector<std::pair<int64_t, TenantMapEntry>> managementTenantsBeforeRestore;
std::vector<std::pair<int64_t, MetaclusterTenantMapEntry>> managementTenantsBeforeRestore;
int initialTenants;
int maxTenants;
@ -337,10 +337,11 @@ struct MetaclusterRestoreWorkload : TestWorkload {
return waitForAll(deleteFutures);
}
ACTOR template <class Transaction>
static Future<std::unordered_set<int64_t>> getTenantsInGroup(Transaction tr,
TenantMetadataSpecification tenantMetadata,
TenantGroupName tenantGroup) {
ACTOR template <class Transaction, class TenantMapEntryImpl>
static Future<std::unordered_set<int64_t>> getTenantsInGroup(
Transaction tr,
TenantMetadataSpecification<TenantMapEntryImpl> tenantMetadata,
TenantGroupName tenantGroup) {
KeyBackedRangeResult<Tuple> groupTenants =
wait(tenantMetadata.tenantGroupTenantIndex.getRange(tr,
Tuple::makeTuple(tenantGroup),
@ -632,23 +633,24 @@ struct MetaclusterRestoreWorkload : TestWorkload {
loop {
try {
TenantMapEntry tenantEntry;
MetaclusterTenantMapEntry tenantEntry;
tenantEntry.tenantName = tenantName;
tenantEntry.tenantGroup = self->chooseTenantGroup();
wait(MetaclusterAPI::createTenant(self->managementDb, tenantEntry, AssignClusterAutomatically::True));
TenantMapEntry createdEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenantName));
MetaclusterTenantMapEntry createdEntry =
wait(MetaclusterAPI::getTenant(self->managementDb, tenantName));
TraceEvent(SevDebug, "MetaclusterRestoreWorkloadCreatedTenant")
.detail("Tenant", tenantName)
.detail("TenantId", createdEntry.id)
.detail("AccessTime", createTime);
self->createdTenants[createdEntry.id] =
TenantData(tenantName, createdEntry.assignedCluster.get(), createdEntry.tenantGroup, createTime);
TenantData(tenantName, createdEntry.assignedCluster, createdEntry.tenantGroup, createTime);
self->tenantNameIndex[tenantName] = createdEntry.id;
auto& dataDb = self->dataDbs[createdEntry.assignedCluster.get()];
auto& dataDb = self->dataDbs[createdEntry.assignedCluster];
dataDb.tenants.insert(createdEntry.id);
if (createdEntry.tenantGroup.present()) {
auto& tenantGroupData = self->tenantGroups[createdEntry.tenantGroup.get()];
tenantGroupData.cluster = createdEntry.assignedCluster.get();
tenantGroupData.cluster = createdEntry.assignedCluster;
tenantGroupData.tenants.insert(createdEntry.id);
dataDb.tenantGroups.insert(createdEntry.tenantGroup.get());
}
@ -872,7 +874,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
if (self->recoverManagementCluster) {
wait(resetManagementCluster(self));
} else {
KeyBackedRangeResult<std::pair<int64_t, TenantMapEntry>> tenants =
KeyBackedRangeResult<std::pair<int64_t, MetaclusterTenantMapEntry>> tenants =
wait(runTransaction(self->managementDb, [](Reference<ITransaction> tr) {
return MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(
tr, {}, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1);
@ -989,14 +991,14 @@ struct MetaclusterRestoreWorkload : TestWorkload {
}
ACTOR static Future<Void> checkTenants(MetaclusterRestoreWorkload* self) {
state KeyBackedRangeResult<std::pair<int64_t, TenantMapEntry>> tenants =
state KeyBackedRangeResult<std::pair<int64_t, MetaclusterTenantMapEntry>> tenants =
wait(runTransaction(self->managementDb, [](Reference<ITransaction> tr) {
return MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(
tr, {}, {}, CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1);
}));
ASSERT_LE(tenants.results.size(), CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER);
std::map<int64_t, TenantMapEntry> tenantMap(tenants.results.begin(), tenants.results.end());
std::map<int64_t, MetaclusterTenantMapEntry> tenantMap(tenants.results.begin(), tenants.results.end());
// If we did not restore the management cluster, then every tenant present in the management cluster before the
// restore should be present after the restore. All tenants in the management cluster should be unchanged except
@ -1006,9 +1008,9 @@ struct MetaclusterRestoreWorkload : TestWorkload {
auto itr = tenantMap.find(tenantId);
ASSERT(itr != tenantMap.end());
TenantMapEntry postRecoveryEntry = itr->second;
if (postRecoveryEntry.tenantState == TenantState::ERROR) {
ASSERT(self->dataDbs[itr->second.assignedCluster.get()].restored);
MetaclusterTenantMapEntry postRecoveryEntry = itr->second;
if (postRecoveryEntry.tenantState == MetaclusterAPI::TenantState::ERROR) {
ASSERT(self->dataDbs[itr->second.assignedCluster].restored);
postRecoveryEntry.tenantState = tenantEntry.tenantState;
postRecoveryEntry.error.clear();
}
@ -1030,14 +1032,14 @@ struct MetaclusterRestoreWorkload : TestWorkload {
} else {
if (tenantData.createTime != TenantData::AccessTime::BEFORE_BACKUP &&
self->dataDbs[tenantData.cluster].restored) {
ASSERT(tenantItr->second.tenantState == TenantState::ERROR ||
(tenantItr->second.tenantState == TenantState::READY &&
ASSERT(tenantItr->second.tenantState == MetaclusterAPI::TenantState::ERROR ||
(tenantItr->second.tenantState == MetaclusterAPI::TenantState::READY &&
tenantData.createTime == TenantData::AccessTime::DURING_BACKUP));
if (tenantItr->second.tenantState == TenantState::ERROR) {
if (tenantItr->second.tenantState == MetaclusterAPI::TenantState::ERROR) {
ASSERT(self->dataDbs[tenantData.cluster].restoreHasMessages);
}
} else {
ASSERT_EQ(tenantItr->second.tenantState, TenantState::READY);
ASSERT_EQ(tenantItr->second.tenantState, MetaclusterAPI::TenantState::READY);
}
}
}

View File

@ -379,7 +379,7 @@ struct ReadWriteWorkload : ReadWriteCommon {
bool adjacentReads; // keys are adjacent within a transaction
bool adjacentWrites;
int extraReadConflictRangesPerTransaction, extraWriteConflictRangesPerTransaction;
int readType;
ReadType readType;
bool cacheResult;
Optional<Key> transactionTag;
@ -403,7 +403,7 @@ struct ReadWriteWorkload : ReadWriteCommon {
rampUpConcurrency = getOption(options, "rampUpConcurrency"_sr, false);
batchPriority = getOption(options, "batchPriority"_sr, false);
descriptionString = getOption(options, "description"_sr, "ReadWrite"_sr);
readType = getOption(options, "readType"_sr, 3);
readType = static_cast<ReadType>(getOption(options, "readType"_sr, (int)ReadType::NORMAL));
cacheResult = getOption(options, "cacheResult"_sr, true);
if (hasOption(options, "transactionTag"_sr)) {
transactionTag = getOption(options, "transactionTag"_sr, ""_sr);
@ -434,10 +434,27 @@ struct ReadWriteWorkload : ReadWriteCommon {
if (transactionTag.present() && tr.getTags().size() == 0) {
tr.setOption(FDBTransactionOptions::AUTO_THROTTLE_TAG, transactionTag.get());
}
ReadOptions options;
options.type = static_cast<ReadType>(readType);
options.cacheResult = cacheResult;
tr.getTransaction().trState->readOptions = options;
if (cacheResult) {
// Enabled is the default, but sometimes set it explicitly
if (BUGGIFY) {
tr.setOption(FDBTransactionOptions::READ_SERVER_SIDE_CACHE_ENABLE);
}
} else {
tr.setOption(FDBTransactionOptions::READ_SERVER_SIDE_CACHE_DISABLE);
}
// ReadTypes of LOW, NORMAL, and HIGH can be set through transaction options, so setOption for those
if (readType == ReadType::LOW) {
tr.setOption(FDBTransactionOptions::READ_PRIORITY_LOW);
} else if (readType == ReadType::NORMAL) {
tr.setOption(FDBTransactionOptions::READ_PRIORITY_NORMAL);
} else if (readType == ReadType::HIGH) {
tr.setOption(FDBTransactionOptions::READ_PRIORITY_HIGH);
} else {
// Otherwise fall back to NativeAPI readOptions
tr.getTransaction().trState->readOptions.withDefault(ReadOptions()).type = readType;
}
}
void getMetrics(std::vector<PerfMetric>& m) override {

View File

@ -130,7 +130,7 @@ struct TenantCapacityLimits : TestWorkload {
}
// Attempt to create a tenant on the metacluster which should fail since the cluster is at capacity
try {
TenantMapEntry entry;
MetaclusterTenantMapEntry entry;
entry.tenantName = "test_tenant_metacluster"_sr;
wait(MetaclusterAPI::createTenant(self->managementDb, entry, AssignClusterAutomatically::True));
ASSERT(false);

View File

@ -90,7 +90,7 @@ struct TenantEntryCacheWorkload : TestWorkload {
// Ensure associated counter values gets updated
ASSERT_EQ(cache->numRefreshByInit(), 1);
state TenantMapEntry dummy(std::numeric_limits<int64_t>::max(), "name"_sr, TenantState::READY);
state TenantMapEntry dummy(std::numeric_limits<int64_t>::max(), "name"_sr);
Optional<TenantEntryCachePayload<int64_t>> value = wait(cache->getById(dummy.id));
ASSERT(!value.present());

View File

@ -181,7 +181,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
ACTOR static Future<Void> createTenant(TenantManagementConcurrencyWorkload* self) {
state TenantName tenant = self->chooseTenantName();
state TenantMapEntry entry;
state MetaclusterTenantMapEntry entry;
state UID debugId = deterministicRandom()->randomUniqueID();
@ -196,7 +196,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
Future<Void> createFuture =
self->useMetacluster
? MetaclusterAPI::createTenant(self->mvDb, entry, AssignClusterAutomatically::True)
: success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, entry));
: success(TenantAPI::createTenant(self->dataDb.getReference(), tenant, TenantMapEntry(entry)));
Optional<Void> result = wait(timeout(createFuture, 30));
if (result.present()) {
TraceEvent(SevDebug, "TenantManagementConcurrencyCreatedTenant", debugId)

View File

@ -358,19 +358,12 @@ struct TenantManagementWorkload : TestWorkload {
return tenantGroup;
}
Future<Optional<TenantMapEntry>> tryGetTenant(TenantName tenantName, OperationType operationType) {
if (operationType == OperationType::METACLUSTER) {
return MetaclusterAPI::tryGetTenant(mvDb, tenantName);
} else {
return TenantAPI::tryGetTenant(dataDb.getReference(), tenantName);
}
}
// Creates tenant(s) using the specified operation type
ACTOR static Future<Void> createTenantImpl(Reference<ReadYourWritesTransaction> tr,
std::map<TenantName, TenantMapEntry> tenantsToCreate,
OperationType operationType,
TenantManagementWorkload* self) {
ACTOR template <class TenantMapEntryImpl>
static Future<Void> createTenantImpl(Reference<ReadYourWritesTransaction> tr,
std::map<TenantName, TenantMapEntryImpl> tenantsToCreate,
OperationType operationType,
TenantManagementWorkload* self) {
if (operationType == OperationType::SPECIAL_KEYS) {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
for (auto [tenant, entry] : tenantsToCreate) {
@ -402,15 +395,58 @@ struct TenantManagementWorkload : TestWorkload {
} else {
ASSERT(OperationType::METACLUSTER == operationType);
ASSERT(tenantsToCreate.size() == 1);
wait(MetaclusterAPI::createTenant(
self->mvDb, tenantsToCreate.begin()->second, AssignClusterAutomatically::True));
TenantMapEntryImpl tEntry = tenantsToCreate.begin()->second;
MetaclusterTenantMapEntry modifiedEntry(tEntry);
auto assign = AssignClusterAutomatically::True;
if (deterministicRandom()->coinflip()) {
modifiedEntry.assignedCluster = self->dataClusterName;
assign = AssignClusterAutomatically::False;
}
wait(MetaclusterAPI::createTenant(self->mvDb, modifiedEntry, assign));
}
return Void();
}
ACTOR template <class TenantMapEntryImpl>
static Future<Void> verifyTenantCreate(TenantManagementWorkload* self,
Optional<TenantMapEntryImpl> entry,
TenantName itrName,
Optional<TenantGroupName> tGroup) {
ASSERT(entry.present());
ASSERT(entry.get().id > self->maxId);
ASSERT(entry.get().tenantGroup == tGroup);
ASSERT(TenantAPI::getTenantIdPrefix(entry.get().id) == self->tenantIdPrefix);
if (self->useMetacluster) {
// In a metacluster, we should also see that the tenant was created on the data cluster
Optional<TenantMapEntry> dataEntry = wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), itrName));
ASSERT(dataEntry.present());
ASSERT(dataEntry.get().id == entry.get().id);
ASSERT(TenantAPI::getTenantIdPrefix(dataEntry.get().id) == self->tenantIdPrefix);
ASSERT(dataEntry.get().tenantGroup == entry.get().tenantGroup);
}
// Update our local tenant state to include the newly created one
self->maxId = entry.get().id;
TenantData tData = TenantData(entry.get().id, itrName, tGroup, true);
self->createdTenants[itrName] = tData;
self->allTestTenants.push_back(tData.tenant);
return Void();
}
ACTOR static Future<Void> createTenant(TenantManagementWorkload* self) {
state OperationType operationType = self->randomOperationType();
if (operationType == OperationType::METACLUSTER) {
wait(createTenantHelper<MetaclusterTenantMapEntry>(self, operationType));
} else {
wait(createTenantHelper<TenantMapEntry>(self, operationType));
}
return Void();
}
ACTOR template <class TenantMapEntryImpl>
static Future<Void> createTenantHelper(TenantManagementWorkload* self, OperationType operationType) {
int numTenants = 1;
// For transaction-based operations, test creating multiple tenants in the same transaction
@ -429,19 +465,16 @@ struct TenantManagementWorkload : TestWorkload {
state bool hasSystemTenantGroup = false;
state int newTenants = 0;
state std::map<TenantName, TenantMapEntry> tenantsToCreate;
state std::map<TenantName, TenantMapEntryImpl> tenantsToCreate;
for (int i = 0; i < numTenants; ++i) {
TenantName tenant = self->chooseTenantName(true);
while (tenantsToCreate.count(tenant)) {
tenant = self->chooseTenantName(true);
}
TenantMapEntry entry;
TenantMapEntryImpl entry;
entry.tenantName = tenant;
entry.tenantGroup = self->chooseTenantGroup(true);
if (OperationType::METACLUSTER == operationType && deterministicRandom()->coinflip()) {
entry.assignedCluster = self->dataClusterName;
}
if (self->createdTenants.count(tenant)) {
alreadyExists = true;
@ -476,8 +509,9 @@ struct TenantManagementWorkload : TestWorkload {
}
try {
Optional<Void> result = wait(timeout(createTenantImpl(tr, tenantsToCreate, operationType, self),
deterministicRandom()->randomInt(1, 30)));
Optional<Void> result =
wait(timeout(createTenantImpl<TenantMapEntryImpl>(tr, tenantsToCreate, operationType, self),
deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
// Make sure that we had capacity to create the tenants. This cannot be validated for
@ -511,18 +545,28 @@ struct TenantManagementWorkload : TestWorkload {
}
// Check the state of the first created tenant
Optional<TenantMapEntry> resultEntry =
wait(self->tryGetTenant(tenantsToCreate.begin()->first, operationType));
if (resultEntry.present()) {
if (resultEntry.get().tenantState == TenantState::READY) {
// The tenant now exists, so we will retry and expect the creation to react accordingly
if (operationType == OperationType::METACLUSTER) {
Optional<MetaclusterTenantMapEntry> resultEntry =
wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenantsToCreate.begin()->first));
if (resultEntry.present()) {
if (resultEntry.get().tenantState == MetaclusterAPI::TenantState::READY) {
// The tenant now exists, so we will retry and expect the creation to react accordingly
alreadyExists = true;
} else {
// Only a metacluster tenant creation can end up in a partially created state
// We should be able to retry and pick up where we left off
ASSERT(resultEntry.get().tenantState == MetaclusterAPI::TenantState::REGISTERING);
}
} else {
CODE_PROBE(true, "Tenant creation (metacluster) aborted before writing data.");
}
} else {
Optional<TenantMapEntry> tenantEntry =
wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantsToCreate.begin()->first));
if (tenantEntry.present()) {
alreadyExists = true;
} else {
// Only a metacluster tenant creation can end up in a partially created state
// We should be able to retry and pick up where we left off
ASSERT(operationType == OperationType::METACLUSTER);
ASSERT(resultEntry.get().tenantState == TenantState::REGISTERING);
CODE_PROBE(true, "Tenant creation (non-metacluster) aborted before writing data.");
}
}
}
@ -540,7 +584,7 @@ struct TenantManagementWorkload : TestWorkload {
ASSERT(!hasSystemTenant);
ASSERT(!hasSystemTenantGroup);
state std::map<TenantName, TenantMapEntry>::iterator tenantItr;
state typename std::map<TenantName, TenantMapEntryImpl>::iterator tenantItr;
for (tenantItr = tenantsToCreate.begin(); tenantItr != tenantsToCreate.end(); ++tenantItr) {
// Ignore any tenants that already existed
if (self->createdTenants.count(tenantItr->first)) {
@ -548,35 +592,25 @@ struct TenantManagementWorkload : TestWorkload {
}
// Read the created tenant object and verify that its state is correct
state Optional<TenantMapEntry> entry = wait(self->tryGetTenant(tenantItr->first, operationType));
ASSERT(entry.present());
ASSERT(entry.get().id > self->maxId);
ASSERT(TenantAPI::getTenantIdPrefix(entry.get().id) == self->tenantIdPrefix);
ASSERT(entry.get().tenantGroup == tenantItr->second.tenantGroup);
ASSERT(entry.get().tenantState == TenantState::READY);
state StringRef tPrefix;
if (operationType == OperationType::METACLUSTER) {
state Optional<MetaclusterTenantMapEntry> metaEntry =
wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenantItr->first));
wait(verifyTenantCreate<MetaclusterTenantMapEntry>(
self, metaEntry, tenantItr->first, tenantItr->second.tenantGroup));
ASSERT(metaEntry.get().tenantState == MetaclusterAPI::TenantState::READY);
tPrefix = metaEntry.get().prefix;
} else {
state Optional<TenantMapEntry> normalEntry =
wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantItr->first));
wait(verifyTenantCreate<TenantMapEntry>(
self, normalEntry, tenantItr->first, tenantItr->second.tenantGroup));
tPrefix = normalEntry.get().prefix;
}
Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType));
ASSERT_GT(currentVersionstamp.version, originalReadVersion);
if (self->useMetacluster) {
// In a metacluster, we should also see that the tenant was created on the data cluster
Optional<TenantMapEntry> dataEntry =
wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenantItr->first));
ASSERT(dataEntry.present());
ASSERT(dataEntry.get().id == entry.get().id);
ASSERT(TenantAPI::getTenantIdPrefix(dataEntry.get().id) == self->tenantIdPrefix);
ASSERT(dataEntry.get().tenantGroup == entry.get().tenantGroup);
ASSERT(dataEntry.get().tenantState == TenantState::READY);
}
// Update our local tenant state to include the newly created one
self->maxId = entry.get().id;
TenantData tData =
TenantData(entry.get().id, tenantItr->first, tenantItr->second.tenantGroup, true);
self->createdTenants[tenantItr->first] = tData;
self->allTestTenants.push_back(tData.tenant);
// If this tenant has a tenant group, create or update the entry for it
if (tenantItr->second.tenantGroup.present()) {
self->createdTenantGroups[tenantItr->second.tenantGroup.get()].tenantCount++;
@ -605,7 +639,7 @@ struct TenantManagementWorkload : TestWorkload {
loop {
try {
checkTr.setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(checkTr.get(self->keyName.withPrefix(entry.get().prefix)));
Optional<Value> val = wait(checkTr.get(self->keyName.withPrefix(tPrefix)));
ASSERT(val.present());
ASSERT(val.get() == tenantItr->first);
break;
@ -750,7 +784,7 @@ struct TenantManagementWorkload : TestWorkload {
// getTenant throwing tenant_not_found will break some test cases because it is not wrapped
// by runManagementTransaction. For such cases, fall back to delete by name and allow
// the errors to flow through there
Optional<TenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->mvDb, beginTenant));
Optional<MetaclusterTenantMapEntry> entry = wait(MetaclusterAPI::tryGetTenant(self->mvDb, beginTenant));
if (entry.present() && deterministicRandom()->coinflip()) {
wait(MetaclusterAPI::deleteTenant(self->mvDb, entry.get().id));
CODE_PROBE(true, "Deleted tenant by ID");
@ -895,16 +929,22 @@ struct TenantManagementWorkload : TestWorkload {
}
if (!tenants.empty()) {
// Check the state of the first deleted tenant
Optional<TenantMapEntry> resultEntry =
wait(self->tryGetTenant(tenants.begin()->first, operationType));
if (!resultEntry.present()) {
alreadyExists = false;
} else if (resultEntry.get().tenantState == TenantState::REMOVING) {
ASSERT(operationType == OperationType::METACLUSTER);
if (operationType == OperationType::METACLUSTER) {
// Check the state of the first deleted tenant
Optional<MetaclusterTenantMapEntry> resultEntry =
wait(MetaclusterAPI::tryGetTenant(self->mvDb, tenants.begin()->first));
if (!resultEntry.present()) {
alreadyExists = false;
} else {
ASSERT(resultEntry.get().tenantState == MetaclusterAPI::TenantState::READY ||
resultEntry.get().tenantState == MetaclusterAPI::TenantState::REMOVING);
}
} else {
ASSERT(resultEntry.get().tenantState == TenantState::READY);
Optional<TenantMapEntry> tenantEntry =
wait(TenantAPI::tryGetTenant(self->dataDb.getReference(), tenants.begin()->first));
if (!tenantEntry.present()) {
alreadyExists = false;
}
}
}
}
@ -1080,13 +1120,7 @@ struct TenantManagementWorkload : TestWorkload {
tenantGroup = TenantGroupNameRef(tenantGroupStr);
}
Optional<ClusterName> assignedCluster;
if (jsonDoc.tryGet("assigned_cluster", assignedClusterStr)) {
assignedCluster = ClusterNameRef(assignedClusterStr);
}
TenantMapEntry entry(id, TenantNameRef(name), TenantMapEntry::stringToTenantState(tenantStateStr), tenantGroup);
entry.assignedCluster = assignedCluster;
TenantMapEntry entry(id, TenantNameRef(name), tenantGroup);
ASSERT(entry.prefix == prefix);
return entry;
}
@ -1109,8 +1143,6 @@ struct TenantManagementWorkload : TestWorkload {
} else if (operationType == OperationType::MANAGEMENT_TRANSACTION) {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
wait(store(entry, TenantAPI::getTenantTransaction(tr, tenant)));
} else {
wait(store(entry, MetaclusterAPI::getTenant(self->mvDb, tenant)));
}
return entry;
@ -1130,10 +1162,20 @@ struct TenantManagementWorkload : TestWorkload {
loop {
try {
// Get the tenant metadata and check that it matches our local state
state TenantMapEntry entry = wait(getTenantImpl(tr, tenant, operationType, self));
state int64_t entryId;
state Optional<TenantGroupName> tGroup;
if (operationType == OperationType::METACLUSTER) {
state MetaclusterTenantMapEntry metaEntry = wait(MetaclusterAPI::getTenant(self->mvDb, tenant));
entryId = metaEntry.id;
tGroup = metaEntry.tenantGroup;
} else {
state TenantMapEntry normalEntry = wait(getTenantImpl(tr, tenant, operationType, self));
entryId = normalEntry.id;
tGroup = normalEntry.tenantGroup;
}
ASSERT(alreadyExists);
ASSERT(entry.id == tenantData.tenant->id());
ASSERT(entry.tenantGroup == tenantData.tenantGroup);
ASSERT(entryId == tenantData.tenant->id());
ASSERT(tGroup == tenantData.tenantGroup);
wait(checkTenantContents(self, tenant, tenantData));
return Void();
} catch (Error& e) {
@ -1189,19 +1231,39 @@ struct TenantManagementWorkload : TestWorkload {
} else if (operationType == OperationType::MANAGEMENT_TRANSACTION) {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
wait(store(tenants, TenantAPI::listTenantMetadataTransaction(tr, beginTenant, endTenant, limit)));
} else {
wait(store(tenants, MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit)));
}
return tenants;
}
template <class TenantMapEntryImpl>
static Future<Void> verifyTenantList(TenantManagementWorkload* self,
std::vector<std::pair<TenantName, TenantMapEntryImpl>> tenants,
int limit,
TenantName beginTenant,
TenantName endTenant) {
ASSERT(tenants.size() <= limit);
// Compare the resulting tenant list to the list we expected to get
auto localItr = self->createdTenants.lower_bound(beginTenant);
auto tenantMapItr = tenants.begin();
for (; tenantMapItr != tenants.end(); ++tenantMapItr, ++localItr) {
ASSERT(localItr != self->createdTenants.end());
ASSERT(localItr->first == tenantMapItr->first);
}
// Make sure the list terminated at the right spot
ASSERT(tenants.size() == limit || localItr == self->createdTenants.end() || localItr->first >= endTenant);
return Void();
}
ACTOR static Future<Void> listTenants(TenantManagementWorkload* self) {
state TenantName beginTenant = self->chooseTenantName(false);
state TenantName endTenant = self->chooseTenantName(false);
state int limit = std::min(CLIENT_KNOBS->MAX_TENANTS_PER_CLUSTER + 1,
deterministicRandom()->randomInt(1, self->maxTenants * 2));
state OperationType operationType = self->randomOperationType();
state OperationType operationType =
self->useMetacluster ? OperationType::METACLUSTER : self->randomOperationType();
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->dataDb);
if (beginTenant > endTenant) {
@ -1210,38 +1272,27 @@ struct TenantManagementWorkload : TestWorkload {
loop {
try {
// Attempt to read the chosen list of tenants
state std::vector<std::pair<TenantName, TenantMapEntry>> tenants =
wait(listTenantsImpl(tr, beginTenant, endTenant, limit, operationType, self));
// Attempting to read the list of tenants using the metacluster API in a non-metacluster should
// return nothing in this test
if (operationType == OperationType::METACLUSTER && !self->useMetacluster) {
ASSERT(tenants.size() == 0);
return Void();
if (self->useMetacluster) {
state std::vector<std::pair<TenantName, MetaclusterTenantMapEntry>> metaTenants =
wait(MetaclusterAPI::listTenantMetadata(self->mvDb, beginTenant, endTenant, limit));
verifyTenantList<MetaclusterTenantMapEntry>(self, metaTenants, limit, beginTenant, endTenant);
} else {
state std::vector<std::pair<TenantName, TenantMapEntry>> tenants =
wait(listTenantsImpl(tr, beginTenant, endTenant, limit, operationType, self));
if (operationType == OperationType::METACLUSTER) {
ASSERT_EQ(tenants.size(), 0);
return Void();
}
verifyTenantList<TenantMapEntry>(self, tenants, limit, beginTenant, endTenant);
}
ASSERT(tenants.size() <= limit);
// Compare the resulting tenant list to the list we expected to get
auto localItr = self->createdTenants.lower_bound(beginTenant);
auto tenantMapItr = tenants.begin();
for (; tenantMapItr != tenants.end(); ++tenantMapItr, ++localItr) {
ASSERT(localItr != self->createdTenants.end());
ASSERT(localItr->first == tenantMapItr->first);
}
// Make sure the list terminated at the right spot
ASSERT(tenants.size() == limit || localItr == self->createdTenants.end() ||
localItr->first >= endTenant);
return Void();
} catch (Error& e) {
state bool retry = false;
state Error error = e;
// Transaction-based operations need to be retried
if (operationType == OperationType::MANAGEMENT_TRANSACTION ||
operationType == OperationType::SPECIAL_KEYS) {
if (!self->useMetacluster && (operationType == OperationType::MANAGEMENT_TRANSACTION ||
operationType == OperationType::SPECIAL_KEYS)) {
try {
retry = true;
wait(tr->onError(e));
@ -1303,13 +1354,12 @@ struct TenantManagementWorkload : TestWorkload {
return Void();
}
ACTOR static Future<Void> renameTenantImpl(Reference<ReadYourWritesTransaction> tr,
ACTOR static Future<Void> renameTenantImpl(TenantManagementWorkload* self,
Reference<ReadYourWritesTransaction> tr,
OperationType operationType,
std::map<TenantName, TenantName> tenantRenames,
bool tenantNotFound,
bool tenantExists,
bool tenantOverlap,
TenantManagementWorkload* self) {
bool tenantExists) {
if (operationType == OperationType::SPECIAL_KEYS) {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
for (auto& iter : tenantRenames) {
@ -1361,10 +1411,10 @@ struct TenantManagementWorkload : TestWorkload {
TenantName newTenant = self->chooseTenantName(false);
bool checkOverlap =
oldTenant == newTenant || allTenantNames.count(oldTenant) || allTenantNames.count(newTenant);
// renameTenantTransaction does not handle rename collisions:
// reject the rename here if it has overlap and we are doing a transaction operation
// and then pick another combination
if (checkOverlap && operationType == OperationType::MANAGEMENT_TRANSACTION) {
// These operation types do not handle rename collisions
// reject the rename here if it has overlap
if (checkOverlap && (operationType == OperationType::MANAGEMENT_TRANSACTION ||
operationType == OperationType::MANAGEMENT_DATABASE)) {
--i;
continue;
}
@ -1383,8 +1433,7 @@ struct TenantManagementWorkload : TestWorkload {
state Version originalReadVersion = wait(self->getLatestReadVersion(self, operationType));
loop {
try {
wait(renameTenantImpl(
tr, operationType, tenantRenames, tenantNotFound, tenantExists, tenantOverlap, self));
wait(renameTenantImpl(self, tr, operationType, tenantRenames, tenantNotFound, tenantExists));
wait(verifyTenantRenames(self, tenantRenames));
Versionstamp currentVersionstamp = wait(getLastTenantModification(self, operationType));
ASSERT_GT(currentVersionstamp.version, originalReadVersion);

View File

@ -227,6 +227,17 @@ public:
return std::move(impl).value_or(std::forward<U>(defaultValue));
}
// A combination of orDefault() and get()
// Stores defaultValue in *this if *this was not present, then returns the stored value.
// Can only be called on lvalues because returning a reference into an rvalue is dangerous.
template <class U>
T& withDefault(U&& defaultValue) & {
if (!impl.has_value()) {
impl.emplace(std::forward<U>(defaultValue));
}
return impl.value();
}
// Spaceship operator. Treats not-present as less-than present.
int compare(Optional const& rhs) const {
if (present() == rhs.present()) {

View File

@ -131,6 +131,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/AtomicOps.toml)
add_fdb_test(TEST_FILES fast/AtomicOpsApiCorrectness.toml)
add_fdb_test(TEST_FILES fast/AuthzSecurity.toml)
add_fdb_test(TEST_FILES fast/AuthzSecurityWithBlobGranules.toml)
add_fdb_test(TEST_FILES fast/AutomaticIdempotency.toml)
add_fdb_test(TEST_FILES fast/BackupAzureBlobCorrectness.toml IGNORE)
add_fdb_test(TEST_FILES fast/BackupS3BlobCorrectness.toml IGNORE)

View File

@ -0,0 +1,41 @@
[configuration]
allowDefaultTenant = false
tenantModes = ['optional', 'required']
blobGranulesEnabled = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[knobs]]
audit_logging_enabled = false
max_trace_lines = 2000000
[[test]]
testTitle = 'TenantCreation'
clearAfterTest = false
[[test.workload]]
testName = 'CreateTenant'
name = 'AuthzSecurityTenant'
blobbify = true
[[test.workload]]
testName = 'CreateTenant'
name = 'AnotherAuthzSecurityTenant'
[[test]]
testTitle = 'AuthzSecurityCheck'
clearAfterTest = false
[[test.workload]]
testName = 'LeakTLogInterface'
tenant = 'AuthzSecurityTenant'
key = 'TLogInterface'
testDuration = 10.0
[[test.workload]]
testName = 'AuthzSecurity'
tenantA = 'AuthzSecurityTenant'
tenantB = 'AnotherAuthzSecurityTenant'
tLogConfigKey = 'TLogInterface'
testDuration = 10.0
checkBlobGranules = true