parent
77ef8a737d
commit
1730d75f73
|
@ -9,7 +9,7 @@ bindings/java/foundationdb-tests*.jar
|
|||
bindings/java/fdb-java-*-sources.jar
|
||||
packaging/msi/FDBInstaller.msi
|
||||
build/
|
||||
cmake-build-debug/
|
||||
cmake-build-debug*
|
||||
# Generated source, build, and packaging files
|
||||
*.g.cpp
|
||||
*.g.h
|
||||
|
|
|
@ -118,7 +118,7 @@ storage migration type
|
|||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Set the storage migration type, or how FDB should migrate to a new storage engine if the value is changed.
|
||||
The default is ``disabled``, which means changing the storage engine will not be possible.
|
||||
The default is ``disabled``, which means changing the storage engine will not be possible.
|
||||
|
||||
* ``disabled``
|
||||
* ``gradual``
|
||||
|
@ -128,7 +128,6 @@ The default is ``disabled``, which means changing the storage engine will not be
|
|||
``aggressive`` tries to replace as many storages as it can at once, and will recruit a new storage server on the same process as the old one. This will be faster, but can potentially hit degraded performance or OOM with two storages on the same process. The main benefit over ``gradual`` is that this doesn't need to take one storage out of rotation, so it works for small or development clusters that have the same number of storage processes as the replication factor. Note that ``aggressive`` is not exclusive to running the perpetual wiggle.
|
||||
``disabled`` means that if the storage engine is changed, fdb will not move the cluster over to the new storage engine. This will disable the perpetual wiggle from rewriting storage files.
|
||||
|
||||
|
||||
consistencycheck
|
||||
----------------
|
||||
|
||||
|
|
|
@ -173,6 +173,16 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
|
|||
fprintf(stderr, "ERROR: These changes would make the configuration invalid\n");
|
||||
ret = false;
|
||||
break;
|
||||
case ConfigurationResult::STORAGE_MIGRATION_DISABLED:
|
||||
fprintf(stderr,
|
||||
"ERROR: Storage engine type cannot be changed because "
|
||||
"storage_migration_mode=disabled.\n");
|
||||
fprintf(stderr,
|
||||
"Type `configure perpetual_storage_wiggle=1 storage_migration_type=gradual' to enable gradual "
|
||||
"migration with the perpetual wiggle, or `configure "
|
||||
"storage_migration_type=aggressive' for aggressive migration.\n");
|
||||
ret = true;
|
||||
break;
|
||||
case ConfigurationResult::DATABASE_ALREADY_CREATED:
|
||||
fprintf(stderr, "ERROR: Database already exists! To change configuration, don't say `new'\n");
|
||||
ret = false;
|
||||
|
@ -240,17 +250,6 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
|
|||
"storage_migration_type=gradual' to set the gradual migration type.\n");
|
||||
ret = false;
|
||||
break;
|
||||
case ConfigurationResult::SUCCESS_WARN_CHANGE_STORAGE_NOMIGRATE:
|
||||
printf("Configuration changed, with warnings\n");
|
||||
fprintf(stderr,
|
||||
"WARN: Storage engine type changed, but nothing will be migrated because "
|
||||
"storage_migration_mode=disabled.\n");
|
||||
fprintf(stderr,
|
||||
"Type `configure perpetual_storage_wiggle=1 storage_migration_type=gradual' to enable gradual "
|
||||
"migration with the perpetual wiggle, or `configure "
|
||||
"storage_migration_type=aggressive' for aggressive migration.\n");
|
||||
ret = false;
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
ret = false;
|
||||
|
|
|
@ -632,7 +632,6 @@ ACTOR Future<Void> commitTransaction(Reference<ITransaction> tr) {
|
|||
}
|
||||
|
||||
// FIXME: Factor address parsing from coordinators, include, exclude
|
||||
|
||||
ACTOR Future<bool> coordinators(Database db, std::vector<StringRef> tokens, bool isClusterTLS) {
|
||||
state StringRef setName;
|
||||
StringRef nameTokenBegin = LiteralStringRef("description=");
|
||||
|
|
|
@ -49,6 +49,7 @@ enum class ConfigurationResult {
|
|||
UNKNOWN_OPTION,
|
||||
INCOMPLETE_CONFIGURATION,
|
||||
INVALID_CONFIGURATION,
|
||||
STORAGE_MIGRATION_DISABLED,
|
||||
DATABASE_ALREADY_CREATED,
|
||||
DATABASE_CREATED,
|
||||
DATABASE_UNAVAILABLE,
|
||||
|
@ -61,7 +62,6 @@ enum class ConfigurationResult {
|
|||
DCID_MISSING,
|
||||
LOCKED_NOT_NEW,
|
||||
SUCCESS_WARN_PPW_GRADUAL,
|
||||
SUCCESS_WARN_CHANGE_STORAGE_NOMIGRATE,
|
||||
SUCCESS,
|
||||
};
|
||||
|
||||
|
@ -569,11 +569,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
|
|||
|
||||
if (newConfig.storageServerStoreType != oldConfig.storageServerStoreType &&
|
||||
newConfig.storageMigrationType == StorageMigrationType::DISABLED) {
|
||||
warnChangeStorageNoMigrate = true;
|
||||
} else if ((newConfig.storageMigrationType == StorageMigrationType::GRADUAL &&
|
||||
newConfig.perpetualStorageWiggleSpeed == 0) ||
|
||||
(newConfig.perpetualStorageWiggleSpeed > 0 &&
|
||||
newConfig.storageMigrationType == StorageMigrationType::DISABLED)) {
|
||||
return ConfigurationResult::STORAGE_MIGRATION_DISABLED;
|
||||
} else if (newConfig.storageMigrationType == StorageMigrationType::GRADUAL &&
|
||||
newConfig.perpetualStorageWiggleSpeed == 0) {
|
||||
warnPPWGradual = true;
|
||||
}
|
||||
}
|
||||
|
@ -636,8 +634,6 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
|
|||
|
||||
if (warnPPWGradual) {
|
||||
return ConfigurationResult::SUCCESS_WARN_PPW_GRADUAL;
|
||||
} else if (warnChangeStorageNoMigrate) {
|
||||
return ConfigurationResult::SUCCESS_WARN_CHANGE_STORAGE_NOMIGRATE;
|
||||
} else {
|
||||
return ConfigurationResult::SUCCESS;
|
||||
}
|
||||
|
|
|
@ -425,6 +425,8 @@ public:
|
|||
bool hasDiffProtocolProcess; // true if simulator is testing a process with a different version
|
||||
bool setDiffProtocol; // true if a process with a different protocol version has been started
|
||||
|
||||
bool allowStorageMigrationTypeChange = false;
|
||||
|
||||
flowGlobalType global(int id) const final { return getCurrentProcess()->global(id); };
|
||||
void setGlobal(size_t id, flowGlobalType v) final { getCurrentProcess()->setGlobal(id, v); };
|
||||
|
||||
|
|
|
@ -2500,6 +2500,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
|
||||
TraceEvent(newServer.isTss() ? "AddedTSS" : "AddedStorageServer", distributorId)
|
||||
.detail("ServerID", newServer.id())
|
||||
.detail("ProcessID", newServer.locality.processId())
|
||||
.detail("ProcessClass", processClass.toString())
|
||||
.detail("WaitFailureToken", newServer.waitFailure.getEndpoint().token)
|
||||
.detail("Address", newServer.waitFailure.getEndpoint().getPrimaryAddress());
|
||||
|
@ -3246,7 +3247,11 @@ ACTOR Future<Void> removeWrongStoreType(DDTeamCollection* self) {
|
|||
// Server may be removed due to failure while the wrongStoreTypeToRemove is sent to the
|
||||
// storageServerTracker. This race may cause the server to be removed before react to
|
||||
// wrongStoreTypeToRemove
|
||||
server.second->wrongStoreTypeToRemove.set(true);
|
||||
if (self->configuration.storageMigrationType == StorageMigrationType::AGGRESSIVE) {
|
||||
// if the Storage Migration type is aggressive, let DD remove SS with wrong storage type
|
||||
server.second->wrongStoreTypeToRemove.set(true);
|
||||
}
|
||||
// Otherwise, wait Perpetual Wiggler to wiggle the SS with wrong storage type
|
||||
foundSSToRemove = true;
|
||||
TraceEvent("WrongStoreTypeRemover", self->distributorId)
|
||||
.detail("Server", server.first)
|
||||
|
@ -3962,10 +3967,12 @@ ACTOR Future<std::vector<std::pair<StorageServerInterface, ProcessClass>>> getSe
|
|||
ACTOR Future<Void> updateNextWigglingStoragePID(DDTeamCollection* teamCollection) {
|
||||
state ReadYourWritesTransaction tr(teamCollection->cx);
|
||||
state Value writeValue;
|
||||
state const Key writeKey =
|
||||
wigglingStorageServerKey.withSuffix(teamCollection->primary ? "/primary"_sr : "/remote"_sr);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
Optional<Value> value = wait(tr.get(wigglingStorageServerKey));
|
||||
Optional<Value> value = wait(tr.get(writeKey));
|
||||
if (teamCollection->pid2server_info.empty()) {
|
||||
writeValue = LiteralStringRef("");
|
||||
} else {
|
||||
|
@ -3981,7 +3988,7 @@ ACTOR Future<Void> updateNextWigglingStoragePID(DDTeamCollection* teamCollection
|
|||
writeValue = pid;
|
||||
}
|
||||
}
|
||||
tr.set(wigglingStorageServerKey, writeValue);
|
||||
tr.set(writeKey, writeValue);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
|
@ -3989,6 +3996,7 @@ ACTOR Future<Void> updateNextWigglingStoragePID(DDTeamCollection* teamCollection
|
|||
}
|
||||
}
|
||||
TraceEvent(SevDebug, "PerpetualNextWigglingStoragePID", teamCollection->distributorId)
|
||||
.detail("Primary", teamCollection->primary)
|
||||
.detail("WriteValue", writeValue);
|
||||
|
||||
return Void();
|
||||
|
@ -4010,6 +4018,15 @@ ACTOR Future<Void> perpetualStorageWiggleIterator(AsyncVar<bool>* stopSignal,
|
|||
// there must not have other teams to place wiggled data
|
||||
takeRest = teamCollection->server_info.size() <= teamCollection->configuration.storageTeamSize ||
|
||||
teamCollection->machine_info.size() < teamCollection->configuration.storageTeamSize;
|
||||
teamCollection->doBuildTeams = true;
|
||||
if (takeRest &&
|
||||
teamCollection->configuration.storageMigrationType == StorageMigrationType::GRADUAL) {
|
||||
TraceEvent(SevWarn, "PerpetualWiggleSleep", teamCollection->distributorId)
|
||||
.suppressFor(SERVER_KNOBS->PERPETUAL_WIGGLE_DELAY * 4)
|
||||
.detail("ServerSize", teamCollection->server_info.size())
|
||||
.detail("MachineSize", teamCollection->machine_info.size())
|
||||
.detail("StorageTeamSize", teamCollection->configuration.storageTeamSize);
|
||||
}
|
||||
}
|
||||
wait(updateNextWigglingStoragePID(teamCollection));
|
||||
}
|
||||
|
@ -4028,14 +4045,16 @@ ACTOR Future<std::pair<Future<Void>, Value>> watchPerpetualStoragePIDChange(DDTe
|
|||
state ReadYourWritesTransaction tr(self->cx);
|
||||
state Future<Void> watchFuture;
|
||||
state Value ret;
|
||||
state const Key readKey = wigglingStorageServerKey.withSuffix(self->primary ? "/primary"_sr : "/remote"_sr);
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
Optional<Value> value = wait(tr.get(wigglingStorageServerKey));
|
||||
Optional<Value> value = wait(tr.get(readKey));
|
||||
if (value.present()) {
|
||||
ret = value.get();
|
||||
}
|
||||
watchFuture = tr.watch(wigglingStorageServerKey);
|
||||
watchFuture = tr.watch(readKey);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
|
@ -4096,7 +4115,13 @@ ACTOR Future<Void> perpetualStorageWiggler(AsyncVar<bool>* stopSignal,
|
|||
TEST(true); // paused because cluster is unhealthy
|
||||
moveFinishFuture = Never();
|
||||
self->includeStorageServersForWiggle();
|
||||
TraceEvent("PerpetualStorageWigglePause", self->distributorId)
|
||||
self->doBuildTeams = true;
|
||||
|
||||
TraceEvent(self->configuration.storageMigrationType == StorageMigrationType::AGGRESSIVE ? SevInfo
|
||||
: SevWarn,
|
||||
"PerpetualStorageWigglePause",
|
||||
self->distributorId)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("ProcessId", pid)
|
||||
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
|
||||
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
||||
|
@ -4108,6 +4133,7 @@ ACTOR Future<Void> perpetualStorageWiggler(AsyncVar<bool>* stopSignal,
|
|||
movingCount = fv.size();
|
||||
moveFinishFuture = waitForAll(fv);
|
||||
TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("ProcessId", pid)
|
||||
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
||||
.detail("HealthyTeamCount", self->healthyTeamCount)
|
||||
|
@ -4134,6 +4160,7 @@ ACTOR Future<Void> perpetualStorageWiggler(AsyncVar<bool>* stopSignal,
|
|||
moveFinishFuture = Never();
|
||||
self->includeStorageServersForWiggle();
|
||||
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("ProcessId", pid.toString())
|
||||
.detail("StorageCount", movingCount);
|
||||
|
||||
|
@ -4153,6 +4180,7 @@ ACTOR Future<Void> perpetualStorageWiggler(AsyncVar<bool>* stopSignal,
|
|||
if (self->wigglingPid.present()) {
|
||||
self->includeStorageServersForWiggle();
|
||||
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("ProcessId", self->wigglingPid.get());
|
||||
self->wigglingPid.reset();
|
||||
}
|
||||
|
@ -4190,14 +4218,16 @@ ACTOR Future<Void> monitorPerpetualStorageWiggle(DDTeamCollection* teamCollectio
|
|||
&stopWiggleSignal, finishStorageWiggleSignal.getFuture(), teamCollection));
|
||||
collection.add(
|
||||
perpetualStorageWiggler(&stopWiggleSignal, finishStorageWiggleSignal, teamCollection));
|
||||
TraceEvent("PerpetualStorageWiggleOpen", teamCollection->distributorId).log();
|
||||
TraceEvent("PerpetualStorageWiggleOpen", teamCollection->distributorId)
|
||||
.detail("Primary", teamCollection->primary);
|
||||
} else if (speed == 0) {
|
||||
if (!stopWiggleSignal.get()) {
|
||||
stopWiggleSignal.set(true);
|
||||
wait(collection.signalAndReset());
|
||||
teamCollection->pauseWiggle->set(true);
|
||||
}
|
||||
TraceEvent("PerpetualStorageWiggleClose", teamCollection->distributorId).log();
|
||||
TraceEvent("PerpetualStorageWiggleClose", teamCollection->distributorId)
|
||||
.detail("Primary", teamCollection->primary);
|
||||
}
|
||||
wait(watchFuture);
|
||||
break;
|
||||
|
@ -4653,6 +4683,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
if (worstStatus == DDTeamCollection::Status::WIGGLING && !isTss) {
|
||||
status.isWiggling = true;
|
||||
TraceEvent("PerpetualWigglingStorageServer", self->distributorId)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("Server", server->id)
|
||||
.detail("ProcessId", server->lastKnownInterface.locality.processId())
|
||||
.detail("Address", worstAddr.toString());
|
||||
|
@ -5598,10 +5629,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
|
|||
self->addActor.send(trackExcludedServers(self));
|
||||
self->addActor.send(monitorHealthyTeams(self));
|
||||
self->addActor.send(waitHealthyZoneChange(self));
|
||||
|
||||
if (self->primary) { // the primary dc also handle the satellite dc's perpetual wiggling
|
||||
self->addActor.send(monitorPerpetualStorageWiggle(self));
|
||||
}
|
||||
self->addActor.send(monitorPerpetualStorageWiggle(self));
|
||||
// SOMEDAY: Monitor FF/serverList for (new) servers that aren't in allServers and add or remove them
|
||||
|
||||
loop choose {
|
||||
|
@ -5641,6 +5669,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
|
|||
.detail("StorageTeamSize", self->configuration.storageTeamSize)
|
||||
.detail("HighestPriority", highestPriority)
|
||||
.trackLatest(self->primary ? "TotalDataInFlight" : "TotalDataInFlightRemote");
|
||||
|
||||
loggingTrigger = delay(SERVER_KNOBS->DATA_DISTRIBUTION_LOGGING_INTERVAL, TaskPriority::FlushTrace);
|
||||
}
|
||||
when(wait(self->serverTrackerErrorOut.getFuture())) {} // Propagate errors from storageServerTracker
|
||||
|
|
|
@ -631,22 +631,22 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
|
|||
state Future<int64_t> storageQueueSize;
|
||||
state Future<bool> dataDistributionActive;
|
||||
state Future<bool> storageServersRecruiting;
|
||||
|
||||
auto traceMessage = "QuietDatabase" + phase + "Begin";
|
||||
TraceEvent(traceMessage.c_str());
|
||||
TraceEvent(traceMessage.c_str()).log();
|
||||
|
||||
// In a simulated environment, wait 5 seconds so that workers can move to their optimal locations
|
||||
if (g_network->isSimulated())
|
||||
wait(delay(5.0));
|
||||
|
||||
// The quiet database check (which runs at the end of every test) will always time out due to active data movement.
|
||||
// To get around this, quiet Database will disable the perpetual wiggle in the setup phase.
|
||||
|
||||
printf("Set perpetual_storage_wiggle=0 ...\n");
|
||||
wait(setPerpetualStorageWiggle(cx, false, LockAware::True));
|
||||
printf("Set perpetual_storage_wiggle=0 Done.\n");
|
||||
|
||||
// Require 3 consecutive successful quiet database checks spaced 2 second apart
|
||||
state int numSuccesses = 0;
|
||||
|
||||
loop {
|
||||
try {
|
||||
TraceEvent("QuietDatabaseWaitingOnDataDistributor").log();
|
||||
|
@ -686,15 +686,15 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
|
|||
if (dataInFlight.get() > dataInFlightGate || tLogQueueInfo.get().first > maxTLogQueueGate ||
|
||||
tLogQueueInfo.get().second > maxPoppedVersionLag ||
|
||||
dataDistributionQueueSize.get() > maxDataDistributionQueueSize ||
|
||||
storageQueueSize.get() > maxStorageServerQueueGate || dataDistributionActive.get() == false ||
|
||||
storageServersRecruiting.get() == true || teamCollectionValid.get() == false) {
|
||||
storageQueueSize.get() > maxStorageServerQueueGate || !dataDistributionActive.get() ||
|
||||
storageServersRecruiting.get() || !teamCollectionValid.get()) {
|
||||
|
||||
wait(delay(1.0));
|
||||
numSuccesses = 0;
|
||||
} else {
|
||||
if (++numSuccesses == 3) {
|
||||
auto msg = "QuietDatabase" + phase + "Done";
|
||||
TraceEvent(msg.c_str());
|
||||
TraceEvent(msg.c_str()).log();
|
||||
break;
|
||||
} else {
|
||||
wait(delay(g_network->isSimulated() ? 2.0 : 30.0));
|
||||
|
|
|
@ -285,6 +285,7 @@ public:
|
|||
int maxTLogVersion = TLogVersion::MAX_SUPPORTED;
|
||||
// Set true to simplify simulation configs for easier debugging
|
||||
bool simpleConfig = false;
|
||||
int extraMachineCountDC = 0;
|
||||
Optional<bool> generateFearless, buggify;
|
||||
Optional<int> datacenters, desiredTLogCount, commitProxyCount, grvProxyCount, resolverCount, storageEngineType,
|
||||
stderrSeverity, machineCount, processesPerMachine, coordinators;
|
||||
|
@ -338,7 +339,8 @@ public:
|
|||
.add("machineCount", &machineCount)
|
||||
.add("processesPerMachine", &processesPerMachine)
|
||||
.add("coordinators", &coordinators)
|
||||
.add("configDB", &configDBType);
|
||||
.add("configDB", &configDBType)
|
||||
.add("extraMachineCountDC", &extraMachineCountDC);
|
||||
try {
|
||||
auto file = toml::parse(testFile);
|
||||
if (file.contains("configuration") && toml::find(file, "configuration").is_table()) {
|
||||
|
@ -1248,7 +1250,7 @@ void SimulationConfig::setRandomConfig() {
|
|||
set_config("perpetual_storage_wiggle=0");
|
||||
} else {
|
||||
// TraceEvent("SimulatedConfigRandom").detail("PerpetualWiggle", 1);
|
||||
set_config("storage_migration_type=gradual perpetual_storage_wiggle=1");
|
||||
set_config("perpetual_storage_wiggle=1");
|
||||
}
|
||||
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
|
@ -1655,6 +1657,7 @@ void SimulationConfig::setMachineCount(const TestConfig& testConfig) {
|
|||
machine_count = std::max(machine_count, deterministicRandom()->randomInt(5, extraDB ? 6 : 10));
|
||||
}
|
||||
}
|
||||
machine_count += datacenters * testConfig.extraMachineCountDC;
|
||||
}
|
||||
|
||||
// Sets the coordinator count based on the testConfig. May be overwritten later
|
||||
|
@ -1693,7 +1696,7 @@ void SimulationConfig::setTss(const TestConfig& testConfig) {
|
|||
|
||||
// reduce tss to half of extra non-seed servers that can be recruited in usable regions.
|
||||
tssCount =
|
||||
std::max(0, std::min(tssCount, (db.usableRegions * (machine_count / datacenters) - replication_type) / 2));
|
||||
std::max(0, std::min(tssCount, db.usableRegions * ((machine_count / datacenters) - db.storageTeamSize) / 2));
|
||||
|
||||
if (!testConfig.config.present() && tssCount > 0) {
|
||||
std::string confStr = format("tss_count:=%d tss_storage_engine:=%d", tssCount, db.storageServerStoreType);
|
||||
|
@ -1980,6 +1983,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
|
||||
bool requiresExtraDBMachines = testConfig.extraDB && g_simulator.extraDB->toString() != conn.toString();
|
||||
int assignedMachines = 0, nonVersatileMachines = 0;
|
||||
bool gradualMigrationPossible = true;
|
||||
std::vector<ProcessClass::ClassType> processClassesSubSet = { ProcessClass::UnsetClass,
|
||||
ProcessClass::StatelessClass };
|
||||
for (int dc = 0; dc < dataCenters; dc++) {
|
||||
|
@ -1988,6 +1992,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
std::vector<UID> machineIdentities;
|
||||
int machines = machineCount / dataCenters +
|
||||
(dc < machineCount % dataCenters); // add remainder of machines to first datacenter
|
||||
int possible_ss = 0;
|
||||
int dcCoordinators = coordinatorCount / dataCenters + (dc < coordinatorCount % dataCenters);
|
||||
printf("Datacenter %d: %d/%d machines, %d/%d coordinators\n",
|
||||
dc,
|
||||
|
@ -2028,8 +2033,12 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
processClass = ProcessClass((ProcessClass::ClassType)deterministicRandom()->randomInt(0, 3),
|
||||
ProcessClass::CommandLineSource); // Unset, Storage, or Transaction
|
||||
if (processClass ==
|
||||
ProcessClass::StatelessClass) // *can't* be assigned to other roles, even in an emergency
|
||||
ProcessClass::StatelessClass) { // *can't* be assigned to other roles, even in an emergency
|
||||
nonVersatileMachines++;
|
||||
}
|
||||
if (processClass == ProcessClass::UnsetClass || processClass == ProcessClass::StorageClass) {
|
||||
possible_ss++;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: temporarily code to test storage cache
|
||||
|
@ -2097,6 +2106,10 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
|
||||
assignedMachines++;
|
||||
}
|
||||
|
||||
if (possible_ss - simconfig.db.desiredTSSCount / simconfig.db.usableRegions <= simconfig.db.storageTeamSize) {
|
||||
gradualMigrationPossible = false;
|
||||
}
|
||||
}
|
||||
|
||||
g_simulator.desiredCoordinators = coordinatorCount;
|
||||
|
@ -2144,6 +2157,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
// save some state that we only need when restarting the simulator.
|
||||
g_simulator.connectionString = conn.toString();
|
||||
g_simulator.testerCount = testerCount;
|
||||
g_simulator.allowStorageMigrationTypeChange = gradualMigrationPossible;
|
||||
|
||||
TraceEvent("SimulatedClusterStarted")
|
||||
.detail("DataCenters", dataCenters)
|
||||
|
@ -2152,6 +2166,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
.detail("SSLEnabled", sslEnabled)
|
||||
.detail("SSLOnly", sslOnly)
|
||||
.detail("ClassesAssigned", assignClasses)
|
||||
.detail("GradualMigrationPossible", gradualMigrationPossible)
|
||||
.detail("StartingConfiguration", pStartingConfiguration->toString());
|
||||
}
|
||||
|
||||
|
|
|
@ -25,12 +25,17 @@
|
|||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbserver/QuietDatabase.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
// "ssd" is an alias to the preferred type which skews the random distribution toward it but that's okay.
|
||||
static const char* storeTypes[] = {
|
||||
"ssd", "ssd-1", "ssd-2", "memory", "memory-1", "memory-2", "memory-radixtree-beta"
|
||||
};
|
||||
static const char* storageMigrationTypes[] = { "perpetual_storage_wiggle=0 storage_migration_type=aggressive",
|
||||
"perpetual_storage_wiggle=1",
|
||||
"perpetual_storage_wiggle=1 storage_migration_type=gradual",
|
||||
"storage_migration_type=aggressive" };
|
||||
static const char* logTypes[] = { "log_engine:=1", "log_engine:=2", "log_spill:=1",
|
||||
"log_spill:=2", "log_version:=2", "log_version:=3",
|
||||
"log_version:=4", "log_version:=5", "log_version:=6" };
|
||||
|
@ -214,6 +219,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
double testDuration;
|
||||
int additionalDBs;
|
||||
bool allowDescriptorChange;
|
||||
bool allowTestStorageMigration;
|
||||
std::vector<Future<Void>> clients;
|
||||
PerfIntCounter retries;
|
||||
|
||||
|
@ -221,7 +227,8 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
testDuration = getOption(options, LiteralStringRef("testDuration"), 200.0);
|
||||
allowDescriptorChange =
|
||||
getOption(options, LiteralStringRef("allowDescriptorChange"), SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT);
|
||||
|
||||
allowTestStorageMigration =
|
||||
getOption(options, "allowTestStorageMigration"_sr, false) && g_simulator.allowStorageMigrationTypeChange;
|
||||
g_simulator.usableRegions = 1;
|
||||
}
|
||||
|
||||
|
@ -230,7 +237,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
||||
|
||||
Future<Void> start(Database const& cx) override { return _start(this, cx); }
|
||||
Future<bool> check(Database const& cx) override { return true; }
|
||||
Future<bool> check(Database const& cx) override { return _check(this, cx); }
|
||||
|
||||
void getMetrics(std::vector<PerfMetric>& m) override { m.push_back(retries.getMetric()); }
|
||||
|
||||
|
@ -250,7 +257,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> _setup(Database cx, ConfigureDatabaseWorkload* self) {
|
||||
wait(success(ManagementAPI::changeConfig(cx.getReference(), "single", true)));
|
||||
wait(success(ManagementAPI::changeConfig(cx.getReference(), "single storage_migration_type=aggressive", true)));
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -262,6 +269,44 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<bool> _check(ConfigureDatabaseWorkload* self, Database cx) {
|
||||
// only storage_migration_type=gradual && perpetual_storage_wiggle=1 need this check because in QuietDatabase
|
||||
// perpetual wiggle will be forced to close For other cases, later ConsistencyCheck will check KV store type
|
||||
// there
|
||||
if (self->allowTestStorageMigration) {
|
||||
state DatabaseConfiguration conf = wait(getDatabaseConfiguration(cx));
|
||||
state int i;
|
||||
loop {
|
||||
state bool pass = true;
|
||||
state std::vector<StorageServerInterface> storageServers = wait(getStorageServers(cx));
|
||||
|
||||
for (i = 0; i < storageServers.size(); i++) {
|
||||
// Check that each storage server has the correct key value store type
|
||||
if (!storageServers[i].isTss()) {
|
||||
ReplyPromise<KeyValueStoreType> typeReply;
|
||||
ErrorOr<KeyValueStoreType> keyValueStoreType =
|
||||
wait(storageServers[i].getKeyValueStoreType.getReplyUnlessFailedFor(typeReply, 2, 0));
|
||||
if (keyValueStoreType.present() && keyValueStoreType.get() != conf.storageServerStoreType) {
|
||||
TraceEvent(SevWarn, "ConfigureDatabase_WrongStoreType")
|
||||
.suppressFor(5.0)
|
||||
.detail("ServerID", storageServers[i].id())
|
||||
.detail("ProcessID", storageServers[i].locality.processId())
|
||||
.detail("ServerStoreType",
|
||||
keyValueStoreType.present() ? keyValueStoreType.get().toString() : "?")
|
||||
.detail("ConfigStoreType", conf.storageServerStoreType.toString());
|
||||
pass = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pass)
|
||||
break;
|
||||
wait(delay(g_network->isSimulated() ? 2.0 : 30.0));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int randomRoleNumber() {
|
||||
int i = deterministicRandom()->randomInt(0, 4);
|
||||
return i ? i : -1;
|
||||
|
@ -273,8 +318,12 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
if (g_simulator.speedUpSimulation) {
|
||||
return Void();
|
||||
}
|
||||
state int randomChoice = deterministicRandom()->randomInt(0, 8);
|
||||
|
||||
state int randomChoice;
|
||||
if (self->allowTestStorageMigration) {
|
||||
randomChoice = deterministicRandom()->randomInt(4, 9);
|
||||
} else {
|
||||
randomChoice = deterministicRandom()->randomInt(0, 8);
|
||||
}
|
||||
if (randomChoice == 0) {
|
||||
wait(success(
|
||||
runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Optional<Value>> {
|
||||
|
@ -345,6 +394,15 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
cx,
|
||||
backupTypes[deterministicRandom()->randomInt(0, sizeof(backupTypes) / sizeof(backupTypes[0]))],
|
||||
false)));
|
||||
} else if (randomChoice == 8) {
|
||||
if (self->allowTestStorageMigration) {
|
||||
TEST(true); // storage migration type change
|
||||
wait(success(IssueConfigurationChange(
|
||||
cx,
|
||||
storageMigrationTypes[deterministicRandom()->randomInt(
|
||||
0, sizeof(storageMigrationTypes) / sizeof(storageMigrationTypes[0]))],
|
||||
false)));
|
||||
}
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
|
|
|
@ -259,6 +259,7 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES slow/CloggedStorefront.toml)
|
||||
add_fdb_test(TEST_FILES slow/CommitBug.toml)
|
||||
add_fdb_test(TEST_FILES slow/ConfigureTest.toml)
|
||||
add_fdb_test(TEST_FILES slow/ConfigureStorageMigrationTest.toml)
|
||||
add_fdb_test(TEST_FILES slow/CycleRollbackPlain.toml)
|
||||
add_fdb_test(TEST_FILES slow/DDBalanceAndRemove.toml)
|
||||
add_fdb_test(TEST_FILES slow/DDBalanceAndRemoveStatus.toml)
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
[configuration]
|
||||
extraMachineCountDC = 2
|
||||
|
||||
[[test]]
|
||||
testTitle = 'CloggedConfigureDatabaseTest'
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'ConfigureDatabase'
|
||||
testDuration = 300.0
|
||||
allowTestStorageMigration = true
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'RandomClogging'
|
||||
testDuration = 300.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'RandomClogging'
|
||||
testDuration = 300.0
|
||||
scale = 0.1
|
||||
clogginess = 2.0
|
Loading…
Reference in New Issue