Add simulation support for changing the cluster file
This commit is contained in:
parent
6e0835f8a8
commit
a72066be33
|
@ -54,6 +54,7 @@ public:
|
||||||
FailDisk,
|
FailDisk,
|
||||||
RebootAndDelete,
|
RebootAndDelete,
|
||||||
RebootProcessAndDelete,
|
RebootProcessAndDelete,
|
||||||
|
RebootProcessAndSwitch,
|
||||||
Reboot,
|
Reboot,
|
||||||
RebootProcess,
|
RebootProcess,
|
||||||
None
|
None
|
||||||
|
@ -304,6 +305,7 @@ public:
|
||||||
KillType kt,
|
KillType kt,
|
||||||
bool forceKill = false,
|
bool forceKill = false,
|
||||||
KillType* ktFinal = nullptr) = 0;
|
KillType* ktFinal = nullptr) = 0;
|
||||||
|
virtual bool killAll(KillType kt, bool forceKill = false, KillType* ktFinal = nullptr) = 0;
|
||||||
// virtual KillType getMachineKillState( UID zoneID ) = 0;
|
// virtual KillType getMachineKillState( UID zoneID ) = 0;
|
||||||
virtual bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
|
virtual bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
|
||||||
std::vector<ProcessInfo*> const& deadProcesses,
|
std::vector<ProcessInfo*> const& deadProcesses,
|
||||||
|
@ -390,6 +392,9 @@ public:
|
||||||
return clearedAddresses.find(address) != clearedAddresses.end();
|
return clearedAddresses.find(address) != clearedAddresses.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void switchCluster(NetworkAddress const& address) { switchedCluster[address] = !switchedCluster[address]; }
|
||||||
|
bool hasSwitchedCluster(NetworkAddress const& address) const { return switchedCluster.at(address); }
|
||||||
|
|
||||||
void excludeAddress(NetworkAddress const& address) {
|
void excludeAddress(NetworkAddress const& address) {
|
||||||
excludedAddresses[address]++;
|
excludedAddresses[address]++;
|
||||||
TraceEvent("ExcludeAddress").detail("Address", address).detail("Value", excludedAddresses[address]);
|
TraceEvent("ExcludeAddress").detail("Address", address).detail("Value", excludedAddresses[address]);
|
||||||
|
@ -540,6 +545,7 @@ private:
|
||||||
std::set<Optional<Standalone<StringRef>>> swapsDisabled;
|
std::set<Optional<Standalone<StringRef>>> swapsDisabled;
|
||||||
std::map<NetworkAddress, int> excludedAddresses;
|
std::map<NetworkAddress, int> excludedAddresses;
|
||||||
std::map<NetworkAddress, int> clearedAddresses;
|
std::map<NetworkAddress, int> clearedAddresses;
|
||||||
|
std::map<NetworkAddress, bool> switchedCluster;
|
||||||
std::map<NetworkAddress, std::map<std::string, int>> roleAddresses;
|
std::map<NetworkAddress, std::map<std::string, int>> roleAddresses;
|
||||||
std::map<std::string, double> disabledMap;
|
std::map<std::string, double> disabledMap;
|
||||||
bool allSwapsDisabled;
|
bool allSwapsDisabled;
|
||||||
|
|
|
@ -1794,6 +1794,15 @@ public:
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
bool killAll(KillType kt, bool forceKill, KillType* ktFinal) override {
|
||||||
|
bool result = false;
|
||||||
|
for (auto& machine : machines) {
|
||||||
|
if (killMachine(machine.second.machineId, kt, forceKill, ktFinal)) {
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
bool killMachine(Optional<Standalone<StringRef>> machineId,
|
bool killMachine(Optional<Standalone<StringRef>> machineId,
|
||||||
KillType kt,
|
KillType kt,
|
||||||
bool forceKill,
|
bool forceKill,
|
||||||
|
@ -2008,7 +2017,7 @@ public:
|
||||||
if (process->startingClass != ProcessClass::TesterClass)
|
if (process->startingClass != ProcessClass::TesterClass)
|
||||||
killProcess_internal(process, kt);
|
killProcess_internal(process, kt);
|
||||||
}
|
}
|
||||||
} else if (kt == Reboot || kt == RebootAndDelete) {
|
} else if (kt == Reboot || kt == RebootAndDelete || kt == RebootProcessAndSwitch) {
|
||||||
for (auto& process : machines[machineId].processes) {
|
for (auto& process : machines[machineId].processes) {
|
||||||
TraceEvent("KillMachineProcess")
|
TraceEvent("KillMachineProcess")
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
|
@ -2564,7 +2573,7 @@ ACTOR void doReboot(ISimulator::ProcessInfo* p, ISimulator::KillType kt) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ASSERT(kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete ||
|
ASSERT(kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete ||
|
||||||
kt == ISimulator::RebootProcessAndDelete);
|
kt == ISimulator::RebootProcessAndDelete || kt == ISimulator::RebootProcessAndSwitch);
|
||||||
|
|
||||||
CODE_PROBE(kt == ISimulator::RebootProcess,
|
CODE_PROBE(kt == ISimulator::RebootProcess,
|
||||||
"Simulated process rebooted",
|
"Simulated process rebooted",
|
||||||
|
@ -2580,6 +2589,10 @@ ACTOR void doReboot(ISimulator::ProcessInfo* p, ISimulator::KillType kt) {
|
||||||
"Simulated process rebooted with data and coordination state deletion",
|
"Simulated process rebooted with data and coordination state deletion",
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::context::sim2);
|
probe::context::sim2);
|
||||||
|
CODE_PROBE(kt == ISimulator::RebootProcessAndSwitch,
|
||||||
|
"Simulated process rebooted with different cluster file",
|
||||||
|
probe::assert::simOnly,
|
||||||
|
probe::context::sim2);
|
||||||
|
|
||||||
if (p->rebooting || !p->isReliable()) {
|
if (p->rebooting || !p->isReliable()) {
|
||||||
TraceEvent(SevDebug, "DoRebootFailed")
|
TraceEvent(SevDebug, "DoRebootFailed")
|
||||||
|
@ -2608,6 +2621,8 @@ ACTOR void doReboot(ISimulator::ProcessInfo* p, ISimulator::KillType kt) {
|
||||||
if ((kt == ISimulator::RebootAndDelete) || (kt == ISimulator::RebootProcessAndDelete)) {
|
if ((kt == ISimulator::RebootAndDelete) || (kt == ISimulator::RebootProcessAndDelete)) {
|
||||||
p->cleared = true;
|
p->cleared = true;
|
||||||
g_simulator->clearAddress(p->address);
|
g_simulator->clearAddress(p->address);
|
||||||
|
} else if (kt == ISimulator::RebootProcessAndSwitch) {
|
||||||
|
g_simulator->switchCluster(p->address);
|
||||||
}
|
}
|
||||||
p->shutdownSignal.send(kt);
|
p->shutdownSignal.send(kt);
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
|
|
@ -620,6 +620,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
||||||
std::string* coordFolder,
|
std::string* coordFolder,
|
||||||
std::string baseFolder,
|
std::string baseFolder,
|
||||||
ClusterConnectionString connStr,
|
ClusterConnectionString connStr,
|
||||||
|
ClusterConnectionString otherConnStr,
|
||||||
bool useSeedFile,
|
bool useSeedFile,
|
||||||
AgentMode runBackupAgents,
|
AgentMode runBackupAgents,
|
||||||
std::string whitelistBinPaths,
|
std::string whitelistBinPaths,
|
||||||
|
@ -830,6 +831,23 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
||||||
connRecord =
|
connRecord =
|
||||||
makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
|
makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
|
||||||
}
|
}
|
||||||
|
} else if (onShutdown.get() == ISimulator::RebootProcessAndSwitch) {
|
||||||
|
TraceEvent("SimulatedFDBDRebootAndSwitch")
|
||||||
|
.detail("Cycles", cycles)
|
||||||
|
.detail("RandomId", randomId)
|
||||||
|
.detail("Address", process->address)
|
||||||
|
.detail("ZoneId", localities.zoneId())
|
||||||
|
.detail("KillType", shutdownResult)
|
||||||
|
.detail("ConnectionString", connStr.toString())
|
||||||
|
.detail("OtherConnectionString", otherConnStr.toString())
|
||||||
|
.detail("SwitchingTo", g_simulator->hasSwitchedCluster(process->address));
|
||||||
|
|
||||||
|
// Handle the case where otherConnStr is '@'.
|
||||||
|
if (otherConnStr.toString().size() > 1) {
|
||||||
|
std::string newConnStr =
|
||||||
|
g_simulator->hasSwitchedCluster(process->address) ? otherConnStr.toString() : connStr.toString();
|
||||||
|
connRecord = makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), newConnStr);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
TraceEvent("SimulatedFDBDJustRepeat")
|
TraceEvent("SimulatedFDBDJustRepeat")
|
||||||
.detail("Cycles", cycles)
|
.detail("Cycles", cycles)
|
||||||
|
@ -846,6 +864,7 @@ std::map<Optional<Standalone<StringRef>>, std::vector<std::vector<std::string>>>
|
||||||
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per
|
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per
|
||||||
// process
|
// process
|
||||||
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
||||||
|
ClusterConnectionString otherConnStr,
|
||||||
std::vector<IPAddress> ips,
|
std::vector<IPAddress> ips,
|
||||||
bool sslEnabled,
|
bool sslEnabled,
|
||||||
LocalityData localities,
|
LocalityData localities,
|
||||||
|
@ -924,6 +943,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
||||||
&coordFolders[i],
|
&coordFolders[i],
|
||||||
baseFolder,
|
baseFolder,
|
||||||
connStr,
|
connStr,
|
||||||
|
otherConnStr,
|
||||||
useSeedFile,
|
useSeedFile,
|
||||||
agentMode,
|
agentMode,
|
||||||
whitelistBinPaths,
|
whitelistBinPaths,
|
||||||
|
@ -942,6 +962,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
||||||
&coordFolders[i],
|
&coordFolders[i],
|
||||||
baseFolder,
|
baseFolder,
|
||||||
connStr,
|
connStr,
|
||||||
|
otherConnStr,
|
||||||
useSeedFile,
|
useSeedFile,
|
||||||
agentMode,
|
agentMode,
|
||||||
whitelistBinPaths,
|
whitelistBinPaths,
|
||||||
|
@ -1311,6 +1332,7 @@ ACTOR Future<Void> restartSimulatedSystem(std::vector<Future<Void>>* systemActor
|
||||||
// SOMEDAY: parse backup agent from test file
|
// SOMEDAY: parse backup agent from test file
|
||||||
systemActors->push_back(reportErrors(
|
systemActors->push_back(reportErrors(
|
||||||
simulatedMachine(conn,
|
simulatedMachine(conn,
|
||||||
|
ClusterConnectionString(),
|
||||||
ipAddrs,
|
ipAddrs,
|
||||||
usingSSL,
|
usingSSL,
|
||||||
localities,
|
localities,
|
||||||
|
@ -2346,20 +2368,23 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||||
// check the sslEnablementMap using only one ip
|
// check the sslEnablementMap using only one ip
|
||||||
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
|
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
|
||||||
localities.set("data_hall"_sr, dcUID);
|
localities.set("data_hall"_sr, dcUID);
|
||||||
systemActors->push_back(reportErrors(simulatedMachine(conn,
|
systemActors->push_back(reportErrors(
|
||||||
ips,
|
simulatedMachine(conn,
|
||||||
sslEnabled,
|
requiresExtraDBMachines ? ClusterConnectionString(g_simulator->extraDatabases.at(0))
|
||||||
localities,
|
: ClusterConnectionString(),
|
||||||
processClass,
|
ips,
|
||||||
baseFolder,
|
sslEnabled,
|
||||||
false,
|
localities,
|
||||||
machine == useSeedForMachine,
|
processClass,
|
||||||
requiresExtraDBMachines ? AgentOnly : AgentAddition,
|
baseFolder,
|
||||||
sslOnly,
|
false,
|
||||||
whitelistBinPaths,
|
machine == useSeedForMachine,
|
||||||
protocolVersion,
|
requiresExtraDBMachines ? AgentOnly : AgentAddition,
|
||||||
configDBType),
|
sslOnly,
|
||||||
"SimulatedMachine"));
|
whitelistBinPaths,
|
||||||
|
protocolVersion,
|
||||||
|
configDBType),
|
||||||
|
"SimulatedMachine"));
|
||||||
|
|
||||||
if (requiresExtraDBMachines) {
|
if (requiresExtraDBMachines) {
|
||||||
int cluster = 4;
|
int cluster = 4;
|
||||||
|
@ -2376,6 +2401,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||||
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID);
|
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID);
|
||||||
localities.set("data_hall"_sr, dcUID);
|
localities.set("data_hall"_sr, dcUID);
|
||||||
systemActors->push_back(reportErrors(simulatedMachine(ClusterConnectionString(extraDatabase),
|
systemActors->push_back(reportErrors(simulatedMachine(ClusterConnectionString(extraDatabase),
|
||||||
|
conn,
|
||||||
extraIps,
|
extraIps,
|
||||||
sslEnabled,
|
sslEnabled,
|
||||||
localities,
|
localities,
|
||||||
|
@ -2422,6 +2448,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||||
Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
|
Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
|
||||||
systemActors->push_back(
|
systemActors->push_back(
|
||||||
reportErrors(simulatedMachine(conn,
|
reportErrors(simulatedMachine(conn,
|
||||||
|
ClusterConnectionString(),
|
||||||
ips,
|
ips,
|
||||||
sslEnabled,
|
sslEnabled,
|
||||||
localities,
|
localities,
|
||||||
|
|
|
@ -357,6 +357,11 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
||||||
TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt);
|
TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt);
|
||||||
|
|
||||||
g_simulator->killDataHall(target, kt);
|
g_simulator->killDataHall(target, kt);
|
||||||
|
} else if (!g_simulator->extraDatabases.empty() && deterministicRandom()->random01() < 0.1) {
|
||||||
|
state ISimulator::KillType kt = ISimulator::RebootProcessAndSwitch;
|
||||||
|
g_simulator->killAll(kt, true);
|
||||||
|
wait(delay(self->testDuration / 2));
|
||||||
|
g_simulator->killAll(kt, true);
|
||||||
} else {
|
} else {
|
||||||
state int killedMachines = 0;
|
state int killedMachines = 0;
|
||||||
while (killedMachines < self->machinesToKill && self->machines.size() > self->machinesToLeave) {
|
while (killedMachines < self->machinesToKill && self->machines.size() > self->machinesToLeave) {
|
||||||
|
|
Loading…
Reference in New Issue