added datahall kill and option to target a specific datahall/dc/machine id

This commit is contained in:
Jon Fu 2019-10-23 14:19:17 -07:00
parent d97ff75638
commit 103cc37a35
2 changed files with 26 additions and 5 deletions

View File

@ -964,8 +964,10 @@ ACTOR Future<Void> workerServer(
when( RebootRequest req = waitNext( interf.clientInterface.reboot.getFuture() ) ) {
state RebootRequest rebootReq = req;
// If suspendDuration is INT_MAX, the trace will not be logged if it was inside the next block
// Also a useful trace to have even if suspendDuration is 0
TraceEvent("RebootRequestSuspendingProcess").detail("Duration", req.waitForDuration);
if(req.waitForDuration) {
TraceEvent("RebootRequestSuspendingProcess").detail("Duration", req.waitForDuration);
flushTraceFileVoid();
setProfilingEnabled(0);
g_network->stop();

View File

@ -65,7 +65,9 @@ struct MachineAttritionWorkload : TestWorkload {
bool reboot;
bool killDc;
bool killMachine;
bool killDatahall;
bool killSelf;
std::string targetId;
bool replacement;
bool waitForVersion;
bool allowFaultInjection;
@ -85,7 +87,9 @@ struct MachineAttritionWorkload : TestWorkload {
reboot = getOption( options, LiteralStringRef("reboot"), false );
killDc = getOption( options, LiteralStringRef("killDc"), deterministicRandom()->random01() < 0.25 );
killMachine = getOption( options, LiteralStringRef("killMachine"), false);
killDatahall = getOption( options, LiteralStringRef("killDatahall"), false);
killSelf = getOption( options, LiteralStringRef("killSelf"), false );
targetId = getOption( options, LiteralStringRef("targetId"), "");
replacement = getOption( options, LiteralStringRef("replacement"), reboot && deterministicRandom()->random01() < 0.5 );
waitForVersion = getOption( options, LiteralStringRef("waitForVersion"), false );
allowFaultInjection = getOption( options, LiteralStringRef("allowFaultInjection"), true );
@ -172,11 +176,12 @@ struct MachineAttritionWorkload : TestWorkload {
wait(delay(delayBeforeKill));
// Pick a dcId to kill
deterministicRandom()->randomShuffle(workers);
Optional<Standalone<StringRef>> killDcId = workers.back().interf.locality.dcId();
TraceEvent("Assassination").detail("TargetDataCenter", killDcId);
Optional<Standalone<StringRef>> killDcId = self->targetId.empty() ? workers.back().interf.locality.dcId() : self->targetId;
TraceEvent("Assassination").detail("TargetDataCenterId", killDcId);
for (const auto& worker : workers) {
// kill all matching dcId workers
if (worker.interf.locality.dcId().present() && worker.interf.locality.dcId() == killDcId) {
TraceEvent("SendingRebootRequest").detail("TargetMachine", worker.interf.locality.toString());
worker.interf.clientInterface.reboot.send(rbReq);
}
}
@ -184,11 +189,25 @@ struct MachineAttritionWorkload : TestWorkload {
wait(delay(delayBeforeKill));
// Pick a machine to kill
deterministicRandom()->randomShuffle(workers);
Optional<Standalone<StringRef>> killMachineId = workers.back().interf.locality.machineId();
TraceEvent("Assassination").detail("TargetMachine", killMachineId);
Optional<Standalone<StringRef>> killMachineId = self->targetId.empty() ? workers.back().interf.locality.machineId() : self->targetId;
TraceEvent("Assassination").detail("TargetMachineId", killMachineId);
for (const auto& worker : workers) {
// kill all matching machine workers
if (worker.interf.locality.machineId().present() && worker.interf.locality.machineId() == killMachineId) {
TraceEvent("SendingRebootRequest").detail("TargetMachine", worker.interf.locality.toString());
worker.interf.clientInterface.reboot.send(rbReq);
}
}
} else if (self->killDatahall) {
wait(delay(delayBeforeKill));
// Pick a datahall to kill
deterministicRandom()->randomShuffle(workers);
Optional<Standalone<StringRef>> killDatahallId = self->targetId.empty() ? workers.back().interf.locality.dataHallId() : self->targetId;
TraceEvent("Assassination").detail("TargetDatahallId", killDatahallId);
for (const auto& worker : workers) {
// kill all matching datahall workers
if (worker.interf.locality.dataHallId().present() && worker.interf.locality.dataHallId() == killDatahallId) {
TraceEvent("SendingRebootRequest").detail("TargetMachine", worker.interf.locality.toString());
worker.interf.clientInterface.reboot.send(rbReq);
}
}