foundationdb/fdbserver/workloads/TargetedKill.actor.cpp

142 lines
5.7 KiB
C++
Raw Normal View History

2017-05-26 04:48:44 +08:00
/*
* TargetedKill.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
2017-05-26 04:48:44 +08:00
#include "fdbrpc/simulator.h"
#include "fdbserver/MasterInterface.h"
#include "fdbclient/SystemData.h"
#include "fdbserver/WorkerInterface.actor.h"
2017-05-26 04:48:44 +08:00
#include "fdbserver/ServerDBInfo.h"
#include "fdbserver/QuietDatabase.h"
#include "flow/actorcompiler.h" // This must be the last #include.
2017-05-26 04:48:44 +08:00
struct TargetedKillWorkload : TestWorkload {
std::string machineToKill;
bool enabled, killAllMachineProcesses;
double killAt;
TargetedKillWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
2017-05-26 04:48:44 +08:00
enabled = !clientId; // only do this on the "first" client
killAt = getOption(options, LiteralStringRef("killAt"), 5.0);
machineToKill = getOption(options, LiteralStringRef("machineToKill"), LiteralStringRef("master")).toString();
killAllMachineProcesses = getOption(options, LiteralStringRef("killWholeMachine"), false);
2017-05-26 04:48:44 +08:00
}
2020-10-05 13:29:07 +08:00
std::string description() const override { return "TargetedKillWorkload"; }
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
2017-05-26 04:48:44 +08:00
TraceEvent("StartTargetedKill").detail("Enabled", enabled);
if (enabled)
return assassin(cx, this);
2017-05-26 04:48:44 +08:00
return Void();
}
2020-10-05 13:29:07 +08:00
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(vector<PerfMetric>& m) override {}
2017-05-26 04:48:44 +08:00
ACTOR Future<Void> killEndpoint(NetworkAddress address, Database cx, TargetedKillWorkload* self) {
if (&g_simulator == g_network) {
g_simulator.killInterface(address, ISimulator::KillInstantly);
2017-05-26 04:48:44 +08:00
return Void();
}
state vector<WorkerDetails> workers = wait(getWorkers(self->dbInfo));
2017-05-26 04:48:44 +08:00
int killed = 0;
for (int i = 0; i < workers.size(); i++) {
if (workers[i].interf.master.getEndpoint().getPrimaryAddress() == address ||
(self->killAllMachineProcesses &&
workers[i].interf.master.getEndpoint().getPrimaryAddress().ip == address.ip &&
workers[i].processClass != ProcessClass::TesterClass)) {
TraceEvent("WorkerKill").detail("TargetedMachine", address).detail("Worker", workers[i].interf.id());
workers[i].interf.clientInterface.reboot.send(RebootRequest());
2017-05-26 04:48:44 +08:00
}
}
if (!killed)
2017-05-26 04:48:44 +08:00
TraceEvent(SevWarn, "WorkerNotFoundAtEndpoint").detail("Address", address);
else
TraceEvent("WorkersKilledAtEndpoint").detail("Address", address).detail("KilledProcesses", killed);
return Void();
}
ACTOR Future<Void> assassin(Database cx, TargetedKillWorkload* self) {
wait(delay(self->killAt));
state vector<StorageServerInterface> storageServers = wait(getStorageServers(cx));
2017-05-26 04:48:44 +08:00
NetworkAddress machine;
if (self->machineToKill == "master") {
2017-05-26 04:48:44 +08:00
machine = self->dbInfo->get().master.address();
2020-09-11 08:44:15 +08:00
} else if (self->machineToKill == "commitproxy") {
auto commitProxies = cx->getCommitProxies(false);
int o = deterministicRandom()->randomInt(0, commitProxies->size());
for (int i = 0; i < commitProxies->size(); i++) {
CommitProxyInterface mpi = commitProxies->getInterface(o);
2017-05-26 04:48:44 +08:00
machine = mpi.address();
if (machine != self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress())
2017-05-26 04:48:44 +08:00
break;
o = ++o % commitProxies->size();
2017-05-26 04:48:44 +08:00
}
2020-09-11 08:44:15 +08:00
} else if (self->machineToKill == "grvproxy") {
2020-07-15 15:37:41 +08:00
auto grvProxies = cx->getGrvProxies(false);
int o = deterministicRandom()->randomInt(0, grvProxies->size());
for (int i = 0; i < grvProxies->size(); i++) {
2020-07-15 15:37:41 +08:00
GrvProxyInterface gpi = grvProxies->getInterface(o);
machine = gpi.address();
if (machine != self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress())
2020-07-15 15:37:41 +08:00
break;
o = ++o % grvProxies->size();
2020-07-15 15:37:41 +08:00
}
2020-09-11 08:44:15 +08:00
} else if (self->machineToKill == "tlog") {
2017-05-26 04:48:44 +08:00
auto tlogs = self->dbInfo->get().logSystemConfig.allPresentLogs();
int o = deterministicRandom()->randomInt(0, tlogs.size());
for (int i = 0; i < tlogs.size(); i++) {
2017-05-26 04:48:44 +08:00
TLogInterface tli = tlogs[o];
machine = tli.address();
if (machine != self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress())
2017-05-26 04:48:44 +08:00
break;
o = ++o % tlogs.size();
2017-05-26 04:48:44 +08:00
}
2020-09-11 08:44:15 +08:00
} else if (self->machineToKill == "storage" || self->machineToKill == "ss" ||
self->machineToKill == "storageserver") {
int o = deterministicRandom()->randomInt(0, storageServers.size());
for (int i = 0; i < storageServers.size(); i++) {
2017-05-26 04:48:44 +08:00
StorageServerInterface ssi = storageServers[o];
machine = ssi.address();
if (machine != self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress())
2017-05-26 04:48:44 +08:00
break;
o = ++o % storageServers.size();
2017-05-26 04:48:44 +08:00
}
2020-09-11 08:44:15 +08:00
} else if (self->machineToKill == "clustercontroller" || self->machineToKill == "cc") {
machine = self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress();
2017-05-26 04:48:44 +08:00
}
TraceEvent("IsolatedMark").detail("TargetedMachine", machine).detail("Role", self->machineToKill);
wait(self->killEndpoint(machine, cx, self));
2017-05-26 04:48:44 +08:00
return Void();
}
};
WorkloadFactory<TargetedKillWorkload> TargetedKillWorkloadFactory("TargetedKill");