foundationdb/fdbserver/workloads/Rollback.actor.cpp

125 lines
5.0 KiB
C++
Raw Normal View History

2017-05-26 04:48:44 +08:00
/*
* Rollback.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
2022-03-22 04:36:23 +08:00
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
2017-05-26 04:48:44 +08:00
* http://www.apache.org/licenses/LICENSE-2.0
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
2017-05-26 04:48:44 +08:00
#include "fdbrpc/simulator.h"
#include "fdbserver/MasterInterface.h"
#include "fdbclient/SystemData.h"
#include "fdbserver/ServerDBInfo.h"
#include "flow/actorcompiler.h" // This must be the last #include.
2017-05-26 04:48:44 +08:00
// Choose a random proxy and a random tLog, represented as unclogTlog.
// The workload first clogs network link between the chosen proxy and all tLogs but the unclogTlog;
// While the network is still clogged, the workload kills the proxy and clogs the unclogged tlog's interface.
// Note: The clogged network link's latency will become "clogDuration".
2017-05-26 04:48:44 +08:00
struct RollbackWorkload : TestWorkload {
bool enableFailures, multiple, enabled;
double meanDelay, clogDuration, testDuration;
RollbackWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
2017-05-26 04:48:44 +08:00
enabled = !clientId; // only do this on the "first" client
meanDelay = getOption(options, LiteralStringRef("meanDelay"), 20.0); // Only matters if multiple==true
clogDuration = getOption(options, LiteralStringRef("clogDuration"), 3.0);
testDuration = getOption(options, LiteralStringRef("testDuration"), 10.0);
enableFailures = getOption(options, LiteralStringRef("enableFailures"), false);
multiple = getOption(options, LiteralStringRef("multiple"), true);
2017-05-26 04:48:44 +08:00
}
2020-10-05 13:29:07 +08:00
std::string description() const override { return "RollbackWorkload"; }
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
2017-05-26 04:48:44 +08:00
if (&g_simulator == g_network && enabled)
return timeout(reportErrors(rollbackFailureWorker(cx, this, meanDelay), "RollbackFailureWorkerError"),
testDuration,
Void());
2017-05-26 04:48:44 +08:00
return Void();
}
2020-10-05 13:29:07 +08:00
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(std::vector<PerfMetric>& m) override {}
2017-05-26 04:48:44 +08:00
ACTOR Future<Void> simulateFailure(Database cx, RollbackWorkload* self) {
2019-02-18 10:46:59 +08:00
state ServerDBInfo system = self->dbInfo->get();
2017-05-26 04:48:44 +08:00
auto tlogs = system.logSystemConfig.allPresentLogs();
2020-09-11 08:44:15 +08:00
if (tlogs.empty() || system.client.commitProxies.empty()) {
2017-05-26 04:48:44 +08:00
TraceEvent(SevInfo, "UnableToTriggerRollback").detail("Reason", "No tlogs in System Map");
return Void();
}
2020-09-11 08:44:15 +08:00
state CommitProxyInterface proxy = deterministicRandom()->randomChoice(system.client.commitProxies);
int utIndex = deterministicRandom()->randomInt(0, tlogs.size());
2017-05-26 04:48:44 +08:00
state NetworkAddress uncloggedTLog = tlogs[utIndex].address();
for (int t = 0; t < tlogs.size(); t++)
2017-05-26 04:48:44 +08:00
if (t != utIndex)
if (tlogs[t].address().ip == proxy.address().ip) {
TraceEvent(SevInfo, "UnableToTriggerRollback").detail("Reason", "proxy-clogged tLog shared IPs");
2017-05-26 04:48:44 +08:00
return Void();
}
TraceEvent("AttemptingToTriggerRollback")
2020-09-11 08:44:15 +08:00
.detail("CommitProxy", proxy.address())
.detail("UncloggedTLog", uncloggedTLog);
2017-05-26 04:48:44 +08:00
for (int t = 0; t < tlogs.size(); t++) {
if (t != utIndex) {
g_simulator.clogPair(proxy.address().ip, tlogs[t].address().ip, self->clogDuration);
// g_simulator.clogInterface( g_simulator.getProcess( system.tlogs[t].commit.getEndpoint() ),
// self->clogDuration, ClogAll );
}
}
2017-05-26 04:48:44 +08:00
// While the clogged machines are still clogged...
wait(delay(self->clogDuration / 3));
2019-02-18 10:46:59 +08:00
system = self->dbInfo->get();
2017-05-26 04:48:44 +08:00
// Kill the proxy and clog the unclogged tlog
2017-05-26 04:48:44 +08:00
if (self->enableFailures) {
g_simulator.killProcess(g_simulator.getProcessByAddress(proxy.address()), ISimulator::KillInstantly);
g_simulator.clogInterface(uncloggedTLog.ip, self->clogDuration, ClogAll);
2017-05-26 04:48:44 +08:00
} else {
g_simulator.clogInterface(proxy.address().ip, self->clogDuration, ClogAll);
g_simulator.clogInterface(uncloggedTLog.ip, self->clogDuration, ClogAll);
2017-05-26 04:48:44 +08:00
}
return Void();
}
ACTOR Future<Void> rollbackFailureWorker(Database cx, RollbackWorkload* self, double delay) {
2017-05-26 04:48:44 +08:00
state PromiseStream<Void> events;
if (self->multiple) {
state double lastTime = now();
loop {
wait(poisson(&lastTime, delay));
wait(self->simulateFailure(cx, self));
2017-05-26 04:48:44 +08:00
}
} else {
wait(::delay(deterministicRandom()->random01() *
std::max(0.0, self->testDuration - self->clogDuration * 13.0)));
wait(self->simulateFailure(cx, self));
2017-05-26 04:48:44 +08:00
}
return Void();
}
};
WorkloadFactory<RollbackWorkload> RollbackWorkloadFactory("Rollback");