Merge pull request #1543 from bnamasivayam/release-6.1

Add a workload to trigger repeated recoveries.
This commit is contained in:
Evan Tschannen 2019-05-07 18:39:29 -07:00 committed by GitHub
commit 0d4fcd526b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 152 additions and 0 deletions

View File

@ -111,6 +111,7 @@ set(FDBSERVER_SRCS
workloads/BulkSetup.actor.h
workloads/ChangeConfig.actor.cpp
workloads/ClientTransactionProfileCorrectness.actor.cpp
workloads/TriggerRecovery.actor.cpp
workloads/CommitBugCheck.actor.cpp
workloads/ConfigureDatabase.actor.cpp
workloads/ConflictRange.actor.cpp

View File

@ -104,6 +104,7 @@
<ActorCompiler Include="workloads\AtomicOps.actor.cpp" />
<ActorCompiler Include="workloads\AtomicOpsApiCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\ClientTransactionProfileCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\TriggerRecovery.actor.cpp" />
<ActorCompiler Include="workloads\BackupToDBAbort.actor.cpp" />
<ActorCompiler Include="workloads\BackupToDBCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\BackupToDBUpgrade.actor.cpp" />

View File

@ -228,6 +228,9 @@
<ActorCompiler Include="workloads\ClientTransactionProfileCorrectness.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>
<ActorCompiler Include="workloads\TriggerRecovery.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>
<ActorCompiler Include="workloads\StatusWorkload.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>

View File

@ -0,0 +1,147 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/ServerDBInfo.h"
#include "fdbclient/Status.h"
#include "fdbclient/StatusClient.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/RunTransaction.actor.h"
#include "flow/actorcompiler.h" // has to be last include
struct TriggerRecoveryLoopWorkload : TestWorkload {
double startTime;
int numRecoveries;
double delayBetweenRecoveries;
double killAllProportion;
Optional<int32_t> originalNumOfResolvers;
Optional<int32_t> currentNumOfResolvers;
TriggerRecoveryLoopWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
startTime = getOption(options, LiteralStringRef("startTime"), 0.0);
numRecoveries = getOption(options, LiteralStringRef("numRecoveries"), g_random->randomInt(1, 10));
delayBetweenRecoveries = getOption(options, LiteralStringRef("delayBetweenRecoveries"), 0.0);
killAllProportion = getOption(options, LiteralStringRef("killAllProportion"), 0.1);
ASSERT(numRecoveries > 0 && startTime >= 0 and delayBetweenRecoveries >= 0);
TraceEvent(SevInfo, "TriggerRecoveryLoopSetup")
.detail("StartTime", startTime)
.detail("NumRecoveries", numRecoveries)
.detail("DelayBetweenRecoveries", delayBetweenRecoveries);
}
virtual std::string description() { return "TriggerRecoveryLoop"; }
ACTOR Future<Void> setOriginalNumOfResolvers(Database cx, TriggerRecoveryLoopWorkload* self) {
DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
self->originalNumOfResolvers = self->currentNumOfResolvers = config.getDesiredResolvers();
return Void();
}
virtual Future<Void> setup(Database const& cx) {
if (clientId == 0) {
return setOriginalNumOfResolvers(cx, this);
}
return Void();
}
ACTOR Future<Void> returnIfClusterRecovered(Database cx) {
loop {
state ReadYourWritesTransaction tr(cx);
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
state Version v = wait(tr.getReadVersion());
tr.makeSelfConflicting();
wait(tr.commit());
TraceEvent(SevInfo, "TriggerRecoveryLoop_ClusterVersion").detail("Version", v);
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
return Void();
}
ACTOR Future<Void> changeResolverConfig(Database cx, TriggerRecoveryLoopWorkload* self, bool setToOriginal=false) {
state int32_t numResolversToSet;
if(setToOriginal) {
numResolversToSet = self->originalNumOfResolvers.get();
} else {
numResolversToSet = self->currentNumOfResolvers.get() == self->originalNumOfResolvers.get()
? self->originalNumOfResolvers.get() + 1
: self->originalNumOfResolvers.get();
}
state StringRef configStr(format("resolvers=%d", numResolversToSet));
loop {
Optional<ConfigureAutoResult> conf;
ConfigurationResult::Type r = wait(changeConfig(cx, { configStr }, conf, true));
if (r == ConfigurationResult::SUCCESS) {
self->currentNumOfResolvers = numResolversToSet;
TraceEvent(SevInfo, "TriggerRecoveryLoop_ChangeResolverConfigSuccess").detail("NumOfResolvers", self->currentNumOfResolvers.get());
break;
}
TraceEvent(SevWarn, "TriggerRecoveryLoop_ChangeResolverConfigFailed").detail("Result", r);
wait(delay(1.0));
}
return Void();
}
ACTOR Future<Void> killAll(Database cx) {
state ReadYourWritesTransaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
Standalone<RangeResultRef> kvs = wait(tr.getRange(
KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces"), LiteralStringRef("\xff\xff\xff")), 1));
std::map<Key, Value> address_interface;
for (auto it : kvs) {
auto ip_port = it.key.endsWith(LiteralStringRef(":tls"))
? it.key.removeSuffix(LiteralStringRef(":tls"))
: it.key;
address_interface[ip_port] = it.value;
}
for (auto it : address_interface) {
tr.set(LiteralStringRef("\xff\xff/reboot_worker"), it.second);
}
TraceEvent(SevInfo, "TriggerRecoveryLoop_AttempedKillAll");
return Void();
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR Future<Void> _start(Database cx, TriggerRecoveryLoopWorkload* self) {
wait(delay(self->startTime));
state int numRecoveriesDone = 0;
try {
loop {
if (g_random->random01() < self->killAllProportion) {
wait(self->killAll(cx));
} else {
wait(self->changeResolverConfig(cx, self));
}
numRecoveriesDone++;
TraceEvent(SevInfo, "TriggerRecoveryLoop_AttempedRecovery").detail("RecoveryNum", numRecoveriesDone);
if (numRecoveriesDone == self->numRecoveries) {
break;
}
wait(delay(self->delayBetweenRecoveries));
wait(self->returnIfClusterRecovered(cx));
}
} catch(Error &e) {
// Dummy catch here to give a chance to reset number of resolvers to its original value
}
wait(self->changeResolverConfig(cx, self, true));
return Void();
}
virtual Future<Void> start(Database const& cx) {
if (clientId != 0) return Void();
return _start(cx, this);
}
virtual Future<bool> check(Database const& cx) { return true; }
virtual void getMetrics(vector<PerfMetric>& m) {}
};
WorkloadFactory<TriggerRecoveryLoopWorkload> TriggerRecoveryLoopWorkloadFactory("TriggerRecoveryLoop");