Merge pull request #2846 from xumengpanda/mengxu/fr-add-attrition-to-test-PR
Performant restore [21/xx]: Enable assassination workload in restore test
This commit is contained in:
commit
9a50458a64
|
@ -32,6 +32,7 @@
|
|||
#include "fdbclient/MutationList.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "flow/genericactors.actor.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
|
@ -297,11 +298,30 @@ ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality) {
|
|||
state Future<Void> myWork = Never();
|
||||
state Reference<AsyncVar<RestoreWorkerInterface>> leader =
|
||||
Reference<AsyncVar<RestoreWorkerInterface>>(new AsyncVar<RestoreWorkerInterface>());
|
||||
|
||||
state RestoreWorkerInterface myWorkerInterf;
|
||||
myWorkerInterf.initEndpoints();
|
||||
state Reference<RestoreWorkerData> self = Reference<RestoreWorkerData>(new RestoreWorkerData());
|
||||
|
||||
myWorkerInterf.initEndpoints();
|
||||
self->workerID = myWorkerInterf.id();
|
||||
|
||||
// Protect restore worker from being killed in simulation;
|
||||
// Future: Remove the protection once restore can tolerate failure
|
||||
if (g_network->isSimulated()) {
|
||||
auto addresses = g_simulator.getProcessByAddress(myWorkerInterf.address())->addresses;
|
||||
|
||||
g_simulator.protectedAddresses.insert(addresses.address);
|
||||
if (addresses.secondaryAddress.present()) {
|
||||
g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get());
|
||||
}
|
||||
ISimulator::ProcessInfo* p = g_simulator.getProcessByAddress(myWorkerInterf.address());
|
||||
TraceEvent("ProtectRestoreWorker")
|
||||
.detail("Address", addresses.toString())
|
||||
.detail("IsReliable", p->isReliable())
|
||||
.detail("ReliableInfo", p->getReliableInfo())
|
||||
.backtrace();
|
||||
ASSERT(p->isReliable());
|
||||
}
|
||||
|
||||
TraceEvent("FastRestoreWorkerKnobs", myWorkerInterf.id())
|
||||
.detail("FailureTimeout", SERVER_KNOBS->FASTRESTORE_FAILURE_TIMEOUT)
|
||||
.detail("HeartBeat", SERVER_KNOBS->FASTRESTORE_HEARTBEAT_INTERVAL)
|
||||
|
|
|
@ -33,18 +33,19 @@ testTitle=BackupAndParallelRestoreWithAtomicOp
|
|||
; meanDelay=90.0
|
||||
; testDuration=90.0
|
||||
|
||||
; Do NOT consider machine crash yet
|
||||
; testName=Attrition
|
||||
; machinesToKill=10
|
||||
; machinesToLeave=3
|
||||
; reboot=true
|
||||
; testDuration=90.0
|
||||
; Do NOT kill restore worker process yet
|
||||
; Kill other process to ensure restore works when FDB cluster has faults
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=90.0
|
||||
|
||||
; testName=Attrition
|
||||
; machinesToKill=10
|
||||
; machinesToLeave=3
|
||||
; reboot=true
|
||||
; testDuration=90.0
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=90.0
|
||||
|
||||
; Disable buggify for parallel restore
|
||||
;buggify=on
|
||||
|
|
|
@ -55,18 +55,19 @@ testTitle=BackupAndRestore
|
|||
; meanDelay=90.0
|
||||
; testDuration=90.0
|
||||
|
||||
; Do NOT consider machine crash yet
|
||||
; testName=Attrition
|
||||
; machinesToKill=10
|
||||
; machinesToLeave=3
|
||||
; reboot=true
|
||||
; testDuration=90.0
|
||||
; Do NOT kill restore worker process yet
|
||||
; Kill other process to ensure restore works when FDB cluster has faults
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=90.0
|
||||
|
||||
; testName=Attrition
|
||||
; machinesToKill=10
|
||||
; machinesToLeave=3
|
||||
; reboot=true
|
||||
; testDuration=90.0
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=90.0
|
||||
|
||||
; Disable buggify for parallel restore
|
||||
;buggify=off
|
||||
|
|
Loading…
Reference in New Issue