Add assassination workload to restore test workload

Add assert to ensure restore worker is reliable and not killed.
This commit is contained in:
Meng Xu 2020-03-23 11:10:46 -07:00
parent 51047a6c1d
commit 48db54424f
2 changed files with 19 additions and 12 deletions

View File

@ -312,7 +312,13 @@ ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality) {
if (addresses.secondaryAddress.present()) { if (addresses.secondaryAddress.present()) {
g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get()); g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get());
} }
TraceEvent("ProtectRestoreWorker").detail("Address", addresses.toString()).backtrace(); ISimulator::ProcessInfo* p = g_simulator.getProcessByAddress(myWorkerInterf.address());
TraceEvent("ProtectRestoreWorker")
.detail("Address", addresses.toString())
.detail("IsReliable", p->isReliable())
.detail("ReliableInfo", p->getReliableInfo())
.backtrace();
ASSERT(p->isReliable());
} }
TraceEvent("FastRestoreWorkerKnobs", myWorkerInterf.id()) TraceEvent("FastRestoreWorkerKnobs", myWorkerInterf.id())

View File

@ -33,18 +33,19 @@ testTitle=BackupAndParallelRestoreWithAtomicOp
; meanDelay=90.0 ; meanDelay=90.0
; testDuration=90.0 ; testDuration=90.0
; Do NOT consider machine crash yet ; Do NOT kill restore worker process yet
; testName=Attrition ; Kill other process to ensure restore works when FDB cluster has faults
; machinesToKill=10 testName=Attrition
; machinesToLeave=3 machinesToKill=10
; reboot=true machinesToLeave=3
; testDuration=90.0 reboot=true
testDuration=90.0
; testName=Attrition testName=Attrition
; machinesToKill=10 machinesToKill=10
; machinesToLeave=3 machinesToLeave=3
; reboot=true reboot=true
; testDuration=90.0 testDuration=90.0
; Disable buggify for parallel restore ; Disable buggify for parallel restore
;buggify=on ;buggify=on