Fix rare force purge and granule assignment race (#8018)

* Fix rare force purge and granule assignment race

* Adding missed transaction options
This commit is contained in:
Josh Slocum 2022-08-29 17:29:28 -05:00 committed by GitHub
parent 058c720ef3
commit adc0fea18c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 32 additions and 4 deletions

View File

@ -1839,6 +1839,20 @@ ACTOR Future<Void> waitVersionCommitted(Reference<BlobWorkerData> bwData,
return Void(); return Void();
} }
ACTOR Future<bool> checkFileNotFoundForcePurgeRace(Reference<BlobWorkerData> bwData, KeyRange range) {
state Transaction tr(bwData->db);
loop {
try {
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
ForcedPurgeState purgeState = wait(getForcePurgedState(&tr, range));
return purgeState != ForcedPurgeState::NonePurged;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
// updater for a single granule // updater for a single granule
// TODO: this is getting kind of large. Should try to split out this actor if it continues to grow? // TODO: this is getting kind of large. Should try to split out this actor if it continues to grow?
ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData, ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
@ -2637,17 +2651,31 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
throw e; throw e;
} }
state Error e2 = e;
if (e.code() == error_code_file_not_found) {
// FIXME: better way to fix this?
bool isForcePurging = wait(checkFileNotFoundForcePurgeRace(bwData, metadata->keyRange));
if (isForcePurging) {
CODE_PROBE(true, "Granule got file not found from force purge");
TraceEvent("GranuleFileUpdaterFileNotFoundForcePurge", bwData->id)
.error(e2)
.detail("KeyRange", metadata->keyRange)
.detail("GranuleID", startState.granuleID);
return Void();
}
}
TraceEvent(SevError, "GranuleFileUpdaterUnexpectedError", bwData->id) TraceEvent(SevError, "GranuleFileUpdaterUnexpectedError", bwData->id)
.error(e) .error(e2)
.detail("Granule", metadata->keyRange) .detail("Granule", metadata->keyRange)
.detail("GranuleID", startState.granuleID); .detail("GranuleID", startState.granuleID);
ASSERT_WE_THINK(false); ASSERT_WE_THINK(false);
// if not simulation, kill the BW // if not simulation, kill the BW
if (bwData->fatalError.canBeSet()) { if (bwData->fatalError.canBeSet()) {
bwData->fatalError.sendError(e); bwData->fatalError.sendError(e2);
} }
throw e; throw e2;
} }
} }
@ -4919,4 +4947,4 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
return Void(); return Void();
} }
// TODO add unit tests for assign/revoke range, especially version ordering // TODO add unit tests for assign/revoke range, especially version ordering