From 2701e44564dcfe6a4f9cb13850541ba14696aa31 Mon Sep 17 00:00:00 2001 From: Josh Slocum Date: Wed, 23 Feb 2022 15:55:16 -0600 Subject: [PATCH] Fixing error handling and race for blob worker status stream to manager --- fdbserver/BlobWorker.actor.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index 9f81cff53b..9521b968b0 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -866,7 +866,12 @@ ACTOR Future checkSplitAndReSnapshot(Reference bw loop { loop { try { - wait(bwData->currentManagerStatusStream.get().onReady()); + loop { + choose { + when(wait(bwData->currentManagerStatusStream.get().onReady())) { break; } + when(wait(bwData->currentManagerStatusStream.onChange())) {} + } + } bwData->currentManagerStatusStream.get().send(GranuleStatusReply(metadata->keyRange, true, writeHot, @@ -877,7 +882,16 @@ ACTOR Future checkSplitAndReSnapshot(Reference bw reSnapshotVersion)); break; } catch (Error& e) { - wait(bwData->currentManagerStatusStream.onChange()); + if (e.code() == error_code_operation_cancelled) { + throw e; + } + // if we got broken promise while waiting, the old stream was killed, so we don't need to wait on + // change, just retry + if (e.code() == error_code_broken_promise) { + wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); + } else { + wait(bwData->currentManagerStatusStream.onChange()); + } } }