Blob Worker focused cleanup

This commit is contained in:
Josh Slocum 2022-03-10 09:22:53 -06:00
parent 1f964ac085
commit c8c97e0256
4 changed files with 231 additions and 372 deletions

View File

@ -825,15 +825,19 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( BG_SNAPSHOT_FILE_TARGET_BYTES, 10000000 ); if( buggifySmallShards || (randomize && BUGGIFY) ) { deterministicRandom()->random01() < 0.1 ? BG_SNAPSHOT_FILE_TARGET_BYTES /= 100 : BG_SNAPSHOT_FILE_TARGET_BYTES /= 10; }
init( BG_DELTA_BYTES_BEFORE_COMPACT, BG_SNAPSHOT_FILE_TARGET_BYTES/2 );
init( BG_DELTA_FILE_TARGET_BYTES, BG_DELTA_BYTES_BEFORE_COMPACT/10 );
init( BG_MAX_SPLIT_FANOUT, 10 ); if( randomize && BUGGIFY ) BG_MAX_SPLIT_FANOUT = deterministicRandom()->randomInt(5, 15);
init( BG_HOT_SNAPSHOT_VERSIONS, 5000000 );
init( BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM = 1;
init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0;
init( BLOB_WORKER_REQUEST_TIMEOUT, 5.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_REQUEST_TIMEOUT = 1.0;
init( BLOB_WORKERLIST_FETCH_INTERVAL, 1.0 );
init( BG_MAX_SPLIT_FANOUT, 10 ); if( randomize && BUGGIFY ) BLOB_WORKER_REQUEST_TIMEOUT = deterministicRandom()->randomInt(5, 15);
init( BLOB_WORKER_BATCH_GRV_INTERVAL, 0.1 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN, 0.1 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN, 0.1 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 );
// clang-format on

View File

@ -776,11 +776,14 @@ public:
int BG_SNAPSHOT_FILE_TARGET_BYTES;
int BG_DELTA_FILE_TARGET_BYTES;
int BG_DELTA_BYTES_BEFORE_COMPACT;
int BG_MAX_SPLIT_FANOUT;
int BG_HOT_SNAPSHOT_VERSIONS;
int BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM;
double BLOB_WORKER_TIMEOUT; // Blob Manager's reaction time to a blob worker failure
double BLOB_WORKER_REQUEST_TIMEOUT; // Blob Worker's server-side request timeout
double BLOB_WORKERLIST_FETCH_INTERVAL;
int BG_MAX_SPLIT_FANOUT; // Decreasing this knob can be unsafe
double BLOB_WORKER_BATCH_GRV_INTERVAL;
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN;
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX;

View File

@ -323,7 +323,9 @@ ACTOR Future<Standalone<VectorRef<KeyRef>>> splitRange(Reference<BlobManagerData
ASSERT_WE_THINK(false);
// if not simulation, kill the BM
bmData->iAmReplaced.sendError(e);
if (bmData->iAmReplaced.canBeSet()) {
bmData->iAmReplaced.sendError(e);
}
throw e;
}
}
@ -452,7 +454,9 @@ ACTOR Future<Void> doRangeAssignment(Reference<BlobManagerData> bmData,
.error(e)
.detail("Epoch", bmData->epoch);
ASSERT_WE_THINK(false);
bmData->iAmReplaced.sendError(e);
if (bmData->iAmReplaced.canBeSet()) {
bmData->iAmReplaced.sendError(e);
}
throw;
}
@ -698,7 +702,9 @@ ACTOR Future<Void> monitorClientRanges(Reference<BlobManagerData> bmData) {
.detail("Epoch", bmData->epoch)
.detail("ClientRanges", results.size() - 1);
wait(delay(600));
bmData->iAmReplaced.sendError(internal_error());
if (bmData->iAmReplaced.canBeSet()) {
bmData->iAmReplaced.sendError(internal_error());
}
throw internal_error();
}
@ -1041,6 +1047,7 @@ ACTOR Future<Void> deregisterBlobWorker(Reference<BlobManagerData> bmData, BlobW
try {
wait(checkManagerLock(tr, bmData));
Key blobWorkerListKey = blobWorkerListKeyFor(interf.id());
// FIXME: should be able to remove this conflict range
tr->addReadConflictRange(singleKeyRange(blobWorkerListKey));
tr->clear(blobWorkerListKey);
@ -1298,7 +1305,9 @@ ACTOR Future<Void> monitorBlobWorkerStatus(Reference<BlobManagerData> bmData, Bl
.detail("Epoch", bmData->epoch);
ASSERT_WE_THINK(false);
// if not simulation, kill the BM
bmData->iAmReplaced.sendError(e);
if (bmData->iAmReplaced.canBeSet()) {
bmData->iAmReplaced.sendError(e);
}
throw e;
}
}
@ -1343,7 +1352,9 @@ ACTOR Future<Void> monitorBlobWorker(Reference<BlobManagerData> bmData, BlobWork
.detail("Epoch", bmData->epoch);
ASSERT_WE_THINK(false);
// if not simulation, kill the BM
bmData->iAmReplaced.sendError(e);
if (bmData->iAmReplaced.canBeSet()) {
bmData->iAmReplaced.sendError(e);
}
throw e;
}
}

File diff suppressed because it is too large Load Diff