misc bg operational fixes and improvements

This commit is contained in:
Josh Slocum 2022-04-05 10:23:30 -05:00
parent 1c6dfae48e
commit aaaf42525a
6 changed files with 71 additions and 21 deletions

View File

@ -44,6 +44,7 @@ struct BlobWorkerStats {
int numRangesAssigned; int numRangesAssigned;
int mutationBytesBuffered; int mutationBytesBuffered;
int activeReadRequests; int activeReadRequests;
int granulesPendingSplitCheck;
Future<Void> logger; Future<Void> logger;
@ -62,10 +63,11 @@ struct BlobWorkerStats {
readReqDeltaBytesReturned("ReadReqDeltaBytesReturned", cc), commitVersionChecks("CommitVersionChecks", cc), readReqDeltaBytesReturned("ReadReqDeltaBytesReturned", cc), commitVersionChecks("CommitVersionChecks", cc),
granuleUpdateErrors("GranuleUpdateErrors", cc), granuleRequestTimeouts("GranuleRequestTimeouts", cc), granuleUpdateErrors("GranuleUpdateErrors", cc), granuleRequestTimeouts("GranuleRequestTimeouts", cc),
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc), readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0) { numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0) {
specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; }); specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; });
specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; }); specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; });
specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; }); specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; });
specialCounter(cc, "GranulesPendingSplitCheck", [this]() { return this->granulesPendingSplitCheck; });
logger = traceCounters("BlobWorkerMetrics", id, interval, &cc, "BlobWorkerMetrics"); logger = traceCounters("BlobWorkerMetrics", id, interval, &cc, "BlobWorkerMetrics");
} }

View File

@ -858,6 +858,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 ); init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 );
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 ); init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 );
init( BGCC_TIMEOUT, isSimulated ? 10.0 : 120.0 );
init( BGCC_MIN_INTERVAL, isSimulated ? 1.0 : 10.0 );
// clang-format on // clang-format on
if (clientKnobs) { if (clientKnobs) {

View File

@ -812,6 +812,8 @@ public:
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN; double BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN;
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX; double BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX;
double BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT; double BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT;
double BGCC_TIMEOUT;
double BGCC_MIN_INTERVAL;
ServerKnobs(Randomize, ClientKnobs*, IsSimulated); ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
void initialize(Randomize, ClientKnobs*, IsSimulated); void initialize(Randomize, ClientKnobs*, IsSimulated);

View File

@ -224,13 +224,16 @@ struct BlobManagerStats {
Counter ccRowsChecked; Counter ccRowsChecked;
Counter ccBytesChecked; Counter ccBytesChecked;
Counter ccMismatches; Counter ccMismatches;
Counter ccTimeouts;
Counter ccErrors;
Future<Void> logger; Future<Void> logger;
// Current stats maintained for a given blob worker process // Current stats maintained for a given blob worker process
explicit BlobManagerStats(UID id, double interval, std::unordered_map<UID, BlobWorkerInterface>* workers) explicit BlobManagerStats(UID id, double interval, std::unordered_map<UID, BlobWorkerInterface>* workers)
: cc("BlobManagerStats", id.toString()), granuleSplits("GranuleSplits", cc), : cc("BlobManagerStats", id.toString()), granuleSplits("GranuleSplits", cc),
granuleWriteHotSplits("GranuleWriteHotSplits", cc), ccGranulesChecked("CCGranulesChecked", cc), granuleWriteHotSplits("GranuleWriteHotSplits", cc), ccGranulesChecked("CCGranulesChecked", cc),
ccRowsChecked("CCRowsChecked", cc), ccBytesChecked("CCBytesChecked", cc), ccMismatches("CCMismatches", cc) { ccRowsChecked("CCRowsChecked", cc), ccBytesChecked("CCBytesChecked", cc), ccMismatches("CCMismatches", cc),
ccTimeouts("CCTimeouts", cc), ccErrors("CCErrors", cc) {
specialCounter(cc, "WorkerCount", [workers]() { return workers->size(); }); specialCounter(cc, "WorkerCount", [workers]() { return workers->size(); });
logger = traceCounters("BlobManagerMetrics", id, interval, &cc, "BlobManagerMetrics"); logger = traceCounters("BlobManagerMetrics", id, interval, &cc, "BlobManagerMetrics");
} }
@ -2743,6 +2746,25 @@ static void blobManagerExclusionSafetyCheck(Reference<BlobManagerData> self,
req.reply.send(reply); req.reply.send(reply);
} }
ACTOR Future<int64_t> bgccCheckGranule(Reference<BlobManagerData> bmData, KeyRange range) {
state std::pair<RangeResult, Version> fdbResult = wait(readFromFDB(bmData->db, range));
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blobResult =
wait(readFromBlob(bmData->db, bmData->bstore, range, 0, fdbResult.second));
if (!compareFDBAndBlob(fdbResult.first, blobResult, range, fdbResult.second, BM_DEBUG)) {
++bmData->stats.ccMismatches;
}
int64_t bytesRead = fdbResult.first.expectedSize();
++bmData->stats.ccGranulesChecked;
bmData->stats.ccRowsChecked += fdbResult.first.size();
bmData->stats.ccBytesChecked += bytesRead;
return bytesRead;
}
// FIXME: could eventually make this more thorough by storing some state in the DB or something // FIXME: could eventually make this more thorough by storing some state in the DB or something
// FIXME: simpler solution could be to shuffle ranges // FIXME: simpler solution could be to shuffle ranges
ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) { ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) {
@ -2775,32 +2797,31 @@ ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) {
tries--; tries--;
} }
state int64_t allowanceBytes = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES;
if (tries == 0) { if (tries == 0) {
if (BM_DEBUG) { if (BM_DEBUG) {
printf("BGCC couldn't find random range to check, skipping\n"); printf("BGCC couldn't find random range to check, skipping\n");
} }
wait(rateLimiter->getAllowance(SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES));
} else { } else {
state std::pair<RangeResult, Version> fdbResult = wait(readFromFDB(bmData->db, range)); try {
Optional<int64_t> bytesRead =
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blobResult = wait(timeout(bgccCheckGranule(bmData, range), SERVER_KNOBS->BGCC_TIMEOUT));
wait(readFromBlob(bmData->db, bmData->bstore, range, 0, fdbResult.second)); if (bytesRead.present()) {
allowanceBytes = bytesRead.get();
if (!compareFDBAndBlob(fdbResult.first, blobResult, range, fdbResult.second, BM_DEBUG)) { } else {
++bmData->stats.ccMismatches; ++bmData->stats.ccTimeouts;
}
} catch (Error& e) {
if (e.code() == error_code_operation_cancelled) {
throw e;
}
TraceEvent(SevWarn, "BGCCError", bmData->id).error(e).detail("Epoch", bmData->epoch);
++bmData->stats.ccErrors;
} }
int64_t bytesRead = fdbResult.first.expectedSize();
++bmData->stats.ccGranulesChecked;
bmData->stats.ccRowsChecked += fdbResult.first.size();
bmData->stats.ccBytesChecked += bytesRead;
// clear fdb result to release memory since it is a state variable
fdbResult = std::pair(RangeResult(), 0);
wait(rateLimiter->getAllowance(bytesRead));
} }
// wait at least some interval if snapshot is small and to not overwhelm the system with reads (for example,
// empty database with one empty granule)
wait(rateLimiter->getAllowance(allowanceBytes) && delay(SERVER_KNOBS->BGCC_MIN_INTERVAL));
} else { } else {
if (BM_DEBUG) { if (BM_DEBUG) {
fmt::print("BGCC found no workers, skipping\n", bmData->workerAssignments.size()); fmt::print("BGCC found no workers, skipping\n", bmData->workerAssignments.size());

View File

@ -862,6 +862,23 @@ ACTOR Future<BlobFileIndex> compactFromBlob(Reference<BlobWorkerData> bwData,
} }
} }
struct CounterHolder {
int* counter;
bool completed;
CounterHolder() : counter(nullptr), completed(true) {}
CounterHolder(int* counter) : counter(counter), completed(false) { (*counter)++; }
void complete() {
if (!completed) {
completed = true;
(*counter)--;
}
}
~CounterHolder() { complete(); }
};
ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bwData, ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bwData,
Reference<GranuleMetadata> metadata, Reference<GranuleMetadata> metadata,
UID granuleID, UID granuleID,
@ -877,6 +894,8 @@ ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bw
wait(delay(0, TaskPriority::BlobWorkerUpdateFDB)); wait(delay(0, TaskPriority::BlobWorkerUpdateFDB));
state CounterHolder pendingCounter(&bwData->stats.granulesPendingSplitCheck);
if (BW_DEBUG) { if (BW_DEBUG) {
fmt::print("Granule [{0} - {1}) checking with BM for re-snapshot after {2} bytes\n", fmt::print("Granule [{0} - {1}) checking with BM for re-snapshot after {2} bytes\n",
metadata->keyRange.begin.printable(), metadata->keyRange.begin.printable(),
@ -955,6 +974,8 @@ ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bw
} }
} }
pendingCounter.complete();
if (BW_DEBUG) { if (BW_DEBUG) {
fmt::print("Granule [{0} - {1}) re-snapshotting after {2} bytes\n", fmt::print("Granule [{0} - {1}) re-snapshotting after {2} bytes\n",
metadata->keyRange.begin.printable(), metadata->keyRange.begin.printable(),

View File

@ -2551,6 +2551,7 @@ ACTOR Future<Void> changeFeedStreamQ(StorageServer* data, ChangeFeedStreamReques
req.reply.send(feedReply); req.reply.send(feedReply);
if (req.begin == req.end) { if (req.begin == req.end) {
data->activeFeedQueries--;
req.reply.sendError(end_of_stream()); req.reply.sendError(end_of_stream());
return Void(); return Void();
} }