misc bg operational fixes and improvements
This commit is contained in:
parent
1c6dfae48e
commit
aaaf42525a
|
@ -44,6 +44,7 @@ struct BlobWorkerStats {
|
||||||
int numRangesAssigned;
|
int numRangesAssigned;
|
||||||
int mutationBytesBuffered;
|
int mutationBytesBuffered;
|
||||||
int activeReadRequests;
|
int activeReadRequests;
|
||||||
|
int granulesPendingSplitCheck;
|
||||||
|
|
||||||
Future<Void> logger;
|
Future<Void> logger;
|
||||||
|
|
||||||
|
@ -62,10 +63,11 @@ struct BlobWorkerStats {
|
||||||
readReqDeltaBytesReturned("ReadReqDeltaBytesReturned", cc), commitVersionChecks("CommitVersionChecks", cc),
|
readReqDeltaBytesReturned("ReadReqDeltaBytesReturned", cc), commitVersionChecks("CommitVersionChecks", cc),
|
||||||
granuleUpdateErrors("GranuleUpdateErrors", cc), granuleRequestTimeouts("GranuleRequestTimeouts", cc),
|
granuleUpdateErrors("GranuleUpdateErrors", cc), granuleRequestTimeouts("GranuleRequestTimeouts", cc),
|
||||||
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
|
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
|
||||||
numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0) {
|
numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0) {
|
||||||
specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; });
|
specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; });
|
||||||
specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; });
|
specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; });
|
||||||
specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; });
|
specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; });
|
||||||
|
specialCounter(cc, "GranulesPendingSplitCheck", [this]() { return this->granulesPendingSplitCheck; });
|
||||||
|
|
||||||
logger = traceCounters("BlobWorkerMetrics", id, interval, &cc, "BlobWorkerMetrics");
|
logger = traceCounters("BlobWorkerMetrics", id, interval, &cc, "BlobWorkerMetrics");
|
||||||
}
|
}
|
||||||
|
|
|
@ -858,6 +858,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 );
|
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX, 5.0 );
|
||||||
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 );
|
init( BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT, 1.5 );
|
||||||
|
|
||||||
|
init( BGCC_TIMEOUT, isSimulated ? 10.0 : 120.0 );
|
||||||
|
init( BGCC_MIN_INTERVAL, isSimulated ? 1.0 : 10.0 );
|
||||||
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
if (clientKnobs) {
|
if (clientKnobs) {
|
||||||
|
|
|
@ -812,6 +812,8 @@ public:
|
||||||
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN;
|
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MIN;
|
||||||
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX;
|
double BLOB_MANAGER_STATUS_EXP_BACKOFF_MAX;
|
||||||
double BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT;
|
double BLOB_MANAGER_STATUS_EXP_BACKOFF_EXPONENT;
|
||||||
|
double BGCC_TIMEOUT;
|
||||||
|
double BGCC_MIN_INTERVAL;
|
||||||
|
|
||||||
ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
|
ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
|
||||||
void initialize(Randomize, ClientKnobs*, IsSimulated);
|
void initialize(Randomize, ClientKnobs*, IsSimulated);
|
||||||
|
|
|
@ -224,13 +224,16 @@ struct BlobManagerStats {
|
||||||
Counter ccRowsChecked;
|
Counter ccRowsChecked;
|
||||||
Counter ccBytesChecked;
|
Counter ccBytesChecked;
|
||||||
Counter ccMismatches;
|
Counter ccMismatches;
|
||||||
|
Counter ccTimeouts;
|
||||||
|
Counter ccErrors;
|
||||||
Future<Void> logger;
|
Future<Void> logger;
|
||||||
|
|
||||||
// Current stats maintained for a given blob worker process
|
// Current stats maintained for a given blob worker process
|
||||||
explicit BlobManagerStats(UID id, double interval, std::unordered_map<UID, BlobWorkerInterface>* workers)
|
explicit BlobManagerStats(UID id, double interval, std::unordered_map<UID, BlobWorkerInterface>* workers)
|
||||||
: cc("BlobManagerStats", id.toString()), granuleSplits("GranuleSplits", cc),
|
: cc("BlobManagerStats", id.toString()), granuleSplits("GranuleSplits", cc),
|
||||||
granuleWriteHotSplits("GranuleWriteHotSplits", cc), ccGranulesChecked("CCGranulesChecked", cc),
|
granuleWriteHotSplits("GranuleWriteHotSplits", cc), ccGranulesChecked("CCGranulesChecked", cc),
|
||||||
ccRowsChecked("CCRowsChecked", cc), ccBytesChecked("CCBytesChecked", cc), ccMismatches("CCMismatches", cc) {
|
ccRowsChecked("CCRowsChecked", cc), ccBytesChecked("CCBytesChecked", cc), ccMismatches("CCMismatches", cc),
|
||||||
|
ccTimeouts("CCTimeouts", cc), ccErrors("CCErrors", cc) {
|
||||||
specialCounter(cc, "WorkerCount", [workers]() { return workers->size(); });
|
specialCounter(cc, "WorkerCount", [workers]() { return workers->size(); });
|
||||||
logger = traceCounters("BlobManagerMetrics", id, interval, &cc, "BlobManagerMetrics");
|
logger = traceCounters("BlobManagerMetrics", id, interval, &cc, "BlobManagerMetrics");
|
||||||
}
|
}
|
||||||
|
@ -2743,6 +2746,25 @@ static void blobManagerExclusionSafetyCheck(Reference<BlobManagerData> self,
|
||||||
req.reply.send(reply);
|
req.reply.send(reply);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<int64_t> bgccCheckGranule(Reference<BlobManagerData> bmData, KeyRange range) {
|
||||||
|
state std::pair<RangeResult, Version> fdbResult = wait(readFromFDB(bmData->db, range));
|
||||||
|
|
||||||
|
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blobResult =
|
||||||
|
wait(readFromBlob(bmData->db, bmData->bstore, range, 0, fdbResult.second));
|
||||||
|
|
||||||
|
if (!compareFDBAndBlob(fdbResult.first, blobResult, range, fdbResult.second, BM_DEBUG)) {
|
||||||
|
++bmData->stats.ccMismatches;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t bytesRead = fdbResult.first.expectedSize();
|
||||||
|
|
||||||
|
++bmData->stats.ccGranulesChecked;
|
||||||
|
bmData->stats.ccRowsChecked += fdbResult.first.size();
|
||||||
|
bmData->stats.ccBytesChecked += bytesRead;
|
||||||
|
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: could eventually make this more thorough by storing some state in the DB or something
|
// FIXME: could eventually make this more thorough by storing some state in the DB or something
|
||||||
// FIXME: simpler solution could be to shuffle ranges
|
// FIXME: simpler solution could be to shuffle ranges
|
||||||
ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) {
|
ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) {
|
||||||
|
@ -2775,32 +2797,31 @@ ACTOR Future<Void> bgConsistencyCheck(Reference<BlobManagerData> bmData) {
|
||||||
tries--;
|
tries--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
state int64_t allowanceBytes = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES;
|
||||||
if (tries == 0) {
|
if (tries == 0) {
|
||||||
if (BM_DEBUG) {
|
if (BM_DEBUG) {
|
||||||
printf("BGCC couldn't find random range to check, skipping\n");
|
printf("BGCC couldn't find random range to check, skipping\n");
|
||||||
}
|
}
|
||||||
wait(rateLimiter->getAllowance(SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES));
|
|
||||||
} else {
|
} else {
|
||||||
state std::pair<RangeResult, Version> fdbResult = wait(readFromFDB(bmData->db, range));
|
try {
|
||||||
|
Optional<int64_t> bytesRead =
|
||||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> blobResult =
|
wait(timeout(bgccCheckGranule(bmData, range), SERVER_KNOBS->BGCC_TIMEOUT));
|
||||||
wait(readFromBlob(bmData->db, bmData->bstore, range, 0, fdbResult.second));
|
if (bytesRead.present()) {
|
||||||
|
allowanceBytes = bytesRead.get();
|
||||||
if (!compareFDBAndBlob(fdbResult.first, blobResult, range, fdbResult.second, BM_DEBUG)) {
|
} else {
|
||||||
++bmData->stats.ccMismatches;
|
++bmData->stats.ccTimeouts;
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_operation_cancelled) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
TraceEvent(SevWarn, "BGCCError", bmData->id).error(e).detail("Epoch", bmData->epoch);
|
||||||
|
++bmData->stats.ccErrors;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t bytesRead = fdbResult.first.expectedSize();
|
|
||||||
|
|
||||||
++bmData->stats.ccGranulesChecked;
|
|
||||||
bmData->stats.ccRowsChecked += fdbResult.first.size();
|
|
||||||
bmData->stats.ccBytesChecked += bytesRead;
|
|
||||||
|
|
||||||
// clear fdb result to release memory since it is a state variable
|
|
||||||
fdbResult = std::pair(RangeResult(), 0);
|
|
||||||
|
|
||||||
wait(rateLimiter->getAllowance(bytesRead));
|
|
||||||
}
|
}
|
||||||
|
// wait at least some interval if snapshot is small and to not overwhelm the system with reads (for example,
|
||||||
|
// empty database with one empty granule)
|
||||||
|
wait(rateLimiter->getAllowance(allowanceBytes) && delay(SERVER_KNOBS->BGCC_MIN_INTERVAL));
|
||||||
} else {
|
} else {
|
||||||
if (BM_DEBUG) {
|
if (BM_DEBUG) {
|
||||||
fmt::print("BGCC found no workers, skipping\n", bmData->workerAssignments.size());
|
fmt::print("BGCC found no workers, skipping\n", bmData->workerAssignments.size());
|
||||||
|
|
|
@ -862,6 +862,23 @@ ACTOR Future<BlobFileIndex> compactFromBlob(Reference<BlobWorkerData> bwData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct CounterHolder {
|
||||||
|
int* counter;
|
||||||
|
bool completed;
|
||||||
|
|
||||||
|
CounterHolder() : counter(nullptr), completed(true) {}
|
||||||
|
CounterHolder(int* counter) : counter(counter), completed(false) { (*counter)++; }
|
||||||
|
|
||||||
|
void complete() {
|
||||||
|
if (!completed) {
|
||||||
|
completed = true;
|
||||||
|
(*counter)--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~CounterHolder() { complete(); }
|
||||||
|
};
|
||||||
|
|
||||||
ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bwData,
|
ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bwData,
|
||||||
Reference<GranuleMetadata> metadata,
|
Reference<GranuleMetadata> metadata,
|
||||||
UID granuleID,
|
UID granuleID,
|
||||||
|
@ -877,6 +894,8 @@ ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bw
|
||||||
|
|
||||||
wait(delay(0, TaskPriority::BlobWorkerUpdateFDB));
|
wait(delay(0, TaskPriority::BlobWorkerUpdateFDB));
|
||||||
|
|
||||||
|
state CounterHolder pendingCounter(&bwData->stats.granulesPendingSplitCheck);
|
||||||
|
|
||||||
if (BW_DEBUG) {
|
if (BW_DEBUG) {
|
||||||
fmt::print("Granule [{0} - {1}) checking with BM for re-snapshot after {2} bytes\n",
|
fmt::print("Granule [{0} - {1}) checking with BM for re-snapshot after {2} bytes\n",
|
||||||
metadata->keyRange.begin.printable(),
|
metadata->keyRange.begin.printable(),
|
||||||
|
@ -955,6 +974,8 @@ ACTOR Future<BlobFileIndex> checkSplitAndReSnapshot(Reference<BlobWorkerData> bw
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pendingCounter.complete();
|
||||||
|
|
||||||
if (BW_DEBUG) {
|
if (BW_DEBUG) {
|
||||||
fmt::print("Granule [{0} - {1}) re-snapshotting after {2} bytes\n",
|
fmt::print("Granule [{0} - {1}) re-snapshotting after {2} bytes\n",
|
||||||
metadata->keyRange.begin.printable(),
|
metadata->keyRange.begin.printable(),
|
||||||
|
|
|
@ -2551,6 +2551,7 @@ ACTOR Future<Void> changeFeedStreamQ(StorageServer* data, ChangeFeedStreamReques
|
||||||
|
|
||||||
req.reply.send(feedReply);
|
req.reply.send(feedReply);
|
||||||
if (req.begin == req.end) {
|
if (req.begin == req.end) {
|
||||||
|
data->activeFeedQueries--;
|
||||||
req.reply.sendError(end_of_stream());
|
req.reply.sendError(end_of_stream());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue