report in status the number of active generations

This commit is contained in:
Evan Tschannen 2020-03-16 10:29:17 -07:00
parent 818537ed2d
commit e5d53c863b
2 changed files with 17 additions and 2 deletions

View File

@ -961,8 +961,9 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(WorkerDetails
state JsonBuilderObject message; state JsonBuilderObject message;
try { try {
state Future<TraceEventFields> activeGens = timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryGenerations") ) ), 1.0);
TraceEventFields md = wait( timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) ); TraceEventFields md = wait( timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) );
state int mStatusCode = md.getInt("StatusCode"); int mStatusCode = md.getInt("StatusCode");
if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END) if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END)
throw attribute_not_found(); throw attribute_not_found();
@ -986,6 +987,12 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(WorkerDetails
// TODO: time_in_recovery: 0.5 // TODO: time_in_recovery: 0.5
// time_in_state: 0.1 // time_in_state: 0.1
TraceEventFields md = wait(activeGens);
if(md.size()) {
int activeGenerations = md.getInt("ActiveGenerations");
message["active_generations"] = activeGenerations;
}
} catch (Error &e){ } catch (Error &e){
if (e.code() == error_code_actor_cancelled) if (e.code() == error_code_actor_cancelled)
throw; throw;

View File

@ -1161,6 +1161,10 @@ ACTOR Future<Void> trackTlogRecovery( Reference<MasterData> self, Reference<Asyn
.detail("StatusCode", RecoveryStatus::fully_recovered) .detail("StatusCode", RecoveryStatus::fully_recovered)
.detail("Status", RecoveryStatus::names[RecoveryStatus::fully_recovered]) .detail("Status", RecoveryStatus::names[RecoveryStatus::fully_recovered])
.trackLatest("MasterRecoveryState"); .trackLatest("MasterRecoveryState");
TraceEvent("MasterRecoveryGenerations", self->dbgid)
.detail("ActiveGenerations", 0)
.trackLatest("MasterRecoveryGenerations");
} else if( !newState.oldTLogData.size() && self->recoveryState < RecoveryState::STORAGE_RECOVERED ) { } else if( !newState.oldTLogData.size() && self->recoveryState < RecoveryState::STORAGE_RECOVERED ) {
self->recoveryState = RecoveryState::STORAGE_RECOVERED; self->recoveryState = RecoveryState::STORAGE_RECOVERED;
TraceEvent("MasterRecoveryState", self->dbgid) TraceEvent("MasterRecoveryState", self->dbgid)
@ -1245,11 +1249,15 @@ ACTOR Future<Void> masterCore( Reference<MasterData> self ) {
.detail("StatusCode", RecoveryStatus::locking_coordinated_state) .detail("StatusCode", RecoveryStatus::locking_coordinated_state)
.detail("Status", RecoveryStatus::names[RecoveryStatus::locking_coordinated_state]) .detail("Status", RecoveryStatus::names[RecoveryStatus::locking_coordinated_state])
.detail("TLogs", self->cstate.prevDBState.tLogs.size()) .detail("TLogs", self->cstate.prevDBState.tLogs.size())
.detail("OldGenerations", self->cstate.myDBState.oldTLogData.size()) .detail("ActiveGenerations", self->cstate.myDBState.oldTLogData.size())
.detail("MyRecoveryCount", self->cstate.prevDBState.recoveryCount+2) .detail("MyRecoveryCount", self->cstate.prevDBState.recoveryCount+2)
.detail("ForceRecovery", self->forceRecovery) .detail("ForceRecovery", self->forceRecovery)
.trackLatest("MasterRecoveryState"); .trackLatest("MasterRecoveryState");
TraceEvent("MasterRecoveryGenerations", self->dbgid)
.detail("ActiveGenerations", self->cstate.myDBState.oldTLogData.size())
.trackLatest("MasterRecoveryGenerations");
if (self->cstate.myDBState.oldTLogData.size() > CLIENT_KNOBS->MAX_GENERATIONS_OVERRIDE) { if (self->cstate.myDBState.oldTLogData.size() > CLIENT_KNOBS->MAX_GENERATIONS_OVERRIDE) {
if (self->cstate.myDBState.oldTLogData.size() >= CLIENT_KNOBS->MAX_GENERATIONS) { if (self->cstate.myDBState.oldTLogData.size() >= CLIENT_KNOBS->MAX_GENERATIONS) {
TraceEvent(SevError, "RecoveryStoppedTooManyOldGenerations").detail("OldGenerations", self->cstate.myDBState.oldTLogData.size()) TraceEvent(SevError, "RecoveryStoppedTooManyOldGenerations").detail("OldGenerations", self->cstate.myDBState.oldTLogData.size())