This resolves issue #3739 by exposing time since last full recovery.
This commit is contained in:
parent
cc0db5452a
commit
4363dd0f25
|
@ -482,6 +482,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
)statusSchema"
|
||||
R"statusSchema(
|
||||
"recovery_state":{
|
||||
"time_since_last_fully_recovered_seconds":1,
|
||||
"required_resolvers":1,
|
||||
"required_proxies":1,
|
||||
"required_grv_proxies":1,
|
||||
|
|
|
@ -1028,8 +1028,13 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(WorkerDetails
|
|||
state JsonBuilderObject message;
|
||||
|
||||
try {
|
||||
state Future<TraceEventFields> activeGens = timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryGenerations") ) ), 1.0);
|
||||
TraceEventFields md = wait( timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) );
|
||||
std::vector<Future<TraceEventFields>> futures;
|
||||
futures.push_back(timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryGenerations") ) ), 1.0));
|
||||
futures.push_back(timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryFullyRecovered") ) ), 1.0));
|
||||
futures.push_back(timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0));
|
||||
std::vector<TraceEventFields> msgs = wait(getAll(futures));
|
||||
|
||||
const TraceEventFields& md = msgs[2];
|
||||
int mStatusCode = md.getInt("StatusCode");
|
||||
if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END)
|
||||
throw attribute_not_found();
|
||||
|
@ -1037,6 +1042,17 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(WorkerDetails
|
|||
message = JsonString::makeMessage(RecoveryStatus::names[mStatusCode], RecoveryStatus::descriptions[mStatusCode]);
|
||||
*statusCode = mStatusCode;
|
||||
|
||||
const TraceEventFields& mLastRecoveryMsg = msgs[1];
|
||||
std::string lastFullyRecoveredTimeS;
|
||||
if (mLastRecoveryMsg.tryGetValue("Time", lastFullyRecoveredTimeS)) {
|
||||
double lastFullyRecoveredTime = atof(lastFullyRecoveredTimeS.c_str());
|
||||
// `lastFullyRecoveredTime` is the timestamp taken on master so the time interval calculated below may not
|
||||
// be accurate due to the clock skew across the network, but it's good enough for the purpose it's used.
|
||||
message["time_since_last_fully_recovered_seconds"] = now() - lastFullyRecoveredTime;
|
||||
} else {
|
||||
message["time_since_last_fully_recovered_seconds"] = -1;
|
||||
}
|
||||
|
||||
// Add additional metadata for certain statuses
|
||||
if (mStatusCode == RecoveryStatus::recruiting_transaction_servers) {
|
||||
int requiredLogs = atoi( md.getValue("RequiredTLogs").c_str() );
|
||||
|
@ -1056,7 +1072,7 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(WorkerDetails
|
|||
// TODO: time_in_recovery: 0.5
|
||||
// time_in_state: 0.1
|
||||
|
||||
TraceEventFields mdActiveGens = wait(activeGens);
|
||||
const TraceEventFields& mdActiveGens = msgs[0];
|
||||
if(mdActiveGens.size()) {
|
||||
int activeGenerations = mdActiveGens.getInt("ActiveGenerations");
|
||||
message["active_generations"] = activeGenerations;
|
||||
|
|
|
@ -1276,6 +1276,8 @@ ACTOR Future<Void> trackTlogRecovery( Reference<MasterData> self, Reference<Asyn
|
|||
.detail("Status", RecoveryStatus::names[RecoveryStatus::fully_recovered])
|
||||
.trackLatest("MasterRecoveryState");
|
||||
|
||||
TraceEvent("MasterRecoveryFullyRecovered").trackLatest("MasterRecoveryFullyRecovered");
|
||||
|
||||
TraceEvent("MasterRecoveryGenerations", self->dbgid)
|
||||
.detail("ActiveGenerations", 1)
|
||||
.trackLatest("MasterRecoveryGenerations");
|
||||
|
|
Loading…
Reference in New Issue