Instead of using fully_recovered, use accepting_commits as a singal of DB turned available. Also add the number of old generations into status

This commit is contained in:
Xin Dong 2020-09-17 09:55:25 -07:00
parent 3c7bd3549a
commit 4df0f60729
3 changed files with 18 additions and 8 deletions

View File

@ -482,7 +482,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
)statusSchema"
R"statusSchema(
"recovery_state":{
"time_since_last_fully_recovered_seconds":1,
"time_since_last_db_turned_available_seconds":1,
"number_of_old_generations_of_tlogs":1,
"required_resolvers":1,
"required_proxies":1,
"required_grv_proxies":1,

View File

@ -1030,9 +1030,10 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(Database cx, W
try {
state Future<TraceEventFields> mdActiveGensF = timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryGenerations") ) ), 1.0);
state Future<TraceEventFields> mdF = timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0);
state Future<TraceEventFields> mDBAvailableF = timeoutError(mWorker.interf.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoverFDBAvailable") ) ), 1.0);
state Future<Version> rvF = timeoutError(tr.getReadVersion(), 1.0);
wait(success(mdActiveGensF) && success(mdF) && success(rvF));
wait(success(mdActiveGensF) && success(mdF) && success(rvF) && success(mDBAvailableF));
const TraceEventFields& md = mdF.get();
int mStatusCode = md.getInt("StatusCode");
@ -1043,13 +1044,16 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(Database cx, W
*statusCode = mStatusCode;
Version rv = rvF.get();
std::string fullyRecoveredAtVersion;
if (mStatusCode == RecoveryStatus::fully_recovered && md.tryGetValue("FullyRecoveredAtVersion", fullyRecoveredAtVersion)) {
int64_t fullyRecoveredAtVersion = md.getInt64("FullyRecoveredAtVersion");
double lastFullyRecoveredSecondsAgo = std::max((int64_t)0, (int64_t)(rv - fullyRecoveredAtVersion)) / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
message["time_since_last_fully_recovered_seconds"] = lastFullyRecoveredSecondsAgo;
const TraceEventFields& dbAvailableMsg = mDBAvailableF.get();
if (dbAvailableMsg.size() > 0) {
int64_t availabelAtVersion = dbAvailableMsg.getInt64("AvailabelAtVersion");
int numOfOldGensOfLogs = dbAvailableMsg.getInt("NumOfOldGensOfLogs");
double lastFullyRecoveredSecondsAgo = std::max((int64_t)0, (int64_t)(rv - availabelAtVersion)) / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
message["time_since_last_db_turned_available_seconds"] = lastFullyRecoveredSecondsAgo;
message["number_of_old_generations_of_tlogs"] = numOfOldGensOfLogs;
} else {
message["time_since_last_fully_recovered_seconds"] = -1;
message["time_since_last_db_turned_available_seconds"] = -1;
message["number_of_old_generations_of_tlogs"] = -1;
}
// Add additional metadata for certain statuses

View File

@ -1689,6 +1689,11 @@ ACTOR Future<Void> masterCore( Reference<MasterData> self ) {
.detail("RecoveryDuration", recoveryDuration)
.trackLatest("MasterRecoveryState");
TraceEvent("MasterRecoverFDBAvailable", self->dbgid)
.detail("NumOfOldGensOfLogs", self->cstate.myDBState.oldTLogData.size())
.detail("AvailabelAtVersion", self->version)
.trackLatest("MasterRecoverFDBAvailable");
if( self->resolvers.size() > 1 )
self->addActor.send( resolutionBalancing(self) );