Report missing old tlogs in recovery between accepting commits and storage recovered
This commit is contained in:
parent
1ee40848df
commit
23e1ff694c
|
@ -278,6 +278,18 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"address":"1.2.3.4:1234"
|
||||
}
|
||||
],
|
||||
"epoch": {
|
||||
"epoch": 1,
|
||||
"epoch_begin": 23,
|
||||
"epoch_end": 112315141
|
||||
},
|
||||
"missing_logs": [
|
||||
{
|
||||
"id":"6f8d623d0cb9966f",
|
||||
"healthy":false,
|
||||
"address":"1.2.3.5:1234"
|
||||
}
|
||||
],
|
||||
"log_replication_factor":3,
|
||||
"log_write_anti_quorum":0,
|
||||
"log_fault_tolerance":2,
|
||||
|
@ -288,6 +300,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"satellite_log_fault_tolerance":2
|
||||
}
|
||||
],
|
||||
"possibly_losing_old_logs_data": true,
|
||||
"fault_tolerance":{
|
||||
"max_zone_failures_without_losing_availability":0,
|
||||
"max_zone_failures_without_losing_data":0
|
||||
|
|
|
@ -1910,11 +1910,11 @@ ACTOR static Future<JsonBuilderObject> clusterSummaryStatisticsFetcher(WorkerEve
|
|||
|
||||
static JsonBuilderArray oldTlogFetcher(int* oldLogFaultTolerance, Reference<AsyncVar<ServerDBInfo>> db, std::unordered_map<NetworkAddress, WorkerInterface> const& address_workers) {
|
||||
JsonBuilderArray oldTlogsArray;
|
||||
|
||||
if(db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
|
||||
for(auto it : db->get().logSystemConfig.oldTLogs) {
|
||||
JsonBuilderObject statusObj;
|
||||
JsonBuilderArray logsObj;
|
||||
JsonBuilderArray failedLogsObj;
|
||||
Optional<int32_t> sat_log_replication_factor, sat_log_write_anti_quorum, sat_log_fault_tolerance, log_replication_factor, log_write_anti_quorum, log_fault_tolerance, remote_log_replication_factor, remote_log_fault_tolerance;
|
||||
|
||||
int maxFaultTolerance = 0;
|
||||
|
@ -1932,6 +1932,7 @@ static JsonBuilderArray oldTlogFetcher(int* oldLogFaultTolerance, Reference<Asyn
|
|||
logsObj.push_back(logObj);
|
||||
if(failed) {
|
||||
failedLogs++;
|
||||
failedLogsObj.push_back(logObj);
|
||||
}
|
||||
}
|
||||
maxFaultTolerance = std::max(maxFaultTolerance, it.tLogs[i].tLogReplicationFactor - 1 - it.tLogs[i].tLogWriteAntiQuorum - failedLogs);
|
||||
|
@ -1953,6 +1954,18 @@ static JsonBuilderArray oldTlogFetcher(int* oldLogFaultTolerance, Reference<Asyn
|
|||
*oldLogFaultTolerance = std::min(*oldLogFaultTolerance, maxFaultTolerance);
|
||||
statusObj["logs"] = logsObj;
|
||||
|
||||
JsonBuilderObject epochInfo;
|
||||
epochInfo["epoch"] = it.epoch;
|
||||
epochInfo["epoch_begin"] = it.epochBegin;
|
||||
epochInfo["epoch_end"] = it.epochEnd;
|
||||
statusObj["epoch"] = epochInfo;
|
||||
|
||||
// We may lose logs in this log generation, storage servers may never be able to catch up this log
|
||||
// generation.
|
||||
if (maxFaultTolerance < 0) {
|
||||
statusObj["missing_logs"] = failedLogsObj;
|
||||
}
|
||||
|
||||
if (sat_log_replication_factor.present())
|
||||
statusObj["satellite_log_replication_factor"] = sat_log_replication_factor.get();
|
||||
if (sat_log_write_anti_quorum.present())
|
||||
|
@ -2419,6 +2432,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
statusObj["old_logs"] = oldTlogFetcher(&oldLogFaultTolerance, db, address_workers);
|
||||
}
|
||||
|
||||
// Used as a signal that storage servers may not be able to catch up certain log generations
|
||||
statusObj["possibly_losing_old_logs_data"] = oldLogFaultTolerance < 0;
|
||||
|
||||
if(configuration.present()) {
|
||||
int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
|
||||
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining, loadResult.present() && loadResult.get().healthyZone.present());
|
||||
|
|
Loading…
Reference in New Issue