Add a knob SNAPSHOT_ALL_STATEFUL_PROCESSES to snapshot all processes with stateful class type(storage, log, transaction) even if they are not recruited (#7554)

This commit is contained in:
Chaoguang Lin 2022-07-08 20:53:49 -07:00 committed by GitHub
parent 68f3d463e2
commit 901d988de9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 0 deletions

View File

@ -124,6 +124,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( SNAP_NETWORK_FAILURE_RETRY_LIMIT, 10 );
init( MAX_STORAGE_SNAPSHOT_FAULT_TOLERANCE, 1 );
init( MAX_COORDINATOR_SNAPSHOT_FAULT_TOLERANCE, 1 );
init( SNAPSHOT_ALL_STATEFUL_PROCESSES, false ); if ( randomize && BUGGIFY ) SNAPSHOT_ALL_STATEFUL_PROCESSES = true;
// Data distribution queue
init( HEALTH_POLL_TIME, 1.0 );

View File

@ -629,6 +629,9 @@ public:
// Maximum number of coordinators a snapshot can fail to
// capture while still succeeding
int64_t MAX_COORDINATOR_SNAPSHOT_FAULT_TOLERANCE;
// if true, all processes with class "storage", "transaction" and "log" will be snapshotted even not recruited as
// the role
bool SNAPSHOT_ALL_STATEFUL_PROCESSES;
// Storage Metrics
double STORAGE_METRICS_AVERAGE_INTERVAL;

View File

@ -1184,6 +1184,24 @@ ACTOR Future<std::map<NetworkAddress, std::pair<WorkerInterface, std::string>>>
}
}
}
if (SERVER_KNOBS->SNAPSHOT_ALL_STATEFUL_PROCESSES) {
for (const auto& worker : workers) {
const auto& processAddress = worker.interf.address();
// skip processes that are already included
if (result.count(processAddress))
continue;
const auto& processClassType = worker.processClass.classType();
// coordinators are always configured to be recruited
if (processClassType == ProcessClass::StorageClass) {
result[processAddress] = std::make_pair(worker.interf, "storage");
TraceEvent(SevInfo, "SnapUnRecruitedStorageProcess").detail("ProcessAddress", processAddress);
} else if (processClassType == ProcessClass::TransactionClass ||
processClassType == ProcessClass::LogClass) {
result[processAddress] = std::make_pair(worker.interf, "tlog");
TraceEvent(SevInfo, "SnapUnRecruitedLogProcess").detail("ProcessAddress", processAddress);
}
}
}
return result;
} catch (Error& e) {
wait(tr.onError(e));