Merge pull request #4842 from RenxuanW/config
Improve logging on the current view of the database configuration that the cluster controller is using.
This commit is contained in:
commit
e02ef3b8d8
|
@ -1692,20 +1692,37 @@ public:
|
|||
if (req.configuration.regions.size() > 1) {
|
||||
std::vector<RegionInfo> regions = req.configuration.regions;
|
||||
if (regions[0].priority == regions[1].priority && regions[1].dcId == clusterControllerDcId.get()) {
|
||||
TraceEvent("CCSwitchPrimaryDc", id)
|
||||
.detail("CCDcId", clusterControllerDcId.get())
|
||||
.detail("OldPrimaryDcId", regions[0].dcId)
|
||||
.detail("NewPrimaryDcId", regions[1].dcId);
|
||||
std::swap(regions[0], regions[1]);
|
||||
}
|
||||
|
||||
if (regions[1].dcId == clusterControllerDcId.get() &&
|
||||
(!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) {
|
||||
if (regions[1].priority >= 0) {
|
||||
TraceEvent("CCSwitchPrimaryDcVersionDifference", id)
|
||||
.detail("CCDcId", clusterControllerDcId.get())
|
||||
.detail("OldPrimaryDcId", regions[0].dcId)
|
||||
.detail("NewPrimaryDcId", regions[1].dcId);
|
||||
std::swap(regions[0], regions[1]);
|
||||
} else {
|
||||
TraceEvent(SevWarnAlways, "CCDcPriorityNegative")
|
||||
.detail("DcId", regions[1].dcId)
|
||||
.detail("Priority", regions[1].priority);
|
||||
.detail("Priority", regions[1].priority)
|
||||
.detail("FindWorkersInDc", regions[0].dcId)
|
||||
.detail("Warning", "Failover did not happen but CC is in remote DC");
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("CCFindWorkersForConfiguration", id)
|
||||
.detail("CCDcId", clusterControllerDcId.get())
|
||||
.detail("Region0DcId", regions[0].dcId)
|
||||
.detail("Region1DcId", regions[1].dcId)
|
||||
.detail("DatacenterVersionDifference", datacenterVersionDifference)
|
||||
.detail("VersionDifferenceUpdated", versionDifferenceUpdated);
|
||||
|
||||
bool setPrimaryDesired = false;
|
||||
try {
|
||||
auto reply = findWorkersForConfigurationFromDC(req, regions[0].dcId);
|
||||
|
@ -1719,6 +1736,10 @@ public:
|
|||
} else if (regions[0].dcId == clusterControllerDcId.get()) {
|
||||
return reply.get();
|
||||
}
|
||||
TraceEvent(SevWarn, "CCRecruitmentFailed", id)
|
||||
.detail("Reason", "Recruited Txn system and CC are in different DCs")
|
||||
.detail("CCDcId", clusterControllerDcId.get())
|
||||
.detail("RecruitedTxnSystemDcId", regions[0].dcId);
|
||||
throw no_more_servers();
|
||||
} catch (Error& e) {
|
||||
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
||||
|
@ -1728,7 +1749,9 @@ public:
|
|||
if (e.code() != error_code_no_more_servers || regions[1].priority < 0) {
|
||||
throw;
|
||||
}
|
||||
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e);
|
||||
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDc", id)
|
||||
.detail("SetPrimaryDesired", setPrimaryDesired)
|
||||
.error(e);
|
||||
auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId);
|
||||
if (!setPrimaryDesired) {
|
||||
vector<Optional<Key>> dcPriority;
|
||||
|
|
|
@ -711,15 +711,10 @@ ACTOR Future<vector<Standalone<CommitTransactionRef>>> recruitEverything(Referen
|
|||
TraceEvent("MasterRecoveryState", self->dbgid)
|
||||
.detail("StatusCode", RecoveryStatus::recruiting_transaction_servers)
|
||||
.detail("Status", RecoveryStatus::names[RecoveryStatus::recruiting_transaction_servers])
|
||||
.detail("RequiredTLogs", self->configuration.tLogReplicationFactor)
|
||||
.detail("DesiredTLogs", self->configuration.getDesiredLogs())
|
||||
.detail("Conf", self->configuration.toString())
|
||||
.detail("RequiredCommitProxies", 1)
|
||||
.detail("DesiredCommitProxies", self->configuration.getDesiredCommitProxies())
|
||||
.detail("RequiredGrvProxies", 1)
|
||||
.detail("DesiredGrvProxies", self->configuration.getDesiredGrvProxies())
|
||||
.detail("RequiredResolvers", 1)
|
||||
.detail("DesiredResolvers", self->configuration.getDesiredResolvers())
|
||||
.detail("StoreType", self->configuration.storageServerStoreType)
|
||||
.trackLatest("MasterRecoveryState");
|
||||
|
||||
// FIXME: we only need log routers for the same locality as the master
|
||||
|
@ -732,14 +727,25 @@ ACTOR Future<vector<Standalone<CommitTransactionRef>>> recruitEverything(Referen
|
|||
wait(brokenPromiseToNever(self->clusterController.recruitFromConfiguration.getReply(
|
||||
RecruitFromConfigurationRequest(self->configuration, self->lastEpochEnd == 0, maxLogRouters))));
|
||||
|
||||
std::string primaryDcIds, remoteDcIds;
|
||||
|
||||
self->primaryDcId.clear();
|
||||
self->remoteDcIds.clear();
|
||||
if (recruits.dcId.present()) {
|
||||
self->primaryDcId.push_back(recruits.dcId);
|
||||
if (!primaryDcIds.empty()) {
|
||||
primaryDcIds += ',';
|
||||
}
|
||||
primaryDcIds += printable(recruits.dcId);
|
||||
if (self->configuration.regions.size() > 1) {
|
||||
self->remoteDcIds.push_back(recruits.dcId.get() == self->configuration.regions[0].dcId
|
||||
? self->configuration.regions[1].dcId
|
||||
: self->configuration.regions[0].dcId);
|
||||
Key remoteDcId = recruits.dcId.get() == self->configuration.regions[0].dcId
|
||||
? self->configuration.regions[1].dcId
|
||||
: self->configuration.regions[0].dcId;
|
||||
self->remoteDcIds.push_back(remoteDcId);
|
||||
if (!remoteDcIds.empty()) {
|
||||
remoteDcIds += ',';
|
||||
}
|
||||
remoteDcIds += printable(remoteDcId);
|
||||
}
|
||||
}
|
||||
self->backupWorkers.swap(recruits.backupWorkers);
|
||||
|
@ -755,6 +761,8 @@ ACTOR Future<vector<Standalone<CommitTransactionRef>>> recruitEverything(Referen
|
|||
.detail("OldLogRouters", recruits.oldLogRouters.size())
|
||||
.detail("StorageServers", recruits.storageServers.size())
|
||||
.detail("BackupWorkers", self->backupWorkers.size())
|
||||
.detail("PrimaryDcIds", primaryDcIds)
|
||||
.detail("RemoteDcIds", remoteDcIds)
|
||||
.trackLatest("MasterRecoveryState");
|
||||
|
||||
// Actually, newSeedServers does both the recruiting and initialization of the seed servers; so if this is a brand
|
||||
|
|
Loading…
Reference in New Issue