Improve avoidance of transient issues when logging IncorrectClusterFileContents SevWarnAlways events by making it time based.

This commit is contained in:
A.J. Beamon 2019-02-27 10:08:24 -08:00
parent a5122dc83c
commit 999ee68609
2 changed files with 14 additions and 10 deletions

View File

@ -492,7 +492,7 @@ DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), laten
ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo ) {
try {
state Optional<std::string> incorrectConnectionString;
state Optional<double> incorrectTime;
loop {
OpenDatabaseRequest req;
req.knownClientInfoID = outInfo->get().id;
@ -503,17 +503,19 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
if (ccf && !ccf->fileContentsUpToDate(fileConnectionString)) {
req.issues = LiteralStringRef("incorrect_cluster_file_contents");
std::string connectionString = ccf->getConnectionString().toString();
if(!incorrectTime.present()) {
incorrectTime = now();
}
if(ccf->canGetFilename()) {
// Don't log a SevWarnAlways the first time to account for transient issues (e.g. someone else changing the file right before us)
TraceEvent(incorrectConnectionString.present() && incorrectConnectionString.get() == connectionString ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing the file right before us)
TraceEvent(now() - incorrectTime.get() > 300 ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
.detail("Filename", ccf->getFilename())
.detail("ConnectionStringFromFile", fileConnectionString.toString())
.detail("CurrentConnectionString", connectionString);
}
incorrectConnectionString = connectionString;
}
else {
incorrectConnectionString = Optional<std::string>();
incorrectTime = Optional<double>();
}
choose {

View File

@ -556,7 +556,7 @@ ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterContr
localInfo.myLocality = locality;
dbInfo->set(localInfo);
state Optional<std::string> incorrectConnectionString;
state Optional<double> incorrectTime;
loop {
GetServerDBInfoRequest req;
req.knownServerInfoID = dbInfo->get().id;
@ -565,17 +565,19 @@ ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterContr
if (connFile && !connFile->fileContentsUpToDate(fileConnectionString)) {
req.issues = LiteralStringRef("incorrect_cluster_file_contents");
std::string connectionString = connFile->getConnectionString().toString();
if(!incorrectTime.present()) {
incorrectTime = now();
}
if(connFile->canGetFilename()) {
// Don't log a SevWarnAlways the first time to account for transient issues (e.g. someone else changing the file right before us)
TraceEvent(incorrectConnectionString.present() && incorrectConnectionString.get() == connectionString ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing the file right before us)
TraceEvent(now() - incorrectTime.get() > 300 ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", fileConnectionString.toString())
.detail("CurrentConnectionString", connectionString);
}
incorrectConnectionString = connectionString;
}
else {
incorrectConnectionString = Optional<std::string>();
incorrectTime = Optional<double>();
}
auto peers = FlowTransport::transport().getIncompatiblePeers();