From 187e507e53cfa1a682c8dc8e8b257d5046739921 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 5 Nov 2018 09:28:08 -0800 Subject: [PATCH] Downgrade the severity of IncorrectClusterFileContents the first time it is logged to avoid transient issues that appear like the cluster file hasn't been updated (e.g. the cluster file is shared between multiple processes). --- fdbclient/NativeAPI.actor.cpp | 12 ++++++++++-- fdbserver/worker.actor.cpp | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 5e09b1a4db..394dc4d6d2 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -498,6 +498,7 @@ ACTOR static Future monitorClientInfo( Reference ccf, Reference> outInfo ) { try { + state Optional incorrectConnectionString; loop { OpenDatabaseRequest req; req.knownClientInfoID = outInfo->get().id; @@ -508,11 +509,18 @@ ACTOR static Future monitorClientInfo( ReferencefileContentsUpToDate(fileConnectionString)) { req.issues = LiteralStringRef("incorrect_cluster_file_contents"); + std::string connectionString = ccf->getConnectionString().toString(); if(ccf->canGetFilename()) { - TraceEvent(SevWarnAlways, "IncorrectClusterFileContents").detail("Filename", ccf->getFilename()) + // Don't log a SevWarnAlways the first time to account for transient issues (e.g. someone else changing the file right before us) + TraceEvent(incorrectConnectionString.present() && incorrectConnectionString.get() == connectionString ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents") + .detail("Filename", ccf->getFilename()) .detail("ConnectionStringFromFile", fileConnectionString.toString()) - .detail("CurrentConnectionString", ccf->getConnectionString().toString()); + .detail("CurrentConnectionString", connectionString); } + incorrectConnectionString = connectionString; + } + else { + incorrectConnectionString = Optional(); } choose { diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 245cd1fb52..d98f00a2d9 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -446,6 +446,7 @@ ACTOR Future monitorServerDBInfo( Referenceset(localInfo); + state Optional incorrectConnectionString; loop { GetServerDBInfoRequest req; req.knownServerInfoID = dbInfo->get().id; @@ -453,11 +454,18 @@ ACTOR Future monitorServerDBInfo( ReferencefileContentsUpToDate(fileConnectionString)) { req.issues = LiteralStringRef("incorrect_cluster_file_contents"); + std::string connectionString = connFile->getConnectionString().toString(); if(connFile->canGetFilename()) { - TraceEvent(SevWarnAlways, "IncorrectClusterFileContents").detail("Filename", connFile->getFilename()) + // Don't log a SevWarnAlways the first time to account for transient issues (e.g. someone else changing the file right before us) + TraceEvent(incorrectConnectionString.present() && incorrectConnectionString.get() == connectionString ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents") + .detail("Filename", connFile->getFilename()) .detail("ConnectionStringFromFile", fileConnectionString.toString()) - .detail("CurrentConnectionString", connFile->getConnectionString().toString()); + .detail("CurrentConnectionString", connectionString); } + incorrectConnectionString = connectionString; + } + else { + incorrectConnectionString = Optional(); } auto peers = FlowTransport::transport().getIncompatiblePeers();