Add trace-based status reporting for server version

This commit is contained in:
Bharadwaj V.R 2022-04-15 09:04:52 -07:00
parent adfc4ca379
commit 3787ddae89
3 changed files with 33 additions and 3 deletions

View File

@ -1409,7 +1409,7 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
wait(self->cstate.read());
if (self->cstate.prevDBState.lowestCompatibleServerVersion > currentProtocolVersion) {
TraceEvent(SevWarnAlways, "IncompatbleServerVersion", self->dbgid).log();
TraceEvent(SevWarnAlways, "IncompatibleServerVersion", self->dbgid).log();
throw internal_error();
}
@ -1478,6 +1478,11 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
}
wait(self->cstate.write(newState) || recoverAndEndEpoch);
TraceEvent("SWVersionCompatibilityChecked", self->dbgid)
.detail("NewestServerVersion", self->cstate.myDBState.newestServerVersion)
.detail("LowestCompatibleVersion", self->cstate.myDBState.lowestCompatibleServerVersion)
.trackLatest(self->swVersionCheckedEventHolder->trackingKey);
self->recoveryState = RecoveryState::RECRUITING;
state std::vector<StorageServerInterface> seedServers;

View File

@ -22,6 +22,7 @@
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
// version.
#include "flow/Trace.h"
#include <utility>
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_CLUSTERRECOVERY_ACTOR_G_H)
@ -244,6 +245,7 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
Future<Void> logger;
Reference<EventCacheHolder> swVersionCheckedEventHolder;
Reference<EventCacheHolder> recoveredConfigEventHolder;
Reference<EventCacheHolder> clusterRecoveryStateEventHolder;
Reference<EventCacheHolder> clusterRecoveryGenerationsEventHolder;
@ -273,6 +275,7 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
backupWorkerDoneRequests("BackupWorkerDoneRequests", cc),
getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc),
reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc),
swVersionCheckedEventHolder(makeReference<EventCacheHolder>("SWVersionCompatibilityChecked")),
recoveredConfigEventHolder(makeReference<EventCacheHolder>("RecoveredConfig")) {
clusterRecoveryStateEventHolder = makeReference<EventCacheHolder>(
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME));

View File

@ -1543,6 +1543,25 @@ ACTOR Future<ProtocolVersion> getNewestProtocolVersion(Database cx) {
}
}
ACTOR Future<ProtocolVersion> getNewestProtocolVersion(Database cx, WorkerDetails ccWorker) {
try {
state Future<TraceEventFields> swVersionF = timeoutError(
ccWorker.interf.eventLogRequest.getReply(EventLogRequest("SWVersionCompatibilityChecked"_sr)), 1.0);
wait(success(swVersionF));
const TraceEventFields& swVersionTrace = swVersionF.get();
int64_t newestProtocolVersionValue = atoi(swVersionTrace.getValue("NewestServerVersion").c_str());
return ProtocolVersion(newestProtocolVersionValue);
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled)
throw;
return ProtocolVersion();
}
}
struct LoadConfigurationResult {
bool fullReplication;
Optional<Key> healthyZone;
@ -2896,6 +2915,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
messages.push_back(message);
}
state ProtocolVersion newestProtocolVersion = wait(getNewestProtocolVersion(cx, ccWorker));
// construct status information for cluster subsections
state int statusCode = (int)RecoveryStatus::END;
state JsonBuilderObject recoveryStateStatus = wait(
@ -2933,10 +2954,11 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version());
statusObj["connection_string"] = coordinators.ccr->getConnectionString().toString();
statusObj["bounce_impact"] = getBounceImpactInfo(statusCode);
ProtocolVersion newestProtocolVersion = wait(getNewestProtocolVersion(cx));
statusObj["latest_server_version"] = format("%" PRIx64, newestProtocolVersion.version());
// ProtocolVersion newestProtocolVersion = wait(getNewestProtocolVersion(cx));
// statusObj["latest_server_version"] = format("%" PRIx64, newestProtocolVersion.version());
state Optional<DatabaseConfiguration> configuration;
state Optional<LoadConfigurationResult> loadResult;