From 72d99cc5b19acb2f84edaefc3bbe317679c54148 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Sun, 13 Mar 2022 20:03:33 -0700 Subject: [PATCH 01/28] Create new system key for tracking latest software version installed on the cluster --- fdbclient/SystemData.cpp | 2 ++ fdbclient/SystemData.h | 8 ++++++++ fdbserver/ApplyMetadataMutation.cpp | 3 ++- fdbserver/ClusterRecovery.actor.cpp | 5 +++++ fdbserver/DDTeamCollection.actor.cpp | 19 +++++++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index fe10cae868..a0f32af238 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -791,6 +791,8 @@ std::vector> decodeBackupStartedValue(const ValueRef& va return ids; } +const KeyRef latestServerVersionKey = LiteralStringRef("\xff/latestServerVersion"); + const KeyRef coordinatorsKey = LiteralStringRef("\xff/coordinators"); const KeyRef logsKey = LiteralStringRef("\xff/logs"); const KeyRef minRequiredCommitVersionKey = LiteralStringRef("\xff/minRequiredCommitVersion"); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 14234151a3..8985f6483e 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -325,6 +325,14 @@ std::vector> decodeBackupStartedValue(const ValueRef& va // 1 = Send a signal to pause/already paused. extern const KeyRef backupPausedKey; +// Key whose value stores the newest software version that has been run on the server +// This is used to make sure that once a version of software is run on the +// server, only compatible versions are permitted to run thereafter. +// A different version of software is considered permitted if it is newer or if +// is only one minor version older. +// "\xff/latestServerVersion" +extern const KeyRef latestServerVersionKey; + // "\xff/coordinators" = "[[ClusterConnectionString]]" // Set to the encoded structure of the cluster's current set of coordinators. // Changed when performing quorumChange. diff --git a/fdbserver/ApplyMetadataMutation.cpp b/fdbserver/ApplyMetadataMutation.cpp index 47871915f9..0b2338faa5 100644 --- a/fdbserver/ApplyMetadataMutation.cpp +++ b/fdbserver/ApplyMetadataMutation.cpp @@ -543,7 +543,8 @@ private: m.param1.startsWith(applyMutationsAddPrefixRange.begin) || m.param1.startsWith(applyMutationsRemovePrefixRange.begin) || m.param1.startsWith(tagLocalityListPrefix) || m.param1.startsWith(serverTagHistoryPrefix) || - m.param1.startsWith(testOnlyTxnStateStorePrefixRange.begin) || m.param1 == clusterIdKey) { + m.param1.startsWith(testOnlyTxnStateStorePrefixRange.begin) || m.param1 == clusterIdKey || + m.param1 == latestServerVersionKey) { txnStateStore->set(KeyValueRef(m.param1, m.param2)); } diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index 512539a607..e2969e1963 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "fdbclient/SystemData.h" #include "fdbrpc/sim_validation.h" #include "fdbserver/ApplyMetadataMutation.h" #include "fdbserver/BackupProgress.actor.h" @@ -1718,6 +1719,10 @@ ACTOR Future clusterRecoveryCore(Reference self) { tr.set(recoveryCommitRequest.arena, clusterIdKey, BinaryWriter::toValue(self->clusterId, Unversioned())); } + tr.set(recoveryCommitRequest.arena, + latestServerVersionKey, + BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); + applyMetadataMutations(SpanID(), self->dbgid, recoveryCommitRequest.arena, diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index bc22b1a7eb..2962f22008 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -2227,6 +2227,21 @@ public: } } + ACTOR static Future getLatestSoftwareVersion(DDTeamCollection* self) { + state ReadYourWritesTransaction tr(self->cx); + loop { + try { + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + Optional latestServerVersion = wait(tr.get(latestServerVersionKey)); + ASSERT(latestServerVersion.present()); + return BinaryReader::fromStringRef(latestServerVersion.get(), Unversioned()); + } catch (Error& e) { + wait(tr.onError(e)); + } + } + } + ACTOR static Future initializeStorage(DDTeamCollection* self, RecruitStorageReply candidateWorker, const DDEnabledState* ddEnabledState, @@ -2250,6 +2265,10 @@ public: self->recruitingIds.insert(interfaceId); self->recruitingLocalities.insert(candidateWorker.worker.stableAddress()); + ProtocolVersion latestServerVersion = wait(DDTeamCollectionImpl::getLatestSoftwareVersion(self)); + TraceEvent(SevInfo, "DDSoftwareVersion", self->distributorId) + .detail("LatestServerVersion", latestServerVersion); + UID clusterId = wait(self->getClusterId()); state InitializeStorageRequest isr; From 52f4c4f007336341e5a37fbc54c7d7090930de54 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 14 Mar 2022 15:19:39 -0700 Subject: [PATCH 02/28] Add server version information to status json --- fdbclient/Schemas.cpp | 3 +++ fdbserver/Status.actor.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 2e9fcc53a6..c57969bd7d 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -485,6 +485,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "transaction_start_seconds":0.0, "commit_seconds":0.02 }, + "serverVersion":{ + "latest_server_version" : "fdb00a400050001", + } "clients":{ "count":1, "supported_versions":[ diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index c34830311d..c2861e0f70 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -392,6 +392,12 @@ static JsonBuilderObject machineStatusFetcher(WorkerEvents mMetrics, return machineMap; } +JsonBuilderObject getServerVersionObject(int64_t serverVersion) { + JsonBuilderObject serverVersionObj; + serverVersionObj["latest_server_version"] = serverVersion; + return serverVersionObj; +} + JsonBuilderObject getLagObject(int64_t versions) { JsonBuilderObject lag; lag["versions"] = versions; @@ -1541,6 +1547,21 @@ struct LoadConfigurationResult { : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false), dataDistributionDisabled(false) {} }; +ACTOR Future getLatestSoftwareVersion(Database cx) { + state ReadYourWritesTransaction tr(cx); + loop { + try { + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + Optional latestServerVersion = wait(tr.get(latestServerVersionKey)); + ASSERT(latestServerVersion.present()); + return BinaryReader::fromStringRef(latestServerVersion.get(), Unversioned()); + } catch (Error& e) { + wait(tr.onError(e)); + } + } +} + ACTOR static Future, Optional>> loadConfiguration(Database cx, JsonBuilderArray* messages, std::set* status_incomplete_reasons) { state Optional result; @@ -3177,6 +3198,9 @@ ACTOR Future clusterGetStatus( statusObj["incompatible_connections"] = incompatibleConnectionsArray; statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference); + ProtocolVersion latestServerVersion = wait(getLatestSoftwareVersion(cx)); + statusObj["server_version"] = getServerVersionObject(latestServerVersion.version()); + int activeTSSCount = 0; for (auto& it : storageServers) { if (it.first.isTss()) { From c380ade0132684fa4ef2358d8f5e6138262001b5 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 14 Mar 2022 16:47:30 -0700 Subject: [PATCH 03/28] Move latest server version indication to top level of status json obj --- fdbclient/Schemas.cpp | 4 +--- fdbserver/Status.actor.cpp | 8 +------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index c57969bd7d..2a7ee33bf8 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -485,9 +485,6 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "transaction_start_seconds":0.0, "commit_seconds":0.02 }, - "serverVersion":{ - "latest_server_version" : "fdb00a400050001", - } "clients":{ "count":1, "supported_versions":[ @@ -696,6 +693,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( }, "cluster_controller_timestamp":1415650089, "protocol_version":"fdb00a400050001", + "latest_server_version":"fdb00a500040001", "connection_string":"a:a@127.0.0.1:4000", "full_replication":true, "maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece", diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index c2861e0f70..bb1221bdcb 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -392,12 +392,6 @@ static JsonBuilderObject machineStatusFetcher(WorkerEvents mMetrics, return machineMap; } -JsonBuilderObject getServerVersionObject(int64_t serverVersion) { - JsonBuilderObject serverVersionObj; - serverVersionObj["latest_server_version"] = serverVersion; - return serverVersionObj; -} - JsonBuilderObject getLagObject(int64_t versions) { JsonBuilderObject lag; lag["versions"] = versions; @@ -3199,7 +3193,7 @@ ACTOR Future clusterGetStatus( statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference); ProtocolVersion latestServerVersion = wait(getLatestSoftwareVersion(cx)); - statusObj["server_version"] = getServerVersionObject(latestServerVersion.version()); + statusObj["latest_server_version"] = format("%" PRIx64, latestServerVersion.version()); int activeTSSCount = 0; for (auto& it : storageServers) { From 1b9f773d2a312bf220312cf0a831ef3fbba827c2 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 15 Mar 2022 11:01:13 -0700 Subject: [PATCH 04/28] Compare current and persisted protocol versions before updating persisted protocol version --- fdbserver/ClusterRecovery.actor.cpp | 43 +++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index e2969e1963..ea3a1fd4dc 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -26,6 +26,7 @@ #include "fdbserver/MasterInterface.h" #include "fdbserver/WaitFailure.h" +#include "flow/Trace.h" #include "flow/actorcompiler.h" // This must be the last #include. static std::set const& normalClusterRecoveryErrors() { @@ -1407,7 +1408,8 @@ ACTOR Future recoverFrom(Reference self, std::vector* seedServers, std::vector>* initialConfChanges, Future poppedTxsVersion, - bool* clusterIdExists) { + bool* clusterIdExists, + ProtocolVersion* persistedServerVersion) { TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME).c_str(), self->dbgid) .detail("StatusCode", RecoveryStatus::reading_transaction_system_state) .detail("Status", RecoveryStatus::names[RecoveryStatus::reading_transaction_system_state]) @@ -1441,6 +1443,12 @@ ACTOR Future recoverFrom(Reference self, self->clusterId = BinaryReader::fromStringRef(clusterId.get(), Unversioned()); } + Optional persistedServerVersionValue = self->txnStateStore->readValue(latestServerVersionKey).get(); + if (persistedServerVersionValue.present()) { + *persistedServerVersion = + BinaryReader::fromStringRef(persistedServerVersionValue.get(), Unversioned()); + } + // Ordinarily we pass through this loop once and recover. We go around the loop if recovery stalls for more than a // second, a provisional master is initialized, and an "emergency transaction" is submitted that might change the // configuration so that we can finish recovery. @@ -1580,6 +1588,7 @@ ACTOR Future clusterRecoveryCore(Reference self) { state Future minRecoveryDuration; state Future poppedTxsVersion; state bool clusterIdExists = false; + state ProtocolVersion persistedServerVersion; loop { Reference oldLogSystem = oldLogSystems->get(); @@ -1600,7 +1609,8 @@ ACTOR Future clusterRecoveryCore(Reference self) { &seedServers, &initialConfChanges, poppedTxsVersion, - std::addressof(clusterIdExists)) + std::addressof(clusterIdExists), + std::addressof(persistedServerVersion)) : Never())) { reg.cancel(); break; @@ -1719,9 +1729,24 @@ ACTOR Future clusterRecoveryCore(Reference self) { tr.set(recoveryCommitRequest.arena, clusterIdKey, BinaryWriter::toValue(self->clusterId, Unversioned())); } - tr.set(recoveryCommitRequest.arena, - latestServerVersionKey, - BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); + if (persistedServerVersion.version() != 0) { + TraceEvent(SevInfo, "RecoveryLatestServerVersion", self->dbgid).detail("Version", persistedServerVersion); + } + + // ProtocolVersion testPersistedServerVersion(0x0FDB00B070010000LL); + // persistedServerVersion = testPersistedServerVersion + if (currentProtocolVersion > persistedServerVersion) { + tr.set(recoveryCommitRequest.arena, + latestServerVersionKey, + BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); + TraceEvent(SevInfo, "SetNewLatestServerVersion", self->dbgid) + .detail("OldVersion", persistedServerVersion) + .detail("NewVersion", currentProtocolVersion); + } else { + TraceEvent(SevInfo, "RetainedLatestServerVersion", self->dbgid) + .detail("OldVersion", persistedServerVersion) + .detail("NewVersion", currentProtocolVersion); + } applyMetadataMutations(SpanID(), self->dbgid, @@ -1730,8 +1755,8 @@ ACTOR Future clusterRecoveryCore(Reference self) { self->txnStateStore); mmApplied = tr.mutations.size(); - tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the initial - // window of the resolver(s) + tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the + // initial window of the resolver(s) TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_COMMIT_EVENT_NAME).c_str(), self->dbgid) .log(); @@ -1771,8 +1796,8 @@ ACTOR Future clusterRecoveryCore(Reference self) { // 3. No other master will attempt to commit anything to our "new" Tlogs // because it didn't recruit them // 4. Therefore, no full commit can come between self->lastEpochEnd and the first commit - // we made to the new Tlogs (self->recoveryTransactionVersion), and only our own semi-commits can come between - // our first commit and the next new TLogs + // we made to the new Tlogs (self->recoveryTransactionVersion), and only our own semi-commits can come + // between our first commit and the next new TLogs self->addActor.send(trackTlogRecovery(self, oldLogSystems, minRecoveryDuration)); debug_advanceMaxCommittedVersion(UID(), self->recoveryTransactionVersion); From 658bce5f32f3d4f210c81c736eaf22016c0d1e44 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 28 Mar 2022 16:49:17 -0700 Subject: [PATCH 05/28] Basic accessor methods for a sw-version file --- fdbserver/worker.actor.cpp | 121 +++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 326852950f..72ee7d80d7 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -21,12 +21,17 @@ #include #include +#include "fdbrpc/IAsyncFile.h" #include "fdbrpc/Locality.h" #include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/ProcessInterface.h" #include "fdbclient/StorageServerInterface.h" #include "fdbserver/Knobs.h" #include "flow/ActorCollection.h" +#include "flow/Error.h" +#include "flow/FileIdentifier.h" +#include "flow/ObjectSerializer.h" +#include "flow/Platform.h" #include "flow/ProtocolVersion.h" #include "flow/SystemMonitor.h" #include "flow/TDMetric.actor.h" @@ -56,6 +61,7 @@ #include "flow/Trace.h" #include "flow/flow.h" #include "flow/network.h" +#include "flow/serialize.h" #ifdef __linux__ #include @@ -2400,6 +2406,106 @@ ACTOR Future monitorAndWriteCCPriorityInfo(std::string filePath, } } +struct SWVersion { + constexpr static FileIdentifier file_identifier = 13943984; + + uint64_t latestProtocolVersion; + uint64_t compatibleProtocolVersion; + + explicit SWVersion(ProtocolVersion latestVersion, ProtocolVersion compatibleVersion) + : latestProtocolVersion(latestVersion.version()), compatibleProtocolVersion(compatibleVersion.version()) {} + + template + void serialize(Ar& ar) { + serializer(ar, latestProtocolVersion, compatibleProtocolVersion); + } +}; + +static const std::string versionFileName = "sw-version"; + +ACTOR Future isCompatibleSoftwareVersion(std::string folder) { + try { + state std::string versionFilePath = joinPath(folder, versionFileName); + state ErrorOr> versionFile = wait( + errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READONLY, 0600))); + + if (versionFile.isError()) { + if (versionFile.getError().code() == error_code_file_not_found && !fileExists(versionFilePath)) { + // If a version file does not exist, we assume this is either a fresh + // installation or an upgrade from a version that does not support version files. + // Either way, we can safely continue running this version of software. + TraceEvent(SevInfo, "NoPreviousSWVersion").log(); + return true; + } else { + // Dangerous to continue if we cannot do a software compatibility test + throw versionFile.getError(); + } + } else { + // Test whether the most newest software version that has been run on this cluster is + // compatible with the current software version + int64_t filesize = wait(versionFile.get()->size()); + state Standalone fileData = makeString(filesize); + wait(success(versionFile.get()->read(mutateString(fileData), filesize, 0))); + + try { + auto latestSoftwareVersion = BinaryReader::fromStringRef(fileData, Unversioned()); + if (latestSoftwareVersion <= currentProtocolVersion) { + TraceEvent(SevInfo, "SWVersionCompatible").log(); + return true; + } else { + TraceEvent(SevInfo, "SWVersionIncompatible").log(); + return false; + } + } catch (Error& e) { + TraceEvent(SevError, "ReadSWVersionFileError").error(e); + throw e; + } + } + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw; + } + // TODO(bvr): Inject faults + TraceEvent(SevError, "OpenReadSWVersionFileError").error(e); + throw; + } +} + +ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder) { + try { + state std::string versionFilePath = joinPath(folder, versionFileName); + state ErrorOr> versionFile = wait( + errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READWRITE, 0600))); + + if (versionFile.isError()) { + if (versionFile.getError().code() == error_code_file_not_found && !fileExists(versionFilePath)) { + Reference newVersionFile = wait(IAsyncFileSystem::filesystem()->open( + versionFilePath, + IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE, + 0600)); + versionFile = newVersionFile; + } else { + TraceEvent(SevError, "OpenSWVersionFileError").error(versionFile.getError()); + throw versionFile.getError(); + } + } + + SWVersion swVersion(currentProtocolVersion, currentProtocolVersion); + ObjectWriter wr(Unversioned()); + auto s = wr.toValue(swVersion, IncludeVersion()); + wait(versionFile.get()->write(s.toString().c_str(), s.size(), 0)); + wait(versionFile.get()->sync()); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw; + } + TraceEvent(SevError, "OpenWriteSWVersionFileError").error(e); + throw; + } + + return Void(); +} + ACTOR Future createAndLockProcessIdFile(std::string folder) { state UID processIDUid; platform::createDirectory(folder); @@ -2701,6 +2807,21 @@ ACTOR Future fdbd(Reference connRecord, localities.set(LocalityData::keyProcessId, processIDUid.toString()); // Only one process can execute on a dataFolder from this point onwards + Future pf = isCompatibleSoftwareVersion(dataFolder); + ErrorOr px = wait(errorOr(pf)); + if (px.isError()) { + throw internal_error(); + } + ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion(dataFolder))); + if (v.isError()) { + TraceEvent(SevError, "SWVersionNotWritten").error(v.getError()); + } + Future f = isCompatibleSoftwareVersion(dataFolder); + ErrorOr vx = wait(errorOr(f)); + if (vx.isError()) { + TraceEvent(SevError, "SWVersionCompatibilityUnknown").error(vx.getError()); + } + std::string fitnessFilePath = joinPath(dataFolder, "fitness"); auto cc = makeReference>>(); auto ci = makeReference>>(); From c009eba7ed3a3239527230c385308ca86b362d89 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 31 Mar 2022 00:09:58 -0700 Subject: [PATCH 06/28] Add last-sw-version tracking to SWVersion structure --- fdbclient/SystemData.cpp | 7 +++++++ fdbclient/SystemData.h | 2 ++ fdbserver/worker.actor.cpp | 21 ++++----------------- flow/ProtocolVersion.h | 29 ++++++++++++++++++++++++++++- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index a0f32af238..77d23f726d 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -590,6 +590,13 @@ StorageServerInterface decodeServerListValueFB(ValueRef const& value) { return s; } +SWVersion decodeSWVersionValue(ValueRef const& value) { + SWVersion s; + ObjectReader reader(value.begin(), IncludeVersion()); + reader.deserialize(s); + return s; +} + // processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0' const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0")); const KeyRef processClassPrefix = processClassKeys.begin; diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 8985f6483e..a883500a4f 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -197,6 +197,8 @@ const Value serverListValue(StorageServerInterface const&); UID decodeServerListKey(KeyRef const&); StorageServerInterface decodeServerListValue(ValueRef const&); +SWVersion decodeSWVersionValue(ValueRef const&); + // "\xff/processClass/[[processID]]" := "[[ProcessClass]]" // Contains a mapping from processID to processClass extern const KeyRangeRef processClassKeys; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 72ee7d80d7..ef29efb630 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2406,21 +2406,6 @@ ACTOR Future monitorAndWriteCCPriorityInfo(std::string filePath, } } -struct SWVersion { - constexpr static FileIdentifier file_identifier = 13943984; - - uint64_t latestProtocolVersion; - uint64_t compatibleProtocolVersion; - - explicit SWVersion(ProtocolVersion latestVersion, ProtocolVersion compatibleVersion) - : latestProtocolVersion(latestVersion.version()), compatibleProtocolVersion(compatibleVersion.version()) {} - - template - void serialize(Ar& ar) { - serializer(ar, latestProtocolVersion, compatibleProtocolVersion); - } -}; - static const std::string versionFileName = "sw-version"; ACTOR Future isCompatibleSoftwareVersion(std::string folder) { @@ -2448,7 +2433,9 @@ ACTOR Future isCompatibleSoftwareVersion(std::string folder) { wait(success(versionFile.get()->read(mutateString(fileData), filesize, 0))); try { - auto latestSoftwareVersion = BinaryReader::fromStringRef(fileData, Unversioned()); + Value value = ObjectReader::fromStringRef(fileData, Unversioned()); + SWVersion swversion = decodeSWVersionValue(value); + ProtocolVersion latestSoftwareVersion(swversion.latestProtocolVersion); if (latestSoftwareVersion <= currentProtocolVersion) { TraceEvent(SevInfo, "SWVersionCompatible").log(); return true; @@ -2490,7 +2477,7 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder) { } } - SWVersion swVersion(currentProtocolVersion, currentProtocolVersion); + SWVersion swVersion(currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion); ObjectWriter wr(Unversioned()); auto s = wr.toValue(swVersion, IncludeVersion()); wait(versionFile.get()->write(s.toString().c_str(), s.size(), 0)); diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 99900acd7b..8ae430ecb2 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -37,6 +37,9 @@ constexpr uint64_t currentProtocolVersionValue = 0x0FDB00B071010000LL; // than the current version, meaning that we only support downgrades between consecutive release versions. constexpr uint64_t minInvalidProtocolVersionValue = 0x0FDB00B073000000LL; +// The lowest protocol version that can be downgraded to. +constexpr uint64_t minCompatibleProtocolVersionValue = 0x0FDB00B070000000LL; + #define PROTOCOL_VERSION_FEATURE(v, x) \ static_assert((v & 0xF0FFFFLL) == 0 || v < 0x0FDB00B071000000LL, "Unexpected feature protocol version"); \ static_assert(v <= currentProtocolVersionValue, "Feature protocol version too large"); \ @@ -172,6 +175,7 @@ struct Traceable : std::true_type { constexpr ProtocolVersion currentProtocolVersion(currentProtocolVersionValue); constexpr ProtocolVersion minInvalidProtocolVersion(minInvalidProtocolVersionValue); +constexpr ProtocolVersion minCompatibleProtocolVersion(minCompatibleProtocolVersionValue); // This assert is intended to help prevent incrementing the leftmost digits accidentally. It will probably need to // change when we reach version 10. @@ -190,4 +194,27 @@ static_assert(minInvalidProtocolVersion.version() >= // The min invalid protocol version should be the smallest possible protocol version associated with a minor release // version. -static_assert((minInvalidProtocolVersion.version() & 0xFFFFFFLL) == 0, "Unexpected min invalid protocol version"); \ No newline at end of file +static_assert((minInvalidProtocolVersion.version() & 0xFFFFFFLL) == 0, "Unexpected min invalid protocol version"); + +struct SWVersion { + constexpr static FileIdentifier file_identifier = 13943984; + + uint64_t latestProtocolVersion; + uint64_t lastProtocolVersion; + uint64_t lowestCompatibleProtocolVersion; + + SWVersion() { + latestProtocolVersion = currentProtocolVersion.version(); + lastProtocolVersion = currentProtocolVersion.version(); + lowestCompatibleProtocolVersion = currentProtocolVersion.version(); + } + + SWVersion(ProtocolVersion latestVersion, ProtocolVersion lastVersion, ProtocolVersion minCompatibleVersion) + : latestProtocolVersion(latestVersion.version()), lastProtocolVersion(lastVersion.version()), + lowestCompatibleProtocolVersion(minCompatibleVersion.version()) {} + + template + void serialize(Ar& ar) { + serializer(ar, latestProtocolVersion, lastProtocolVersion, lowestCompatibleProtocolVersion); + } +}; \ No newline at end of file From a9fa65e5c7249905cabe5daff4f09883c274a596 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 31 Mar 2022 14:09:37 -0700 Subject: [PATCH 07/28] Revert "Compare current and persisted protocol versions before updating persisted protocol version" This reverts commit 1b9f773d2a312bf220312cf0a831ef3fbba827c2. --- fdbserver/ClusterRecovery.actor.cpp | 43 ++++++----------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index c06dc9a160..2d50ef699b 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -26,7 +26,6 @@ #include "fdbserver/MasterInterface.h" #include "fdbserver/WaitFailure.h" -#include "flow/Trace.h" #include "flow/actorcompiler.h" // This must be the last #include. static std::set const& normalClusterRecoveryErrors() { @@ -1269,8 +1268,7 @@ ACTOR Future recoverFrom(Reference self, std::vector* seedServers, std::vector>* initialConfChanges, Future poppedTxsVersion, - bool* clusterIdExists, - ProtocolVersion* persistedServerVersion) { + bool* clusterIdExists) { TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME).c_str(), self->dbgid) .detail("StatusCode", RecoveryStatus::reading_transaction_system_state) .detail("Status", RecoveryStatus::names[RecoveryStatus::reading_transaction_system_state]) @@ -1304,12 +1302,6 @@ ACTOR Future recoverFrom(Reference self, self->clusterId = BinaryReader::fromStringRef(clusterId.get(), Unversioned()); } - Optional persistedServerVersionValue = self->txnStateStore->readValue(latestServerVersionKey).get(); - if (persistedServerVersionValue.present()) { - *persistedServerVersion = - BinaryReader::fromStringRef(persistedServerVersionValue.get(), Unversioned()); - } - // Ordinarily we pass through this loop once and recover. We go around the loop if recovery stalls for more than a // second, a provisional master is initialized, and an "emergency transaction" is submitted that might change the // configuration so that we can finish recovery. @@ -1449,7 +1441,6 @@ ACTOR Future clusterRecoveryCore(Reference self) { state Future minRecoveryDuration; state Future poppedTxsVersion; state bool clusterIdExists = false; - state ProtocolVersion persistedServerVersion; loop { Reference oldLogSystem = oldLogSystems->get(); @@ -1470,8 +1461,7 @@ ACTOR Future clusterRecoveryCore(Reference self) { &seedServers, &initialConfChanges, poppedTxsVersion, - std::addressof(clusterIdExists), - std::addressof(persistedServerVersion)) + std::addressof(clusterIdExists)) : Never())) { reg.cancel(); break; @@ -1590,24 +1580,9 @@ ACTOR Future clusterRecoveryCore(Reference self) { tr.set(recoveryCommitRequest.arena, clusterIdKey, BinaryWriter::toValue(self->clusterId, Unversioned())); } - if (persistedServerVersion.version() != 0) { - TraceEvent(SevInfo, "RecoveryLatestServerVersion", self->dbgid).detail("Version", persistedServerVersion); - } - - // ProtocolVersion testPersistedServerVersion(0x0FDB00B070010000LL); - // persistedServerVersion = testPersistedServerVersion - if (currentProtocolVersion > persistedServerVersion) { - tr.set(recoveryCommitRequest.arena, - latestServerVersionKey, - BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); - TraceEvent(SevInfo, "SetNewLatestServerVersion", self->dbgid) - .detail("OldVersion", persistedServerVersion) - .detail("NewVersion", currentProtocolVersion); - } else { - TraceEvent(SevInfo, "RetainedLatestServerVersion", self->dbgid) - .detail("OldVersion", persistedServerVersion) - .detail("NewVersion", currentProtocolVersion); - } + tr.set(recoveryCommitRequest.arena, + latestServerVersionKey, + BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); applyMetadataMutations(SpanID(), self->dbgid, @@ -1616,8 +1591,8 @@ ACTOR Future clusterRecoveryCore(Reference self) { self->txnStateStore); mmApplied = tr.mutations.size(); - tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the - // initial window of the resolver(s) + tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the initial + // window of the resolver(s) TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_COMMIT_EVENT_NAME).c_str(), self->dbgid) .log(); @@ -1657,8 +1632,8 @@ ACTOR Future clusterRecoveryCore(Reference self) { // 3. No other master will attempt to commit anything to our "new" Tlogs // because it didn't recruit them // 4. Therefore, no full commit can come between self->lastEpochEnd and the first commit - // we made to the new Tlogs (self->recoveryTransactionVersion), and only our own semi-commits can come - // between our first commit and the next new TLogs + // we made to the new Tlogs (self->recoveryTransactionVersion), and only our own semi-commits can come between + // our first commit and the next new TLogs self->addActor.send(trackTlogRecovery(self, oldLogSystems, minRecoveryDuration)); debug_advanceMaxCommittedVersion(UID(), self->recoveryTransactionVersion); From 3410ab3bd223ae9b0b415721fc1fc0da7620c13c Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 31 Mar 2022 14:10:37 -0700 Subject: [PATCH 08/28] Revert "Create new system key for tracking latest software version installed on the cluster" This reverts commit 72d99cc5b19acb2f84edaefc3bbe317679c54148. --- fdbclient/SystemData.cpp | 2 -- fdbclient/SystemData.h | 8 -------- fdbserver/ApplyMetadataMutation.cpp | 3 +-- fdbserver/ClusterRecovery.actor.cpp | 5 ----- fdbserver/DDTeamCollection.actor.cpp | 19 ------------------- 5 files changed, 1 insertion(+), 36 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 5a54a9e511..303919e68a 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -825,8 +825,6 @@ std::vector> decodeBackupStartedValue(const ValueRef& va return ids; } -const KeyRef latestServerVersionKey = LiteralStringRef("\xff/latestServerVersion"); - const KeyRef coordinatorsKey = LiteralStringRef("\xff/coordinators"); const KeyRef logsKey = LiteralStringRef("\xff/logs"); const KeyRef minRequiredCommitVersionKey = LiteralStringRef("\xff/minRequiredCommitVersion"); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index df633d2de0..68e35d35c4 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -335,14 +335,6 @@ std::vector> decodeBackupStartedValue(const ValueRef& va // 1 = Send a signal to pause/already paused. extern const KeyRef backupPausedKey; -// Key whose value stores the newest software version that has been run on the server -// This is used to make sure that once a version of software is run on the -// server, only compatible versions are permitted to run thereafter. -// A different version of software is considered permitted if it is newer or if -// is only one minor version older. -// "\xff/latestServerVersion" -extern const KeyRef latestServerVersionKey; - // "\xff/coordinators" = "[[ClusterConnectionString]]" // Set to the encoded structure of the cluster's current set of coordinators. // Changed when performing quorumChange. diff --git a/fdbserver/ApplyMetadataMutation.cpp b/fdbserver/ApplyMetadataMutation.cpp index 74b1218846..ef53e278f2 100644 --- a/fdbserver/ApplyMetadataMutation.cpp +++ b/fdbserver/ApplyMetadataMutation.cpp @@ -572,8 +572,7 @@ private: m.param1.startsWith(applyMutationsAddPrefixRange.begin) || m.param1.startsWith(applyMutationsRemovePrefixRange.begin) || m.param1.startsWith(tagLocalityListPrefix) || m.param1.startsWith(serverTagHistoryPrefix) || - m.param1.startsWith(testOnlyTxnStateStorePrefixRange.begin) || m.param1 == clusterIdKey || - m.param1 == latestServerVersionKey) { + m.param1.startsWith(testOnlyTxnStateStorePrefixRange.begin) || m.param1 == clusterIdKey) { txnStateStore->set(KeyValueRef(m.param1, m.param2)); } diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index 2d50ef699b..180673f850 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbclient/SystemData.h" #include "fdbrpc/sim_validation.h" #include "fdbserver/ApplyMetadataMutation.h" #include "fdbserver/BackupProgress.actor.h" @@ -1580,10 +1579,6 @@ ACTOR Future clusterRecoveryCore(Reference self) { tr.set(recoveryCommitRequest.arena, clusterIdKey, BinaryWriter::toValue(self->clusterId, Unversioned())); } - tr.set(recoveryCommitRequest.arena, - latestServerVersionKey, - BinaryWriter::toValue(currentProtocolVersion.version(), Unversioned())); - applyMetadataMutations(SpanID(), self->dbgid, recoveryCommitRequest.arena, diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index 57047f45de..bdc6259a98 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -2256,21 +2256,6 @@ public: } } - ACTOR static Future getLatestSoftwareVersion(DDTeamCollection* self) { - state ReadYourWritesTransaction tr(self->cx); - loop { - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Optional latestServerVersion = wait(tr.get(latestServerVersionKey)); - ASSERT(latestServerVersion.present()); - return BinaryReader::fromStringRef(latestServerVersion.get(), Unversioned()); - } catch (Error& e) { - wait(tr.onError(e)); - } - } - } - ACTOR static Future initializeStorage(DDTeamCollection* self, RecruitStorageReply candidateWorker, const DDEnabledState* ddEnabledState, @@ -2294,10 +2279,6 @@ public: self->recruitingIds.insert(interfaceId); self->recruitingLocalities.insert(candidateWorker.worker.stableAddress()); - ProtocolVersion latestServerVersion = wait(DDTeamCollectionImpl::getLatestSoftwareVersion(self)); - TraceEvent(SevInfo, "DDSoftwareVersion", self->distributorId) - .detail("LatestServerVersion", latestServerVersion); - UID clusterId = wait(self->getClusterId()); state InitializeStorageRequest isr; From 4b4b20118e1e3bcabb721000c1146fd1f03d3c14 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 31 Mar 2022 16:11:21 -0700 Subject: [PATCH 09/28] Remove access of system key by status actor to test latest protocol version. That information will now be in a file in the data dir --- fdbserver/Status.actor.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index c54d27140d..b619cf3df0 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1537,18 +1537,7 @@ struct LoadConfigurationResult { }; ACTOR Future getLatestSoftwareVersion(Database cx) { - state ReadYourWritesTransaction tr(cx); - loop { - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Optional latestServerVersion = wait(tr.get(latestServerVersionKey)); - ASSERT(latestServerVersion.present()); - return BinaryReader::fromStringRef(latestServerVersion.get(), Unversioned()); - } catch (Error& e) { - wait(tr.onError(e)); - } - } + return currentProtocolVersion; } ACTOR static Future, Optional>> From 008bd93cce98d5e3771253427c3fb2d4e06b197b Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Fri, 1 Apr 2022 15:45:24 -0700 Subject: [PATCH 10/28] Change the default construction for SWVersion and add error code for sw version incompatibility --- fdbserver/worker.actor.cpp | 52 +++++++++++++++++++++++++------------- flow/ProtocolVersion.h | 6 +---- flow/error_definitions.h | 1 + 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index e0148065a7..538b073803 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -60,6 +60,7 @@ #include "flow/ThreadHelper.actor.h" #include "flow/Trace.h" #include "flow/flow.h" +#include "flow/genericactors.actor.h" #include "flow/network.h" #include "flow/serialize.h" @@ -2460,7 +2461,7 @@ ACTOR Future monitorAndWriteCCPriorityInfo(std::string filePath, static const std::string versionFileName = "sw-version"; -ACTOR Future isCompatibleSoftwareVersion(std::string folder) { +ACTOR Future testSoftwareVersionCompatibility(std::string folder) { try { state std::string versionFilePath = joinPath(folder, versionFileName); state ErrorOr> versionFile = wait( @@ -2472,7 +2473,7 @@ ACTOR Future isCompatibleSoftwareVersion(std::string folder) { // installation or an upgrade from a version that does not support version files. // Either way, we can safely continue running this version of software. TraceEvent(SevInfo, "NoPreviousSWVersion").log(); - return true; + return SWVersion(); } else { // Dangerous to continue if we cannot do a software compatibility test throw versionFile.getError(); @@ -2487,13 +2488,13 @@ ACTOR Future isCompatibleSoftwareVersion(std::string folder) { try { Value value = ObjectReader::fromStringRef(fileData, Unversioned()); SWVersion swversion = decodeSWVersionValue(value); - ProtocolVersion latestSoftwareVersion(swversion.latestProtocolVersion); - if (latestSoftwareVersion <= currentProtocolVersion) { + ProtocolVersion lowestCompatibleVersion(swversion.lowestCompatibleProtocolVersion); + if (currentProtocolVersion >= lowestCompatibleVersion) { TraceEvent(SevInfo, "SWVersionCompatible").log(); - return true; + return swversion; } else { TraceEvent(SevInfo, "SWVersionIncompatible").log(); - return false; + throw incomptible_software_version(); } } catch (Error& e) { TraceEvent(SevError, "ReadSWVersionFileError").error(e); @@ -2510,7 +2511,13 @@ ACTOR Future isCompatibleSoftwareVersion(std::string folder) { } } -ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder) { +ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, + ProtocolVersion currentVersion, + ProtocolVersion latestVersion, + ProtocolVersion minCompatibleVersion) { + + ASSERT(currentVersion >= minCompatibleVersion); + try { state std::string versionFilePath = joinPath(folder, versionFileName); state ErrorOr> versionFile = wait( @@ -2545,6 +2552,21 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder) { return Void(); } +static const std::string swversionTestDirName = "sw-version-test"; + +TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { + wait(Future(Void())); + + if (!platform::createDirectory("sw-version-test")) { + TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + return Void(); + } + + SWVersion swversion = wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName))); + + return Void(); +} + ACTOR Future createAndLockProcessIdFile(std::string folder) { state UID processIDUid; platform::createDirectory(folder); @@ -2844,20 +2866,16 @@ ACTOR Future fdbd(Reference connRecord, localities.set(LocalityData::keyProcessId, processIDUid.toString()); // Only one process can execute on a dataFolder from this point onwards - Future pf = isCompatibleSoftwareVersion(dataFolder); - ErrorOr px = wait(errorOr(pf)); - if (px.isError()) { - throw internal_error(); + Future f = testSoftwareVersionCompatibility(dataFolder); + ErrorOr swversion = wait(errorOr(f)); + if (swversion.isError()) { + throw swversion.getError(); } - ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion(dataFolder))); + ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( + dataFolder, currentProtocolVersion, currentProtocolVersion, currentProtocolVersion))); if (v.isError()) { TraceEvent(SevError, "SWVersionNotWritten").error(v.getError()); } - Future f = isCompatibleSoftwareVersion(dataFolder); - ErrorOr vx = wait(errorOr(f)); - if (vx.isError()) { - TraceEvent(SevError, "SWVersionCompatibilityUnknown").error(vx.getError()); - } std::string fitnessFilePath = joinPath(dataFolder, "fitness"); auto cc = makeReference>>(); diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index e034968773..e3f8f659fa 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -204,11 +204,7 @@ struct SWVersion { uint64_t lastProtocolVersion; uint64_t lowestCompatibleProtocolVersion; - SWVersion() { - latestProtocolVersion = currentProtocolVersion.version(); - lastProtocolVersion = currentProtocolVersion.version(); - lowestCompatibleProtocolVersion = currentProtocolVersion.version(); - } + SWVersion() { SWVersion(ProtocolVersion(), ProtocolVersion(), ProtocolVersion()); } SWVersion(ProtocolVersion latestVersion, ProtocolVersion lastVersion, ProtocolVersion minCompatibleVersion) : latestProtocolVersion(latestVersion.version()), lastProtocolVersion(lastVersion.version()), diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 7710bca9ca..fdae74464c 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -113,6 +113,7 @@ ERROR( dd_tracker_cancelled, 1215, "The data distribution tracker has been cance ERROR( failed_to_progress, 1216, "Process has failed to make sufficient progress" ) ERROR( invalid_cluster_id, 1217, "Attempted to join cluster with a different cluster ID" ) ERROR( restart_cluster_controller, 1218, "Restart cluster controller process" ) +ERROR( incomptible_software_version, 1219, "Current software does not support database format" ) // 15xx Platform errors ERROR( platform_error, 1500, "Platform error" ) From d175599713c4cb5bc5513baf09c8d55deed3304c Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Fri, 1 Apr 2022 15:50:38 -0700 Subject: [PATCH 11/28] resolve merge conflict from upstream --- fdbclient/SystemData.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 1dadd094db..9aae0c6f64 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -607,13 +607,6 @@ StorageServerInterface decodeServerListValueFB(ValueRef const& value) { return s; } -<<<<<<< HEAD -SWVersion decodeSWVersionValue(ValueRef const& value) { - SWVersion s; - ObjectReader reader(value.begin(), IncludeVersion()); - reader.deserialize(s); - return s; -======= StorageServerInterface decodeServerListValue(ValueRef const& value) { StorageServerInterface s; BinaryReader reader(value, IncludeVersion()); @@ -624,7 +617,13 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) { } return decodeServerListValueFB(value); ->>>>>>> d248b73df5fb08b88d0e003e6d2950fe03bab5f2 +} + +SWVersion decodeSWVersionValue(ValueRef const& value) { + SWVersion s; + ObjectReader reader(value.begin(), IncludeVersion()); + reader.deserialize(s); + return s; } // processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0' From 3fbbf415e776dd76f40a192378b7496fc6043802 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 4 Apr 2022 18:42:52 -0700 Subject: [PATCH 12/28] Properly encapsulate SWVersion and create a couple of UTs for sw version testing --- fdbclient/SystemData.cpp | 6 +++ fdbclient/SystemData.h | 1 + fdbserver/worker.actor.cpp | 77 ++++++++++++++++++++++++++++++++------ flow/ProtocolVersion.h | 30 +++++++++++---- 4 files changed, 94 insertions(+), 20 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 9aae0c6f64..afc9318492 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -619,6 +619,12 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) { return decodeServerListValueFB(value); } +const Value swVersionValue(SWVersion const& swversion) { + auto protocolVersion = currentProtocolVersion; + protocolVersion.addObjectSerializerFlag(); + return ObjectWriter::toValue(swversion, IncludeVersion(protocolVersion)); +} + SWVersion decodeSWVersionValue(ValueRef const& value) { SWVersion s; ObjectReader reader(value.begin(), IncludeVersion()); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 68e35d35c4..2483daf534 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -205,6 +205,7 @@ const Value serverListValue(StorageServerInterface const&); UID decodeServerListKey(KeyRef const&); StorageServerInterface decodeServerListValue(ValueRef const&); +const Value swVersionValue(SWVersion const& swversion); SWVersion decodeSWVersionValue(ValueRef const&); // "\xff/processClass/[[processID]]" := "[[ProcessClass]]" diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index d8649c999c..957f1cbd66 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2507,7 +2507,7 @@ ACTOR Future monitorAndWriteCCPriorityInfo(std::string filePath, static const std::string versionFileName = "sw-version"; -ACTOR Future testSoftwareVersionCompatibility(std::string folder) { +ACTOR Future testSoftwareVersionCompatibility(std::string folder, ProtocolVersion currentVersion) { try { state std::string versionFilePath = joinPath(folder, versionFileName); state ErrorOr> versionFile = wait( @@ -2522,20 +2522,25 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder) { return SWVersion(); } else { // Dangerous to continue if we cannot do a software compatibility test + TraceEvent(SevError, "OpenSWVersionFileError").error(versionFile.getError()); throw versionFile.getError(); } } else { // Test whether the most newest software version that has been run on this cluster is // compatible with the current software version int64_t filesize = wait(versionFile.get()->size()); - state Standalone fileData = makeString(filesize); - wait(success(versionFile.get()->read(mutateString(fileData), filesize, 0))); + TraceEvent(SevInfo, "SWVersionFileSizeBeforeRead") + .detail("File", versionFilePath) + .detail("Size", filesize) + .detail("FD", versionFile.get()->debugFD()); + + state Standalone buf = makeString(filesize); + wait(success(versionFile.get()->read(mutateString(buf), filesize, 0))); try { - Value value = ObjectReader::fromStringRef(fileData, Unversioned()); - SWVersion swversion = decodeSWVersionValue(value); - ProtocolVersion lowestCompatibleVersion(swversion.lowestCompatibleProtocolVersion); - if (currentProtocolVersion >= lowestCompatibleVersion) { + SWVersion swversion = ObjectReader::fromStringRef(buf, IncludeVersion()); + ProtocolVersion lowestCompatibleVersion(swversion.lowestCompatibleProtocolVersion()); + if (currentVersion >= lowestCompatibleVersion) { TraceEvent(SevInfo, "SWVersionCompatible").log(); return swversion; } else { @@ -2582,11 +2587,18 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, } } - SWVersion swVersion(currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion); - ObjectWriter wr(Unversioned()); - auto s = wr.toValue(swVersion, IncludeVersion()); + SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion); + auto s = swVersionValue(swVersion); + TraceEvent(SevInfo, "SWVersionFileWriteSize").detail("Size", s.size()).detail("String", s.toString().c_str()); wait(versionFile.get()->write(s.toString().c_str(), s.size(), 0)); wait(versionFile.get()->sync()); + wait(delay(0)); + int64_t filesize = wait(versionFile.get()->size()); + TraceEvent(SevInfo, "SWVersionFileSizeAfterWrite") + .detail("File", versionFilePath) + .detail("Size", filesize) + .detail("FD", versionFile.get()->debugFD()); + } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw; @@ -2603,12 +2615,53 @@ static const std::string swversionTestDirName = "sw-version-test"; TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { wait(Future(Void())); + platform::eraseDirectoryRecursive(swversionTestDirName); + if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); return Void(); } - SWVersion swversion = wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName))); + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(!swversion.get().isValid()); + } + + platform::eraseDirectoryRecursive(swversionTestDirName); + + return Void(); +} + +TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { + wait(Future(Void())); + + platform::eraseDirectoryRecursive(swversionTestDirName); + + if (!platform::createDirectory("sw-version-test")) { + TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + return Void(); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); + ASSERT(!f.isError()); + + ErrorOr swversion = + wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName, currentProtocolVersion))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withTenants().version()); + ASSERT(swversion.get().lastProtocolVersion() == currentProtocolVersion.version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + } + + platform::eraseDirectoryRecursive(swversionTestDirName); return Void(); } @@ -2912,7 +2965,7 @@ ACTOR Future fdbd(Reference connRecord, localities.set(LocalityData::keyProcessId, processIDUid.toString()); // Only one process can execute on a dataFolder from this point onwards - Future f = testSoftwareVersionCompatibility(dataFolder); + Future f = testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion); ErrorOr swversion = wait(errorOr(f)); if (swversion.isError()) { throw swversion.getError(); diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 91757e203f..2432a1de01 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -199,20 +199,34 @@ static_assert(minInvalidProtocolVersion.version() >= static_assert((minInvalidProtocolVersion.version() & 0xFFFFFFLL) == 0, "Unexpected min invalid protocol version"); struct SWVersion { - constexpr static FileIdentifier file_identifier = 13943984; + constexpr static FileIdentifier file_identifier = 13943914; - uint64_t latestProtocolVersion; - uint64_t lastProtocolVersion; - uint64_t lowestCompatibleProtocolVersion; +private: + uint64_t _latestProtocolVersion; + uint64_t _lastProtocolVersion; + uint64_t _lowestCompatibleProtocolVersion; - SWVersion() { SWVersion(ProtocolVersion(), ProtocolVersion(), ProtocolVersion()); } +public: + SWVersion() { + _latestProtocolVersion = 0; + _lastProtocolVersion = 0; + _lowestCompatibleProtocolVersion = 0; + } SWVersion(ProtocolVersion latestVersion, ProtocolVersion lastVersion, ProtocolVersion minCompatibleVersion) - : latestProtocolVersion(latestVersion.version()), lastProtocolVersion(lastVersion.version()), - lowestCompatibleProtocolVersion(minCompatibleVersion.version()) {} + : _latestProtocolVersion(latestVersion.version()), _lastProtocolVersion(lastVersion.version()), + _lowestCompatibleProtocolVersion(minCompatibleVersion.version()) {} + + bool isValid() const { + return (_latestProtocolVersion != 0 && _lastProtocolVersion != 0 && _lowestCompatibleProtocolVersion != 0); + } + + uint64_t latestProtocolVersion() const { return _latestProtocolVersion; } + uint64_t lastProtocolVersion() const { return _lastProtocolVersion; } + uint64_t lowestCompatibleProtocolVersion() const { return _lowestCompatibleProtocolVersion; } template void serialize(Ar& ar) { - serializer(ar, latestProtocolVersion, lastProtocolVersion, lowestCompatibleProtocolVersion); + serializer(ar, _latestProtocolVersion, _lastProtocolVersion, _lowestCompatibleProtocolVersion); } }; \ No newline at end of file From 1d23d92e40a74db88243feb25670f2266693960b Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 4 Apr 2022 19:31:09 -0700 Subject: [PATCH 13/28] Fix typo in the name of new error code, and add a few UTs --- fdbserver/worker.actor.cpp | 138 +++++++++++++++++++++++++++++++++++-- flow/error_definitions.h | 2 +- 2 files changed, 134 insertions(+), 6 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 957f1cbd66..52e6b7665f 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2545,7 +2545,7 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro return swversion; } else { TraceEvent(SevInfo, "SWVersionIncompatible").log(); - throw incomptible_software_version(); + throw incompatible_software_version(); } } catch (Error& e) { TraceEvent(SevError, "ReadSWVersionFileError").error(e); @@ -2651,13 +2651,141 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { ProtocolVersion::withTSS()))); ASSERT(!f.isError()); - ErrorOr swversion = - wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName, currentProtocolVersion))); + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); ASSERT(!swversion.isError()); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withTenants().version()); - ASSERT(swversion.get().lastProtocolVersion() == currentProtocolVersion.version()); + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + } + + platform::eraseDirectoryRecursive(swversionTestDirName); + + return Void(); +} + +TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { + wait(Future(Void())); + + platform::eraseDirectoryRecursive(swversionTestDirName); + + if (!platform::createDirectory("sw-version-test")) { + TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + return Void(); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); + ASSERT(!f.isError()); + + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withTSS(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); + + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + } + + platform::eraseDirectoryRecursive(swversionTestDirName); + + return Void(); +} + +TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { + wait(Future(Void())); + + platform::eraseDirectoryRecursive(swversionTestDirName); + + if (!platform::createDirectory("sw-version-test")) { + TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + return Void(); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); + ASSERT(!f.isError()); + + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + } + + ErrorOr swversion = + wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withCacheRole()))); + + ASSERT(swversion.isError() && swversion.getError().code() == error_code_incompatible_software_version); + + platform::eraseDirectoryRecursive(swversionTestDirName); + + return Void(); +} + +TEST_CASE("/fdbserver/worker/swversion/runNewer") { + wait(Future(Void())); + + platform::eraseDirectoryRecursive(swversionTestDirName); + + if (!platform::createDirectory("sw-version-test")) { + TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + return Void(); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withTSS(), + ProtocolVersion::withTSS(), + ProtocolVersion::withCacheRole()))); + ASSERT(!f.isError()); + + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withCacheRole().version()); + } + + ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); + + ErrorOr swversion = wait(errorOr( + testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); + + ASSERT(!swversion.isError()); + if (!swversion.isError()) { + ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 7df3b34e9d..a687ee023e 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -115,7 +115,7 @@ ERROR( failed_to_progress, 1216, "Process has failed to make sufficient progress ERROR( invalid_cluster_id, 1217, "Attempted to join cluster with a different cluster ID" ) ERROR( restart_cluster_controller, 1218, "Restart cluster controller process" ) ERROR( please_reboot_remote_kv_store, 1219, "Need to reboot the storage engine process as it died abnormally") -ERROR( incomptible_software_version, 1220, "Current software does not support database format" ) +ERROR( incompatible_software_version, 1220, "Current software does not support database format" ) // 15xx Platform errors ERROR( platform_error, 1500, "Platform error" ) From e80a83a418a5b416612c7306ebc0f2580b681924 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 4 Apr 2022 19:36:56 -0700 Subject: [PATCH 14/28] Remove latestSoftwareVersion in status json. It is not yet updated correctly --- fdbclient/Schemas.cpp | 1 - fdbserver/Status.actor.cpp | 7 ------- 2 files changed, 8 deletions(-) diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index a3f326b955..18ffac2fa2 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -695,7 +695,6 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( }, "cluster_controller_timestamp":1415650089, "protocol_version":"fdb00a400050001", - "latest_server_version":"fdb00a500040001", "connection_string":"a:a@127.0.0.1:4000", "full_replication":true, "maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece", diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index b619cf3df0..5549083b8e 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1536,10 +1536,6 @@ struct LoadConfigurationResult { : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false), dataDistributionDisabled(false) {} }; -ACTOR Future getLatestSoftwareVersion(Database cx) { - return currentProtocolVersion; -} - ACTOR static Future, Optional>> loadConfiguration(Database cx, JsonBuilderArray* messages, std::set* status_incomplete_reasons) { state Optional result; @@ -3174,9 +3170,6 @@ ACTOR Future clusterGetStatus( statusObj["incompatible_connections"] = incompatibleConnectionsArray; statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference); - ProtocolVersion latestServerVersion = wait(getLatestSoftwareVersion(cx)); - statusObj["latest_server_version"] = format("%" PRIx64, latestServerVersion.version()); - int activeTSSCount = 0; JsonBuilderArray wiggleServerAddress; for (auto& it : storageServers) { From 1457804b29a0b5ce625dc36c0feb6d829cdeccfc Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 4 Apr 2022 20:00:59 -0700 Subject: [PATCH 15/28] Remove info traces --- fdbserver/worker.actor.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 52e6b7665f..a0266bbf6a 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2529,11 +2529,6 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro // Test whether the most newest software version that has been run on this cluster is // compatible with the current software version int64_t filesize = wait(versionFile.get()->size()); - TraceEvent(SevInfo, "SWVersionFileSizeBeforeRead") - .detail("File", versionFilePath) - .detail("Size", filesize) - .detail("FD", versionFile.get()->debugFD()); - state Standalone buf = makeString(filesize); wait(success(versionFile.get()->read(mutateString(buf), filesize, 0))); @@ -2589,16 +2584,8 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion); auto s = swVersionValue(swVersion); - TraceEvent(SevInfo, "SWVersionFileWriteSize").detail("Size", s.size()).detail("String", s.toString().c_str()); wait(versionFile.get()->write(s.toString().c_str(), s.size(), 0)); wait(versionFile.get()->sync()); - wait(delay(0)); - int64_t filesize = wait(versionFile.get()->size()); - TraceEvent(SevInfo, "SWVersionFileSizeAfterWrite") - .detail("File", versionFilePath) - .detail("Size", filesize) - .detail("FD", versionFile.get()->debugFD()); - } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw; From 58a6442d1fc9829efd7596b7ae6fc0573e01eae0 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Wed, 6 Apr 2022 21:06:01 -0700 Subject: [PATCH 16/28] Add tracing and fix the version write and update logic --- fdbserver/SimulatedCluster.actor.cpp | 3 +- fdbserver/worker.actor.cpp | 104 +++++++++++++++++++-------- flow/ProtocolVersion.h | 12 +++- 3 files changed, 87 insertions(+), 32 deletions(-) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 3883dd50d3..974d137c64 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -629,7 +629,8 @@ ACTOR Future simulatedFDBDRebooter(Reference #include +#include "fdbclient/FDBTypes.h" #include "fdbrpc/IAsyncFile.h" #include "fdbrpc/Locality.h" #include "fdbclient/GlobalConfig.actor.h" @@ -2522,15 +2523,17 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro return SWVersion(); } else { // Dangerous to continue if we cannot do a software compatibility test - TraceEvent(SevError, "OpenSWVersionFileError").error(versionFile.getError()); + TraceEvent(SevWarnAlways, "OpenSWVersionFileError").error(versionFile.getError()); throw versionFile.getError(); } } else { // Test whether the most newest software version that has been run on this cluster is // compatible with the current software version - int64_t filesize = wait(versionFile.get()->size()); + state int64_t filesize = wait(versionFile.get()->size()); state Standalone buf = makeString(filesize); - wait(success(versionFile.get()->read(mutateString(buf), filesize, 0))); + int readLen = wait(versionFile.get()->read(mutateString(buf), filesize, 0)); + ASSERT(filesize != 0); + ASSERT(readLen == filesize); try { SWVersion swversion = ObjectReader::fromStringRef(buf, IncludeVersion()); @@ -2543,7 +2546,7 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro throw incompatible_software_version(); } } catch (Error& e) { - TraceEvent(SevError, "ReadSWVersionFileError").error(e); + TraceEvent(SevWarnAlways, "ReadSWVersionFileError").error(e); throw e; } } @@ -2552,7 +2555,7 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro throw; } // TODO(bvr): Inject faults - TraceEvent(SevError, "OpenReadSWVersionFileError").error(e); + TraceEvent(SevWarnAlways, "OpenReadSWVersionFileError").error(e); throw; } } @@ -2566,31 +2569,36 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, try { state std::string versionFilePath = joinPath(folder, versionFileName); - state ErrorOr> versionFile = wait( - errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READWRITE, 0600))); + ErrorOr> versionFile = wait( + errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READONLY, 0600))); - if (versionFile.isError()) { - if (versionFile.getError().code() == error_code_file_not_found && !fileExists(versionFilePath)) { - Reference newVersionFile = wait(IAsyncFileSystem::filesystem()->open( - versionFilePath, - IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE, - 0600)); - versionFile = newVersionFile; - } else { - TraceEvent(SevError, "OpenSWVersionFileError").error(versionFile.getError()); - throw versionFile.getError(); - } + if (versionFile.isError() && + (versionFile.getError().code() != error_code_file_not_found || fileExists(versionFilePath))) { + TraceEvent(SevWarnAlways, "OpenSWVersionFileError").error(versionFile.getError()); + throw versionFile.getError(); } + state Reference newVersionFile = wait(IAsyncFileSystem::filesystem()->open( + versionFilePath, + IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE, + 0600)); + SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion); auto s = swVersionValue(swVersion); - wait(versionFile.get()->write(s.toString().c_str(), s.size(), 0)); - wait(versionFile.get()->sync()); + TraceEvent(SevInfo, "CheckAndUpdateNewestSoftwareVersion") + .detail("SWVersion", swVersion) + .detail("ValueSize", s.size()); + ErrorOr e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0))); + if (e.isError()) { + TraceEvent(SevWarnAlways, "WriteSWVersionFailed").error(e.getError()); + throw e.getError(); + } + wait(newVersionFile->sync()); } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw; } - TraceEvent(SevError, "OpenWriteSWVersionFileError").error(e); + TraceEvent(SevWarnAlways, "OpenWriteSWVersionFileError").error(e); throw; } @@ -2605,7 +2613,7 @@ TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { platform::eraseDirectoryRecursive(swversionTestDirName); if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2628,7 +2636,7 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { platform::eraseDirectoryRecursive(swversionTestDirName); if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2659,7 +2667,7 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { platform::eraseDirectoryRecursive(swversionTestDirName); if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2677,6 +2685,8 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + + TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); } ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, @@ -2692,6 +2702,8 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); + + TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); } platform::eraseDirectoryRecursive(swversionTestDirName); @@ -2705,7 +2717,7 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { platform::eraseDirectoryRecursive(swversionTestDirName); if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2741,7 +2753,7 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { platform::eraseDirectoryRecursive(swversionTestDirName); if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevInfo, "CreatedDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -3083,12 +3095,44 @@ ACTOR Future fdbd(Reference connRecord, Future f = testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion); ErrorOr swversion = wait(errorOr(f)); if (swversion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionFileCompatibilityCheckError", processIDUid).error(swversion.getError()); throw swversion.getError(); + } else { + TraceEvent(SevInfo, "SWVersionCompatible", processIDUid).detail("SWVersion", swversion.get()); } - ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( - dataFolder, currentProtocolVersion, currentProtocolVersion, currentProtocolVersion))); - if (v.isError()) { - TraceEvent(SevError, "SWVersionNotWritten").error(v.getError()); + + if (!swversion.get().isValid() || + currentProtocolVersion > ProtocolVersion(swversion.get().latestProtocolVersion())) { + ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( + dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion))); + if (v.isError()) { + TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(v.getError()); + } else { + TraceEvent(SevWarnAlways, "NewSWVersionWritten", processIDUid).log(); + } + } else if (currentProtocolVersion < ProtocolVersion(swversion.get().latestProtocolVersion())) { + ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( + dataFolder, + currentProtocolVersion, + ProtocolVersion(swversion.get().latestProtocolVersion()), + ProtocolVersion(swversion.get().lowestCompatibleProtocolVersion())))); + if (v.isError()) { + TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(v.getError()); + } else { + TraceEvent(SevWarnAlways, "SWVersionWritten", processIDUid).log(); + } + } + + // wait(delay(0)); + + Future f = testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion); + ErrorOr swversion = wait(errorOr(f)); + if (swversion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionFileCompatibilityCheckError", processIDUid).error(swversion.getError()); + throw swversion.getError(); + } else { + TraceEvent(SevInfo, "ReadBackCheckAndUpdateNewestSoftwareVersion", processIDUid) + .detail("SWVersion", swversion.get()); } std::string fitnessFilePath = joinPath(dataFolder, "fitness"); diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 2432a1de01..89ed39f71f 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -229,4 +229,14 @@ public: void serialize(Ar& ar) { serializer(ar, _latestProtocolVersion, _lastProtocolVersion, _lowestCompatibleProtocolVersion); } -}; \ No newline at end of file +}; + +template <> +struct Traceable : std::true_type { + static std::string toString(const SWVersion& swVersion) { + return format("Newest: 0x%016lX, Last: 0x%016lX, MinCompatible: 0x%016lX", + swVersion.latestProtocolVersion(), + swVersion.lastProtocolVersion(), + swVersion.lowestCompatibleProtocolVersion()); + } +}; From dce93b108330f101236783d54570d0ae2931853a Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Wed, 6 Apr 2022 23:03:27 -0700 Subject: [PATCH 17/28] Minor unit test fixes --- fdbserver/worker.actor.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index cbf0787dfe..0253089177 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2620,7 +2620,6 @@ TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(!swversion.get().isValid()); } @@ -2649,7 +2648,6 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); @@ -2675,12 +2673,10 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withTSS()))); - ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); @@ -2697,7 +2693,6 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); @@ -2781,7 +2776,6 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); From 1e2c4a08447f511290d1c19e53773d170a1a7031 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 7 Apr 2022 14:46:27 -0700 Subject: [PATCH 18/28] Rename some variables from clarity; reorganize compatibility test into a new actor --- fdbserver/worker.actor.cpp | 148 +++++++++++++++++++------------------ flow/ProtocolVersion.h | 22 +++--- 2 files changed, 86 insertions(+), 84 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 0253089177..eb396bd8fe 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2560,10 +2560,10 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro } } -ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, - ProtocolVersion currentVersion, - ProtocolVersion latestVersion, - ProtocolVersion minCompatibleVersion) { +ACTOR Future updateNewestSoftwareVersion(std::string folder, + ProtocolVersion currentVersion, + ProtocolVersion latestVersion, + ProtocolVersion minCompatibleVersion) { ASSERT(currentVersion >= minCompatibleVersion); @@ -2574,7 +2574,6 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, if (versionFile.isError() && (versionFile.getError().code() != error_code_file_not_found || fileExists(versionFilePath))) { - TraceEvent(SevWarnAlways, "OpenSWVersionFileError").error(versionFile.getError()); throw versionFile.getError(); } @@ -2585,9 +2584,6 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion); auto s = swVersionValue(swVersion); - TraceEvent(SevInfo, "CheckAndUpdateNewestSoftwareVersion") - .detail("SWVersion", swVersion) - .detail("ValueSize", s.size()); ErrorOr e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0))); if (e.isError()) { TraceEvent(SevWarnAlways, "WriteSWVersionFailed").error(e.getError()); @@ -2605,6 +2601,50 @@ ACTOR Future checkAndUpdateNewestSoftwareVersion(std::string folder, return Void(); } +ACTOR Future testAndUpdateSoftwareVersionCompatibility(std::string dataFolder, UID processIDUid) { + ErrorOr swVersion = wait(errorOr(testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion))); + if (swVersion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionCompatibilityCheckError", processIDUid).error(swVersion.getError()); + throw swVersion.getError(); + } + + TraceEvent(SevInfo, "SWVersionCompatible", processIDUid).detail("SWVersion", swVersion.get()); + + if (!swVersion.get().isValid() || + currentProtocolVersion > ProtocolVersion(swVersion.get().latestProtocolVersion())) { + ErrorOr updateSWVersion = wait(errorOr(updateNewestSoftwareVersion( + dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion))); + if (updateSWVersion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updateSWVersion.getError()); + throw updateSWVersion.getError(); + } else { + TraceEvent(SevWarnAlways, "NewSWVersionWritten", processIDUid).log(); + } + } else if (currentProtocolVersion < ProtocolVersion(swVersion.get().latestProtocolVersion())) { + ErrorOr updatedSWVersion = wait( + errorOr(updateNewestSoftwareVersion(dataFolder, + currentProtocolVersion, + ProtocolVersion(swVersion.get().latestProtocolVersion()), + ProtocolVersion(swVersion.get().lowestCompatibleProtocolVersion())))); + if (updatedSWVersion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updatedSWVersion.getError()); + } else { + TraceEvent(SevWarnAlways, "SWVersionWritten", processIDUid).log(); + } + } + + ErrorOr newSWVersion = + wait(errorOr(testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion))); + if (newSWVersion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionCompatibilityCheckError", processIDUid).error(newSWVersion.getError()); + throw newSWVersion.getError(); + } + + TraceEvent(SevInfo, "VerifiedNewSoftwareVersion", processIDUid).detail("SWVersion", newSWVersion.get()); + + return Void(); +} + static const std::string swversionTestDirName = "sw-version-test"; TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { @@ -2639,10 +2679,10 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { return Void(); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withTSS()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( @@ -2669,10 +2709,10 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { return Void(); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withTSS()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); @@ -2685,10 +2725,10 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withTSS(), - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withTSS()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withTSS(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); @@ -2716,10 +2756,10 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { return Void(); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withTSS()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( @@ -2752,10 +2792,10 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { return Void(); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withTSS(), - ProtocolVersion::withTSS(), - ProtocolVersion::withCacheRole()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withTSS(), + ProtocolVersion::withTSS(), + ProtocolVersion::withCacheRole()))); ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( @@ -2768,10 +2808,10 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withCacheRole().version()); } - ErrorOr f = wait(errorOr(checkAndUpdateNewestSoftwareVersion(swversionTestDirName, - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withStorageInterfaceReadiness(), - ProtocolVersion::withTSS()))); + ErrorOr f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName, + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withStorageInterfaceReadiness(), + ProtocolVersion::withTSS()))); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); @@ -3086,47 +3126,9 @@ ACTOR Future fdbd(Reference connRecord, localities.set(LocalityData::keyProcessId, processIDUid.toString()); // Only one process can execute on a dataFolder from this point onwards - Future f = testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion); - ErrorOr swversion = wait(errorOr(f)); - if (swversion.isError()) { - TraceEvent(SevWarnAlways, "SWVersionFileCompatibilityCheckError", processIDUid).error(swversion.getError()); - throw swversion.getError(); - } else { - TraceEvent(SevInfo, "SWVersionCompatible", processIDUid).detail("SWVersion", swversion.get()); - } - - if (!swversion.get().isValid() || - currentProtocolVersion > ProtocolVersion(swversion.get().latestProtocolVersion())) { - ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( - dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion))); - if (v.isError()) { - TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(v.getError()); - } else { - TraceEvent(SevWarnAlways, "NewSWVersionWritten", processIDUid).log(); - } - } else if (currentProtocolVersion < ProtocolVersion(swversion.get().latestProtocolVersion())) { - ErrorOr v = wait(errorOr(checkAndUpdateNewestSoftwareVersion( - dataFolder, - currentProtocolVersion, - ProtocolVersion(swversion.get().latestProtocolVersion()), - ProtocolVersion(swversion.get().lowestCompatibleProtocolVersion())))); - if (v.isError()) { - TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(v.getError()); - } else { - TraceEvent(SevWarnAlways, "SWVersionWritten", processIDUid).log(); - } - } - - // wait(delay(0)); - - Future f = testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion); - ErrorOr swversion = wait(errorOr(f)); - if (swversion.isError()) { - TraceEvent(SevWarnAlways, "SWVersionFileCompatibilityCheckError", processIDUid).error(swversion.getError()); - throw swversion.getError(); - } else { - TraceEvent(SevInfo, "ReadBackCheckAndUpdateNewestSoftwareVersion", processIDUid) - .detail("SWVersion", swversion.get()); + ErrorOr f = wait(errorOr(testAndUpdateSoftwareVersionCompatibility(dataFolder, processIDUid))); + if (f.isError()) { + throw f.getError(); } std::string fitnessFilePath = joinPath(dataFolder, "fitness"); diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 89ed39f71f..15ea5c99fc 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -202,32 +202,32 @@ struct SWVersion { constexpr static FileIdentifier file_identifier = 13943914; private: - uint64_t _latestProtocolVersion; - uint64_t _lastProtocolVersion; + uint64_t _newestProtocolVersion; + uint64_t _lastRunProtocolVersion; uint64_t _lowestCompatibleProtocolVersion; public: SWVersion() { - _latestProtocolVersion = 0; - _lastProtocolVersion = 0; + _newestProtocolVersion = 0; + _lastRunProtocolVersion = 0; _lowestCompatibleProtocolVersion = 0; } SWVersion(ProtocolVersion latestVersion, ProtocolVersion lastVersion, ProtocolVersion minCompatibleVersion) - : _latestProtocolVersion(latestVersion.version()), _lastProtocolVersion(lastVersion.version()), + : _newestProtocolVersion(latestVersion.version()), _lastRunProtocolVersion(lastVersion.version()), _lowestCompatibleProtocolVersion(minCompatibleVersion.version()) {} bool isValid() const { - return (_latestProtocolVersion != 0 && _lastProtocolVersion != 0 && _lowestCompatibleProtocolVersion != 0); + return (_newestProtocolVersion != 0 && _lastRunProtocolVersion != 0 && _lowestCompatibleProtocolVersion != 0); } - uint64_t latestProtocolVersion() const { return _latestProtocolVersion; } - uint64_t lastProtocolVersion() const { return _lastProtocolVersion; } + uint64_t newestProtocolVersion() const { return _newestProtocolVersion; } + uint64_t lastRunProtocolVersion() const { return _lastRunProtocolVersion; } uint64_t lowestCompatibleProtocolVersion() const { return _lowestCompatibleProtocolVersion; } template void serialize(Ar& ar) { - serializer(ar, _latestProtocolVersion, _lastProtocolVersion, _lowestCompatibleProtocolVersion); + serializer(ar, _newestProtocolVersion, _lastRunProtocolVersion, _lowestCompatibleProtocolVersion); } }; @@ -235,8 +235,8 @@ template <> struct Traceable : std::true_type { static std::string toString(const SWVersion& swVersion) { return format("Newest: 0x%016lX, Last: 0x%016lX, MinCompatible: 0x%016lX", - swVersion.latestProtocolVersion(), - swVersion.lastProtocolVersion(), + swVersion.newestProtocolVersion(), + swVersion.lastRunProtocolVersion(), swVersion.lowestCompatibleProtocolVersion()); } }; From 4fbf8e4925fe7c8b01256e1973d630f09a199b1e Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 7 Apr 2022 16:26:05 -0700 Subject: [PATCH 19/28] Fix failures incorrect merge --- fdbserver/worker.actor.cpp | 41 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index f87053ce36..72efd2ca76 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2560,10 +2560,8 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro SWVersion swversion = ObjectReader::fromStringRef(buf, IncludeVersion()); ProtocolVersion lowestCompatibleVersion(swversion.lowestCompatibleProtocolVersion()); if (currentVersion >= lowestCompatibleVersion) { - TraceEvent(SevInfo, "SWVersionCompatible").log(); return swversion; } else { - TraceEvent(SevInfo, "SWVersionIncompatible").log(); throw incompatible_software_version(); } } catch (Error& e) { @@ -2632,23 +2630,24 @@ ACTOR Future testAndUpdateSoftwareVersionCompatibility(std::string dataFol TraceEvent(SevInfo, "SWVersionCompatible", processIDUid).detail("SWVersion", swVersion.get()); if (!swVersion.get().isValid() || - currentProtocolVersion > ProtocolVersion(swVersion.get().latestProtocolVersion())) { - ErrorOr updateSWVersion = wait(errorOr(updateNewestSoftwareVersion( + currentProtocolVersion > ProtocolVersion(swVersion.get().newestProtocolVersion())) { + ErrorOr updatedSWVersion = wait(errorOr(updateNewestSoftwareVersion( dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion))); - if (updateSWVersion.isError()) { - TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updateSWVersion.getError()); - throw updateSWVersion.getError(); + if (updatedSWVersion.isError()) { + TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updatedSWVersion.getError()); + throw updatedSWVersion.getError(); } else { TraceEvent(SevWarnAlways, "NewSWVersionWritten", processIDUid).log(); } - } else if (currentProtocolVersion < ProtocolVersion(swVersion.get().latestProtocolVersion())) { + } else if (currentProtocolVersion < ProtocolVersion(swVersion.get().newestProtocolVersion())) { ErrorOr updatedSWVersion = wait( errorOr(updateNewestSoftwareVersion(dataFolder, currentProtocolVersion, - ProtocolVersion(swVersion.get().latestProtocolVersion()), + ProtocolVersion(swVersion.get().newestProtocolVersion()), ProtocolVersion(swVersion.get().lowestCompatibleProtocolVersion())))); if (updatedSWVersion.isError()) { TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updatedSWVersion.getError()); + throw updatedSWVersion.getError(); } else { TraceEvent(SevWarnAlways, "SWVersionWritten", processIDUid).log(); } @@ -2710,8 +2709,8 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } @@ -2739,8 +2738,8 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); @@ -2755,8 +2754,8 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); @@ -2788,8 +2787,8 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { ASSERT(!swversion.isError()); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } @@ -2824,8 +2823,8 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { ASSERT(!swversion.isError()); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withTSS().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withTSS().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withCacheRole().version()); } @@ -2838,8 +2837,8 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); if (!swversion.isError()) { - ASSERT(swversion.get().latestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); - ASSERT(swversion.get().lastProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); + ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } From fc666469af1e92b144e7c40065e503953b611fc6 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Fri, 8 Apr 2022 15:03:39 -0700 Subject: [PATCH 20/28] unit test fixes --- fdbserver/worker.actor.cpp | 51 ++++++++++---------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 72efd2ca76..c3d1567b46 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2544,7 +2544,6 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro return SWVersion(); } else { // Dangerous to continue if we cannot do a software compatibility test - TraceEvent(SevWarnAlways, "OpenSWVersionFileError").error(versionFile.getError()); throw versionFile.getError(); } } else { @@ -2553,8 +2552,9 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro state int64_t filesize = wait(versionFile.get()->size()); state Standalone buf = makeString(filesize); int readLen = wait(versionFile.get()->read(mutateString(buf), filesize, 0)); - ASSERT(filesize != 0); - ASSERT(readLen == filesize); + if (filesize == 0 || readLen != filesize) { + throw file_corrupt(); + } try { SWVersion swversion = ObjectReader::fromStringRef(buf, IncludeVersion()); @@ -2565,7 +2565,6 @@ ACTOR Future testSoftwareVersionCompatibility(std::string folder, Pro throw incompatible_software_version(); } } catch (Error& e) { - TraceEvent(SevWarnAlways, "ReadSWVersionFileError").error(e); throw e; } } @@ -2605,7 +2604,6 @@ ACTOR Future updateNewestSoftwareVersion(std::string folder, auto s = swVersionValue(swVersion); ErrorOr e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0))); if (e.isError()) { - TraceEvent(SevWarnAlways, "WriteSWVersionFailed").error(e.getError()); throw e.getError(); } wait(newVersionFile->sync()); @@ -2634,10 +2632,7 @@ ACTOR Future testAndUpdateSoftwareVersionCompatibility(std::string dataFol ErrorOr updatedSWVersion = wait(errorOr(updateNewestSoftwareVersion( dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion))); if (updatedSWVersion.isError()) { - TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updatedSWVersion.getError()); throw updatedSWVersion.getError(); - } else { - TraceEvent(SevWarnAlways, "NewSWVersionWritten", processIDUid).log(); } } else if (currentProtocolVersion < ProtocolVersion(swVersion.get().newestProtocolVersion())) { ErrorOr updatedSWVersion = wait( @@ -2646,10 +2641,7 @@ ACTOR Future testAndUpdateSoftwareVersionCompatibility(std::string dataFol ProtocolVersion(swVersion.get().newestProtocolVersion()), ProtocolVersion(swVersion.get().lowestCompatibleProtocolVersion())))); if (updatedSWVersion.isError()) { - TraceEvent(SevWarnAlways, "SWVersionNotWritten", processIDUid).error(updatedSWVersion.getError()); throw updatedSWVersion.getError(); - } else { - TraceEvent(SevWarnAlways, "SWVersionWritten", processIDUid).log(); } } @@ -2670,10 +2662,8 @@ static const std::string swversionTestDirName = "sw-version-test"; TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { wait(Future(Void())); - platform::eraseDirectoryRecursive(swversionTestDirName); - if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2684,7 +2674,7 @@ TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { ASSERT(!swversion.get().isValid()); } - platform::eraseDirectoryRecursive(swversionTestDirName); + wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true)); return Void(); } @@ -2692,10 +2682,8 @@ TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { wait(Future(Void())); - platform::eraseDirectoryRecursive(swversionTestDirName); - if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2703,7 +2691,6 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withTSS()))); - ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); @@ -2714,7 +2701,7 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } - platform::eraseDirectoryRecursive(swversionTestDirName); + wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true)); return Void(); } @@ -2722,10 +2709,8 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { wait(Future(Void())); - platform::eraseDirectoryRecursive(swversionTestDirName); - if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2757,11 +2742,9 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version()); ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); - - TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get()); } - platform::eraseDirectoryRecursive(swversionTestDirName); + wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true)); return Void(); } @@ -2769,10 +2752,8 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { wait(Future(Void())); - platform::eraseDirectoryRecursive(swversionTestDirName); - if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2780,12 +2761,10 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withStorageInterfaceReadiness(), ProtocolVersion::withTSS()))); - ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version()); @@ -2797,7 +2776,7 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { ASSERT(swversion.isError() && swversion.getError().code() == error_code_incompatible_software_version); - platform::eraseDirectoryRecursive(swversionTestDirName); + wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true)); return Void(); } @@ -2805,10 +2784,8 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { TEST_CASE("/fdbserver/worker/swversion/runNewer") { wait(Future(Void())); - platform::eraseDirectoryRecursive(swversionTestDirName); - if (!platform::createDirectory("sw-version-test")) { - TraceEvent(SevError, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); + TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); } @@ -2816,12 +2793,10 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { ProtocolVersion::withTSS(), ProtocolVersion::withTSS(), ProtocolVersion::withCacheRole()))); - ASSERT(!f.isError()); ErrorOr swversion = wait(errorOr( testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness()))); - ASSERT(!swversion.isError()); if (!swversion.isError()) { ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withTSS().version()); ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version()); @@ -2842,7 +2817,7 @@ TEST_CASE("/fdbserver/worker/swversion/runNewer") { ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version()); } - platform::eraseDirectoryRecursive(swversionTestDirName); + wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true)); return Void(); } From 50b45f1bf953c500194d45c5025b177a6f4eb1dd Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 11 Apr 2022 12:01:12 -0700 Subject: [PATCH 21/28] Add SW versions to DBCore state --- fdbserver/ClusterRecovery.actor.cpp | 13 +++++++++++++ fdbserver/DBCoreState.h | 12 +++++++++++- flow/ProtocolVersion.h | 4 ++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index af564b377d..19e6356108 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -25,6 +25,8 @@ #include "fdbserver/MasterInterface.h" #include "fdbserver/WaitFailure.h" +#include "flow/ProtocolVersion.h" +#include "flow/Trace.h" #include "flow/actorcompiler.h" // This must be the last #include. static std::set const& normalClusterRecoveryErrors() { @@ -1406,6 +1408,10 @@ ACTOR Future clusterRecoveryCore(Reference self) { wait(self->cstate.read()); + if (self->cstate.prevDBState.lowestCompatibleServerVersion > currentProtocolVersion) { + TraceEvent(SevWarnAlways, "IncompatbleServerVersion", self->dbgid).log(); + } + self->recoveryState = RecoveryState::LOCKING_CSTATE; TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME).c_str(), self->dbgid) .detail("StatusCode", RecoveryStatus::locking_coordinated_state) @@ -1461,6 +1467,13 @@ ACTOR Future clusterRecoveryCore(Reference self) { DBCoreState newState = self->cstate.myDBState; newState.recoveryCount++; + if (self->cstate.myDBState.newestServerVersion.isInvalidMagic() || + self->cstate.myDBState.newestServerVersion < currentProtocolVersion) { + ASSERT(self->cstate.myDBState.lowestCompatibleServerVersion.isInvalidMagic() || + !self->cstate.myDBState.newestServerVersion.isInvalidMagic()); + newState.newestServerVersion = currentProtocolVersion; + newState.lowestCompatibleServerVersion = minCompatibleProtocolVersion; + } wait(self->cstate.write(newState) || recoverAndEndEpoch); self->recoveryState = RecoveryState::RECRUITING; diff --git a/fdbserver/DBCoreState.h b/fdbserver/DBCoreState.h index 7c06432498..9094a8aeea 100644 --- a/fdbserver/DBCoreState.h +++ b/fdbserver/DBCoreState.h @@ -28,6 +28,8 @@ #include "fdbrpc/ReplicationPolicy.h" #include "fdbserver/LogSystemConfig.h" #include "fdbserver/MasterInterface.h" +#include "flow/ObjectSerializerTraits.h" +#include "flow/ProtocolVersion.h" class LogSet; struct OldLogData; @@ -141,8 +143,13 @@ struct DBCoreState { DBRecoveryCount recoveryCount; // Increases with sequential successful recoveries. LogSystemType logSystemType; std::set pseudoLocalities; + ProtocolVersion newestServerVersion; + ProtocolVersion lowestCompatibleServerVersion; - DBCoreState() : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty) {} + DBCoreState() + : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty), + newestServerVersion(ProtocolVersion::invalidProtocolVersion), + lowestCompatibleServerVersion(ProtocolVersion::invalidProtocolVersion) {} std::vector getPriorCommittedLogServers() { std::vector priorCommittedLogServers; @@ -180,6 +187,9 @@ struct DBCoreState { if (ar.protocolVersion().hasShardedTxsTags()) { serializer(ar, txsTags); } + if (ar.protocolVersion().hasSWVersionTracking()) { + serializer(ar, newestServerVersion, lowestCompatibleServerVersion); + } } else if (ar.isDeserializing) { tLogs.push_back(CoreTLogSet()); serializer(ar, diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 15ea5c99fc..cb708253c9 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -62,6 +62,7 @@ public: // constants static constexpr uint64_t objectSerializerFlag = 0x1000000000000000LL; static constexpr uint64_t compatibleProtocolVersionMask = 0xFFFFFFFFFFFF0000LL; static constexpr uint64_t minValidProtocolVersion = 0x0FDB00A200060001LL; + static constexpr uint64_t invalidProtocolVersion = 0x0FDB00A100000000LL; public: constexpr explicit ProtocolVersion(uint64_t version) : _version(version) {} @@ -77,6 +78,8 @@ public: } constexpr bool isValid() const { return version() >= minValidProtocolVersion; } + constexpr bool isInvalidMagic() const { return version() == invalidProtocolVersion; } + constexpr uint64_t version() const { return _version & versionFlagMask; } constexpr uint64_t versionWithFlags() const { return _version; } @@ -166,6 +169,7 @@ public: // introduced features PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, PerpetualWiggleMetadata); PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, Tenants); PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, StorageInterfaceReadiness); + PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, SWVersionTracking); }; template <> From 9f11a8e6a47c5c72d6cd08a319ad2ce6b7b2fb1f Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 12 Apr 2022 10:19:38 -0700 Subject: [PATCH 22/28] Update fdbclient/SystemData.cpp Co-authored-by: Trevor Clinkenbeard --- fdbclient/SystemData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index fecbe93c25..af9ba32a31 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -619,7 +619,7 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) { return decodeServerListValueFB(value); } -const Value swVersionValue(SWVersion const& swversion) { +Value swVersionValue(SWVersion const& swversion) { auto protocolVersion = currentProtocolVersion; protocolVersion.addObjectSerializerFlag(); return ObjectWriter::toValue(swversion, IncludeVersion(protocolVersion)); From ebb0c45b0f244f341a658913aff6be5a91543841 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 12 Apr 2022 10:19:49 -0700 Subject: [PATCH 23/28] Update fdbclient/SystemData.h Co-authored-by: Trevor Clinkenbeard --- fdbclient/SystemData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 710ac0369b..705caf98f1 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -205,7 +205,7 @@ const Value serverListValue(StorageServerInterface const&); UID decodeServerListKey(KeyRef const&); StorageServerInterface decodeServerListValue(ValueRef const&); -const Value swVersionValue(SWVersion const& swversion); +Value swVersionValue(SWVersion const& swversion); SWVersion decodeSWVersionValue(ValueRef const&); // "\xff/processClass/[[processID]]" := "[[ProcessClass]]" From 5d103d5f7b3d854a23efca8b2747425a8e42731c Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 12 Apr 2022 10:22:35 -0700 Subject: [PATCH 24/28] Update fdbserver/worker.actor.cpp Remove unnecessary errorOr actor when testing SW version compatibility Co-authored-by: Trevor Clinkenbeard --- fdbserver/worker.actor.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 0173745b00..10281bb9bd 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -3211,10 +3211,7 @@ ACTOR Future fdbd(Reference connRecord, localities.set(LocalityData::keyProcessId, processIDUid.toString()); // Only one process can execute on a dataFolder from this point onwards - ErrorOr f = wait(errorOr(testAndUpdateSoftwareVersionCompatibility(dataFolder, processIDUid))); - if (f.isError()) { - throw f.getError(); - } + wait(testAndUpdateSoftwareVersionCompatibility(dataFolder, processIDUid)); std::string fitnessFilePath = joinPath(dataFolder, "fitness"); auto cc = makeReference>>(); From 95754fe65018dbac500c9c7365bfa9517acbd031 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 12 Apr 2022 10:34:18 -0700 Subject: [PATCH 25/28] Remove unnecessary wait statements in test cases --- fdbserver/worker.actor.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 0173745b00..b293ae2c64 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2750,8 +2750,6 @@ ACTOR Future testAndUpdateSoftwareVersionCompatibility(std::string dataFol static const std::string swversionTestDirName = "sw-version-test"; TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { - wait(Future(Void())); - if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); @@ -2770,8 +2768,6 @@ TEST_CASE("/fdbserver/worker/swversion/noversionhistory") { } TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { - wait(Future(Void())); - if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); @@ -2797,8 +2793,6 @@ TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") { } TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { - wait(Future(Void())); - if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); @@ -2840,8 +2834,6 @@ TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") { } TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { - wait(Future(Void())); - if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); @@ -2872,8 +2864,6 @@ TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") { } TEST_CASE("/fdbserver/worker/swversion/runNewer") { - wait(Future(Void())); - if (!platform::createDirectory("sw-version-test")) { TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test"); return Void(); From 46b22b79ea357cd559b1c433316dfdebce4cd17d Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Tue, 12 Apr 2022 10:47:49 -0700 Subject: [PATCH 26/28] Revert "Add SW versions to DBCore state" This reverts commit 50b45f1bf953c500194d45c5025b177a6f4eb1dd. --- fdbserver/ClusterRecovery.actor.cpp | 13 ------------- fdbserver/DBCoreState.h | 12 +----------- flow/ProtocolVersion.h | 4 ---- 3 files changed, 1 insertion(+), 28 deletions(-) diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index 19e6356108..af564b377d 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -25,8 +25,6 @@ #include "fdbserver/MasterInterface.h" #include "fdbserver/WaitFailure.h" -#include "flow/ProtocolVersion.h" -#include "flow/Trace.h" #include "flow/actorcompiler.h" // This must be the last #include. static std::set const& normalClusterRecoveryErrors() { @@ -1408,10 +1406,6 @@ ACTOR Future clusterRecoveryCore(Reference self) { wait(self->cstate.read()); - if (self->cstate.prevDBState.lowestCompatibleServerVersion > currentProtocolVersion) { - TraceEvent(SevWarnAlways, "IncompatbleServerVersion", self->dbgid).log(); - } - self->recoveryState = RecoveryState::LOCKING_CSTATE; TraceEvent(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME).c_str(), self->dbgid) .detail("StatusCode", RecoveryStatus::locking_coordinated_state) @@ -1467,13 +1461,6 @@ ACTOR Future clusterRecoveryCore(Reference self) { DBCoreState newState = self->cstate.myDBState; newState.recoveryCount++; - if (self->cstate.myDBState.newestServerVersion.isInvalidMagic() || - self->cstate.myDBState.newestServerVersion < currentProtocolVersion) { - ASSERT(self->cstate.myDBState.lowestCompatibleServerVersion.isInvalidMagic() || - !self->cstate.myDBState.newestServerVersion.isInvalidMagic()); - newState.newestServerVersion = currentProtocolVersion; - newState.lowestCompatibleServerVersion = minCompatibleProtocolVersion; - } wait(self->cstate.write(newState) || recoverAndEndEpoch); self->recoveryState = RecoveryState::RECRUITING; diff --git a/fdbserver/DBCoreState.h b/fdbserver/DBCoreState.h index 9094a8aeea..7c06432498 100644 --- a/fdbserver/DBCoreState.h +++ b/fdbserver/DBCoreState.h @@ -28,8 +28,6 @@ #include "fdbrpc/ReplicationPolicy.h" #include "fdbserver/LogSystemConfig.h" #include "fdbserver/MasterInterface.h" -#include "flow/ObjectSerializerTraits.h" -#include "flow/ProtocolVersion.h" class LogSet; struct OldLogData; @@ -143,13 +141,8 @@ struct DBCoreState { DBRecoveryCount recoveryCount; // Increases with sequential successful recoveries. LogSystemType logSystemType; std::set pseudoLocalities; - ProtocolVersion newestServerVersion; - ProtocolVersion lowestCompatibleServerVersion; - DBCoreState() - : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty), - newestServerVersion(ProtocolVersion::invalidProtocolVersion), - lowestCompatibleServerVersion(ProtocolVersion::invalidProtocolVersion) {} + DBCoreState() : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty) {} std::vector getPriorCommittedLogServers() { std::vector priorCommittedLogServers; @@ -187,9 +180,6 @@ struct DBCoreState { if (ar.protocolVersion().hasShardedTxsTags()) { serializer(ar, txsTags); } - if (ar.protocolVersion().hasSWVersionTracking()) { - serializer(ar, newestServerVersion, lowestCompatibleServerVersion); - } } else if (ar.isDeserializing) { tLogs.push_back(CoreTLogSet()); serializer(ar, diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index cb708253c9..15ea5c99fc 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -62,7 +62,6 @@ public: // constants static constexpr uint64_t objectSerializerFlag = 0x1000000000000000LL; static constexpr uint64_t compatibleProtocolVersionMask = 0xFFFFFFFFFFFF0000LL; static constexpr uint64_t minValidProtocolVersion = 0x0FDB00A200060001LL; - static constexpr uint64_t invalidProtocolVersion = 0x0FDB00A100000000LL; public: constexpr explicit ProtocolVersion(uint64_t version) : _version(version) {} @@ -78,8 +77,6 @@ public: } constexpr bool isValid() const { return version() >= minValidProtocolVersion; } - constexpr bool isInvalidMagic() const { return version() == invalidProtocolVersion; } - constexpr uint64_t version() const { return _version & versionFlagMask; } constexpr uint64_t versionWithFlags() const { return _version; } @@ -169,7 +166,6 @@ public: // introduced features PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, PerpetualWiggleMetadata); PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, Tenants); PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, StorageInterfaceReadiness); - PROTOCOL_VERSION_FEATURE(0x0FDB00B071010000LL, SWVersionTracking); }; template <> From 3cbe7f7d630e85d1aa552df16f0c84832cc42c00 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Wed, 13 Apr 2022 09:47:12 -0700 Subject: [PATCH 27/28] Update min compatible version to 7.1 --- flow/ProtocolVersion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index fbf4359920..55f580d4d7 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -38,7 +38,7 @@ constexpr uint64_t currentProtocolVersionValue = 0x0FDB00B072000000LL; constexpr uint64_t minInvalidProtocolVersionValue = 0x0FDB00B074000000LL; // The lowest protocol version that can be downgraded to. -constexpr uint64_t minCompatibleProtocolVersionValue = 0x0FDB00B070000000LL; +constexpr uint64_t minCompatibleProtocolVersionValue = 0x0FDB00B071000000LL; #define PROTOCOL_VERSION_FEATURE(v, x) \ static_assert((v & 0xF0FFFFLL) == 0 || v < 0x0FDB00B071000000LL, "Unexpected feature protocol version"); \ From 08323de905537c3d026fc768168359052bf621bc Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Fri, 22 Apr 2022 15:10:24 -0700 Subject: [PATCH 28/28] fix formatting --- flow/ProtocolVersion.h | 1 - 1 file changed, 1 deletion(-) diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index 651bf1aa0d..9f3e1f5440 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -246,4 +246,3 @@ struct Traceable : std::true_type { swVersion.lowestCompatibleProtocolVersion()); } }; -