Merge pull request #6762 from sfc-gh-bvr/block-down

Fail upon running server versions that are too old and incompatible with the newest software that has run on a cluster
This commit is contained in:
Bharadwaj V.R 2022-04-22 17:28:49 -07:00 committed by GitHub
commit 9b66447783
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 357 additions and 1 deletions

View File

@ -619,6 +619,19 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) {
return decodeServerListValueFB(value);
}
Value swVersionValue(SWVersion const& swversion) {
auto protocolVersion = currentProtocolVersion;
protocolVersion.addObjectSerializerFlag();
return ObjectWriter::toValue(swversion, IncludeVersion(protocolVersion));
}
SWVersion decodeSWVersionValue(ValueRef const& value) {
SWVersion s;
ObjectReader reader(value.begin(), IncludeVersion());
reader.deserialize(s);
return s;
}
// processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0'
const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0"));
const KeyRef processClassPrefix = processClassKeys.begin;

View File

@ -205,6 +205,9 @@ const Value serverListValue(StorageServerInterface const&);
UID decodeServerListKey(KeyRef const&);
StorageServerInterface decodeServerListValue(ValueRef const&);
Value swVersionValue(SWVersion const& swversion);
SWVersion decodeSWVersionValue(ValueRef const&);
// "\xff/processClass/[[processID]]" := "[[ProcessClass]]"
// Contains a mapping from processID to processClass
extern const KeyRangeRef processClassKeys;

View File

@ -636,7 +636,8 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
printf("SimulatedFDBDTerminated: %s\n", e.what());
ASSERT(destructed ||
g_simulator.getCurrentProcess() == process); // simulatedFDBD catch called on different process
TraceEvent(e.code() == error_code_actor_cancelled || e.code() == error_code_file_not_found || destructed
TraceEvent(e.code() == error_code_actor_cancelled || e.code() == error_code_file_not_found ||
e.code() == error_code_incompatible_software_version || destructed
? SevInfo
: SevError,
"SimulatedFDBDTerminated")

View File

@ -22,12 +22,18 @@
#include <tuple>
#include <boost/lexical_cast.hpp>
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/IAsyncFile.h"
#include "fdbrpc/Locality.h"
#include "fdbclient/GlobalConfig.actor.h"
#include "fdbclient/ProcessInterface.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbserver/Knobs.h"
#include "flow/ActorCollection.h"
#include "flow/Error.h"
#include "flow/FileIdentifier.h"
#include "flow/ObjectSerializer.h"
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/SystemMonitor.h"
#include "flow/TDMetric.actor.h"
@ -57,7 +63,9 @@
#include "flow/ThreadHelper.actor.h"
#include "flow/Trace.h"
#include "flow/flow.h"
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/serialize.h"
#ifdef __linux__
#include <fcntl.h>
@ -2624,6 +2632,291 @@ ACTOR Future<Void> monitorAndWriteCCPriorityInfo(std::string filePath,
}
}
static const std::string versionFileName = "sw-version";
ACTOR Future<SWVersion> testSoftwareVersionCompatibility(std::string folder, ProtocolVersion currentVersion) {
try {
state std::string versionFilePath = joinPath(folder, versionFileName);
state ErrorOr<Reference<IAsyncFile>> versionFile = wait(
errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READONLY, 0600)));
if (versionFile.isError()) {
if (versionFile.getError().code() == error_code_file_not_found && !fileExists(versionFilePath)) {
// If a version file does not exist, we assume this is either a fresh
// installation or an upgrade from a version that does not support version files.
// Either way, we can safely continue running this version of software.
TraceEvent(SevInfo, "NoPreviousSWVersion").log();
return SWVersion();
} else {
// Dangerous to continue if we cannot do a software compatibility test
throw versionFile.getError();
}
} else {
// Test whether the most newest software version that has been run on this cluster is
// compatible with the current software version
state int64_t filesize = wait(versionFile.get()->size());
state Standalone<StringRef> buf = makeString(filesize);
int readLen = wait(versionFile.get()->read(mutateString(buf), filesize, 0));
if (filesize == 0 || readLen != filesize) {
throw file_corrupt();
}
try {
SWVersion swversion = ObjectReader::fromStringRef<SWVersion>(buf, IncludeVersion());
ProtocolVersion lowestCompatibleVersion(swversion.lowestCompatibleProtocolVersion());
if (currentVersion >= lowestCompatibleVersion) {
return swversion;
} else {
throw incompatible_software_version();
}
} catch (Error& e) {
throw e;
}
}
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) {
throw;
}
// TODO(bvr): Inject faults
TraceEvent(SevWarnAlways, "OpenReadSWVersionFileError").error(e);
throw;
}
}
ACTOR Future<Void> updateNewestSoftwareVersion(std::string folder,
ProtocolVersion currentVersion,
ProtocolVersion latestVersion,
ProtocolVersion minCompatibleVersion) {
ASSERT(currentVersion >= minCompatibleVersion);
try {
state std::string versionFilePath = joinPath(folder, versionFileName);
ErrorOr<Reference<IAsyncFile>> versionFile = wait(
errorOr(IAsyncFileSystem::filesystem(g_network)->open(versionFilePath, IAsyncFile::OPEN_READONLY, 0600)));
if (versionFile.isError() &&
(versionFile.getError().code() != error_code_file_not_found || fileExists(versionFilePath))) {
throw versionFile.getError();
}
state Reference<IAsyncFile> newVersionFile = wait(IAsyncFileSystem::filesystem()->open(
versionFilePath,
IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE,
0600));
SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion);
auto s = swVersionValue(swVersion);
ErrorOr<Void> e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0)));
if (e.isError()) {
throw e.getError();
}
wait(newVersionFile->sync());
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) {
throw;
}
TraceEvent(SevWarnAlways, "OpenWriteSWVersionFileError").error(e);
throw;
}
return Void();
}
ACTOR Future<Void> testAndUpdateSoftwareVersionCompatibility(std::string dataFolder, UID processIDUid) {
ErrorOr<SWVersion> swVersion = wait(errorOr(testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion)));
if (swVersion.isError()) {
TraceEvent(SevWarnAlways, "SWVersionCompatibilityCheckError", processIDUid).error(swVersion.getError());
throw swVersion.getError();
}
TraceEvent(SevInfo, "SWVersionCompatible", processIDUid).detail("SWVersion", swVersion.get());
if (!swVersion.get().isValid() ||
currentProtocolVersion > ProtocolVersion(swVersion.get().newestProtocolVersion())) {
ErrorOr<Void> updatedSWVersion = wait(errorOr(updateNewestSoftwareVersion(
dataFolder, currentProtocolVersion, currentProtocolVersion, minCompatibleProtocolVersion)));
if (updatedSWVersion.isError()) {
throw updatedSWVersion.getError();
}
} else if (currentProtocolVersion < ProtocolVersion(swVersion.get().newestProtocolVersion())) {
ErrorOr<Void> updatedSWVersion = wait(
errorOr(updateNewestSoftwareVersion(dataFolder,
currentProtocolVersion,
ProtocolVersion(swVersion.get().newestProtocolVersion()),
ProtocolVersion(swVersion.get().lowestCompatibleProtocolVersion()))));
if (updatedSWVersion.isError()) {
throw updatedSWVersion.getError();
}
}
ErrorOr<SWVersion> newSWVersion =
wait(errorOr(testSoftwareVersionCompatibility(dataFolder, currentProtocolVersion)));
if (newSWVersion.isError()) {
TraceEvent(SevWarnAlways, "SWVersionCompatibilityCheckError", processIDUid).error(newSWVersion.getError());
throw newSWVersion.getError();
}
TraceEvent(SevInfo, "VerifiedNewSoftwareVersion", processIDUid).detail("SWVersion", newSWVersion.get());
return Void();
}
static const std::string swversionTestDirName = "sw-version-test";
TEST_CASE("/fdbserver/worker/swversion/noversionhistory") {
if (!platform::createDirectory("sw-version-test")) {
TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test");
return Void();
}
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(!swversion.get().isValid());
}
wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true));
return Void();
}
TEST_CASE("/fdbserver/worker/swversion/writeVerifyVersion") {
if (!platform::createDirectory("sw-version-test")) {
TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test");
return Void();
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withTSS())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version());
}
wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true));
return Void();
}
TEST_CASE("/fdbserver/worker/swversion/runCompatibleOlder") {
if (!platform::createDirectory("sw-version-test")) {
TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test");
return Void();
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withTSS())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version());
TraceEvent(SevInfo, "UT/swversion/runCompatibleOlder").detail("SWVersion", swversion.get());
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withTSS(),
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withTSS())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version());
}
wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true));
return Void();
}
TEST_CASE("/fdbserver/worker/swversion/runIncompatibleOlder") {
if (!platform::createDirectory("sw-version-test")) {
TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test");
return Void();
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withTSS())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version());
}
ErrorOr<SWVersion> swversion =
wait(errorOr(testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withCacheRole())));
ASSERT(swversion.isError() && swversion.getError().code() == error_code_incompatible_software_version);
wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true));
return Void();
}
TEST_CASE("/fdbserver/worker/swversion/runNewer") {
if (!platform::createDirectory("sw-version-test")) {
TraceEvent(SevWarnAlways, "FailedToCreateDirectory").detail("Directory", "sw-version-test");
return Void();
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withTSS(),
ProtocolVersion::withTSS(),
ProtocolVersion::withCacheRole())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withTSS().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withTSS().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withCacheRole().version());
}
ErrorOr<Void> f = wait(errorOr(updateNewestSoftwareVersion(swversionTestDirName,
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withStorageInterfaceReadiness(),
ProtocolVersion::withTSS())));
ErrorOr<SWVersion> swversion = wait(errorOr(
testSoftwareVersionCompatibility(swversionTestDirName, ProtocolVersion::withStorageInterfaceReadiness())));
if (!swversion.isError()) {
ASSERT(swversion.get().newestProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lastRunProtocolVersion() == ProtocolVersion::withStorageInterfaceReadiness().version());
ASSERT(swversion.get().lowestCompatibleProtocolVersion() == ProtocolVersion::withTSS().version());
}
wait(IAsyncFileSystem::filesystem()->deleteFile(joinPath(swversionTestDirName, versionFileName), true));
return Void();
}
ACTOR Future<UID> createAndLockProcessIdFile(std::string folder) {
state UID processIDUid;
platform::createDirectory(folder);
@ -2923,6 +3216,8 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
localities.set(LocalityData::keyProcessId, processIDUid.toString());
// Only one process can execute on a dataFolder from this point onwards
wait(testAndUpdateSoftwareVersionCompatibility(dataFolder, processIDUid));
std::string fitnessFilePath = joinPath(dataFolder, "fitness");
auto cc = makeReference<AsyncVar<Optional<ClusterControllerFullInterface>>>();
auto ci = makeReference<AsyncVar<Optional<ClusterInterface>>>();

View File

@ -203,3 +203,46 @@ static_assert(minInvalidProtocolVersion.version() >=
// The min invalid protocol version should be the smallest possible protocol version associated with a minor release
// version.
static_assert((minInvalidProtocolVersion.version() & 0xFFFFFFLL) == 0, "Unexpected min invalid protocol version");
struct SWVersion {
constexpr static FileIdentifier file_identifier = 13943914;
private:
uint64_t _newestProtocolVersion;
uint64_t _lastRunProtocolVersion;
uint64_t _lowestCompatibleProtocolVersion;
public:
SWVersion() {
_newestProtocolVersion = 0;
_lastRunProtocolVersion = 0;
_lowestCompatibleProtocolVersion = 0;
}
SWVersion(ProtocolVersion latestVersion, ProtocolVersion lastVersion, ProtocolVersion minCompatibleVersion)
: _newestProtocolVersion(latestVersion.version()), _lastRunProtocolVersion(lastVersion.version()),
_lowestCompatibleProtocolVersion(minCompatibleVersion.version()) {}
bool isValid() const {
return (_newestProtocolVersion != 0 && _lastRunProtocolVersion != 0 && _lowestCompatibleProtocolVersion != 0);
}
uint64_t newestProtocolVersion() const { return _newestProtocolVersion; }
uint64_t lastRunProtocolVersion() const { return _lastRunProtocolVersion; }
uint64_t lowestCompatibleProtocolVersion() const { return _lowestCompatibleProtocolVersion; }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, _newestProtocolVersion, _lastRunProtocolVersion, _lowestCompatibleProtocolVersion);
}
};
template <>
struct Traceable<SWVersion> : std::true_type {
static std::string toString(const SWVersion& swVersion) {
return format("Newest: 0x%016lX, Last: 0x%016lX, MinCompatible: 0x%016lX",
swVersion.newestProtocolVersion(),
swVersion.lastRunProtocolVersion(),
swVersion.lowestCompatibleProtocolVersion());
}
};

View File

@ -123,6 +123,7 @@ ERROR( failed_to_progress, 1216, "Process has failed to make sufficient progress
ERROR( invalid_cluster_id, 1217, "Attempted to join cluster with a different cluster ID" )
ERROR( restart_cluster_controller, 1218, "Restart cluster controller process" )
ERROR( please_reboot_remote_kv_store, 1219, "Need to reboot the storage engine process as it died abnormally")
ERROR( incompatible_software_version, 1220, "Current software does not support database format" )
// 15xx Platform errors
ERROR( platform_error, 1500, "Platform error" )