From 7debb23cf6519f1bc07e4dea9e328e487abfec44 Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Thu, 23 May 2019 12:52:16 +0200 Subject: [PATCH 01/69] change common errors for easy comparison --- bindings/go/src/fdb/directory/directory.go | 7 +++++++ bindings/go/src/fdb/directory/directoryLayer.go | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/bindings/go/src/fdb/directory/directory.go b/bindings/go/src/fdb/directory/directory.go index 792621b386..167cfc0bf3 100644 --- a/bindings/go/src/fdb/directory/directory.go +++ b/bindings/go/src/fdb/directory/directory.go @@ -41,6 +41,7 @@ package directory import ( "errors" + "github.com/apple/foundationdb/bindings/go/src/fdb" "github.com/apple/foundationdb/bindings/go/src/fdb/subspace" ) @@ -54,6 +55,12 @@ const ( _MICROVERSION int32 = 0 ) +var ( + ErrDirAlreadyExists = errors.New("the directory already exists") + ErrDirNotExists = errors.New("the directory does not exist") + ErrParentDirDoesNotExist = errors.New("the parent directory does not exist") +) + // Directory represents a subspace of keys in a FoundationDB database, // identified by a hierarchical path. type Directory interface { diff --git a/bindings/go/src/fdb/directory/directoryLayer.go b/bindings/go/src/fdb/directory/directoryLayer.go index e6abf3df28..63574d9148 100644 --- a/bindings/go/src/fdb/directory/directoryLayer.go +++ b/bindings/go/src/fdb/directory/directoryLayer.go @@ -99,7 +99,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti } if !allowOpen { - return nil, errors.New("the directory already exists") + return nil, ErrDirAlreadyExists } if layer != nil { @@ -112,7 +112,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti } if !allowCreate { - return nil, errors.New("the directory does not exist") + return nil, ErrDirNotExists } if e := dl.checkVersion(rtr, tr); e != nil { @@ -161,7 +161,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti } if parentNode == nil { - return nil, errors.New("the parent directory does not exist") + return nil, ErrParentDirDoesNotExist } node := dl.nodeWithPrefix(prefix) @@ -254,7 +254,7 @@ func (dl directoryLayer) List(rt fdb.ReadTransactor, path []string) ([]string, e node := dl.find(rtr, path).prefetchMetadata(rtr) if !node.exists() { - return nil, errors.New("the directory does not exist") + return nil, ErrDirNotExists } if node.isInPartition(nil, true) { From 69edefe68bc84f16a6b3feba78fe327c2b4faac9 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 27 Feb 2019 15:40:33 -0800 Subject: [PATCH 02/69] Snapshot based backup and resotre implementation --- cmake/ConfigureCompiler.cmake | 2 +- fdbcli/fdbcli.actor.cpp | 20 + fdbclient/CommitTransaction.h | 1 + fdbclient/ManagementAPI.actor.cpp | 22 + fdbclient/ManagementAPI.actor.h | 4 + fdbclient/MasterProxyInterface.h | 22 +- fdbclient/NativeAPI.actor.cpp | 147 +++++ fdbclient/NativeAPI.actor.h | 12 + fdbclient/SystemData.cpp | 6 + fdbclient/SystemData.h | 3 + fdbserver/CMakeLists.txt | 3 + fdbserver/FDBExecArgs.cpp | 100 +++ fdbserver/FDBExecArgs.h | 37 ++ fdbserver/LogSystem.h | 7 +- fdbserver/MasterProxyServer.actor.cpp | 95 ++- fdbserver/OldTLogServer_6_0.actor.cpp | 259 +++++++- fdbserver/TLogInterface.h | 8 +- fdbserver/TLogServer.actor.cpp | 264 +++++++- fdbserver/TagPartitionedLogSystem.actor.cpp | 2 +- fdbserver/WorkerInterface.actor.h | 27 +- fdbserver/storageserver.actor.cpp | 102 ++- fdbserver/worker.actor.cpp | 91 ++- fdbserver/workloads/SnapTest.actor.cpp | 674 ++++++++++++++++++++ flow/Platform.cpp | 42 ++ flow/Platform.h | 10 + 25 files changed, 1918 insertions(+), 42 deletions(-) create mode 100644 fdbserver/FDBExecArgs.cpp create mode 100644 fdbserver/FDBExecArgs.h create mode 100644 fdbserver/workloads/SnapTest.actor.cpp diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index dfaaa02fdd..c84aa79172 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -140,7 +140,7 @@ else() -Wno-deprecated -fvisibility=hidden -Wreturn-type - -fdiagnostics-color=always +# -fdiagnostics-color=always -fPIC) if (GPERFTOOLS_FOUND AND GCC) add_compile_options( diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index d464b0c3bd..b76e674b98 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -470,6 +470,10 @@ void initHelp() { "include all|
*", "permit previously-excluded servers to rejoin the database", "If `all' is specified, the excluded servers list is cleared.\n\nFor each IP address or IP:port pair in
*, removes any matching exclusions from the excluded servers list. (A specified IP will match all IP:* exclusion entries)"); + helpMap["snapshot"] = CommandHelp("snapshot :,,...", "snapshot the database", + "invokes binary provided in binary-path" + "with the arg,value pairs on TLog, Storage and " + "Coordinators nodes. uid is a reserved ARG key."); helpMap["setclass"] = CommandHelp( "setclass
", "change the class of a process", @@ -2121,6 +2125,11 @@ ACTOR Future exclude( Database db, std::vector tokens, Referenc return false; } +ACTOR Future createSnapshot(Database db, StringRef snapCmd) { + wait(makeInterruptable(mgmtSnapCreate(db, snapCmd))); + return false; +} + ACTOR Future setClass( Database db, std::vector tokens ) { if( tokens.size() == 1 ) { vector _workers = wait( makeInterruptable(getWorkers(db)) ); @@ -2720,6 +2729,17 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } + if (tokencmp(tokens[0], "snapshot")) { + if (tokens.size() != 2) { + printUsage(tokens[0]); + is_error = true; + } else { + bool err = wait(createSnapshot(db, tokens[1])); + if (err) is_error = true; + } + continue; + } + if (tokencmp(tokens[0], "setclass")) { if (tokens.size() != 3 && tokens.size() != 1) { printUsage(tokens[0]); diff --git a/fdbclient/CommitTransaction.h b/fdbclient/CommitTransaction.h index e5050f6ec4..e3eb3a9d1c 100644 --- a/fdbclient/CommitTransaction.h +++ b/fdbclient/CommitTransaction.h @@ -70,6 +70,7 @@ struct MutationRef { MinV2, AndV2, CompareAndClear, + Exec, MAX_ATOMIC_OP }; // This is stored this way for serialization purposes. diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 6f23fe1c0b..7a13e7148d 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1474,6 +1474,28 @@ ACTOR Future waitForExcludedServers( Database cx, vector } } +ACTOR Future mgmtSnapCreate(Database cx, StringRef snapCmd) { + state int retryCount = 0; + + loop { + state UID snapUID = g_random->randomUniqueID(); + try { + wait(snapCreate(cx, snapCmd, snapUID)); + printf("Snapshots tagged with UID: %s, check logs for status\n", snapUID.toString().c_str()); + TraceEvent("SnapCreateSucceeded").detail("snapUID", snapUID); + break; + } catch (Error& e) { + ++retryCount; + TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e); + if (retryCount > 3) { + printf("Snapshot create failed, %d (%s)\n", e.code(), e.what()); + throw; + } + } + } + return Void(); +} + ACTOR Future waitForFullReplication( Database cx ) { state ReadYourWritesTransaction tr(cx); loop { diff --git a/fdbclient/ManagementAPI.actor.h b/fdbclient/ManagementAPI.actor.h index 92c9f76c1f..2b1db48e3a 100644 --- a/fdbclient/ManagementAPI.actor.h +++ b/fdbclient/ManagementAPI.actor.h @@ -191,5 +191,9 @@ ACTOR Future> getCoordinators( Database cx ); void schemaCoverage( std::string const& spath, bool covered=true ); bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const& result, std::string& errorStr, Severity sev=SevError, bool checkCoverage=false, std::string path = std::string(), std::string schema_path = std::string() ); +// execute payload in 'snapCmd' on all the coordinators, TLogs and +// storage nodes +ACTOR Future mgmtSnapCreate(Database cx, StringRef snapCmd); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index 17e43d5930..0a6477a626 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -49,6 +49,7 @@ struct MasterProxyInterface { RequestStream< struct GetRawCommittedVersionRequest > getRawCommittedVersion; RequestStream< struct TxnStateRequest > txnState; + RequestStream execReq; RequestStream< struct GetHealthMetricsRequest > getHealthMetrics; @@ -62,7 +63,7 @@ struct MasterProxyInterface { void serialize(Archive& ar) { serializer(ar, locality, provisional, commit, getConsistentReadVersion, getKeyServersLocations, waitFailure, getStorageServerRejoinInfo, getRawCommittedVersion, - txnState, getHealthMetrics); + txnState, getHealthMetrics, execReq); } void initEndpoints() { @@ -298,4 +299,23 @@ struct GetHealthMetricsRequest } }; +struct ExecRequest { + // FIXME: sramamoorthy, FDB6PORT, flat-buffers related versioning disabled + // constexpr static flat_buffers::FileIdentifier file_identifier = 1315755287; + Arena arena; + StringRef execPayLoad; + ReplyPromise reply; + Optional debugID; + + ExecRequest(Optional const& debugID = Optional()) : debugID(debugID) {} + ExecRequest(StringRef exec, Optional debugID = Optional()) : execPayLoad(exec), debugID(debugID) {} + + template + void serialize(Ar& ar) { + // FIXME: sramamoorthy, FDB6PORT, flat-buffers related versioning disabled + // serializer(ar, v2(execPayLoad), v2(reply), v2(arena), v2(debugID)); + serializer(ar, execPayLoad, reply, arena, debugID); + } +}; + #endif diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index fb0517580a..1873f2a2f9 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2287,6 +2287,55 @@ void Transaction::atomicOp(const KeyRef& key, const ValueRef& operand, MutationR TEST(true); //NativeAPI atomic operation } +ACTOR Future executeCoordinators(DatabaseContext* cx, StringRef execPayLoad, Optional debugID) { + try { + if (debugID.present()) { + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.executeCoordinators.Before"); + } + + loop { + state ExecRequest req(execPayLoad, debugID); + if (debugID.present()) { + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), + "NativeAPI.executeCoordinators.Inside loop"); + } + choose { + when(wait(cx->onMasterProxiesChanged())) { + if (debugID.present()) { + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), + "NativeAPI.executeCoordinators." + "MasterProxyChangeDuringStart"); + } + } + when(wait(loadBalance(cx->getMasterProxies(), &MasterProxyInterface::execReq, req, cx->taskID))) { + if (debugID.present()) + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), + "NativeAPI.executeCoordinators.After"); + return Void(); + } + } + } + } catch (Error& e) { + TraceEvent(SevError, "NativeAPI.executeCoordinatorsError").error(e); + throw; + } +} + +void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad) { + TraceEvent("Execute operation").detail("Key", cmdType.toString()).detail("Value", cmdPayLoad.toString()); + + if (cmdType.size() > CLIENT_KNOBS->KEY_SIZE_LIMIT) throw key_too_large(); + if (cmdPayLoad.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) throw value_too_large(); + + auto& req = tr; + + auto& t = req.transaction; + auto r = singleKeyRange(cmdType, req.arena); + auto v = ValueRef(req.arena, cmdPayLoad); + t.mutations.push_back(req.arena, MutationRef(MutationRef::Exec, r.begin, v)); + return; +} + void Transaction::clear( const KeyRangeRef& range, bool addConflictRange ) { auto &req = tr; auto &t = req.transaction; @@ -3260,3 +3309,101 @@ void enableClientInfoLogging() { networkOptions.logClientInfo = true; TraceEvent(SevInfo, "ClientInfoLoggingEnabled"); } + +ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) { + state Transaction tr(inputCx); + state Database testCx = inputCx; + state DatabaseContext* cx = inputCx.getPtr(); + // remember the client ID before the snap operation + state UID preSnapClientUID = cx->clientInfo->get().id; + + TraceEvent("snapCreate") + .detail("snapCmd", snapCmd.toString()) + .detail("snapCreateEnter", snapUID) + .detail("preSnapClientUID", preSnapClientUID); + + tr.debugTransaction(snapUID); + std::string snapString = "empty-binary:uid=" + snapUID.toString(); + state Standalone uidPayLoad = makeString(snapString.size()); + uint8_t* ptr = mutateString(uidPayLoad); + memcpy(ptr, ((uint8_t*)snapString.c_str()), snapString.size()); + // disable popping of TLog + loop { + tr.reset(); + try { + tr.execute(execDisableTLogPop, uidPayLoad); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + + TraceEvent("snapCreate").detail("snapCreate.After.LockingTLogs", snapUID); + + int p = snapCmd.toString().find_first_of(':', 0); + state std::string snapPayLoad; + + TraceEvent("snapCmd").detail("snapCmd", snapCmd.toString()); + if (p == snapCmd.toString().npos) { + snapPayLoad = snapCmd.toString() + ":uid=" + snapUID.toString(); + } else { + snapPayLoad = snapCmd.toString() + ",uid=" + snapUID.toString(); + } + Standalone snapPayLoadRef = makeString(snapPayLoad.size()); + uint8_t* ptr = mutateString(snapPayLoadRef); + memcpy(ptr, ((uint8_t*)snapPayLoad.c_str()), snapPayLoad.size()); + + // snap the storage and Tlogs + // if we retry the below command in failure cases with the same snapUID + // then the snapCreate can end up creating multiple snapshots with + // the same name which needs additional handling, hence we fail in + // failure cases and let the caller retry with different snapUID + try { + tr.reset(); + tr.execute(execSnap, snapPayLoadRef); + wait(tr.commit()); + } catch (Error& e) { + TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + throw; + } + + TraceEvent("snapCreate").detail("snapCreate.After.SnappingTLogsStorage", snapUID); + + // enable popping of the TLog + loop { + tr.reset(); + try { + tr.execute(execEnableTLogPop, uidPayLoad); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + + TraceEvent("snapCreate").detail("snapCreate.After.UnlockingTLogs", snapUID); + + // snap the coordinators + try { + Future exec = executeCoordinators(cx, snapPayLoad, snapUID); + wait(exec); + } catch (Error& e) { + TraceEvent("snapCreate").detail("snapCreateErrorSnapCoordinators", e.what()); + throw; + } + + TraceEvent("snapCreate").detail("snapCreate.After.SnappingCoords", snapUID); + + // if the client IDs did not change then we have a clean snapshot + UID postSnapClientUID = cx->clientInfo->get().id; + if (preSnapClientUID != postSnapClientUID) { + TraceEvent("UID mismatch") + .detail("preSnapClientUID", preSnapClientUID) + .detail("postSnapClientUID", postSnapClientUID); + throw coordinators_changed(); + } + + TraceEvent("snapCreate").detail("snapCreate.Complete", snapUID); + return Void(); +} diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index b93094cc71..a02d3c8a51 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -259,6 +259,14 @@ public: // If checkWriteConflictRanges is true, existing write conflict ranges will be searched for this key void set( const KeyRef& key, const ValueRef& value, bool addConflictRange = true ); void atomicOp( const KeyRef& key, const ValueRef& value, MutationRef::Type operationType, bool addConflictRange = true ); + // execute operation is similar to set, but the command will reach + // one of the proxies, all the TLogs and all the storage nodes. + // instead of setting a key and value on the DB, it executes the command + // that is passed in the value field. + // - cmdType can be used for logging purposes + // - cmdPayLoad contains the details of the command to be executed: + // format of the cmdPayLoad : :,... + void execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad); void clear( const KeyRangeRef& range, bool addConflictRange = true ); void clear( const KeyRef& key, bool addConflictRange = true ); Future commit(); // Throws not_committed or commit_unknown_result errors in normal operation @@ -324,5 +332,9 @@ std::string unprintable( const std::string& ); int64_t extractIntOption( Optional value, int64_t minValue = std::numeric_limits::min(), int64_t maxValue = std::numeric_limits::max() ); +// Takes a snapshot of the cluster, specifically the following persistent +// states: coordinator, TLog and storage state +ACTOR Future snapCreate(Database cx, StringRef snapCmd, UID snapUID); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index c2d0ebee84..45d2e92ed3 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -36,6 +36,12 @@ const KeyRef keyServersEnd = keyServersKeys.end; const KeyRangeRef keyServersKeyServersKeys ( LiteralStringRef("\xff/keyServers/\xff/keyServers/"), LiteralStringRef("\xff/keyServers/\xff/keyServers0")); const KeyRef keyServersKeyServersKey = keyServersKeyServersKeys.begin; +// list of reserved exec commands +const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent state of + // storage, TLog and coordinated state +const StringRef execDisableTLogPop = LiteralStringRef("tldp"); // disable pop on TLog +const StringRef execEnableTLogPop = LiteralStringRef("tlep"); // enable pop on TLog + const Key keyServersKey( const KeyRef& k ) { return k.withPrefix( keyServersPrefix ); } diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index bb79ac7f36..0f7d5d591c 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -282,6 +282,9 @@ extern const KeyRef healthyZoneKey; const Value healthyZoneValue( StringRef const& zoneId, Version version ); std::pair decodeHealthyZoneValue( ValueRef const& ); +extern const StringRef execSnap; +extern const StringRef execDisableTLogPop; +extern const StringRef execEnableTLogPop; // All mutations done to this range are blindly copied into txnStateStore. // Used to create artifically large txnStateStore instances in testing. diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index bb211ff9d2..5dd48ec6d6 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -17,6 +17,8 @@ set(FDBSERVER_SRCS DBCoreState.h DiskQueue.actor.cpp fdbserver.actor.cpp + FDBExecArgs.cpp + FDBExecArgs.h IDiskQueue.h IKeyValueStore.h IPager.h @@ -151,6 +153,7 @@ set(FDBSERVER_SRCS workloads/Serializability.actor.cpp workloads/Sideband.actor.cpp workloads/SlowTaskWorkload.actor.cpp + workloads/SnapTest.actor.cpp workloads/StatusWorkload.actor.cpp workloads/Storefront.actor.cpp workloads/StreamingRead.actor.cpp diff --git a/fdbserver/FDBExecArgs.cpp b/fdbserver/FDBExecArgs.cpp new file mode 100644 index 0000000000..36ea8c2341 --- /dev/null +++ b/fdbserver/FDBExecArgs.cpp @@ -0,0 +1,100 @@ +#include "fdbserver/FDBExecArgs.h" +#include +#include + +ExecCmdValueString::ExecCmdValueString(std::string const& pCmdValueString) { + cmdValueString = pCmdValueString; + parseCmdValue(); +} + +void ExecCmdValueString::setCmdValueString(std::string const& pCmdValueString) { + // reset everything + binaryPath = ""; + binaryArgs.clear(); + keyValueMap.clear(); + + // set the new cmdValueString + cmdValueString = pCmdValueString; + + // parse it out + parseCmdValue(); +} + +std::string ExecCmdValueString::getBinaryPath() { + return binaryPath; +} + +std::vector ExecCmdValueString::getBinaryArgs() { + return binaryArgs; +} + +std::string ExecCmdValueString::getBinaryArgValue(const std::string& key) { + std::string res; + if (keyValueMap.find(key) != keyValueMap.end()) { + res = keyValueMap[key]; + } + return res; +} + +void ExecCmdValueString::parseCmdValue() { + int p = 0; + int pSemiColon = 0; + std::string const& param = this->cmdValueString; + { + // get the binary path + pSemiColon = param.find_first_of(':', p); + if (pSemiColon == param.npos) { + pSemiColon = param.size(); + } + this->binaryPath = param.substr(p, pSemiColon - p); + } + + // no arguments provided + if (pSemiColon >= param.size() - 1) { + return; + } + + p = pSemiColon + 1; + + { + // extract the arguments + for (; p <= param.size();) { + int pComma = param.find_first_of(',', p); + if (pComma == param.npos) { + pComma = param.size(); + } + std::string token = param.substr(p, pComma - p); + this->binaryArgs.push_back(token); + { + // parse the token to get key,value + int idx = 0; + int pEqual = token.find_first_of('=', idx); + if (pEqual == token.npos) { + pEqual = token.size(); + } + std::string key = token.substr(idx, pEqual - idx); + + std::string value; + if (pEqual < token.size() - 1) { + value = token.substr(pEqual + 1); + } + keyValueMap.insert(std::pair(key, value)); + } + p = pComma + 1; + } + } + return; +} + +void ExecCmdValueString::dbgPrint() { + auto te = TraceEvent("execCmdValueString"); + + te.detail("cmdValueString", cmdValueString); + te.detail("binaryPath", binaryPath); + + int i = 0; + for (auto elem : binaryArgs) { + te.detail(format("arg{}", ++i).c_str(), elem); + } + return; +} diff --git a/fdbserver/FDBExecArgs.h b/fdbserver/FDBExecArgs.h new file mode 100644 index 0000000000..6469ce5106 --- /dev/null +++ b/fdbserver/FDBExecArgs.h @@ -0,0 +1,37 @@ +#pragma once +#ifndef FDBCLIENT_EXECCMDARGS_H +#define FDBCLIENT_EXECCMDARGS_H +#include +#include +#include + +// execute/snapshot command takes two arguments: +// param1 - represents the command type/name +// param2 - takes a binary path followed by a set of arguments in the following +// format :,... +// this class will abstract the format and give functions to get various pieces +// of information +class ExecCmdValueString { +public: // ctor & dtor + ExecCmdValueString() {} + ExecCmdValueString(std::string const& cmdValueString); + +public: // interfaces + std::string getBinaryPath(); + std::vector getBinaryArgs(); + std::string getBinaryArgValue(std::string const& key); + void setCmdValueString(std::string const& cmdValueString); + +public: // helper functions + void dbgPrint(); + +private: // functions + void parseCmdValue(); + +private: // data + std::string cmdValueString; + std::vector binaryArgs; + std::string binaryPath; + std::map keyValueMap; +}; +#endif diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index 824ff310f2..e1f3041be9 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -733,7 +733,7 @@ struct CompareFirst { struct LogPushData : NonCopyable { // Log subsequences have to start at 1 (the MergedPeekCursor relies on this to make sure we never have !hasMessage() in the middle of data for a version - explicit LogPushData(Reference logSystem) : logSystem(logSystem), subsequence(1) { + explicit LogPushData(Reference logSystem) : logSystem(logSystem), subsequence(1), hasExecOp(false) { for(auto& log : logSystem->getLogSystemConfig().tLogs) { if(log.isLocal) { for(int i = 0; i < log.tLogs.size(); i++) { @@ -805,6 +805,10 @@ struct LogPushData : NonCopyable { return messagesWriter[loc].toValue(); } + void setHasExecOp() { hasExecOp = true; } + + bool getHasExecOp() { return hasExecOp; } + private: Reference logSystem; std::vector next_message_tags; @@ -812,6 +816,7 @@ private: std::vector messagesWriter; std::vector msg_locations; uint32_t subsequence; + bool hasExecOp; }; #endif diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 7dc2f262af..14221ade59 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -34,10 +34,12 @@ #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" #include "fdbserver/ConflictSet.h" +#include "fdbclient/SystemData.h" #include "flow/Stats.h" #include "fdbserver/ApplyMetadataMutation.h" #include "fdbserver/RecoveryState.h" #include "fdbserver/LatencyBandConfig.h" +#include "fdbserver/FDBExecArgs.h" #include "fdbclient/Atomic.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -727,8 +729,54 @@ ACTOR Future commitBatch( toCommit.addTags(allSources); } toCommit.addTypedMessage(m); - } - else + } else if (m.type == MutationRef::Exec) { + auto ranges = self->keyInfo.intersectingRanges(allKeys); + std::set allSources; + + if (debugMutation("ProxyCommit", commitVersion, m)) + TraceEvent("ProxyCommitTo", self->dbgid) + .detail("To", "all sources") + .detail("Mutation", m.toString()) + .detail("Version", commitVersion); + + for (auto r : ranges) { + auto& tags = r.value().tags; + if (!tags.size()) { + for (auto info : r.value().src_info) { + tags.push_back(info->tag); + } + for (auto info : r.value().dest_info) { + tags.push_back(info->tag); + } + uniquify(tags); + } + allSources.insert(tags.begin(), tags.end()); + } + + auto param2 = m.param2.toString(); + ExecCmdValueString execArg(param2); + execArg.dbgPrint(); + auto uidStr = execArg.getBinaryArgValue("uid"); + auto tokenStr = "ExecTrace/Proxy/" + uidStr; + + auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); + te1.detail("To", "all sources"); + te1.detail("Mutation", m.toString()); + te1.detail("Version", commitVersion); + te1.detail("numTags", allSources.size()); + if (m.param1 == execSnap) { + te1.trackLatest(tokenStr.c_str()); + } + // FIXME: sramamoorthy, FDB6port - dynamic tracing not supported? + // auto te = TraceEvent(SevDebug, "tagInfo"); + int i = 0; + for (auto& tag : allSources) { + // te.detail(format("tagId{}", ++i).c_str(), tag.toString()); + toCommit.addTag(tag); + } + toCommit.addTypedMessage(m); + toCommit.setHasExecOp(); + } else UNREACHABLE(); @@ -1517,6 +1565,49 @@ ACTOR Future masterProxyServerCore( rep.version = commitData.committedVersion.get(); req.reply.send(rep); } + when(ExecRequest _execReq = waitNext(proxy.execReq.getFuture())) { + state ExecRequest execReq = _execReq; + if (execReq.debugID.present()) + g_traceBatch.addEvent("TransactionDebug", execReq.debugID.get().first(), + "MasterProxyServer.masterProxyServerCore." + "ExecRequest"); + + TraceEvent("ExecRequest").detail("payload", execReq.execPayLoad.toString()); + + // get the list of coordinators + state Optional coordinators = commitData.txnStateStore->readValue(coordinatorsKey).get(); + state std::vector coordinatorsAddr = + ClusterConnectionString(coordinators.get().toString()).coordinators(); + state std::set coordinatorsAddrSet; + for (int i = 0; i < coordinatorsAddr.size(); i++) { + coordinatorsAddrSet.insert(coordinatorsAddr[i]); + } + + // get the list of workers + state std::vector> workers = + wait(db->get().clusterInterface.getWorkers.getReply(GetWorkersRequest())); + + // send the exec command to the list of workers which are + // coordinators + state int i = 0; + state int numSucc = 0; + for (; i < workers.size(); i++) { + if (coordinatorsAddrSet.find(workers[i].first.address()) != coordinatorsAddrSet.end()) { + TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].first.address()); + try { + wait(timeoutError(workers[i].first.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 1.0)); + ++numSucc; + } catch (Error& e) { + TraceEvent("ExecReqFailed").detail("what", e.what()); + } + } + } + if (numSucc >= (coordinatorsAddrSet.size() + 1) / 2) { + execReq.reply.send(Void()); + } else { + execReq.reply.sendError(operation_failed()); + } + } when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) { state ReplyPromise reply = req.reply; if(req.last) maxSequence = req.sequence + 1; diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 28efb03e9a..d9803bbb01 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -38,7 +38,11 @@ #include "fdbserver/LogSystem.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/RecoveryState.h" +#include "fdbserver/FDBExecArgs.h" #include "flow/actorcompiler.h" // This must be the last #include. +#if defined(CMAKE_BUILD) || !defined(WIN32) +#include "versions.h" +#endif using std::pair; using std::make_pair; @@ -274,6 +278,14 @@ struct TLogData : NonCopyable { FlowLock concurrentLogRouterReads; FlowLock persistentDataCommitLock; + bool ignorePopRequest; // ignore pop request from storage servers + double ignorePopDeadline; // time until which the ignorePopRequest will be + // honored + std::string ignorePopUid; // callers that set ignorePopRequest will set this + // extra state, used to validate the ownership of + // the set and for callers that unset will + // be able to match it up + std::string dataFolder; // folder where data is stored Reference> degraded; TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded) @@ -281,7 +293,8 @@ struct TLogData : NonCopyable { persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0), - concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS) + concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), + ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder) { } }; @@ -1257,21 +1270,139 @@ ACTOR Future tLogCommit( return Void(); } + state Version execVersion = invalidVersion; + state ExecCmdValueString execArg(); + state TLogQueueEntryRef qe; + state StringRef execCmd; + state StringRef param2; + if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); + // Log the changes to the persistent queue, to be committed by commitQueue() + qe.version = req.version; + qe.knownCommittedVersion = logData->knownCommittedVersion; + qe.messages = req.messages; + qe.id = logData->logId; + + if (req.hasExecOp) { + // inspect the messages to find if there is an Exec type and print + // it. message are prefixed by the length of the message and each + // field is prefixed by the length too + uint8_t type = MutationRef::MAX_ATOMIC_OP; + { + ArenaReader rd(req.arena, qe.messages, Unversioned()); + int32_t messageLength, rawLength; + uint16_t tagCount; + uint32_t sub; + while (!rd.empty()) { + Tag tmpTag; + rd.checkpoint(); + rd >> messageLength >> sub >> tagCount; + for (int i = 0; i < tagCount; i++) { + rd >> tmpTag; + } + rd >> type; + if (type == MutationRef::Exec) { + break; + } + rawLength = messageLength + sizeof(messageLength); + rd.rewind(); + rd.readBytes(rawLength); + } + int32_t len = 0; + if (type == MutationRef::Exec) { + // get param1 + rd >> len; + execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); + // get param2 + rd >> len; + param2 = StringRef((uint8_t const*)rd.readBytes(len), len); + + TraceEvent("oldTLog6TLogCommandType", self->dbgid).detail("execCmd", execCmd.toString()); + + execArg.setCmdValueString(param2.toString()); + execArg.dbgPrint(); + auto uidStr = execArg.getBinaryArgValue("uid"); + execVersion = qe.version; + if (execCmd == execSnap) { + // validation check specific to snap request + std::string reason; + if (!self->ignorePopRequest) { + execVersion = invalidVersion; + reason = "snapFailIgnorePopNotSet"; + } else if (uidStr != self->ignorePopUid) { + execVersion = invalidVersion; + reason = "snapFailedDisableTLogUidMismatch"; + } + + if (execVersion == invalidVersion) { + TraceEvent(SevWarn, "snapFailed") + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignorePopRequest", self->ignorePopRequest) + .detail("reason", reason) + .trackLatest(reason.c_str()); + + auto startTag = logData->allTags.begin(); + std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); + // startTag.toString() + + "/" + uidStr; + TraceEvent("oldTLog6ExecCmdSnapCreate") + .detail("uidStr", uidStr) + .detail("status", -1) + .detail("tag", logData->allTags.begin()->toString()) + .detail("role", "TLog") + .trackLatest(message.c_str()); + } + } + if (execCmd == execDisableTLogPop) { + execVersion = invalidVersion; + self->ignorePopRequest = true; + if (self->ignorePopUid != "") { + TraceEvent(SevWarn, "oldTlog6TLogPopDisableonDisable") + .detail("ignorePopUid", self->ignorePopUid) + .detail("uidStr", uidStr); + } + self->ignorePopUid = uidStr; + // ignorePopRequest will be turned off after 30 seconds + self->ignorePopDeadline = g_network->now() + 30.0; + TraceEvent("oldTLog6ExecCmdPopDisable") + .detail("execCmd", execCmd.toString()) + .detail("uidStr", uidStr) + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignporePopRequest", self->ignorePopRequest) + .detail("ignporePopDeadline", self->ignorePopDeadline) + .trackLatest("disablePopTLog"); + } + if (execCmd == execEnableTLogPop) { + execVersion = invalidVersion; + if (self->ignorePopUid != uidStr) { + TraceEvent(SevWarn, "oldTLog6tLogPopDisableEnableUidMismatch") + .detail("ignorePopUid", self->ignorePopUid) + .detail("uidStr", uidStr) + .trackLatest("tLogPopDisableEnableUidMismatch"); + } + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; + TraceEvent("oldTLog6ExecCmdPopEnable") + .detail("execCmd", execCmd.toString()) + .detail("uidStr", uidStr) + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignporePopRequest", self->ignorePopRequest) + .detail("ignporePopDeadline", self->ignorePopDeadline) + .trackLatest("enablePopTLog"); + } + } + } + } + //TraceEvent("TLogCommit", logData->logId).detail("Version", req.version); commitMessages(self, logData, req.version, req.arena, req.messages); logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); - // Log the changes to the persistent queue, to be committed by commitQueue() - TLogQueueEntryRef qe; - qe.version = req.version; - qe.knownCommittedVersion = logData->knownCommittedVersion; - qe.messages = req.messages; - qe.id = logData->logId; self->persistentQueue->push( qe, logData ); self->diskQueueCommitBytes += qe.expectedSize(); @@ -1289,6 +1420,100 @@ ACTOR Future tLogCommit( state Future stopped = logData->stopCommit.onTrigger(); wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); + if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { + int err = 0; + auto uidStr = execArg.getBinaryArgValue("uid"); + if (!g_network->isSimulated()) { + // Run the exec command + // std::string snapBin = extractBinPath(param2.toString()); + auto snapBin = execArg.getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; + + TraceEvent("oldTLog6SnapCommand").detail("cmdLine", param2.toString()).detail("folderPath", dataFolder); + + vector paramList; + // bin path + paramList.push_back(snapBin); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + err = fdbFork(snapBin, paramList); + } else { + // copy the entire directory + std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + + std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; + std::string tLogFolderCopyCmd = "cp " + tLogFolderFrom + " " + tLogFolderTo; + + TraceEvent("oldTLog6ExecSnapCommands") + .detail("tLogFolderToCreateCmd", tLogFolderToCreateCmd) + .detail("tLogFolderCopyCmd", tLogFolderCopyCmd); + + vector paramList; + std::string cpBin = "/bin/cp"; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + err = fdbFork(mkdirBin, paramList); + if (err == 0) { + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + err = fdbFork(cpBin, paramList); + } + } + TraceEvent("oldTLog6CommitExecTraceTLog") + .detail("uidStr", uidStr) + .detail("status", err) + .detail("tag", logData->allTags.begin()->toString()) + .detail("role", "TLog"); + + // print the status message + for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { + Version poppedTagVersion = -1; + auto tagv = logData->getTagData(*it); + // auto tagv = logData->tag_data.find(*it); + // if (tagv != logData->tag_data.end()) { + // poppedTagVersion = tagv->value.popped; + // } + + int len = param2.size(); + state std::string message = "ExecTrace/TLog/" + it->toString() + "/" + uidStr; + + TraceEvent te = TraceEvent(SevDebug, "oldTLog6ExecTraceDetailed"); + te.detail("uid", uidStr); + te.detail("status", err); + te.detail("role", "TLog"); + te.detail("execCmd", execCmd.toString()); + te.detail("param2", param2.toString()); + te.detail("Tag", it->toString()); + te.detail("Version", qe.version); + te.detail("poppedTagVersion", poppedTagVersion); + te.detail("persistentDataVersion", logData->persistentDataVersion); + te.detail("persistentDatadurableVersion", logData->persistentDataDurableVersion); + te.detail("queueCommittedVersion", logData->queueCommittedVersion.get()); + te.detail("ignorePopUid", self->ignorePopUid); + if (execCmd == execSnap) { + te.trackLatest(message.c_str()); + } + } + execVersion = invalidVersion; + } + if(stopped.isReady()) { ASSERT(logData->stopped); req.reply.sendError( tlog_stopped() ); @@ -1473,7 +1698,21 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere logData->addActor.send( tLogPeekMessages( self, req, logData ) ); } when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) { - logData->addActor.send( tLogPop( self, req, logData ) ); + if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; + TraceEvent("oldTLog6resetIgnorePopRequest") + .detail("now", g_network->now()) + .detail("ignorePopRequest", self->ignorePopRequest) + .detail("ignorePopDeadline", self->ignorePopDeadline) + .trackLatest("disableTLogPopTimedOut"); + } + if (!self->ignorePopRequest) { + logData->addActor.send(tLogPop(self, req, logData)); + } else { + TraceEvent("oldTLog6ignoringPopRequest").detail("ignorePopDeadline", self->ignorePopDeadline); + } } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { //TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get()); @@ -2098,8 +2337,8 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit } // New tLog (if !recoverFrom.size()) or restore from network -ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, Reference> degraded) { - state TLogData self( tlogId, persistentData, persistentQueue, db, degraded ); +ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded) { + state TLogData self( tlogId, persistentData, persistentQueue, db, folder, degraded ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); diff --git a/fdbserver/TLogInterface.h b/fdbserver/TLogInterface.h index d466638744..22e6756109 100644 --- a/fdbserver/TLogInterface.h +++ b/fdbserver/TLogInterface.h @@ -218,13 +218,15 @@ struct TLogCommitRequest { ReplyPromise reply; Optional debugID; + bool hasExecOp; TLogCommitRequest() {} - TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, Optional debugID ) - : arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID) {} + TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, bool hasExecOp, Optional debugID ) + : arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID), hasExecOp(hasExecOp){} template void serialize( Ar& ar ) { - serializer(ar, prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, messages, reply, arena, debugID); + // FIXME: sramamoorthy, FDB6port, flatbuffers related versioning missing + serializer(ar, prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, messages, reply, arena, debugID, hasExecOp); } }; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index aa102bf9bc..c2100d815c 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -38,7 +38,11 @@ #include "fdbserver/LogSystem.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/RecoveryState.h" +#include "fdbserver/FDBExecArgs.h" #include "flow/actorcompiler.h" // This must be the last #include. +#if defined(CMAKE_BUILD) || !defined(WIN32) +#include "versions.h" +#endif using std::pair; using std::make_pair; @@ -325,6 +329,14 @@ struct TLogData : NonCopyable { FlowLock concurrentLogRouterReads; FlowLock persistentDataCommitLock; + bool ignorePopRequest; // ignore pop request from storage servers + double ignorePopDeadline; // time until which the ignorePopRequest will be + // honored + std::string ignorePopUid; // callers that set ignorePopRequest will set this + // extra state, used to validate the ownership of + // the set and for callers that unset will + // be able to match it up + std::string dataFolder; // folder where data is stored Reference> degraded; TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded) @@ -333,7 +345,8 @@ struct TLogData : NonCopyable { dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0), peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES), - concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS) + concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), + ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder) { } }; @@ -1624,21 +1637,140 @@ ACTOR Future tLogCommit( return Void(); } + state Version execVersion = invalidVersion; + state ExecCmdValueString execArg(); + state TLogQueueEntryRef qe; + state StringRef execCmd; + state StringRef param2; + if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); + // Log the changes to the persistent queue, to be committed by commitQueue() + qe.version = req.version; + qe.knownCommittedVersion = logData->knownCommittedVersion; + qe.messages = req.messages; + qe.id = logData->logId; + + if (req.hasExecOp) { + // inspect the messages to find if there is an Exec type and print + // it. message are prefixed by the length of the message and each + // field is prefixed by the length too + uint8_t type = MutationRef::MAX_ATOMIC_OP; + { + ArenaReader rd(req.arena, qe.messages, Unversioned()); + int32_t messageLength, rawLength; + uint16_t tagCount; + uint32_t sub; + while(!rd.empty()) { + Tag tmpTag; + rd.checkpoint(); + rd >> messageLength >> sub >> tagCount; + for(int i = 0; i < tagCount; i++) { + rd >> tmpTag; + } + rd >> type; + if (type == MutationRef::Exec) { + break; + } + rawLength = messageLength + sizeof(messageLength); + rd.rewind(); + rd.readBytes(rawLength); + } + int32_t len = 0; + if (type == MutationRef::Exec) { + // get param1 + rd >> len; + execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); + // get param2 + rd >> len; + param2 = StringRef((uint8_t const*)rd.readBytes(len), len); + + TraceEvent("TLogCommandType", self->dbgid).detail("execCmd", execCmd.toString()); + + execArg.setCmdValueString(param2.toString()); + execArg.dbgPrint(); + auto uidStr = execArg.getBinaryArgValue("uid"); + execVersion = qe.version; + if (execCmd == execSnap) { + // validation check specific to snap request + std::string reason; + if (!self->ignorePopRequest) { + execVersion = invalidVersion; + reason = "snapFailIgnorePopNotSet"; + } else if (uidStr != self->ignorePopUid) { + execVersion = invalidVersion; + reason = "snapFailedDisableTLogUidMismatch"; + } + + if (execVersion == invalidVersion) { + TraceEvent(SevWarn, "oldTLog6snapFailed") + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignorePopRequest", self->ignorePopRequest) + .detail("reason", reason) + .trackLatest(reason.c_str()); + + auto startTag = logData->allTags.begin(); + std::string message = "ExecTrace/TLog/" + + logData->allTags.begin()->toString(); + // startTag.toString() + + "/" + uidStr; + TraceEvent("TLog6ExecCmdSnapCreate") + .detail("uidStr", uidStr) + .detail("status", -1) + .detail("tag", logData->allTags.begin()->toString()) + .detail("role", "TLog") + .trackLatest(message.c_str()); + } + } + if (execCmd == execDisableTLogPop) { + execVersion = invalidVersion; + self->ignorePopRequest = true; + if (self->ignorePopUid != "") { + TraceEvent(SevWarn, "tLogPopDisableOnDisable") + .detail("ignorePopUid", self->ignorePopUid) + .detail("uidStr", uidStr); + } + self->ignorePopUid = uidStr; + // ignorePopRequest will be turned off after 30 seconds + self->ignorePopDeadline = g_network->now() + 30.0; + TraceEvent("TLogExecCmdPopDisable") + .detail("execCmd", execCmd.toString()) + .detail("uidStr", uidStr) + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignporePopRequest", self->ignorePopRequest) + .detail("ignporePopDeadline", self->ignorePopDeadline) + .trackLatest("disablePopTLog"); + } + if (execCmd == execEnableTLogPop) { + execVersion = invalidVersion; + if (self->ignorePopUid != uidStr) { + TraceEvent(SevWarn, "tLogPopDisableEnableUidMismatch") + .detail("ignorePopUid", self->ignorePopUid) + .detail("uidStr", uidStr) + .trackLatest("tLogPopDisableEnableUidMismatch"); + } + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; + TraceEvent("TLogExecCmdPopEnable") + .detail("execCmd", execCmd.toString()) + .detail("uidStr", uidStr) + .detail("ignorePopUid", self->ignorePopUid) + .detail("ignporePopRequest", self->ignorePopRequest) + .detail("ignporePopDeadline", self->ignorePopDeadline) + .trackLatest("enablePopTLog"); + } + } + } + } + //TraceEvent("TLogCommit", logData->logId).detail("Version", req.version); commitMessages(self, logData, req.version, req.arena, req.messages); logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); - // Log the changes to the persistent queue, to be committed by commitQueue() - TLogQueueEntryRef qe; - qe.version = req.version; - qe.knownCommittedVersion = logData->knownCommittedVersion; - qe.messages = req.messages; - qe.id = logData->logId; self->persistentQueue->push( qe, logData ); self->diskQueueCommitBytes += qe.expectedSize(); @@ -1656,6 +1788,104 @@ ACTOR Future tLogCommit( state Future stopped = logData->stopCommit.onTrigger(); wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); + if ((execVersion != invalidVersion) && + execVersion <= logData->queueCommittedVersion.get()) { + int err = 0; + auto uidStr = execArg.getBinaryArgValue("uid"); + if (!g_network->isSimulated()) { + // Run the exec command + auto snapBin = execArg.getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; + + TraceEvent("TLogSnapCommand").detail("cmdLine", param2.toString()).detail("folderPath", dataFolder); + + vector paramList; + // bin path + paramList.push_back(snapBin); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + err = fdbFork(snapBin, paramList); + } else { + // copy the entire directory + std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + + std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; + std::string tLogFolderCopyCmd = + "cp " + tLogFolderFrom + " " + tLogFolderTo; + + TraceEvent("TLogExecSnapcommands") + .detail("tLogFolderToCreateCmd", tLogFolderToCreateCmd) + .detail("tLogFolderCopyCmd", tLogFolderCopyCmd); + + vector paramList; + std::string cpBin = "/bin/cp"; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + err = fdbFork(mkdirBin, paramList); + if (err == 0) { + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + err = fdbFork(cpBin, paramList); + } + } + TraceEvent("TLogCommitExecTraceLog") + .detail("uidStr", uidStr) + .detail("status", err) + .detail("tag", logData->allTags.begin()->toString()) + .detail("role", "TLog"); + + // print the status message + for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { + Version poppedTagVersion = -1; + auto tagv = logData->getTagData(*it); + // auto tagv = logData->tag_data.find(*it); + // if (tagv != logData->tag_data.end()) { + // poppedTagVersion = tagv->value.popped; + // } + + int len = param2.size(); + state std::string message = + "ExecTrace/TLog/" + it->toString() + "/" + uidStr; + + TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); + te.detail("uid", uidStr); + te.detail("status", err); + te.detail("role", "TLog"); + te.detail("execCmd", execCmd.toString()); + te.detail("param2", param2.toString()); + te.detail("Tag", it->toString()); + te.detail("Version", qe.version); + te.detail("poppedTagVersion", poppedTagVersion); + te.detail("persistentDataVersion", logData->persistentDataVersion); + te.detail("persistentDatadurableVersion", + logData->persistentDataDurableVersion); + te.detail("queueCommittedVersion", + logData->queueCommittedVersion.get()); + te.detail("ignorePopUid", self->ignorePopUid); + if (execCmd == execSnap) { + te.trackLatest(message.c_str()); + } + } + execVersion = invalidVersion; + } + if(stopped.isReady()) { ASSERT(logData->stopped); req.reply.sendError( tlog_stopped() ); @@ -1841,7 +2071,21 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere logData->addActor.send( tLogPeekMessages( self, req, logData ) ); } when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) { - logData->addActor.send( tLogPop( self, req, logData ) ); + if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; + TraceEvent("resetIgnorePopRequest") + .detail("now", g_network->now()) + .detail("ignorePopRequest", self->ignorePopRequest) + .detail("ignorePopDeadline", self->ignorePopDeadline) + .trackLatest("disableTLogPopTimedOut"); + } + if (!self->ignorePopRequest) { + logData->addActor.send(tLogPop(self, req, logData)); + } else { + TraceEvent("ignoringPopRequest").detail("ignorePopDeadline", self->ignorePopDeadline); + } } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { //TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get()); @@ -2493,8 +2737,8 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit } // New tLog (if !recoverFrom.size()) or restore from network -ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, Reference> degraded ) { - state TLogData self( tlogId, persistentData, persistentQueue, db, degraded ); +ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded ) { + state TLogData self( tlogId, persistentData, persistentQueue, db, folder, degraded ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index b8ecb413b0..df55510fbf 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -431,7 +431,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> tLogCommitResults; for(int loc=0; loc< it->logServers.size(); loc++) { Standalone msg = data.getMessages(location); - allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, debugID ), TaskTLogCommitReply ) ); + allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, data.getHasExecOp(), debugID ), TaskTLogCommitReply ) ); Future commitSuccess = success(allReplies.back()); addActor.get().send(commitSuccess); tLogCommitResults.push_back(commitSuccess); diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 3015e9633c..8d5d3db4ae 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -60,6 +60,7 @@ struct WorkerInterface { RequestStream< struct EventLogRequest > eventLogRequest; RequestStream< struct TraceBatchDumpRequest > traceBatchDumpRequest; RequestStream< struct DiskStoreRequest > diskStoreRequest; + RequestStream execReq; TesterInterface testerInterface; @@ -71,7 +72,7 @@ struct WorkerInterface { template void serialize(Ar& ar) { - serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest); + serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq); } }; @@ -239,6 +240,26 @@ struct TraceBatchDumpRequest { } }; +struct ExecuteRequest { + // FIXME: sramamoorthy, FDB6port enable flat_buffers + // constexpr static flat_buffers::FileIdentifier file_identifier = 16478959; + ReplyPromise reply; + + Arena arena; + StringRef execPayLoad; + + ExecuteRequest(StringRef execPayLoad) : execPayLoad(execPayLoad) {} + + ExecuteRequest() : execPayLoad() {} + + template + void serialize(Ar& ar) { + // FIXME: sramamoorthy, FDB6port enable flat_buffers + // serializer(ar, v2(reply), v2(execPayLoad), v2(arena)); + serializer(ar, reply, execPayLoad, arena); + } +}; + struct LoadedReply { constexpr static FileIdentifier file_identifier = 9956350; Standalone payload; @@ -403,7 +424,7 @@ ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMaste ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, - Promise oldLog, Promise recovered, Reference> degraded); // changes tli->id() to be the recovered ID + Promise oldLog, Promise recovered, std:;string folder, Reference> degraded); // changes tli->id() to be the recovered ID ACTOR Future monitorServerDBInfo(Reference>> ccInterface, Reference ccf, LocalityData locality, Reference> dbInfo); @@ -425,7 +446,7 @@ namespace oldTLog_6_0 { ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, - Promise oldLog, Promise recovered, Reference> degraded); + Promise oldLog, Promise recovered, std::string folder, Reference> degraded); } typedef decltype(&tLog) TLogFn; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 1106addb61..aaa8dda4c9 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -49,8 +49,12 @@ #include "fdbserver/RecoveryState.h" #include "fdbserver/LogProtocolMessage.h" #include "fdbserver/LatencyBandConfig.h" +#include "fdbserver/FDBExecArgs.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. +#if defined(CMAKE_BUILD) || !defined(WIN32) +#include "versions.h" +#endif using std::pair; using std::make_pair; @@ -1834,14 +1838,14 @@ void addMutation( Reference& target, Version version, MutationRef const& muta } template -void splitMutations( KeyRangeMap& map, VerUpdateRef const& update ) { +void splitMutations(StorageServer* data, KeyRangeMap& map, VerUpdateRef const& update) { for(auto& m : update.mutations) { - splitMutation(map, m, update.version); + splitMutation(data, map, m, update.version); } } template -void splitMutation( KeyRangeMap& map, MutationRef const& m, Version ver ) { +void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& m, Version ver) { if(isSingleKeyMutation((MutationRef::Type) m.type)) { if ( !SHORT_CIRCUT_ACTUAL_STORAGE || !normalKeys.contains(m.param1) ) addMutation( map.rangeContaining(m.param1)->value(), ver, m ); @@ -1855,8 +1859,92 @@ void splitMutation( KeyRangeMap& map, MutationRef const& m, Version ver ) { addMutation( i->value(), ver, MutationRef((MutationRef::Type)m.type, k.begin, k.end) ); } } - } - else + } else if (m.type == MutationRef::Exec) { + std::string cmd = m.param1.toString(); + int len = m.param2.size(); + if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) { + TraceEvent("IgnoreNonSnapCommands").detail("execCommand", cmd); + return; + } + ExecCmdValueString execArg(m.param2.toString()); + auto uidStr = execArg.getBinaryArgValue("uid"); + + int err = 0; + if (!g_network->isSimulated() || cmd != execSnap) { + // Run the exec command + auto binPath = execArg.getBinaryPath(); + auto dataFolder = "path=" + data->folder; + vector paramList; + // bin path + paramList.push_back(binPath); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + execArg.dbgPrint(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=storage"; + paramList.push_back(roleString); + err = fdbFork(binPath, paramList); + } else { + // copy the files + TraceEvent("ExecTraceStorage") + .detail("storageFolder", data->folder) + .detail("localMachineId", data->thisServerID.toString()) + .detail("durableVersion", data->durableVersion.get()); + + std::string folder = abspath(data->folder); + + std::string folderFrom = folder + "/."; + std::string folderTo = folder + "-snap-" + uidStr; + + std::string folderToCreateCmd = "mkdir " + folderTo; + std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; + + TraceEvent("ExecTraceStorageSnapcommands") + .detail("folderToCreateCmd", folderToCreateCmd) + .detail("folderCopyCmd", folderCopyCmd); + + vector paramList; + std::string cpBin = "/bin/cp"; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(folderTo); + err = fdbFork(mkdirBin, paramList); + TraceEvent("mkdirStatus").detail("errno", err); + + if (err == 0) { + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(folderFrom); + paramList.push_back(folderTo); + err = fdbFork(cpBin, paramList); + } + } + // FIXME, sramamoorthy, print for non execSnap commands too + if (cmd == execSnap) { + auto tokenStr = "ExecTrace/storage/" + uidStr; + TraceEvent te = TraceEvent("ExecTraceStorage"); + te.detail("uid", uidStr); + te.detail("status", err); + te.detail("role", "storage"); + te.detail("version", ver); + te.detail("mutation", m.toString()); + te.detail("mid", data->thisServerID.toString()); + te.detail("durableVersion", data->durableVersion.get()); + te.detail("data_version", data->version.get()); + te.detail("tag", data->tag.toString()); + te.trackLatest(tokenStr.c_str()); + } + } else ASSERT(false); // Unknown mutation type in splitMutations } @@ -1981,7 +2069,7 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { // and the ones delivered to the new shard will be discarded because it is in WaitPrevious phase (hasn't chosen a fetchVersion yet). // What we are doing here is expensive and could get more expensive if we started having many more blocks per shard. May need optimization in the future. for(auto u = updatesToSplit.begin(); u != updatesToSplit.end(); ++u) - splitMutations( data->shards, *u ); + splitMutations(data, data->shards, *u); TEST( true ); TEST( shard->updates.size() ); @@ -2382,7 +2470,7 @@ public: // debugMutation("SSUpdateMutation", changes[c].version, *m); //} - splitMutation( data->shards, m, ver ); + splitMutation(data, data->shards, m, ver); } if (data->otherError.getFuture().isReady()) data->otherError.getFuture().get(); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 08ef75e055..062de7cf72 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -35,11 +35,15 @@ #include "fdbserver/ClusterRecruitmentInterface.h" #include "fdbserver/DataDistributorInterface.h" #include "fdbserver/ServerDBInfo.h" +#include "fdbserver/FDBExecArgs.h" #include "fdbserver/CoordinationInterface.h" #include "fdbclient/FailureMonitorClient.h" #include "fdbclient/MonitorLeader.h" #include "fdbclient/ClientWorkerInterface.h" #include "flow/Profiler.h" +#if defined(CMAKE_BUILD) || !defined(WIN32) +#include "versions.h" +#endif #ifdef __linux__ #include @@ -689,6 +693,7 @@ ACTOR Future monitorServerDBInfo( Reference workerServer( Reference connFile, Reference>> ccInterface, @@ -697,6 +702,15 @@ ACTOR Future workerServer( ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise recoveredDiskFiles, int64_t memoryProfileThreshold) { +======= +ACTOR Future workerServer(Reference connFile, + Reference>> ccInterface, + LocalityData locality, + Reference> asyncPriorityInfo, + ProcessClass initialClass, std::string folder, int64_t memoryLimit, + std::string metricsConnFile, std::string metricsPrefix, + Promise recoveredDiskFiles, std::string _coordFolder) { +>>>>>>> 2d5af668... Snapshot based backup and resotre implementation state PromiseStream< ErrorInfo > errors; state Reference>> ddInterf( new AsyncVar>() ); state Reference>> rkInterf( new AsyncVar>() ); @@ -717,6 +731,7 @@ ACTOR Future workerServer( // here is no, so that when running with log_version==3, all files should say V=3. state std::map, std::pair, PromiseStream>> sharedLogs; + state std::string coordFolder = _coordFolder; state WorkerInterface interf( locality ); @@ -832,7 +847,7 @@ ACTOR Future workerServer( auto& logData = sharedLogs[std::make_tuple(s.tLogOptions.version, s.storeType, s.tLogOptions.spillType)]; // FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we // be sending a fake InitializeTLogRequest rather than calling tLog() ? - Future tl = tLogFn( kv, queue, dbInfo, locality, !logData.first.isValid() || logData.first.isReady() ? logData.second : PromiseStream(), s.storeID, true, oldLog, recovery, degraded ); + Future tl = tLogFn( kv, queue, dbInfo, locality, !logData.first.isValid() || logData.first.isReady() ? logData.second : PromiseStream(), s.storeID, true, oldLog, recovery, folder, degraded ); recoveries.push_back(recovery.getFuture()); tl = handleIOErrors( tl, kv, s.storeID ); @@ -989,7 +1004,7 @@ ACTOR Future workerServer( filesClosed.add( data->onClosed() ); filesClosed.add( queue->onClosed() ); - logData.first = tLogFn( data, queue, dbInfo, locality, logData.second, logId, false, Promise(), Promise(), degraded ); + logData.first = tLogFn( data, queue, dbInfo, locality, logData.second, logId, false, Promise(), Promise(), folder, degraded ); logData.first = handleIOErrors( logData.first, data, logId ); logData.first = handleIOErrors( logData.first, queue, logId ); errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, logData.first ) ); @@ -1166,6 +1181,76 @@ ACTOR Future workerServer( systemMonitor(); loggingTrigger = delay( loggingDelay, TaskFlushTrace ); } + when(ExecuteRequest req = waitNext(interf.execReq.getFuture())) { + int len = req.execPayLoad.size(); + ExecCmdValueString execArg(req.execPayLoad.toString()); + execArg.dbgPrint(); + auto uidStr = execArg.getBinaryArgValue("uid"); + + int err = 0; + if (!g_network->isSimulated()) { + // bin path + auto snapBin = execArg.getBinaryPath(); + auto dataFolder = "path=" + coordFolder; + vector paramList; + // bin path + paramList.push_back(snapBin); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=coordinator"; + paramList.push_back(roleString); + err = fdbFork(snapBin, paramList); + } else { + // copy the files + std::string folder = coordFolder; + std::string folderFrom = "./" + folder + "/."; + std::string folderTo = "./" + folder + "-snap-" + uidStr; + + std::string folderToCreateCmd = "mkdir " + folderTo; + std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; + + TraceEvent("ExecTraceCoordSnapcommands") + .detail("folderToCreateCmd", folderToCreateCmd) + .detail("folderCopyCmd", folderCopyCmd); + + vector paramList; + std::string cpBin = "/bin/cp"; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(folderTo); + err = fdbFork(mkdirBin, paramList); + TraceEvent("mkdirStatus").detail("errno", err); + + if (err == 0) { + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(folderFrom); + paramList.push_back(folderTo); + err = fdbFork(cpBin, paramList); + } + } + + auto tokenStr = "ExecTrace/Coordinators/" + uidStr; + auto te = TraceEvent("ExecTraceCoordinators"); + te.detail("uid", uidStr); + te.detail("status", err); + te.detail("role", "coordinator"); + te.detail("value", coordFolder); + te.detail("execPayLoad", req.execPayLoad.toString()); + te.trackLatest(tokenStr.c_str()); + req.reply.send(Void()); + } when( wait( errorForwarders.getResult() ) ) {} when( wait( handleErrors ) ) {} } @@ -1344,7 +1429,7 @@ ACTOR Future fdbd( v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") ); - v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold), "WorkerServer", UID(), &normalWorkerErrors()) ); + v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder), "WorkerServer", UID(), &normalWorkerErrors()) ); state Future firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" ); wait( quorum(v,1) ); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp new file mode 100644 index 0000000000..11be376e3a --- /dev/null +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -0,0 +1,674 @@ +#include "fdbserver/Status.h" +#include "flow/actorcompiler.h" +#include "fdbrpc/ContinuousSample.h" +#include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/ManagementAPI.actor.h" +#include "fdbserver/TesterInterface.actor.h" +#include "fdbserver/WorkerInterface.actor.h" +#include "workloads.actor.h" +#include "BulkSetup.actor.h" +#include "fdbserver/ClusterRecruitmentInterface.h" +#include "fdbclient/ReadYourWrites.h" + +#include + +#undef FLOW_ACOMPILER_STATE +#define FLOW_ACOMPILER_STATE 1 + +void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) { + version = -1; + numTags = -1; + + // FIXME: sramamoorthy, WONTWORK + // std::string versionStr = extractAttribute(msg.toString(), "Version"); + // version = strtol(versionStr.c_str(), nullptr, 0); + // TraceEvent("version").detail("", version); + sscanf(md.getValue("Version").c_str(), "%lld", &version); + sscanf(md.getValue("numTags").c_str(), "%d:%d", &numTags); + + // std::string numTagsStr = extractAttribute(msg.toString(), "numTags"); + // numTags = strtol(numTagsStr.c_str(), nullptr, 0); + // TraceEvent("numTags").detail("", numTags); + // FIXME: sramamoorthy, WONTWORK +} + +void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) { + Version verifyVersion; + // FIXME: sramamoorthy, WONTWORK + // tag = -1; + durableVersion = -1; + + int tagLocality; + int tagId; + sscanf(md.getValue("version").c_str(), "%lld", &verifyVersion); + sscanf(md.getValue("tag").c_str(), "%d:%d", &tagLocality, &tagId); + tag.locality = tagLocality; + tag.id = tagId; + sscanf(md.getValue("durableVersion").c_str(), "%lld", &durableVersion); + + // FIXME: sramamoorthy, WONTWORK + // std::string versionStr = extractAttribute(msg.toString(), "version"); + // verifyVersion = strtol(versionStr.c_str(), nullptr, 0); + + // TraceEvent("version compare").detail("version", version).detail("verifyVersion", verifyVersion); + // if (version != verifyVersion) { + // return; + // } + + // std::string tagStr = extractAttribute(msg.toString(), "tag"); + // tag = strtol(tagStr.c_str(), nullptr, 0); + // TraceEvent("tagscan").detail("tag", tag); + // if (tag == -1) { + // return; + // } + + // versionStr = extractAttribute(msg.toString(), "durableVersion"); + // durableVersion = strtol(versionStr.c_str(), nullptr, 0); + + // TraceEvent("durableVersion").detail("durablVersion", durableVersion); + // FIXME: sramamoorthy, WONTWORK +} + +void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Version& minTLogVersion, + Version& maxTLogVersion) { + Version verifyVersion; + Tag verifyTag; + minTLogVersion = maxTLogVersion = -1; + + sscanf(md.getValue("Version").c_str(), "%lld", &verifyVersion); + int tagLocality; + int tagId; + sscanf(md.getValue("tag").c_str(), "%d:%d", &tagLocality, &tagId); + verifyTag.locality = tagLocality; + verifyTag.id = tagId; + if (tag != verifyTag) { + return; + } + sscanf(md.getValue("poppedTagVersion").c_str(), "%lld", &minTLogVersion); + sscanf(md.getValue("queueCommittedVersion").c_str(), "%lld", &maxTLogVersion); + + // FIXME: sramamoorthy, WONTWORK + // std::string versionStr = extractAttribute(msg.toString(), "Version"); + // verifyVersion = strtol(versionStr.c_str(), nullptr, 0); + + // if (version != verifyVersion) { + // return; + // } + + // std::string tagStr = extractAttribute(msg.toString(), "Tag"); + // verifyTag = strtol(tagStr.c_str(), nullptr, 0); + + // if (tag != verifyTag) { + // return; + // } + + // versionStr = extractAttribute(msg.toString(), "poppedTagVersion"); + // minTLogVersion = strtol(versionStr.c_str(), nullptr, 0); + // versionStr = extractAttribute(msg.toString(), "queueCommittedVersion"); + // maxTLogVersion = strtol(versionStr.c_str(), nullptr, 0); + // FIXME: sramamoorthy, WONTWORK +} + +void filterEmptyMessages(std::vector>& messages) { + // FIXME, sramamoorthy, FDB6 related + // std::string emptyStr; + // auto it = messages.begin(); + // while (it != messages.end()) { + // if (it->get() == emptyStr) { + // it = messages.erase(it); + // } else { + // ++it; + // } + // } + return; +} + +struct SnapTestWorkload : TestWorkload { +public: // variables + int numSnaps; // num of snapshots to be taken + // FIXME: currently validation works on numSnap = 1 + double maxSnapDelay; // max delay before which a snapshot will be taken + bool snapCheck; // check for the successful snap create + int testID; // test id + UID snapUID; // UID used for snap name + +public: // ctor & dtor + SnapTestWorkload(WorkloadContext const& wcx) + : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), snapCheck(false), testID(0), snapUID() { + TraceEvent("SnapTestWorkload Constructor"); + std::string workloadName = "SnapTest"; + + numSnaps = getOption(options, LiteralStringRef("numSnaps"), 0); + maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0); + snapCheck = getOption(options, LiteralStringRef("snapCheck"), false); + testID = getOption(options, LiteralStringRef("testID"), 0); + } + +public: // workload functions + std::string description() override { return "SnapTest"; } + Future setup(Database const& cx) override { + TraceEvent("SnapTestWorkload setup"); + return Void(); + } + Future start(Database const& cx) override { + TraceEvent("SnapTestWorkload start"); + if (clientId == 0) { + return _start(cx, this); + } + return Void(); + } + + Future check(Database const& cx) override { + // FIXME: sramamoorthy, FDB6 porting fallout + if (true) return true; + if (!this->snapCheck || clientId != 0) { + TraceEvent("returning true here"); + return true; + } + switch (this->testID) { + case 0: + case 1: + case 2: + case 3: { + TraceEvent("SnapTestWorkload check"); + Future> proxyIfaces; + return (verifyExecTraceVersion(cx, this)); + break; + } + case 4: { + std::string token = "disableTLogPopTimedOut"; + return verifyTLogTrackLatest(cx, this, token); + break; + } + case 5: { + std::string token = "tLogPopDisableEnableUidMismatch"; + return verifyTLogTrackLatest(cx, this, token); + break; + } + case 6: { + std::string token = "snapFailIgnorePopNotSet"; + return verifyTLogTrackLatest(cx, this, token); + break; + } + case 7: { + std::string token = "snapFailedDisableTLogUidMismatch"; + return verifyTLogTrackLatest(cx, this, token); + break; + } + default: { break; } + } + return false; + } + + void getMetrics(vector& m) override { TraceEvent("SnapTestWorkload getMetrics"); } + + ACTOR Future _create_keys(Database cx, std::string prefix, bool even = true) { + state Transaction tr(cx); + + state int retry = 0; + loop { + tr.reset(); + try { + for (int i = 0; i < 1000; i++) { + int64_t id = g_random->randomInt64(0, INT64_MAX - 2); + if (even) { + if (id % 2 != 0) { + id++; + } + } else { + if (id % 2 == 0) { + id++; + } + } + std::string Key1 = prefix + std::to_string(id); + Key key1Ref(Key1); + std::string Val1 = std::to_string(id); + Value val1Ref(Val1); + tr.set(key1Ref, val1Ref, false); + } + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return Void(); + } + + ACTOR Future _start(Database cx, SnapTestWorkload* self) { + state Transaction tr(cx); + + if (self->testID == 0) { + // create even keys before the snapshot + wait(self->_create_keys(cx, "snapKey")); + } else if (self->testID == 1) { + // create a snapshot + state double toDelay = fmod(g_random->randomUInt32(), self->maxSnapDelay); + TraceEvent("toDelay").detail("toDelay", toDelay); + ASSERT(toDelay < self->maxSnapDelay); + wait(delay(toDelay)); + + state int retry = 0; + loop { + self->snapUID = g_random->randomUniqueID(); + try { + StringRef snapCmdRef = LiteralStringRef("/bin/snap_create.sh"); + Future status = snapCreate(cx, snapCmdRef, self->snapUID); + wait(status); + break; + } catch (Error& e) { + ++retry; + TraceEvent(retry > 3 ? SevWarn : SevInfo, "snapCreate command failed").detail("error", e.what()); + if (retry > 3) { + throw operation_failed(); + } + } + } + TraceEvent("Snapshot create succeeded"); + } else if (self->testID == 2) { + // create odd keys after the snapshot + wait(self->_create_keys(cx, "snapKey", false /*even*/)); + } else if (self->testID == 3) { + state KeySelector begin = firstGreaterOrEqual(normalKeys.begin); + state KeySelector end = firstGreaterOrEqual(normalKeys.end); + state int cnt = 0; + // read the entire normalKeys range and look at keys prefixed + // with snapKeys 1) validate that all key ids are even ie - + // created before snap 2) values are same as the key id 3) # of + // keys adds up to the total keys created before snap + loop { + tr.reset(); + try { + Standalone kvRange = wait(tr.getRange(begin, end, CLIENT_KNOBS->TOO_MANY)); + if (!kvRange.more && kvRange.size() == 0) { + TraceEvent("No more entires"); + break; + } + + for (int i = 0; i < kvRange.size(); i++) { + if (kvRange[i].key.startsWith(LiteralStringRef("snapKey"))) { + std::string tmp1 = kvRange[i].key.substr(7).toString(); + int64_t id = strtol(tmp1.c_str(), nullptr, 0); + if (id % 2 != 0) { + throw operation_failed(); + } + ++cnt; + std::string tmp2 = kvRange[i].value.toString(); + int64_t value = strtol(tmp2.c_str(), nullptr, 0); + if (id != value) { + throw operation_failed(); + } + } + } + begin = firstGreaterThan(kvRange.end()[-1].key); + } catch (Error& e) { + wait(tr.onError(e)); + } + } + if (cnt != 1000) { + throw operation_failed(); + } + } else if (self->testID == 4) { + // description: if disable of a TLog pop was not followed by a + // corresponding enable, then TLog will automatically enable the + // popping of TLogs. this test case validates that we auto + // enable the popping of TLogs + loop { + // disable pop of the TLog + tr.reset(); + try { + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=test"); + tr.execute(execDisableTLogPop, payLoadRef); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + // wait for 40 seconds and verify that the enabled pop happened + // automatically + wait(delay(40.0)); + } else if (self->testID == 5) { + // description: disable TLog pop and enable TLog pop with + // different UIDs should mis-match and print an error + loop { + // disable pop of the TLog + tr.reset(); + try { + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=tmatch"); + tr.execute(execDisableTLogPop, payLoadRef); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + loop { + // enable pop of the TLog + tr.reset(); + try { + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=didnotmatch"); + tr.execute(execEnableTLogPop, payLoadRef); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + } else if (self->testID == 6) { + // snapshot create without disabling pop of the TLog + loop { + try { + tr.reset(); + StringRef snapPayload = LiteralStringRef("/bin/" + "snap_create.sh:uid=d78b08d47f341158e9a54d4baaf4a4dd"); + tr.execute(execSnap, snapPayload); + wait(tr.commit()); + break; + } catch (Error& e) { + TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + throw; + } + } + } else if (self->testID == 7) { + // disable popping of TLog and snapshot create with mis-matching + loop { + // disable pop of the TLog + tr.reset(); + try { + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=tmatch"); + tr.execute(execDisableTLogPop, payLoadRef); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + loop { + // snap create with different UID + try { + tr.reset(); + StringRef snapPayload = LiteralStringRef("empty-binary:uid=ba61e9612a561d60bd83ad83e1b63568"); + tr.execute(execSnap, snapPayload); + wait(tr.commit()); + break; + } catch (Error& e) { + TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + throw; + } + } + } + TraceEvent("returning from start"); + wait(delay(0.0)); + return Void(); + } + + ACTOR Future verifyTLogTrackLatest(Database cx, SnapTestWorkload* self, std::string event) { + TraceEvent("verifyTLogTrackLatest"); + state StringRef eventTokenRef(event); + // FIXME: sramamoorthy, FDB6 related + // state vector tLogWorkers = wait(self->getWorkersWithRole(cx, + // LocalityData::ClusterRole::TLog)); state vector> tLogWorkers = + // wait(self->dbInfo->get().clusterInterface.getWorkers()); + state vector> tLogWorkers = wait(getWorkers(self->dbInfo)); + state std::vector> tLogMessages; + + state int i = 0; + for (; i < tLogWorkers.size(); i++) { + tLogMessages.push_back( + timeoutError(tLogWorkers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + + state int retryCnt = 0; + state bool retry = false; + loop { + retry = false; + try { + TraceEvent("waiting for tlog messages"); + wait(waitForAll(tLogMessages)); + break; + } catch (Error& e) { + TraceEvent("verifyTLogTrackLatest") + .detail("token", eventTokenRef.toString()) + .detail("Reason", "Failed to get tLogMessages") + .detail("code", e.what()); + if (e.code() != error_code_timed_out) { + return false; + } else { + retry = true; + ++retryCnt; + } + } + if (retryCnt >= 4) { + TraceEvent("Unable to retrieve TLog messages"); + return false; + } + } + filterEmptyMessages(tLogMessages); + if (tLogMessages.size() != 1) { + TraceEvent("verifyTLogTrackLatest message not found").detail("token", eventTokenRef.toString()); + return false; + } + tLogMessages.clear(); + } + return true; + } + + ACTOR Future verifyExecTraceVersion(Database cx, SnapTestWorkload* self) { + TraceEvent("verifyExecTraceVersion1"); + + // FIXME: sramamoorthy, FDB6 + // state std::vector coordAddrs = self->getCoordinatorAddresses(); + state std::vector coordAddrs = wait(getCoordinators(cx)); + TraceEvent("verifyExecTraceVersion2"); + state vector> proxyWorkers = wait(getWorkers(self->dbInfo)); + TraceEvent("verifyExecTraceVersion3"); + state vector> storageWorkers = wait(getWorkers(self->dbInfo)); + TraceEvent("verifyExecTraceVersion4"); + state vector> tLogWorkers = wait(getWorkers(self->dbInfo)); + TraceEvent("verifyExecTraceVersion5"); + state vector> workers = wait(getWorkers(self->dbInfo)); + TraceEvent("verifyExecTraceVersion6"); + + state std::vector> proxyMessages; + state std::vector> tLogMessages; + state std::vector> storageMessages; + state std::vector> coordMessages; + state int numDurableVersionChecks = 0; + state std::map visitedStorageTags; + + state int retryCnt = 0; + loop { + proxyMessages.clear(); + storageMessages.clear(); + coordMessages.clear(); + + state bool retry = false; + + for (int i = 0; i < workers.size(); i++) { + std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + coordMessages.push_back( + timeoutError(workers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + } + + for (int i = 0; i < workers.size(); i++) { + std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + proxyMessages.push_back( + timeoutError(workers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + } + + for (int i = 0; i < storageWorkers.size(); i++) { + std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + storageMessages.push_back(timeoutError( + storageWorkers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + } + + TraceEvent("WAITING for proxy1"); + try { + wait(waitForAll(proxyMessages)); + // wait(waitForAll(storageMessages)); + // wait(waitForAll(coordMessages)); + } catch (Error& e) { + TraceEvent("verifyExecTraceVersionFailure") + .detail("Reason", "Failed to get proxy or storage messages") + .detail("code", e.what()); + if (e.code() != error_code_timed_out) { + return false; + } else { + retry = true; + ++retryCnt; + } + } + TraceEvent("WAITING for proxy2"); + if (retry == false) { + break; + } + TraceEvent("WAITING for proxy3"); + + if (retry && retryCnt >= 4) { + TraceEvent("Unable to retrieve proxy/storage/coord messages " + "after retries"); + std::terminate(); + return false; + } + } + + // filter out empty messages + filterEmptyMessages(proxyMessages); + filterEmptyMessages(storageMessages); + filterEmptyMessages(coordMessages); + + if (proxyMessages.size() != 1) { + // if no message from proxy or more than one fail the check + TraceEvent("No ExecTrace message from Proxy"); + std::terminate(); + return false; + } + + TraceEvent("CoordinatorSnapStatus") + .detail("coordMessage size", coordMessages.size()) + .detail("coordAddrssize", coordAddrs.size()); + if (coordMessages.size() < (coordAddrs.size() + 1) / 2) { + TraceEvent("No ExecTrace message from Quorum of coordinators"); + std::terminate(); + return false; + } + + state int i = 0; + state int numTags = -1; + + for (; i < proxyMessages.size(); i++) { + state Version execVersion = -1; + state std::string emptyStr; + + TraceEvent("Printing Relevant ProxyMessage").detail("msg", proxyMessages[i].get().toString()); + // FIXME: sramamoorthy, how to compare with empty string + if (proxyMessages[i].get().toString() != emptyStr) { + getVersionAndnumTags(proxyMessages[i].get(), execVersion, numTags); + ASSERT(numTags > 0); + } + state int j = 0; + for (; (execVersion != -1) && j < storageMessages.size(); j++) { + // for each message that has this verison, get the tag and + // the durable version + // FIXME: sramamoorthy, for now allow default values + state Tag tag; + state Tag invalidTag; + // FIXME: sramamoorthy, for now allow default values + state Version durableVersion = -1; + TraceEvent("Printing Relevant StorageMessage").detail("msg", storageMessages[j].get().toString()); + // FIXME: sramamoorthy, how to compare with empty string + ASSERT(storageMessages[j].get().toString() != emptyStr); + getTagAndDurableVersion(storageMessages[j].get(), execVersion, tag, durableVersion); + TraceEvent("Searching for tlog messages").detail("tag", tag.toString()); + + retryCnt = 0; + loop { + retry = false; + tLogMessages.clear(); + + // for (int m = 0; (tag != -1) && m < tLogWorkers.size(); m++) { + for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { + visitedStorageTags[tag] = true; + std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + tLogMessages.push_back(timeoutError( + tLogWorkers[m].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + } + + try { + TraceEvent("waiting for tlog messages"); + if (tag != invalidTag) { + wait(waitForAll(tLogMessages)); + } + } catch (Error& e) { + TraceEvent("verifyExecTraceVersionFailure") + .detail("Reason", "Failed to get tLogMessages") + .detail("code", e.what()); + if (e.code() != error_code_timed_out) { + return false; + } else { + retry = true; + ++retryCnt; + } + } + if (retry == false) { + break; + } + if (retry && retryCnt > 4) { + TraceEvent("Unable to retrieve tLog messages after " + "retries"); + std::terminate(); + return false; + } + } + + filterEmptyMessages(tLogMessages); + + state int k = 0; + numDurableVersionChecks = 0; + for (; (tag != invalidTag) && k < tLogMessages.size(); k++) { + // for each of the message that has this version and tag + // verify that the minVersioninTlog < durableVersion < + // maxVersioninTlog + Version minTLogVersion = -1; + Version maxTLogVersion = -1; + + TraceEvent("tLogMessage").detail("msg", tLogMessages[k].get().toString()); + + // FIXME, sramamoorthy, handle empty string + ASSERT(tLogMessages[k].get().toString() != emptyStr); + getMinAndMaxTLogVersions(tLogMessages[k].get(), execVersion, tag, minTLogVersion, maxTLogVersion); + if (minTLogVersion != -1 && maxTLogVersion != -1) { + if ((durableVersion > minTLogVersion) && (durableVersion < maxTLogVersion)) { + ++numDurableVersionChecks; + TraceEvent("Successs!!!"); + } + } + } + // if we did not find even one tlog for a given tag fail the + // check + if (numDurableVersionChecks < 1) { + TraceEvent("No TLog found for a tag"); + std::terminate(); + } + + TraceEvent("next iteration"); + tLogMessages.clear(); + } + } + + // validates that we encountered unique tags of value numTags + if (numTags != visitedStorageTags.size()) { + TraceEvent("Storage messages were not found"); + std::terminate(); + return false; + } + TraceEvent("Check Succeeded for verifyExecTraceVersion"); + return true; + } +}; + +WorkloadFactory SnapTestWorkloadFactory("SnapTest"); diff --git a/flow/Platform.cpp b/flow/Platform.cpp index e3ee72fab1..03917cc896 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "flow/UnitTest.h" #include "flow/FaultInjection.h" @@ -2682,6 +2683,47 @@ void* loadFunction(void* lib, const char* func_name) { return dlfcn; } +int +fdbFork(const std::string& path, const std::vector& args) +{ + std::vector paramList; + for (int i = 0; i < args.size(); i++) { + paramList.push_back(const_cast(args[i].c_str())); + } + paramList.push_back(nullptr); + + // FIXME: sramamoorthy, FDB6port, dynamic content fails + auto te = TraceEvent("fdbFork"); + te.detail("cmd", path); + // for (int i = 0; i < args.size(); i++) { + // te.detail("args", args[i]); + //} + + pid_t pid = fork(); + if (pid == -1) { + TraceEvent(SevWarnAlways, "Command failed to spawn") + .detail("cmd", path); + throw platform_error(); + } else if (pid > 0) { + int status; + waitpid(pid, &status, 0); + if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) { + TraceEvent(SevWarnAlways, "Command failed") + .detail("cmd", path) + .detail("errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); + return WIFEXITED(status) ? WEXITSTATUS(status) : -1; + } + TraceEvent("Command status") + .detail("cmd", path) + .detail("errno", WIFEXITED(status) ? WEXITSTATUS(status) : 0); + } else { + execv(const_cast(path.c_str()), ¶mList[0]); + _exit(EXIT_FAILURE); + } + return 0; +} + + void platformInit() { #ifdef WIN32 _set_FMA3_enable(0); // Workaround for VS 2013 code generation bug. See https://connect.microsoft.com/VisualStudio/feedback/details/811093/visual-studio-2013-rtm-c-x64-code-generation-bug-for-avx2-instructions diff --git a/flow/Platform.h b/flow/Platform.h index b6183bbc1c..3d5a401cb0 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -533,6 +533,16 @@ bool isLibraryLoaded(const char* lib_path); void* loadLibrary(const char* lib_path); void* loadFunction(void* lib, const char* func_name); +// FIXME: sramamoorthy, clang-format fails here fix it +// wrapper to execv +// takes two arguments: +// 1. path to the binary +// 2. list of arguments +// returns: +// throws platform_error() if it is not able to spawn the process +// returns 0 on success or status from the command being run +int fdbFork(const std::string& path, const std::vector& args); + #ifdef _WIN32 inline static int ctzll( uint64_t value ) { unsigned long count = 0; From 72dd0671735c7bf09a38788b954a38088068b101 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 6 Mar 2019 17:16:59 -0800 Subject: [PATCH 03/69] Trace message changes and fix few FIXMEs --- fdbclient/MasterProxyInterface.h | 4 - fdbclient/NativeAPI.actor.cpp | 28 +++--- fdbserver/FDBExecArgs.cpp | 8 +- fdbserver/MasterProxyServer.actor.cpp | 9 +- fdbserver/OldTLogServer_6_0.actor.cpp | 112 +++++++++++------------ fdbserver/TLogInterface.h | 1 - fdbserver/TLogServer.actor.cpp | 120 ++++++++++++------------- fdbserver/WorkerInterface.actor.h | 4 - fdbserver/storageserver.actor.cpp | 37 ++++---- fdbserver/worker.actor.cpp | 16 ++-- fdbserver/workloads/SnapTest.actor.cpp | 2 +- flow/Platform.cpp | 40 ++++----- flow/Platform.h | 1 - 13 files changed, 178 insertions(+), 204 deletions(-) diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index 0a6477a626..0186cccfd6 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -300,8 +300,6 @@ struct GetHealthMetricsRequest }; struct ExecRequest { - // FIXME: sramamoorthy, FDB6PORT, flat-buffers related versioning disabled - // constexpr static flat_buffers::FileIdentifier file_identifier = 1315755287; Arena arena; StringRef execPayLoad; ReplyPromise reply; @@ -312,8 +310,6 @@ struct ExecRequest { template void serialize(Ar& ar) { - // FIXME: sramamoorthy, FDB6PORT, flat-buffers related versioning disabled - // serializer(ar, v2(execPayLoad), v2(reply), v2(arena), v2(debugID)); serializer(ar, execPayLoad, reply, arena, debugID); } }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 1873f2a2f9..cc427466d2 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3317,10 +3317,10 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) // remember the client ID before the snap operation state UID preSnapClientUID = cx->clientInfo->get().id; - TraceEvent("snapCreate") - .detail("snapCmd", snapCmd.toString()) - .detail("snapCreateEnter", snapUID) - .detail("preSnapClientUID", preSnapClientUID); + TraceEvent("SnapCreateEnter") + .detail("SnapCmd", snapCmd.toString()) + .detail("UID", snapUID) + .detail("PreSnapClientUID", preSnapClientUID); tr.debugTransaction(snapUID); std::string snapString = "empty-binary:uid=" + snapUID.toString(); @@ -3339,12 +3339,12 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) } } - TraceEvent("snapCreate").detail("snapCreate.After.LockingTLogs", snapUID); + TraceEvent("SnapCreateAfterLockingTLogs").detail("UID", snapUID); int p = snapCmd.toString().find_first_of(':', 0); state std::string snapPayLoad; - TraceEvent("snapCmd").detail("snapCmd", snapCmd.toString()); + TraceEvent("SnapCmd").detail("Command", snapCmd.toString()); if (p == snapCmd.toString().npos) { snapPayLoad = snapCmd.toString() + ":uid=" + snapUID.toString(); } else { @@ -3364,11 +3364,11 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) tr.execute(execSnap, snapPayLoadRef); wait(tr.commit()); } catch (Error& e) { - TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + TraceEvent("SnapCreateErroSnapTLogStorage").detail("Error", e.what()); throw; } - TraceEvent("snapCreate").detail("snapCreate.After.SnappingTLogsStorage", snapUID); + TraceEvent("SnapCreateAfterSnappingTLogStorage").detail("UID", snapUID); // enable popping of the TLog loop { @@ -3382,28 +3382,28 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) } } - TraceEvent("snapCreate").detail("snapCreate.After.UnlockingTLogs", snapUID); + TraceEvent("SnapCreateAfterUnlockingTLogs").detail("UID", snapUID); // snap the coordinators try { Future exec = executeCoordinators(cx, snapPayLoad, snapUID); wait(exec); } catch (Error& e) { - TraceEvent("snapCreate").detail("snapCreateErrorSnapCoordinators", e.what()); + TraceEvent("SnapCreateErrorSnapCoords").detail("Error", e.what()); throw; } - TraceEvent("snapCreate").detail("snapCreate.After.SnappingCoords", snapUID); + TraceEvent("SnapCreateAfterSnappingCoords").detail("UID", snapUID); // if the client IDs did not change then we have a clean snapshot UID postSnapClientUID = cx->clientInfo->get().id; if (preSnapClientUID != postSnapClientUID) { TraceEvent("UID mismatch") - .detail("preSnapClientUID", preSnapClientUID) - .detail("postSnapClientUID", postSnapClientUID); + .detail("SnapPreSnapClientUID", preSnapClientUID) + .detail("SnapPostSnapClientUID", postSnapClientUID); throw coordinators_changed(); } - TraceEvent("snapCreate").detail("snapCreate.Complete", snapUID); + TraceEvent("SnapCreateComplete").detail("UID", snapUID); return Void(); } diff --git a/fdbserver/FDBExecArgs.cpp b/fdbserver/FDBExecArgs.cpp index 36ea8c2341..d7722a3886 100644 --- a/fdbserver/FDBExecArgs.cpp +++ b/fdbserver/FDBExecArgs.cpp @@ -87,14 +87,14 @@ void ExecCmdValueString::parseCmdValue() { } void ExecCmdValueString::dbgPrint() { - auto te = TraceEvent("execCmdValueString"); + auto te = TraceEvent("ExecCmdValueString"); - te.detail("cmdValueString", cmdValueString); - te.detail("binaryPath", binaryPath); + te.detail("CmdValueString", cmdValueString); + te.detail("BinaryPath", binaryPath); int i = 0; for (auto elem : binaryArgs) { - te.detail(format("arg{}", ++i).c_str(), elem); + te.detail(format("Arg", ++i).c_str(), elem); } return; } diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 14221ade59..beb0703981 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -763,15 +763,14 @@ ACTOR Future commitBatch( te1.detail("To", "all sources"); te1.detail("Mutation", m.toString()); te1.detail("Version", commitVersion); - te1.detail("numTags", allSources.size()); + te1.detail("NumTags", allSources.size()); if (m.param1 == execSnap) { te1.trackLatest(tokenStr.c_str()); } - // FIXME: sramamoorthy, FDB6port - dynamic tracing not supported? - // auto te = TraceEvent(SevDebug, "tagInfo"); + auto te = TraceEvent(SevDebug, "TagInfo"); int i = 0; for (auto& tag : allSources) { - // te.detail(format("tagId{}", ++i).c_str(), tag.toString()); + te.detail(format("TagId", ++i).c_str(), tag.toString()); toCommit.addTag(tag); } toCommit.addTypedMessage(m); @@ -1572,7 +1571,7 @@ ACTOR Future masterProxyServerCore( "MasterProxyServer.masterProxyServerCore." "ExecRequest"); - TraceEvent("ExecRequest").detail("payload", execReq.execPayLoad.toString()); + TraceEvent("ExecRequest").detail("Payload", execReq.execPayLoad.toString()); // get the list of coordinators state Optional coordinators = commitData.txnStateStore->readValue(coordinatorsKey).get(); diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index d9803bbb01..1c7372172b 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1320,7 +1320,7 @@ ACTOR Future tLogCommit( rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent("oldTLog6TLogCommandType", self->dbgid).detail("execCmd", execCmd.toString()); + TraceEvent("TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); @@ -1338,21 +1338,20 @@ ACTOR Future tLogCommit( } if (execVersion == invalidVersion) { - TraceEvent(SevWarn, "snapFailed") - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignorePopRequest", self->ignorePopRequest) - .detail("reason", reason) + TraceEvent(SevWarn, "TLogSnapFailed") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("Reason", reason) .trackLatest(reason.c_str()); auto startTag = logData->allTags.begin(); std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - // startTag.toString() + "/" + uidStr; - TraceEvent("oldTLog6ExecCmdSnapCreate") - .detail("uidStr", uidStr) - .detail("status", -1) - .detail("tag", logData->allTags.begin()->toString()) - .detail("role", "TLog") + TraceEvent("ExecCmdSnapCreate") + .detail("Uid", uidStr) + .detail("Status", -1) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog") .trackLatest(message.c_str()); } } @@ -1360,38 +1359,38 @@ ACTOR Future tLogCommit( execVersion = invalidVersion; self->ignorePopRequest = true; if (self->ignorePopUid != "") { - TraceEvent(SevWarn, "oldTlog6TLogPopDisableonDisable") - .detail("ignorePopUid", self->ignorePopUid) - .detail("uidStr", uidStr); + TraceEvent(SevWarn, "TLogPopDisableonDisable") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr); } self->ignorePopUid = uidStr; // ignorePopRequest will be turned off after 30 seconds self->ignorePopDeadline = g_network->now() + 30.0; - TraceEvent("oldTLog6ExecCmdPopDisable") - .detail("execCmd", execCmd.toString()) - .detail("uidStr", uidStr) - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignporePopRequest", self->ignorePopRequest) - .detail("ignporePopDeadline", self->ignorePopDeadline) + TraceEvent("TLogExecCmdPopDisable") + .detail("ExecCmd", execCmd.toString()) + .detail("UidStr", uidStr) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) .trackLatest("disablePopTLog"); } if (execCmd == execEnableTLogPop) { execVersion = invalidVersion; if (self->ignorePopUid != uidStr) { - TraceEvent(SevWarn, "oldTLog6tLogPopDisableEnableUidMismatch") - .detail("ignorePopUid", self->ignorePopUid) - .detail("uidStr", uidStr) + TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr) .trackLatest("tLogPopDisableEnableUidMismatch"); } self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; - TraceEvent("oldTLog6ExecCmdPopEnable") - .detail("execCmd", execCmd.toString()) - .detail("uidStr", uidStr) - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignporePopRequest", self->ignorePopRequest) - .detail("ignporePopDeadline", self->ignorePopDeadline) + TraceEvent("TLog6ExecCmdPopEnable") + .detail("ExecCmd", execCmd.toString()) + .detail("UidStr", uidStr) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) .trackLatest("enablePopTLog"); } } @@ -1456,9 +1455,9 @@ ACTOR Future tLogCommit( std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; std::string tLogFolderCopyCmd = "cp " + tLogFolderFrom + " " + tLogFolderTo; - TraceEvent("oldTLog6ExecSnapCommands") - .detail("tLogFolderToCreateCmd", tLogFolderToCreateCmd) - .detail("tLogFolderCopyCmd", tLogFolderCopyCmd); + TraceEvent("ExecSnapCommands") + .detail("TLogFolderToCreateCmd", tLogFolderToCreateCmd) + .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); vector paramList; std::string cpBin = "/bin/cp"; @@ -1476,37 +1475,34 @@ ACTOR Future tLogCommit( err = fdbFork(cpBin, paramList); } } - TraceEvent("oldTLog6CommitExecTraceTLog") - .detail("uidStr", uidStr) - .detail("status", err) - .detail("tag", logData->allTags.begin()->toString()) - .detail("role", "TLog"); + TraceEvent("TLogCommitExecTraceTLog") + .detail("UidStr", uidStr) + .detail("Status", err) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog"); - // print the status message + // print the detailed status message for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { Version poppedTagVersion = -1; auto tagv = logData->getTagData(*it); - // auto tagv = logData->tag_data.find(*it); - // if (tagv != logData->tag_data.end()) { - // poppedTagVersion = tagv->value.popped; - // } + poppedTagVersion = tagv->popped; int len = param2.size(); state std::string message = "ExecTrace/TLog/" + it->toString() + "/" + uidStr; - TraceEvent te = TraceEvent(SevDebug, "oldTLog6ExecTraceDetailed"); - te.detail("uid", uidStr); - te.detail("status", err); - te.detail("role", "TLog"); - te.detail("execCmd", execCmd.toString()); - te.detail("param2", param2.toString()); + TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); + te.detail("Uid", uidStr); + te.detail("Status", err); + te.detail("Role", "TLog"); + te.detail("ExecCmd", execCmd.toString()); + te.detail("Param2", param2.toString()); te.detail("Tag", it->toString()); te.detail("Version", qe.version); - te.detail("poppedTagVersion", poppedTagVersion); - te.detail("persistentDataVersion", logData->persistentDataVersion); - te.detail("persistentDatadurableVersion", logData->persistentDataDurableVersion); - te.detail("queueCommittedVersion", logData->queueCommittedVersion.get()); - te.detail("ignorePopUid", self->ignorePopUid); + te.detail("PoppedTagVersion", poppedTagVersion); + te.detail("PersistentDataVersion", logData->persistentDataVersion); + te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); + te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); + te.detail("IgnorePopUid", self->ignorePopUid); if (execCmd == execSnap) { te.trackLatest(message.c_str()); } @@ -1702,16 +1698,16 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere self->ignorePopRequest = false; self->ignorePopUid = ""; self->ignorePopDeadline = 0.0; - TraceEvent("oldTLog6resetIgnorePopRequest") - .detail("now", g_network->now()) - .detail("ignorePopRequest", self->ignorePopRequest) - .detail("ignorePopDeadline", self->ignorePopDeadline) + TraceEvent("ResetIgnorePopRequest") + .detail("Now", g_network->now()) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("IgnorePopDeadline", self->ignorePopDeadline) .trackLatest("disableTLogPopTimedOut"); } if (!self->ignorePopRequest) { logData->addActor.send(tLogPop(self, req, logData)); } else { - TraceEvent("oldTLog6ignoringPopRequest").detail("ignorePopDeadline", self->ignorePopDeadline); + TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); } } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { diff --git a/fdbserver/TLogInterface.h b/fdbserver/TLogInterface.h index 22e6756109..aa0c8a622e 100644 --- a/fdbserver/TLogInterface.h +++ b/fdbserver/TLogInterface.h @@ -225,7 +225,6 @@ struct TLogCommitRequest { : arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID), hasExecOp(hasExecOp){} template void serialize( Ar& ar ) { - // FIXME: sramamoorthy, FDB6port, flatbuffers related versioning missing serializer(ar, prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, messages, reply, arena, debugID, hasExecOp); } }; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index c2100d815c..43414ef38a 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1687,7 +1687,7 @@ ACTOR Future tLogCommit( rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent("TLogCommandType", self->dbgid).detail("execCmd", execCmd.toString()); + TraceEvent("TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); @@ -1705,22 +1705,21 @@ ACTOR Future tLogCommit( } if (execVersion == invalidVersion) { - TraceEvent(SevWarn, "oldTLog6snapFailed") - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignorePopRequest", self->ignorePopRequest) - .detail("reason", reason) + TraceEvent(SevWarn, "TLogSnapFailed") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("Reason", reason) .trackLatest(reason.c_str()); auto startTag = logData->allTags.begin(); std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - // startTag.toString() + "/" + uidStr; - TraceEvent("TLog6ExecCmdSnapCreate") - .detail("uidStr", uidStr) - .detail("status", -1) - .detail("tag", logData->allTags.begin()->toString()) - .detail("role", "TLog") + TraceEvent("ExecCmdSnapCreate") + .detail("Uid", uidStr) + .detail("Status", -1) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog") .trackLatest(message.c_str()); } } @@ -1728,38 +1727,38 @@ ACTOR Future tLogCommit( execVersion = invalidVersion; self->ignorePopRequest = true; if (self->ignorePopUid != "") { - TraceEvent(SevWarn, "tLogPopDisableOnDisable") - .detail("ignorePopUid", self->ignorePopUid) - .detail("uidStr", uidStr); + TraceEvent(SevWarn, "TLogPopDisableOnDisable") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr); } self->ignorePopUid = uidStr; // ignorePopRequest will be turned off after 30 seconds self->ignorePopDeadline = g_network->now() + 30.0; TraceEvent("TLogExecCmdPopDisable") - .detail("execCmd", execCmd.toString()) - .detail("uidStr", uidStr) - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignporePopRequest", self->ignorePopRequest) - .detail("ignporePopDeadline", self->ignorePopDeadline) + .detail("ExecCmd", execCmd.toString()) + .detail("UidStr", uidStr) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) .trackLatest("disablePopTLog"); } if (execCmd == execEnableTLogPop) { execVersion = invalidVersion; if (self->ignorePopUid != uidStr) { - TraceEvent(SevWarn, "tLogPopDisableEnableUidMismatch") - .detail("ignorePopUid", self->ignorePopUid) - .detail("uidStr", uidStr) - .trackLatest("tLogPopDisableEnableUidMismatch"); + TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr) + .trackLatest("TLogPopDisableEnableUidMismatch"); } self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; TraceEvent("TLogExecCmdPopEnable") - .detail("execCmd", execCmd.toString()) - .detail("uidStr", uidStr) - .detail("ignorePopUid", self->ignorePopUid) - .detail("ignporePopRequest", self->ignorePopRequest) - .detail("ignporePopDeadline", self->ignorePopDeadline) + .detail("ExecCmd", execCmd.toString()) + .detail("UidStr", uidStr) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) .trackLatest("enablePopTLog"); } } @@ -1826,8 +1825,8 @@ ACTOR Future tLogCommit( "cp " + tLogFolderFrom + " " + tLogFolderTo; TraceEvent("TLogExecSnapcommands") - .detail("tLogFolderToCreateCmd", tLogFolderToCreateCmd) - .detail("tLogFolderCopyCmd", tLogFolderCopyCmd); + .detail("TLogFolderToCreateCmd", tLogFolderToCreateCmd) + .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); vector paramList; std::string cpBin = "/bin/cp"; @@ -1846,43 +1845,38 @@ ACTOR Future tLogCommit( } } TraceEvent("TLogCommitExecTraceLog") - .detail("uidStr", uidStr) - .detail("status", err) - .detail("tag", logData->allTags.begin()->toString()) - .detail("role", "TLog"); + .detail("UidStr", uidStr) + .detail("Status", err) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog"); // print the status message for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { Version poppedTagVersion = -1; auto tagv = logData->getTagData(*it); - // auto tagv = logData->tag_data.find(*it); - // if (tagv != logData->tag_data.end()) { - // poppedTagVersion = tagv->value.popped; - // } + poppedTagVersion = tagv->popped; - int len = param2.size(); - state std::string message = + int len = param2.size(); + state std::string message = "ExecTrace/TLog/" + it->toString() + "/" + uidStr; TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); - te.detail("uid", uidStr); - te.detail("status", err); - te.detail("role", "TLog"); - te.detail("execCmd", execCmd.toString()); - te.detail("param2", param2.toString()); - te.detail("Tag", it->toString()); - te.detail("Version", qe.version); - te.detail("poppedTagVersion", poppedTagVersion); - te.detail("persistentDataVersion", logData->persistentDataVersion); - te.detail("persistentDatadurableVersion", - logData->persistentDataDurableVersion); - te.detail("queueCommittedVersion", - logData->queueCommittedVersion.get()); - te.detail("ignorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } - } + te.detail("Uid", uidStr); + te.detail("Status", err); + te.detail("Role", "TLog"); + te.detail("ExecCmd", execCmd.toString()); + te.detail("Param2", param2.toString()); + te.detail("Tag", it->toString()); + te.detail("Version", qe.version); + te.detail("PoppedTagVersion", poppedTagVersion); + te.detail("PersistentDataVersion", logData->persistentDataVersion); + te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); + te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); + te.detail("IgnorePopUid", self->ignorePopUid); + if (execCmd == execSnap) { + te.trackLatest(message.c_str()); + } + } execVersion = invalidVersion; } @@ -2075,16 +2069,16 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere self->ignorePopRequest = false; self->ignorePopUid = ""; self->ignorePopDeadline = 0.0; - TraceEvent("resetIgnorePopRequest") - .detail("now", g_network->now()) - .detail("ignorePopRequest", self->ignorePopRequest) - .detail("ignorePopDeadline", self->ignorePopDeadline) + TraceEvent("ResetIgnorePopRequest") + .detail("Now", g_network->now()) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("IgnorePopDeadline", self->ignorePopDeadline) .trackLatest("disableTLogPopTimedOut"); } if (!self->ignorePopRequest) { logData->addActor.send(tLogPop(self, req, logData)); } else { - TraceEvent("ignoringPopRequest").detail("ignorePopDeadline", self->ignorePopDeadline); + TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); } } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 8d5d3db4ae..bc973f9208 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -241,8 +241,6 @@ struct TraceBatchDumpRequest { }; struct ExecuteRequest { - // FIXME: sramamoorthy, FDB6port enable flat_buffers - // constexpr static flat_buffers::FileIdentifier file_identifier = 16478959; ReplyPromise reply; Arena arena; @@ -254,8 +252,6 @@ struct ExecuteRequest { template void serialize(Ar& ar) { - // FIXME: sramamoorthy, FDB6port enable flat_buffers - // serializer(ar, v2(reply), v2(execPayLoad), v2(arena)); serializer(ar, reply, execPayLoad, arena); } }; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index aaa8dda4c9..31721626d2 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1863,7 +1863,7 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& std::string cmd = m.param1.toString(); int len = m.param2.size(); if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) { - TraceEvent("IgnoreNonSnapCommands").detail("execCommand", cmd); + TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); return; } ExecCmdValueString execArg(m.param2.toString()); @@ -1895,9 +1895,9 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& } else { // copy the files TraceEvent("ExecTraceStorage") - .detail("storageFolder", data->folder) - .detail("localMachineId", data->thisServerID.toString()) - .detail("durableVersion", data->durableVersion.get()); + .detail("StorageFolder", data->folder) + .detail("LocalMachineId", data->thisServerID.toString()) + .detail("DurableVersion", data->durableVersion.get()); std::string folder = abspath(data->folder); @@ -1908,8 +1908,8 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; TraceEvent("ExecTraceStorageSnapcommands") - .detail("folderToCreateCmd", folderToCreateCmd) - .detail("folderCopyCmd", folderCopyCmd); + .detail("FolderToCreateCmd", folderToCreateCmd) + .detail("FolderCopyCmd", folderCopyCmd); vector paramList; std::string cpBin = "/bin/cp"; @@ -1918,7 +1918,7 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& paramList.push_back(mkdirBin); paramList.push_back(folderTo); err = fdbFork(mkdirBin, paramList); - TraceEvent("mkdirStatus").detail("errno", err); + TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { paramList.clear(); @@ -1929,19 +1929,18 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& err = fdbFork(cpBin, paramList); } } - // FIXME, sramamoorthy, print for non execSnap commands too + auto tokenStr = "ExecTrace/storage/" + uidStr; + TraceEvent te = TraceEvent("ExecTraceStorage"); + te.detail("Uid", uidStr); + te.detail("Status", err); + te.detail("Role", "storage"); + te.detail("Version", ver); + te.detail("Mutation", m.toString()); + te.detail("Mid", data->thisServerID.toString()); + te.detail("DurableVersion", data->durableVersion.get()); + te.detail("DataVersion", data->version.get()); + te.detail("Tag", data->tag.toString()); if (cmd == execSnap) { - auto tokenStr = "ExecTrace/storage/" + uidStr; - TraceEvent te = TraceEvent("ExecTraceStorage"); - te.detail("uid", uidStr); - te.detail("status", err); - te.detail("role", "storage"); - te.detail("version", ver); - te.detail("mutation", m.toString()); - te.detail("mid", data->thisServerID.toString()); - te.detail("durableVersion", data->durableVersion.get()); - te.detail("data_version", data->version.get()); - te.detail("tag", data->tag.toString()); te.trackLatest(tokenStr.c_str()); } } else diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 062de7cf72..70e84fa6b6 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1219,8 +1219,8 @@ ACTOR Future workerServer(Reference connFile, std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; TraceEvent("ExecTraceCoordSnapcommands") - .detail("folderToCreateCmd", folderToCreateCmd) - .detail("folderCopyCmd", folderCopyCmd); + .detail("FolderToCreateCmd", folderToCreateCmd) + .detail("FolderCopyCmd", folderCopyCmd); vector paramList; std::string cpBin = "/bin/cp"; @@ -1229,7 +1229,7 @@ ACTOR Future workerServer(Reference connFile, paramList.push_back(mkdirBin); paramList.push_back(folderTo); err = fdbFork(mkdirBin, paramList); - TraceEvent("mkdirStatus").detail("errno", err); + TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { paramList.clear(); @@ -1243,11 +1243,11 @@ ACTOR Future workerServer(Reference connFile, auto tokenStr = "ExecTrace/Coordinators/" + uidStr; auto te = TraceEvent("ExecTraceCoordinators"); - te.detail("uid", uidStr); - te.detail("status", err); - te.detail("role", "coordinator"); - te.detail("value", coordFolder); - te.detail("execPayLoad", req.execPayLoad.toString()); + te.detail("Uid", uidStr); + te.detail("Status", err); + te.detail("Role", "coordinator"); + te.detail("Value", coordFolder); + te.detail("ExecPayLoad", req.execPayLoad.toString()); te.trackLatest(tokenStr.c_str()); req.reply.send(Void()); } diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 11be376e3a..e4da32be45 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -244,7 +244,7 @@ public: // workload functions } else if (self->testID == 1) { // create a snapshot state double toDelay = fmod(g_random->randomUInt32(), self->maxSnapDelay); - TraceEvent("toDelay").detail("toDelay", toDelay); + TraceEvent("ToDelay").detail("Value", toDelay); ASSERT(toDelay < self->maxSnapDelay); wait(delay(toDelay)); diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 03917cc896..ab3a77da7d 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2692,35 +2692,31 @@ fdbFork(const std::string& path, const std::vector& args) } paramList.push_back(nullptr); - // FIXME: sramamoorthy, FDB6port, dynamic content fails - auto te = TraceEvent("fdbFork"); - te.detail("cmd", path); - // for (int i = 0; i < args.size(); i++) { - // te.detail("args", args[i]); - //} + auto te = TraceEvent("FdbFork"); + te.detail("Cmd", path); + for (int i = 0; i < args.size(); i++) { + te.detail("Args", args[i]); + } pid_t pid = fork(); if (pid == -1) { - TraceEvent(SevWarnAlways, "Command failed to spawn") - .detail("cmd", path); - throw platform_error(); - } else if (pid > 0) { + TraceEvent(SevWarnAlways, "CommandFailedToSpawn").detail("Cmd", path); + throw platform_error(); + } else if (pid > 0) { int status; waitpid(pid, &status, 0); if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) { - TraceEvent(SevWarnAlways, "Command failed") - .detail("cmd", path) - .detail("errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); - return WIFEXITED(status) ? WEXITSTATUS(status) : -1; - } - TraceEvent("Command status") - .detail("cmd", path) - .detail("errno", WIFEXITED(status) ? WEXITSTATUS(status) : 0); - } else { - execv(const_cast(path.c_str()), ¶mList[0]); + TraceEvent(SevWarnAlways, "CommandFailed") + .detail("Cmd", path) + .detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); + return WIFEXITED(status) ? WEXITSTATUS(status) : -1; + } + TraceEvent("CommandStatus").detail("Cmd", path).detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : 0); + } else { + execv(const_cast(path.c_str()), ¶mList[0]); _exit(EXIT_FAILURE); - } - return 0; + } + return 0; } diff --git a/flow/Platform.h b/flow/Platform.h index 3d5a401cb0..d43dd8eab8 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -533,7 +533,6 @@ bool isLibraryLoaded(const char* lib_path); void* loadLibrary(const char* lib_path); void* loadFunction(void* lib, const char* func_name); -// FIXME: sramamoorthy, clang-format fails here fix it // wrapper to execv // takes two arguments: // 1. path to the binary From 4bc4c615da21aa788a6e108609fe310804914ae9 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 11 Mar 2019 07:31:44 -0700 Subject: [PATCH 04/69] exec op to all tlog, restore change in test &other - exec operation to go to all the TLogs - minor bug fix in tlog - restore implementation for the simulator - restore snap UID to be stored in restartInfo.ini - test cases added - indentation and trace file fixes --- cmake/AddFdbTest.cmake | 7 +- fdbserver/LogSystem.h | 36 ++- fdbserver/MasterProxyServer.actor.cpp | 7 +- fdbserver/OldTLogServer_6_0.actor.cpp | 58 +++-- fdbserver/TLogServer.actor.cpp | 46 ++-- fdbserver/TagPartitionedLogSystem.actor.cpp | 14 +- fdbserver/fdbserver.actor.cpp | 71 +++++- fdbserver/workloads/SnapTest.actor.cpp | 241 +++++++++---------- flow/Platform.cpp | 6 +- tests/CMakeLists.txt | 12 + tests/TestRunner/TestRunner.py | 5 + tests/restarting/SnapCycleRestart-1.txt | 22 ++ tests/restarting/SnapCycleRestart-2.txt | 8 + tests/restarting/SnapTestRestart-1.txt | 48 ++++ tests/restarting/SnapTestRestart-2.txt | 8 + tests/restarting/SnapTestSimpleRestart-1.txt | 36 +++ tests/restarting/SnapTestSimpleRestart-2.txt | 40 +++ 17 files changed, 461 insertions(+), 204 deletions(-) create mode 100644 tests/restarting/SnapCycleRestart-1.txt create mode 100644 tests/restarting/SnapCycleRestart-2.txt create mode 100644 tests/restarting/SnapTestRestart-1.txt create mode 100644 tests/restarting/SnapTestRestart-2.txt create mode 100644 tests/restarting/SnapTestSimpleRestart-1.txt create mode 100644 tests/restarting/SnapTestSimpleRestart-2.txt diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index b3ffb51760..d06a3d7a5d 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -54,7 +54,7 @@ endfunction() # all these tests in serialized order and within the same directory. This is # useful for restart tests function(add_fdb_test) - set(options UNIT IGNORE) + set(options UNIT IGNORE RESTORE) set(oneValueArgs TEST_NAME TIMEOUT) set(multiValueArgs TEST_FILES) cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") @@ -75,6 +75,10 @@ function(add_fdb_test) if(ADD_FDB_TEST_UNIT) set(test_type "test") endif() + set(TEST_RESTORING "NO") + if (ADD_FDB_TEST_RESTORE) + set(TEST_RESTORING "YES") + endif() list(GET ADD_FDB_TEST_TEST_FILES 0 first_file) string(REGEX REPLACE "^(.*)\\.txt$" "\\1" test_name ${first_file}) if("${test_name}" MATCHES "(-\\d)$") @@ -117,6 +121,7 @@ function(add_fdb_test) --log-format ${TEST_LOG_FORMAT} --keep-logs ${TEST_KEEP_LOGS} --keep-simdirs ${TEST_KEEP_SIMDIR} + --restoring ${TEST_RESTORING} --seed ${SEED} --test-number ${assigned_id} ${BUGGIFY_OPTION} diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index e1f3041be9..c41f44b300 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -231,8 +231,20 @@ public: return resultEntries.size() == 0; } - void getPushLocations( std::vector const& tags, std::vector& locations, int locationOffset ) { + void getPushLocations(std::vector const& tags, std::vector& locations, int locationOffset, + bool allLocations = false) { if(locality == tagLocalitySatellite) { + if (allLocations) { + // special handling for allLocations + TraceEvent("AllLocationsSet"); + for (int i = 0; i < satelliteTagLocations.size(); i++) { + for (int j : satelliteTagLocations[i]) { + locations.push_back(locationOffset + j); + } + } + uniquify(locations); + return; + } for(auto& t : tags) { if(t == txsTag || t.locality == tagLocalityLogRouter) { for(int loc : satelliteTagLocations[t == txsTag ? 0 : t.id + 1]) { @@ -248,9 +260,17 @@ public: alsoServers.clear(); resultEntries.clear(); - for(auto& t : tags) { - if(locality == tagLocalitySpecial || t.locality == locality || t.locality < 0) { - newLocations.push_back(bestLocationFor(t)); + if (allLocations) { + // special handling for allLocations + TraceEvent("AllLocationsSet"); + for (int i = 0; i < logServers.size(); i++) { + newLocations.push_back(i); + } + } else { + for (auto& t : tags) { + if (locality == tagLocalitySpecial || t.locality == locality || t.locality < 0) { + newLocations.push_back(bestLocationFor(t)); + } } } @@ -690,7 +710,7 @@ struct ILogSystem { virtual Future onLogSystemConfigChange() = 0; // Returns when the log system configuration has changed due to a tlog rejoin. - virtual void getPushLocations( std::vector const& tags, std::vector& locations ) = 0; + virtual void getPushLocations(std::vector const& tags, std::vector& locations, bool allLocations = false) = 0; virtual bool hasRemoteLogs() = 0; @@ -776,7 +796,7 @@ struct LogPushData : NonCopyable { } template - void addTypedMessage( T const& item ) { + void addTypedMessage(T const& item, bool allLocations = false) { prev_tags.clear(); if(logSystem->hasRemoteLogs()) { prev_tags.push_back( logSystem->getRandomRouterTag() ); @@ -785,8 +805,8 @@ struct LogPushData : NonCopyable { prev_tags.push_back(tag); } msg_locations.clear(); - logSystem->getPushLocations( prev_tags, msg_locations ); - + logSystem->getPushLocations(prev_tags, msg_locations, allLocations); + uint32_t subseq = this->subsequence++; for(int loc : msg_locations) { // FIXME: memcpy after the first time diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index beb0703981..8a62c7a42b 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -767,13 +767,14 @@ ACTOR Future commitBatch( if (m.param1 == execSnap) { te1.trackLatest(tokenStr.c_str()); } - auto te = TraceEvent(SevDebug, "TagInfo"); int i = 0; + std::string allTagString; for (auto& tag : allSources) { - te.detail(format("TagId", ++i).c_str(), tag.toString()); + allTagString += tag.toString() + ","; toCommit.addTag(tag); } - toCommit.addTypedMessage(m); + TraceEvent(SevDebug, "TagInfo").detail("Tags", allTagString); + toCommit.addTypedMessage(m, true /* allLocations */); toCommit.setHasExecOp(); } else UNREACHABLE(); diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 1c7372172b..e40b8dd61b 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -900,6 +900,22 @@ std::deque> & getVersionMessages( Re }; ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference logData ) { + // timeout check for ignorePopRequest + if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; + TraceEvent("ResetIgnorePopRequest") + .detail("Now", g_network->now()) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("IgnorePopDeadline", self->ignorePopDeadline) + .trackLatest("DisableTLogPopTimedOut"); + } + if (self->ignorePopRequest && req.tag != txsTag) { + TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); + req.reply.send(Void()); + return Void(); + } state Version upTo = req.to; int8_t tagLocality = req.tag.locality; if (logData->logSystem->get().isValid() && logData->logSystem->get()->isPseudoLocality(tagLocality)) { @@ -1275,6 +1291,7 @@ ACTOR Future tLogCommit( state TLogQueueEntryRef qe; state StringRef execCmd; state StringRef param2; + state vector execTags; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1302,6 +1319,7 @@ ACTOR Future tLogCommit( rd >> messageLength >> sub >> tagCount; for (int i = 0; i < tagCount; i++) { rd >> tmpTag; + execTags.push_back(tmpTag); } rd >> type; if (type == MutationRef::Exec) { @@ -1331,10 +1349,10 @@ ACTOR Future tLogCommit( std::string reason; if (!self->ignorePopRequest) { execVersion = invalidVersion; - reason = "snapFailIgnorePopNotSet"; + reason = "SnapFailIgnorePopNotSet"; } else if (uidStr != self->ignorePopUid) { execVersion = invalidVersion; - reason = "snapFailedDisableTLogUidMismatch"; + reason = "SnapFailedDisableTLogUidMismatch"; } if (execVersion == invalidVersion) { @@ -1372,7 +1390,7 @@ ACTOR Future tLogCommit( .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("disablePopTLog"); + .trackLatest("DisablePopTLog"); } if (execCmd == execEnableTLogPop) { execVersion = invalidVersion; @@ -1380,18 +1398,18 @@ ACTOR Future tLogCommit( TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) .detail("UidStr", uidStr) - .trackLatest("tLogPopDisableEnableUidMismatch"); + .trackLatest("TLogPopDisableEnableUidMismatch"); } self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; - TraceEvent("TLog6ExecCmdPopEnable") + TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd.toString()) .detail("UidStr", uidStr) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("enablePopTLog"); + .trackLatest("EnablePopTLog"); } } } @@ -1479,16 +1497,20 @@ ACTOR Future tLogCommit( .detail("UidStr", uidStr) .detail("Status", err) .detail("Tag", logData->allTags.begin()->toString()) + .detail("OldTagSize", logData->allTags.size()) .detail("Role", "TLog"); // print the detailed status message - for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { + for (int i = 0; i < execTags.size(); i++) { Version poppedTagVersion = -1; - auto tagv = logData->getTagData(*it); + auto tagv = logData->getTagData(execTags[i]); + if (!tagv) { + continue; + } poppedTagVersion = tagv->popped; int len = param2.size(); - state std::string message = "ExecTrace/TLog/" + it->toString() + "/" + uidStr; + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr; TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr); @@ -1496,7 +1518,7 @@ ACTOR Future tLogCommit( te.detail("Role", "TLog"); te.detail("ExecCmd", execCmd.toString()); te.detail("Param2", param2.toString()); - te.detail("Tag", it->toString()); + te.detail("Tag", tagv->tag.toString()); te.detail("Version", qe.version); te.detail("PoppedTagVersion", poppedTagVersion); te.detail("PersistentDataVersion", logData->persistentDataVersion); @@ -1694,21 +1716,7 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere logData->addActor.send( tLogPeekMessages( self, req, logData ) ); } when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) { - if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { - self->ignorePopRequest = false; - self->ignorePopUid = ""; - self->ignorePopDeadline = 0.0; - TraceEvent("ResetIgnorePopRequest") - .detail("Now", g_network->now()) - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("disableTLogPopTimedOut"); - } - if (!self->ignorePopRequest) { - logData->addActor.send(tLogPop(self, req, logData)); - } else { - TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); - } + logData->addActor.send(tLogPop(self, req, logData)); } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { //TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get()); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 43414ef38a..61b8d2837d 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1178,7 +1178,6 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, ReferenceeraseMessagesBefore(upTo, self, logData, TaskTLogPop)); //TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo); } - req.reply.send(Void()); return Void(); } @@ -1642,6 +1641,7 @@ ACTOR Future tLogCommit( state TLogQueueEntryRef qe; state StringRef execCmd; state StringRef param2; + state vector execTags; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1669,6 +1669,7 @@ ACTOR Future tLogCommit( rd >> messageLength >> sub >> tagCount; for(int i = 0; i < tagCount; i++) { rd >> tmpTag; + execTags.push_back(tmpTag); } rd >> type; if (type == MutationRef::Exec) { @@ -1698,10 +1699,10 @@ ACTOR Future tLogCommit( std::string reason; if (!self->ignorePopRequest) { execVersion = invalidVersion; - reason = "snapFailIgnorePopNotSet"; + reason = "SnapFailIgnorePopNotSet"; } else if (uidStr != self->ignorePopUid) { execVersion = invalidVersion; - reason = "snapFailedDisableTLogUidMismatch"; + reason = "SnapFailedDisableTLogUidMismatch"; } if (execVersion == invalidVersion) { @@ -1740,7 +1741,7 @@ ACTOR Future tLogCommit( .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("disablePopTLog"); + .trackLatest("DisablePopTLog"); } if (execCmd == execEnableTLogPop) { execVersion = invalidVersion; @@ -1759,7 +1760,7 @@ ACTOR Future tLogCommit( .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("enablePopTLog"); + .trackLatest("EnablePopTLog"); } } } @@ -1787,7 +1788,7 @@ ACTOR Future tLogCommit( state Future stopped = logData->stopCommit.onTrigger(); wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); - if ((execVersion != invalidVersion) && + if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { int err = 0; auto uidStr = execArg.getBinaryArgValue("uid"); @@ -1848,17 +1849,20 @@ ACTOR Future tLogCommit( .detail("UidStr", uidStr) .detail("Status", err) .detail("Tag", logData->allTags.begin()->toString()) + .detail("TagSize", logData->allTags.size()) .detail("Role", "TLog"); // print the status message - for (auto it = logData->allTags.begin(); it != logData->allTags.end(); it++) { - Version poppedTagVersion = -1; - auto tagv = logData->getTagData(*it); + for (int i = 0; i < execTags.size(); i++) { + Version poppedTagVersion = -1; + auto tagv = logData->getTagData(execTags[i]); + if (!tagv) { + continue; + } poppedTagVersion = tagv->popped; int len = param2.size(); - state std::string message = - "ExecTrace/TLog/" + it->toString() + "/" + uidStr; + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr; TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr); @@ -1866,7 +1870,7 @@ ACTOR Future tLogCommit( te.detail("Role", "TLog"); te.detail("ExecCmd", execCmd.toString()); te.detail("Param2", param2.toString()); - te.detail("Tag", it->toString()); + te.detail("Tag", tagv->tag.toString()); te.detail("Version", qe.version); te.detail("PoppedTagVersion", poppedTagVersion); te.detail("PersistentDataVersion", logData->persistentDataVersion); @@ -1877,7 +1881,7 @@ ACTOR Future tLogCommit( te.trackLatest(message.c_str()); } } - execVersion = invalidVersion; + execVersion = invalidVersion; } if(stopped.isReady()) { @@ -2065,21 +2069,7 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere logData->addActor.send( tLogPeekMessages( self, req, logData ) ); } when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) { - if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { - self->ignorePopRequest = false; - self->ignorePopUid = ""; - self->ignorePopDeadline = 0.0; - TraceEvent("ResetIgnorePopRequest") - .detail("Now", g_network->now()) - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("disableTLogPopTimedOut"); - } - if (!self->ignorePopRequest) { - logData->addActor.send(tLogPop(self, req, logData)); - } else { - TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); - } + logData->addActor.send(tLogPop(self, req, logData)); } when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) { //TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get()); diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index df55510fbf..5e42cd0762 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -1108,11 +1108,21 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted::max(); } - virtual void getPushLocations( std::vector const& tags, std::vector& locations ) { + virtual void getPushLocations(std::vector const& tags, std::vector& locations, bool allLocations) { int locationOffset = 0; for(auto& log : tLogs) { + // FIXME: sramamoorthy, remove after debugging + if (allLocations) { + TraceEvent("AllLocationsDetails") + .detail("NumLogServers", log->logServers.size()) + .detail("NumLogRouters", log->logRouters.size()) + .detail("SatelliteTagLocations", log->satelliteTagLocations.size()) + .detail("IsLocal", log->isLocal) + .detail("LogServerString", log->logServerString()) + .detail("LogRouterString", log->logRouterString()); + } if(log->isLocal && log->logServers.size()) { - log->getPushLocations(tags, locations, locationOffset); + log->getPushLocations(tags, locations, locationOffset, allLocations); locationOffset += log->logServers.size(); } } diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 34e0ee4a43..dabbb62342 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -61,6 +61,8 @@ #include "versions.h" #endif +#include "fdbmonitor/SimpleIni.h" + #ifdef __linux__ #include #include @@ -79,7 +81,7 @@ #include "flow/actorcompiler.h" // This must be the last #include. enum { - OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX, + OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER }; CSimpleOpt::SOption g_rgOptions[] = { @@ -118,6 +120,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, { OPT_RESTARTING, "-R", SO_NONE }, { OPT_RESTARTING, "--restarting", SO_NONE }, + { OPT_RESTORING, "--restoring", SO_NONE }, { OPT_RANDOMSEED, "-s", SO_REQ_SEP }, { OPT_RANDOMSEED, "--seed", SO_REQ_SEP }, { OPT_KEY, "-k", SO_REQ_SEP }, @@ -932,6 +935,7 @@ int main(int argc, char* argv[]) { LocalityData localities; int minTesterCount = 1; bool testOnServers = false; + bool restoring = false; Reference tlsOptions = Reference( new TLSOptions ); std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword; @@ -1193,7 +1197,11 @@ int main(int argc, char* argv[]) { case OPT_RESTARTING: restarting = true; break; - case OPT_RANDOMSEED: { + case OPT_RESTORING: { + restoring = true; + break; + } + case OPT_RANDOMSEED: { char* end; randomSeed = (uint32_t)strtoul( args.OptionArg(), &end, 0 ); if( *end ) { @@ -1641,7 +1649,8 @@ int main(int argc, char* argv[]) { std::vector directories = platform::listDirectories( dataFolder ); for(int i = 0; i < directories.size(); i++) - if( directories[i].size() != 32 && directories[i] != "." && directories[i] != ".." && directories[i] != "backups") { + if (directories[i].size() != 32 && directories[i] != "." && directories[i] != ".." && + directories[i] != "backups" && directories[i].find("snap") == std::string::npos) { TraceEvent(SevError, "IncompatibleDirectoryFound").detail("DataFolder", dataFolder).detail("SuspiciousFile", directories[i]); fprintf(stderr, "ERROR: Data folder `%s' had non fdb file `%s'; please use clean, fdb-only folder\n", dataFolder.c_str(), directories[i].c_str()); flushAndExit(FDB_EXIT_ERROR); @@ -1661,6 +1670,62 @@ int main(int argc, char* argv[]) { if (!restarting) { platform::eraseDirectoryRecursive( dataFolder ); platform::createDirectory( dataFolder ); + } else if (restoring) { + std::vector returnList; + std::string ext = ""; + std::string tmpFolder = abspath(dataFolder); + returnList = platform::listDirectories(tmpFolder); + TraceEvent("RestoringDataFolder").detail("DataFolder", tmpFolder); + + CSimpleIni ini; + ini.SetUnicode(); + ini.LoadFile(joinPath(tmpFolder, "restartInfo.ini").c_str()); + std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID"); + TraceEvent("RestoreSnapUID").detail("UID", snapStr); + + // delete all files (except fdb.cluster) in non-snap directories + for (int i = 0; i < returnList.size(); i++) { + if (returnList[i] == "." || returnList[i] == "..") { + continue; + } + if (returnList[i].find(snapStr) != std::string::npos) { + continue; + } + + std::string childf = tmpFolder + "/" + returnList[i]; + std::vector returnFiles = platform::listFiles(childf, ext); + for (int j = 0; j < returnFiles.size(); j++) { + fprintf(stderr, "file : %s\n", returnFiles[j].c_str()); + if (returnFiles[j] != "fdb.cluster") { + TraceEvent("DeletingNonSnapfiles") + .detail("FileBeingDeleted", childf + "/" + returnFiles[j]); + deleteFile(childf + "/" + returnFiles[j]); + } + } + } + // move the contents from snap folder to the original folder, + // delete snap folders + for (int i = 0; i < returnList.size(); i++) { + fprintf(stderr, "Dir : %s\n", returnList[i].c_str()); + if (returnList[i] == "." || returnList[i] == "..") { + continue; + } + if (returnList[i].find(snapStr) == std::string::npos) { + if (returnList[i].find("snap") != std::string::npos) { + platform::eraseDirectoryRecursive(tmpFolder + returnList[i]); + } + continue; + } + std::string origDir = returnList[i].substr(0, 32); + std::string dirToRemove = tmpFolder + "/" + origDir; + std::string dirSrc = tmpFolder + "/" + returnList[i]; + TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove); + platform::eraseDirectoryRecursive(dirToRemove); + renameFile(dirSrc, dirToRemove); + TraceEvent("RenamingSnapToOriginalDirectory") + .detail("Oldname", dirSrc) + .detail("Newname", dirToRemove); + } } setupAndRun( dataFolder, testFile, restarting, tlsOptions ); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index e4da32be45..dd414d78bb 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -9,7 +9,7 @@ #include "BulkSetup.actor.h" #include "fdbserver/ClusterRecruitmentInterface.h" #include "fdbclient/ReadYourWrites.h" - +#include "fdbmonitor/SimpleIni.h" #include #undef FLOW_ACOMPILER_STATE @@ -19,54 +19,21 @@ void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) { version = -1; numTags = -1; - // FIXME: sramamoorthy, WONTWORK - // std::string versionStr = extractAttribute(msg.toString(), "Version"); - // version = strtol(versionStr.c_str(), nullptr, 0); - // TraceEvent("version").detail("", version); sscanf(md.getValue("Version").c_str(), "%lld", &version); - sscanf(md.getValue("numTags").c_str(), "%d:%d", &numTags); - - // std::string numTagsStr = extractAttribute(msg.toString(), "numTags"); - // numTags = strtol(numTagsStr.c_str(), nullptr, 0); - // TraceEvent("numTags").detail("", numTags); - // FIXME: sramamoorthy, WONTWORK + sscanf(md.getValue("NumTags").c_str(), "%d:%d", &numTags); } void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) { Version verifyVersion; - // FIXME: sramamoorthy, WONTWORK - // tag = -1; durableVersion = -1; int tagLocality; int tagId; - sscanf(md.getValue("version").c_str(), "%lld", &verifyVersion); - sscanf(md.getValue("tag").c_str(), "%d:%d", &tagLocality, &tagId); + sscanf(md.getValue("Version").c_str(), "%lld", &verifyVersion); + sscanf(md.getValue("Tag").c_str(), "%d:%d", &tagLocality, &tagId); tag.locality = tagLocality; tag.id = tagId; - sscanf(md.getValue("durableVersion").c_str(), "%lld", &durableVersion); - - // FIXME: sramamoorthy, WONTWORK - // std::string versionStr = extractAttribute(msg.toString(), "version"); - // verifyVersion = strtol(versionStr.c_str(), nullptr, 0); - - // TraceEvent("version compare").detail("version", version).detail("verifyVersion", verifyVersion); - // if (version != verifyVersion) { - // return; - // } - - // std::string tagStr = extractAttribute(msg.toString(), "tag"); - // tag = strtol(tagStr.c_str(), nullptr, 0); - // TraceEvent("tagscan").detail("tag", tag); - // if (tag == -1) { - // return; - // } - - // versionStr = extractAttribute(msg.toString(), "durableVersion"); - // durableVersion = strtol(versionStr.c_str(), nullptr, 0); - - // TraceEvent("durableVersion").detail("durablVersion", durableVersion); - // FIXME: sramamoorthy, WONTWORK + sscanf(md.getValue("DurableVersion").c_str(), "%lld", &durableVersion); } void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Version& minTLogVersion, @@ -78,48 +45,33 @@ void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Ver sscanf(md.getValue("Version").c_str(), "%lld", &verifyVersion); int tagLocality; int tagId; - sscanf(md.getValue("tag").c_str(), "%d:%d", &tagLocality, &tagId); + sscanf(md.getValue("Tag").c_str(), "%d:%d", &tagLocality, &tagId); verifyTag.locality = tagLocality; verifyTag.id = tagId; if (tag != verifyTag) { return; } - sscanf(md.getValue("poppedTagVersion").c_str(), "%lld", &minTLogVersion); - sscanf(md.getValue("queueCommittedVersion").c_str(), "%lld", &maxTLogVersion); - - // FIXME: sramamoorthy, WONTWORK - // std::string versionStr = extractAttribute(msg.toString(), "Version"); - // verifyVersion = strtol(versionStr.c_str(), nullptr, 0); - - // if (version != verifyVersion) { - // return; - // } - - // std::string tagStr = extractAttribute(msg.toString(), "Tag"); - // verifyTag = strtol(tagStr.c_str(), nullptr, 0); - - // if (tag != verifyTag) { - // return; - // } - - // versionStr = extractAttribute(msg.toString(), "poppedTagVersion"); - // minTLogVersion = strtol(versionStr.c_str(), nullptr, 0); - // versionStr = extractAttribute(msg.toString(), "queueCommittedVersion"); - // maxTLogVersion = strtol(versionStr.c_str(), nullptr, 0); - // FIXME: sramamoorthy, WONTWORK + sscanf(md.getValue("PoppedTagVersion").c_str(), "%lld", &minTLogVersion); + sscanf(md.getValue("QueueCommittedVersion").c_str(), "%lld", &maxTLogVersion); } void filterEmptyMessages(std::vector>& messages) { - // FIXME, sramamoorthy, FDB6 related - // std::string emptyStr; - // auto it = messages.begin(); - // while (it != messages.end()) { - // if (it->get() == emptyStr) { - // it = messages.erase(it); - // } else { - // ++it; - // } - // } + std::string emptyStr; + auto it = messages.begin(); + while (it != messages.end()) { + if (it->get().toString() == emptyStr) { + it = messages.erase(it); + } else { + ++it; + } + } + return; +} + +void printMessages(std::vector>& messages) { + for (int i = 0; i < messages.size(); i++) { + TraceEvent("MESSAGES").detail("I", i).detail("VALUE", messages[i].get().toString()); + } return; } @@ -131,6 +83,7 @@ public: // variables bool snapCheck; // check for the successful snap create int testID; // test id UID snapUID; // UID used for snap name + std::string restartInfoLocation; // file location to store the snap restore info public: // ctor & dtor SnapTestWorkload(WorkloadContext const& wcx) @@ -142,16 +95,19 @@ public: // ctor & dtor maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0); snapCheck = getOption(options, LiteralStringRef("snapCheck"), false); testID = getOption(options, LiteralStringRef("testID"), 0); + restartInfoLocation = + getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini")) + .toString(); } public: // workload functions std::string description() override { return "SnapTest"; } Future setup(Database const& cx) override { - TraceEvent("SnapTestWorkload setup"); + TraceEvent("SnapTestWorkloadSetup"); return Void(); } Future start(Database const& cx) override { - TraceEvent("SnapTestWorkload start"); + TraceEvent("SnapTestWorkloadStart"); if (clientId == 0) { return _start(cx, this); } @@ -159,10 +115,9 @@ public: // workload functions } Future check(Database const& cx) override { - // FIXME: sramamoorthy, FDB6 porting fallout - if (true) return true; + TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId).detail("SnapCheck", this->snapCheck); if (!this->snapCheck || clientId != 0) { - TraceEvent("returning true here"); + TraceEvent("SnapTestCheckSucc"); return true; } switch (this->testID) { @@ -170,28 +125,27 @@ public: // workload functions case 1: case 2: case 3: { - TraceEvent("SnapTestWorkload check"); Future> proxyIfaces; return (verifyExecTraceVersion(cx, this)); break; } case 4: { - std::string token = "disableTLogPopTimedOut"; + std::string token = "DisableTLogPopTimedOut"; return verifyTLogTrackLatest(cx, this, token); break; } case 5: { - std::string token = "tLogPopDisableEnableUidMismatch"; + std::string token = "TLogPopDisableEnableUidMismatch"; return verifyTLogTrackLatest(cx, this, token); break; } case 6: { - std::string token = "snapFailIgnorePopNotSet"; + std::string token = "SnapFailIgnorePopNotSet"; return verifyTLogTrackLatest(cx, this, token); break; } case 7: { - std::string token = "snapFailedDisableTLogUidMismatch"; + std::string token = "SnapFailedDisableTLogUidMismatch"; return verifyTLogTrackLatest(cx, this, token); break; } @@ -200,7 +154,7 @@ public: // workload functions return false; } - void getMetrics(vector& m) override { TraceEvent("SnapTestWorkload getMetrics"); } + void getMetrics(vector& m) override { TraceEvent("SnapTestWorkloadGetMetrics"); } ACTOR Future _create_keys(Database cx, std::string prefix, bool even = true) { state Transaction tr(cx); @@ -258,12 +212,20 @@ public: // workload functions break; } catch (Error& e) { ++retry; - TraceEvent(retry > 3 ? SevWarn : SevInfo, "snapCreate command failed").detail("error", e.what()); + TraceEvent(retry > 3 ? SevWarn : SevInfo, "SnapCreateCommandFailed").detail("Error", e.what()); if (retry > 3) { throw operation_failed(); } } + wait(delay(30.0)); } + CSimpleIni ini; + ini.SetUnicode(); + ini.LoadFile(self->restartInfoLocation.c_str()); + std::string uidStr = self->snapUID.toString(); + ini.SetValue("RESTORE", "RestoreSnapUID", uidStr.c_str()); + ini.SaveFile(self->restartInfoLocation.c_str()); + // write the snapUID to a file TraceEvent("Snapshot create succeeded"); } else if (self->testID == 2) { // create odd keys after the snapshot @@ -281,7 +243,7 @@ public: // workload functions try { Standalone kvRange = wait(tr.getRange(begin, end, CLIENT_KNOBS->TOO_MANY)); if (!kvRange.more && kvRange.size() == 0) { - TraceEvent("No more entires"); + TraceEvent("NoMoreEntries"); break; } @@ -305,6 +267,7 @@ public: // workload functions wait(tr.onError(e)); } } + TraceEvent("VerifyCntValue").detail("Value", cnt); if (cnt != 1000) { throw operation_failed(); } @@ -366,7 +329,7 @@ public: // workload functions wait(tr.commit()); break; } catch (Error& e) { - TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); throw; } } @@ -393,44 +356,58 @@ public: // workload functions wait(tr.commit()); break; } catch (Error& e) { - TraceEvent("snapCreate").detail("snapCreateErrorSnapTLogStorage", e.what()); + TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); throw; } } } - TraceEvent("returning from start"); wait(delay(0.0)); return Void(); } ACTOR Future verifyTLogTrackLatest(Database cx, SnapTestWorkload* self, std::string event) { - TraceEvent("verifyTLogTrackLatest"); + TraceEvent("VerifyTLogTrackLatest"); state StringRef eventTokenRef(event); - // FIXME: sramamoorthy, FDB6 related - // state vector tLogWorkers = wait(self->getWorkersWithRole(cx, - // LocalityData::ClusterRole::TLog)); state vector> tLogWorkers = - // wait(self->dbInfo->get().clusterInterface.getWorkers()); - state vector> tLogWorkers = wait(getWorkers(self->dbInfo)); + state vector tLogWorkers; state std::vector> tLogMessages; + state std::vector> workers = wait(getWorkers(self->dbInfo)); + state std::map address_workers; + + for (auto const& worker : workers) { + address_workers[worker.first.address()] = worker.first; + } + vector tLogServers = self->dbInfo->get().logSystemConfig.allPresentLogs(); + + for (auto s : tLogServers) { + auto it = address_workers.find(s.address()); + if (it != address_workers.end()) { + tLogWorkers.push_back(it->second); + TraceEvent("TLogWorker") + .detail("Address", s.address()) + .detail("Id", s.id()) + .detail("Localit", s.locality.toString()); + } + } + state int i = 0; for (; i < tLogWorkers.size(); i++) { tLogMessages.push_back( - timeoutError(tLogWorkers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + timeoutError(tLogWorkers[i].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); state int retryCnt = 0; state bool retry = false; loop { retry = false; try { - TraceEvent("waiting for tlog messages"); + TraceEvent("WaitingForTlogMessages"); wait(waitForAll(tLogMessages)); break; } catch (Error& e) { - TraceEvent("verifyTLogTrackLatest") - .detail("token", eventTokenRef.toString()) + TraceEvent("VerifyTLogTrackLatest") + .detail("Token", eventTokenRef.toString()) .detail("Reason", "Failed to get tLogMessages") - .detail("code", e.what()); + .detail("Code", e.what()); if (e.code() != error_code_timed_out) { return false; } else { @@ -443,9 +420,12 @@ public: // workload functions return false; } } + printMessages(tLogMessages); filterEmptyMessages(tLogMessages); if (tLogMessages.size() != 1) { - TraceEvent("verifyTLogTrackLatest message not found").detail("token", eventTokenRef.toString()); + TraceEvent("VerifyTLogTrackLatestMessageNotFound") + .detail("Address", tLogWorkers[i].address()) + .detail("Token", eventTokenRef.toString()); return false; } tLogMessages.clear(); @@ -454,20 +434,11 @@ public: // workload functions } ACTOR Future verifyExecTraceVersion(Database cx, SnapTestWorkload* self) { - TraceEvent("verifyExecTraceVersion1"); - - // FIXME: sramamoorthy, FDB6 - // state std::vector coordAddrs = self->getCoordinatorAddresses(); state std::vector coordAddrs = wait(getCoordinators(cx)); - TraceEvent("verifyExecTraceVersion2"); state vector> proxyWorkers = wait(getWorkers(self->dbInfo)); - TraceEvent("verifyExecTraceVersion3"); state vector> storageWorkers = wait(getWorkers(self->dbInfo)); - TraceEvent("verifyExecTraceVersion4"); state vector> tLogWorkers = wait(getWorkers(self->dbInfo)); - TraceEvent("verifyExecTraceVersion5"); state vector> workers = wait(getWorkers(self->dbInfo)); - TraceEvent("verifyExecTraceVersion6"); state std::vector> proxyMessages; state std::vector> tLogMessages; @@ -505,13 +476,12 @@ public: // workload functions storageWorkers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); } - TraceEvent("WAITING for proxy1"); try { wait(waitForAll(proxyMessages)); - // wait(waitForAll(storageMessages)); - // wait(waitForAll(coordMessages)); + wait(waitForAll(storageMessages)); + wait(waitForAll(coordMessages)); } catch (Error& e) { - TraceEvent("verifyExecTraceVersionFailure") + TraceEvent("VerifyExecTraceVersionFailure") .detail("Reason", "Failed to get proxy or storage messages") .detail("code", e.what()); if (e.code() != error_code_timed_out) { @@ -521,25 +491,34 @@ public: // workload functions ++retryCnt; } } - TraceEvent("WAITING for proxy2"); if (retry == false) { break; } - TraceEvent("WAITING for proxy3"); if (retry && retryCnt >= 4) { TraceEvent("Unable to retrieve proxy/storage/coord messages " "after retries"); + ASSERT(1 == 0); std::terminate(); return false; } } + printMessages(proxyMessages); + printMessages(storageMessages); + printMessages(coordMessages); // filter out empty messages filterEmptyMessages(proxyMessages); filterEmptyMessages(storageMessages); filterEmptyMessages(coordMessages); + TraceEvent("ProxyMessages"); + printMessages(proxyMessages); + TraceEvent("StorageMessages"); + printMessages(storageMessages); + TraceEvent("CoorMessages"); + printMessages(coordMessages); + if (proxyMessages.size() != 1) { // if no message from proxy or more than one fail the check TraceEvent("No ExecTrace message from Proxy"); @@ -548,8 +527,8 @@ public: // workload functions } TraceEvent("CoordinatorSnapStatus") - .detail("coordMessage size", coordMessages.size()) - .detail("coordAddrssize", coordAddrs.size()); + .detail("CoordMessageSize", coordMessages.size()) + .detail("CoordAddrssize", coordAddrs.size()); if (coordMessages.size() < (coordAddrs.size() + 1) / 2) { TraceEvent("No ExecTrace message from Quorum of coordinators"); std::terminate(); @@ -563,8 +542,7 @@ public: // workload functions state Version execVersion = -1; state std::string emptyStr; - TraceEvent("Printing Relevant ProxyMessage").detail("msg", proxyMessages[i].get().toString()); - // FIXME: sramamoorthy, how to compare with empty string + TraceEvent("RelevantProxyMessage").detail("Msg", proxyMessages[i].get().toString()); if (proxyMessages[i].get().toString() != emptyStr) { getVersionAndnumTags(proxyMessages[i].get(), execVersion, numTags); ASSERT(numTags > 0); @@ -578,11 +556,11 @@ public: // workload functions state Tag invalidTag; // FIXME: sramamoorthy, for now allow default values state Version durableVersion = -1; - TraceEvent("Printing Relevant StorageMessage").detail("msg", storageMessages[j].get().toString()); + TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[j].get().toString()); // FIXME: sramamoorthy, how to compare with empty string ASSERT(storageMessages[j].get().toString() != emptyStr); getTagAndDurableVersion(storageMessages[j].get(), execVersion, tag, durableVersion); - TraceEvent("Searching for tlog messages").detail("tag", tag.toString()); + TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); retryCnt = 0; loop { @@ -599,14 +577,14 @@ public: // workload functions } try { - TraceEvent("waiting for tlog messages"); + TraceEvent("WaitingForTlogMessages"); if (tag != invalidTag) { wait(waitForAll(tLogMessages)); } } catch (Error& e) { - TraceEvent("verifyExecTraceVersionFailure") + TraceEvent("VerifyExecTraceVersionFailure") .detail("Reason", "Failed to get tLogMessages") - .detail("code", e.what()); + .detail("Code", e.what()); if (e.code() != error_code_timed_out) { return false; } else { @@ -617,9 +595,10 @@ public: // workload functions if (retry == false) { break; } - if (retry && retryCnt > 4) { + if (retry && retryCnt > 20) { TraceEvent("Unable to retrieve tLog messages after " "retries"); + ASSERT(1 == 0); std::terminate(); return false; } @@ -636,7 +615,7 @@ public: // workload functions Version minTLogVersion = -1; Version maxTLogVersion = -1; - TraceEvent("tLogMessage").detail("msg", tLogMessages[k].get().toString()); + TraceEvent("TLogMessage").detail("Msg", tLogMessages[k].get().toString()); // FIXME, sramamoorthy, handle empty string ASSERT(tLogMessages[k].get().toString() != emptyStr); @@ -651,22 +630,22 @@ public: // workload functions // if we did not find even one tlog for a given tag fail the // check if (numDurableVersionChecks < 1) { - TraceEvent("No TLog found for a tag"); + TraceEvent("NoTLogFoundForATag"); + ASSERT(1 == 0); std::terminate(); } - - TraceEvent("next iteration"); tLogMessages.clear(); } } // validates that we encountered unique tags of value numTags if (numTags != visitedStorageTags.size()) { - TraceEvent("Storage messages were not found"); + TraceEvent("StorageMessagesWereNotFound"); + ASSERT(1 == 0); std::terminate(); return false; } - TraceEvent("Check Succeeded for verifyExecTraceVersion"); + TraceEvent("VerifyExecTraceVersionSuccess"); return true; } }; diff --git a/flow/Platform.cpp b/flow/Platform.cpp index ab3a77da7d..1cb0caf1fa 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2692,11 +2692,11 @@ fdbFork(const std::string& path, const std::vector& args) } paramList.push_back(nullptr); - auto te = TraceEvent("FdbFork"); - te.detail("Cmd", path); + std::string argsString; for (int i = 0; i < args.size(); i++) { - te.detail("Args", args[i]); + argsString += args[i] + ","; } + TraceEvent("FdbFork").detail("Cmd", path).detail("Args", argsString); pid_t pid = fork(); if (pid == -1) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a4965e8aa8..b48722bc7d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -140,6 +140,18 @@ add_fdb_test( add_fdb_test( TEST_FILES restarting/StorefrontTestRestart-1.txt restarting/StorefrontTestRestart-2.txt) +add_fdb_test( + TEST_FILES restarting/SnapTestSimpleRestart-1.txt + restarting/SnapTestSimpleRestart-2.txt + RESTORE) +add_fdb_test( + TEST_FILES restarting/SnapTestRestart-1.txt + restarting/SnapTestRestart-2.txt + RESTORE) +add_fdb_test( + TEST_FILES restarting/SnapCycleRestart-1.txt + restarting/SnapCycleRestart-2.txt + RESTORE) add_fdb_test( TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) diff --git a/tests/TestRunner/TestRunner.py b/tests/TestRunner/TestRunner.py index f0e5741830..650609f32c 100755 --- a/tests/TestRunner/TestRunner.py +++ b/tests/TestRunner/TestRunner.py @@ -300,6 +300,8 @@ def run_simulation_test(basedir, options): tmp[0] = options.old_binary if not first: tmp.append('-R') + if options.restoring == 'YES': + tmp.append('--restoring') first = False tmp.append('-f') tmp.append(testfile) @@ -368,6 +370,9 @@ if __name__ == '__main__': choices=['xml', 'json'], help='Log format (json or xml)') parser.add_argument('-O', '--old-binary', required=False, default=None, help='Path to the old binary to use for upgrade tests') + parser.add_argument('-RO', '--restoring', required=False, default='NO', + choices=['YES', 'NO'], + help='Set if you want asnapshot to be restored') parser.add_argument('--aggregate-traces', default='NONE', choices=['NONE', 'FAILED', 'ALL']) parser.add_argument('--keep-logs', default='FAILED', diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/SnapCycleRestart-1.txt new file mode 100644 index 0000000000..29a511ebe7 --- /dev/null +++ b/tests/restarting/SnapCycleRestart-1.txt @@ -0,0 +1,22 @@ +testTitle=SnapCyclePre +;Take snap and do cycle test + clearAfterTest=false + testName=Cycle + transactionsPerSecond=2500.0 + nodeCount=2500 + testDuration=10.0 + expectedRate=0 + + testName=SnapTest + numSnaps=1 + maxSnapDelay=10.0 + testID=1 + clearAfterTest=false + snapCheck=true + dumpAfterTest=true + +testTitle=SnapCycleShutdown +;save and shutdown + testName=SaveAndKill + restartInfoLocation=simfdb/restartInfo.ini + testDuration=10.0 diff --git a/tests/restarting/SnapCycleRestart-2.txt b/tests/restarting/SnapCycleRestart-2.txt new file mode 100644 index 0000000000..0ea01d0984 --- /dev/null +++ b/tests/restarting/SnapCycleRestart-2.txt @@ -0,0 +1,8 @@ +testTitle=SnapCycleRestore +;Post snap restore test + runSetup=false + testName=Cycle + transactionsPerSecond=2500.0 + nodeCount=2500 + testDuration=10.0 + expectedRate=0 diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt new file mode 100644 index 0000000000..d1c26b9a82 --- /dev/null +++ b/tests/restarting/SnapTestRestart-1.txt @@ -0,0 +1,48 @@ +testTitle=SnapTestPre +;write 1000 Keys ending with even numbers + testName=SnapTest + numSnaps=1 + maxSnapDelay=3.0 + testID=0 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + + +testTitle=SnapTestTakeSnap +;Take snap and do read/write + testName=ReadWrite + testDuration=20.0 + transactionsPerSecond=10000 + writesPerTransactionA=0 + readsPerTransactionA=10 + writesPerTransactionB=10 + readsPerTransactionB=1 + alpha=0.5 + nodeCount=100000 + valueBytes=16 + discardEdgeMeasurements=false + + testName=SnapTest + numSnaps=1 + maxSnapDelay=30.0 + testID=1 + clearAfterTest=false + snapCheck=true + dumpAfterTest=true + +testTitle=SnapTestPost +;write 1000 Keys ending with odd numbers + testName=SnapTest + numSnaps=1 + maxSnapDelay=25.0 + testID=2 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + +testTitle=SnapTestShutdown +;save and shutdown + testName=SaveAndKill + restartInfoLocation=simfdb/restartInfo.ini + testDuration=10.0 diff --git a/tests/restarting/SnapTestRestart-2.txt b/tests/restarting/SnapTestRestart-2.txt new file mode 100644 index 0000000000..b86c59b7c6 --- /dev/null +++ b/tests/restarting/SnapTestRestart-2.txt @@ -0,0 +1,8 @@ +; verify all keys are even numbered +testTitle=SnapTestVerify +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=3 +snapCheck=false +dumpAfterTest=true diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/SnapTestSimpleRestart-1.txt new file mode 100644 index 0000000000..635d0a6c1c --- /dev/null +++ b/tests/restarting/SnapTestSimpleRestart-1.txt @@ -0,0 +1,36 @@ +;write 1000 Keys ending with even number +testTitle=SnapSimplePre + testName=SnapTest + numSnaps=1 + maxSnapDelay=30.0 + testID=0 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + +;take snap +testTitle=SnapSimpleTakeSnap + testName=SnapTest + numSnaps=1 + maxSnapDelay=5.0 + testID=1 + clearAfterTest=false + snapCheck=true + dumpAfterTest=true + +;write 1000 Keys ending with odd number +testTitle=SnapSimplePost + testName=SnapTest + numSnaps=1 + maxSnapDelay=3.0 + testID=2 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + +; save and shutdown +testTitle=SnapSimpleShutdown + + testName=SaveAndKill + restartInfoLocation=simfdb/restartInfo.ini + testDuration=10.0 diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/SnapTestSimpleRestart-2.txt new file mode 100644 index 0000000000..38013680ba --- /dev/null +++ b/tests/restarting/SnapTestSimpleRestart-2.txt @@ -0,0 +1,40 @@ +; verify all keys are even numbered +testTitle=SnapSimpleVerify +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=3 +snapCheck=false + +; verify that the TLog popping disable times out and switches to enable mode +; automatically, if not enabled specifically +testTitle=SnapTLogPopDisableTimeout +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=4 +snapCheck=true + +; TLog pop enable and disable UID mismatch +testTitle=SnapTLogPopEnableDisableMismatch +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=5 +snapCheck=true + +; snapCreate without TLogPopDisable +testTitle=SnapCreateWithNoDisablePop +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=6 +snapCheck=true + +; snapCreate and tlogPopDisable with mis-matched UID +testTitle=SnapCreateDisableTLogPopMismatch +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=7 +snapCheck=true From 3d5998e9ddc85d4184be70c37d4c1f15da5019dc Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 19 Mar 2019 11:36:07 -0700 Subject: [PATCH 05/69] tlog: when pops are disabled, store them & replay In Tlogs, disable pop is done whlie taking snapshots. Earlier, tlogs were ignoring the pops if it got pop requests when pops were disabled. In this change, instead of ignoring the pop - it remembers the list of pops in-memory and plays them once the popping is enabled. --- fdbclient/NativeAPI.actor.cpp | 8 +++ fdbserver/LogSystem.h | 11 ---- fdbserver/OldTLogServer_6_0.actor.cpp | 81 ++++++++++++++++++------ fdbserver/TLogServer.actor.cpp | 72 ++++++++++++++++++--- fdbserver/workloads/SnapTest.actor.cpp | 4 +- tests/CMakeLists.txt | 4 ++ tests/restarting/SnapTestAttrition-1.txt | 51 +++++++++++++++ tests/restarting/SnapTestAttrition-2.txt | 9 +++ 8 files changed, 198 insertions(+), 42 deletions(-) create mode 100644 tests/restarting/SnapTestAttrition-1.txt create mode 100644 tests/restarting/SnapTestAttrition-2.txt diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index cc427466d2..837f936dfa 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3370,6 +3370,14 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) TraceEvent("SnapCreateAfterSnappingTLogStorage").detail("UID", snapUID); + if (BUGGIFY) { + int32_t toDelay = g_random->randomInt(1, 30); + TraceEvent("SleepingBeforeEnablingPop") + .detail("duration", toDelay); + wait(delay(toDelay)); + TraceEvent("DoneSleepingBeforeEnablingPop"); + } + // enable popping of the TLog loop { tr.reset(); diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index c41f44b300..2bf236cadf 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -234,17 +234,6 @@ public: void getPushLocations(std::vector const& tags, std::vector& locations, int locationOffset, bool allLocations = false) { if(locality == tagLocalitySatellite) { - if (allLocations) { - // special handling for allLocations - TraceEvent("AllLocationsSet"); - for (int i = 0; i < satelliteTagLocations.size(); i++) { - for (int j : satelliteTagLocations[i]) { - locations.push_back(locationOffset + j); - } - } - uniquify(locations); - return; - } for(auto& t : tags) { if(t == txsTag || t.locality == tagLocalityLogRouter) { for(int loc : satelliteTagLocations[t == txsTag ? 0 : t.id + 1]) { diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index e40b8dd61b..33289fe764 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -287,14 +287,16 @@ struct TLogData : NonCopyable { // be able to match it up std::string dataFolder; // folder where data is stored Reference> degraded; + std::map toBePopped; // map of Tag->Version for all the pops + // that came when ignorePopRequest was set - TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded) + TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0), concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), - ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder) + ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { } }; @@ -899,30 +901,28 @@ std::deque> & getVersionMessages( Re return tagData->versionMessages; }; -ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference logData ) { - // timeout check for ignorePopRequest - if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { - self->ignorePopRequest = false; - self->ignorePopUid = ""; - self->ignorePopDeadline = 0.0; - TraceEvent("ResetIgnorePopRequest") - .detail("Now", g_network->now()) - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("DisableTLogPopTimedOut"); - } - if (self->ignorePopRequest && req.tag != txsTag) { +ACTOR Future tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference logData ) { + if (self->ignorePopRequest && inputTag != txsTag) { TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); - req.reply.send(Void()); + + if (self->toBePopped.find(inputTag) == self->toBePopped.end() + || to > self->toBePopped[inputTag]) { + self->toBePopped[inputTag] = to; + } + // add the pop to the toBePopped map + TraceEvent(SevDebug, "IgnoringPopRequest") + .detail("IgnorePopDeadline", self->ignorePopDeadline) + .detail("Tag", inputTag.toString()) + .detail("Version", to); return Void(); } - state Version upTo = req.to; - int8_t tagLocality = req.tag.locality; + state Version upTo = to; + int8_t tagLocality = inputTag.locality; if (logData->logSystem->get().isValid() && logData->logSystem->get()->isPseudoLocality(tagLocality)) { - upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, req.to); + upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, to); tagLocality = tagLocalityLogRouter; } - state Tag tag(tagLocality, req.tag.id); + state Tag tag(tagLocality, inputTag.id); auto tagData = logData->getTagData(tag); if (!tagData) { tagData = logData->createTagData(tag, upTo, true, true, false); @@ -943,7 +943,34 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, ReferenceeraseMessagesBefore(upTo, self, logData, TaskTLogPop)); //TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo); } + return Void(); +} +ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference logData ) { + // timeout check for ignorePopRequest + if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + wait(tLogPopCore(self, it->first, it->second, logData)); + } + self->toBePopped.clear(); + + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; + TraceEvent("ResetIgnorePopRequest") + .detail("Now", g_network->now()) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("IgnorePopDeadline", self->ignorePopDeadline) + .trackLatest("DisableTLogPopTimedOut"); + } + wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); return Void(); } @@ -1342,7 +1369,7 @@ ACTOR Future tLogCommit( execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); - auto uidStr = execArg.getBinaryArgValue("uid"); + state std::string uidStr = execArg.getBinaryArgValue("uid"); execVersion = qe.version; if (execCmd == execSnap) { // validation check specific to snap request @@ -1400,6 +1427,18 @@ ACTOR Future tLogCommit( .detail("UidStr", uidStr) .trackLatest("TLogPopDisableEnableUidMismatch"); } + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + wait(tLogPopCore(self, it->first, it->second, logData)); + } + self->toBePopped.clear(); + self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 61b8d2837d..2b5547a41a 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -338,15 +338,17 @@ struct TLogData : NonCopyable { // be able to match it up std::string dataFolder; // folder where data is stored Reference> degraded; + std::map toBePopped; // map of Tag->Version for all the pops + // that came when ignorePopRequest was set - TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded) + TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0), peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES), concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), - ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder) + ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { } }; @@ -1149,14 +1151,28 @@ std::deque> & getVersionMessages( Re return tagData->versionMessages; }; -ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference logData ) { - state Version upTo = req.to; - int8_t tagLocality = req.tag.locality; +ACTOR Future tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference logData ) { + if (self->ignorePopRequest && inputTag != txsTag) { + TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); + + if (self->toBePopped.find(inputTag) == self->toBePopped.end() + || to > self->toBePopped[inputTag]) { + self->toBePopped[inputTag] = to; + } + // add the pop to the toBePopped map + TraceEvent(SevDebug, "IgnoringPopRequest") + .detail("IgnorePopDeadline", self->ignorePopDeadline) + .detail("Tag", inputTag.toString()) + .detail("Version", to); + return Void(); + } + state Version upTo = to; + int8_t tagLocality = inputTag.locality; if (logData->logSystem->get().isValid() && logData->logSystem->get()->isPseudoLocality(tagLocality)) { - upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, req.to); + upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, to); tagLocality = tagLocalityLogRouter; } - state Tag tag(tagLocality, req.tag.id); + state Tag tag(tagLocality, inputTag.id); auto tagData = logData->getTagData(tag); if (!tagData) { tagData = logData->createTagData(tag, upTo, true, true, false); @@ -1178,6 +1194,34 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, ReferenceeraseMessagesBefore(upTo, self, logData, TaskTLogPop)); //TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo); } + return Void(); +} + +ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference logData ) { + // timeout check for ignorePopRequest + if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) { + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + wait(tLogPopCore(self, it->first, it->second, logData)); + } + self->toBePopped.clear(); + + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; + TraceEvent("ResetIgnorePopRequest") + .detail("Now", g_network->now()) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("IgnorePopDeadline", self->ignorePopDeadline) + .trackLatest("DisableTLogPopTimedOut"); + } + wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); return Void(); } @@ -1692,7 +1736,7 @@ ACTOR Future tLogCommit( execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); - auto uidStr = execArg.getBinaryArgValue("uid"); + state std::string uidStr = execArg.getBinaryArgValue("uid"); execVersion = qe.version; if (execCmd == execSnap) { // validation check specific to snap request @@ -1751,6 +1795,18 @@ ACTOR Future tLogCommit( .detail("UidStr", uidStr) .trackLatest("TLogPopDisableEnableUidMismatch"); } + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + wait(tLogPopCore(self, it->first, it->second, logData)); + } + self->toBePopped.clear(); + self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index dd414d78bb..34831b5359 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -330,7 +330,7 @@ public: // workload functions break; } catch (Error& e) { TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); - throw; + wait(tr.onError(e)); } } } else if (self->testID == 7) { @@ -357,7 +357,7 @@ public: // workload functions break; } catch (Error& e) { TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); - throw; + wait(tr.onError(e)); } } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b48722bc7d..10f85c2985 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -152,6 +152,10 @@ add_fdb_test( TEST_FILES restarting/SnapCycleRestart-1.txt restarting/SnapCycleRestart-2.txt RESTORE) +add_fdb_test( + TEST_FILES restarting/SnapTestAttrition-1.txt + restarting/SnapTestAttrition-2.txt + RESTORE) add_fdb_test( TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt new file mode 100644 index 0000000000..7eccd164d7 --- /dev/null +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -0,0 +1,51 @@ +testTitle=SnapTestPre +;write 1000 Keys ending with even numbers + testName=SnapTest + numSnaps=1 + maxSnapDelay=3.0 + testID=0 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + + +testTitle=SnapTestTakeSnap +;Take snap and do read/write + testName=ReadWrite + testDuration=20.0 + transactionsPerSecond=10000 + writesPerTransactionA=0 + readsPerTransactionA=10 + writesPerTransactionB=10 + readsPerTransactionB=1 + alpha=0.5 + nodeCount=100000 + valueBytes=16 + discardEdgeMeasurements=false + + testName=SnapTest + numSnaps=1 + maxSnapDelay=30.0 + testID=1 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + + testName=Attrition + testDuration=20.0 + +testTitle=SnapTestPost +;write 1000 Keys ending with odd numbers + testName=SnapTest + numSnaps=1 + maxSnapDelay=25.0 + testID=2 + clearAfterTest=false + snapCheck=false + dumpAfterTest=true + +; save and shutdown +testTitle=SnapSimpleShutdown + testName=SaveAndKill + restartInfoLocation=simfdb/restartInfo.ini + testDuration=10.0 diff --git a/tests/restarting/SnapTestAttrition-2.txt b/tests/restarting/SnapTestAttrition-2.txt new file mode 100644 index 0000000000..336d5a8137 --- /dev/null +++ b/tests/restarting/SnapTestAttrition-2.txt @@ -0,0 +1,9 @@ +; verify all keys are even numbered +testTitle=SnapTestVerify +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=3 +snapCheck=false +dumpAfterTest=true +restartInfoLocation=simfdb/restartInfo.ini From 4016f16c763a37516f8a526e91e9d9f41a85ce9f Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 19 Mar 2019 15:12:47 -0700 Subject: [PATCH 06/69] Fix few compilation and bugs in rebase --- fdbserver/MasterProxyServer.actor.cpp | 8 ++++---- fdbserver/OldTLogServer_6_0.actor.cpp | 2 +- fdbserver/TLogServer.actor.cpp | 2 +- fdbserver/WorkerInterface.actor.h | 2 +- fdbserver/workloads/SnapTest.actor.cpp | 20 ++++++++++---------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 8a62c7a42b..b8262c63bf 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -1584,7 +1584,7 @@ ACTOR Future masterProxyServerCore( } // get the list of workers - state std::vector> workers = + state std::vector workers = wait(db->get().clusterInterface.getWorkers.getReply(GetWorkersRequest())); // send the exec command to the list of workers which are @@ -1592,10 +1592,10 @@ ACTOR Future masterProxyServerCore( state int i = 0; state int numSucc = 0; for (; i < workers.size(); i++) { - if (coordinatorsAddrSet.find(workers[i].first.address()) != coordinatorsAddrSet.end()) { - TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].first.address()); + if (coordinatorsAddrSet.find(workers[i].interf.address()) != coordinatorsAddrSet.end()) { + TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].interf.address()); try { - wait(timeoutError(workers[i].first.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 1.0)); + wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 1.0)); ++numSucc; } catch (Error& e) { TraceEvent("ExecReqFailed").detail("what", e.what()); diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 33289fe764..fa23e5f3cc 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -286,9 +286,9 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - Reference> degraded; std::map toBePopped; // map of Tag->Version for all the pops // that came when ignorePopRequest was set + Reference> degraded; TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 2b5547a41a..1dc32f2ee3 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -337,9 +337,9 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - Reference> degraded; std::map toBePopped; // map of Tag->Version for all the pops // that came when ignorePopRequest was set + Reference> degraded; TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index bc973f9208..6ec6b35658 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -420,7 +420,7 @@ ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMaste ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, - Promise oldLog, Promise recovered, std:;string folder, Reference> degraded); // changes tli->id() to be the recovered ID + Promise oldLog, Promise recovered, std::string folder, Reference> degraded); // changes tli->id() to be the recovered ID ACTOR Future monitorServerDBInfo(Reference>> ccInterface, Reference ccf, LocalityData locality, Reference> dbInfo); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 34831b5359..02f62ec78b 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -371,11 +371,11 @@ public: // workload functions state vector tLogWorkers; state std::vector> tLogMessages; - state std::vector> workers = wait(getWorkers(self->dbInfo)); + state std::vector workers = wait(getWorkers(self->dbInfo)); state std::map address_workers; for (auto const& worker : workers) { - address_workers[worker.first.address()] = worker.first; + address_workers[worker.interf.address()] = worker.interf; } vector tLogServers = self->dbInfo->get().logSystemConfig.allPresentLogs(); @@ -435,10 +435,10 @@ public: // workload functions ACTOR Future verifyExecTraceVersion(Database cx, SnapTestWorkload* self) { state std::vector coordAddrs = wait(getCoordinators(cx)); - state vector> proxyWorkers = wait(getWorkers(self->dbInfo)); - state vector> storageWorkers = wait(getWorkers(self->dbInfo)); - state vector> tLogWorkers = wait(getWorkers(self->dbInfo)); - state vector> workers = wait(getWorkers(self->dbInfo)); + state vector proxyWorkers = wait(getWorkers(self->dbInfo)); + state vector storageWorkers = wait(getWorkers(self->dbInfo)); + state vector tLogWorkers = wait(getWorkers(self->dbInfo)); + state vector workers = wait(getWorkers(self->dbInfo)); state std::vector> proxyMessages; state std::vector> tLogMessages; @@ -459,21 +459,21 @@ public: // workload functions std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); StringRef eventTokenRef(eventToken); coordMessages.push_back( - timeoutError(workers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); } for (int i = 0; i < workers.size(); i++) { std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); StringRef eventTokenRef(eventToken); proxyMessages.push_back( - timeoutError(workers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); } for (int i = 0; i < storageWorkers.size(); i++) { std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); StringRef eventTokenRef(eventToken); storageMessages.push_back(timeoutError( - storageWorkers[i].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + storageWorkers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); } try { @@ -573,7 +573,7 @@ public: // workload functions std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); StringRef eventTokenRef(eventToken); tLogMessages.push_back(timeoutError( - tLogWorkers[m].first.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + tLogWorkers[m].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); } try { From 6431513ad0aa755c386c855a766e7e9b029c7901 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 20 Mar 2019 09:29:09 -0700 Subject: [PATCH 07/69] Fail exec req until the cluster is fully_recovered --- fdbserver/MasterProxyServer.actor.cpp | 120 ++++++++++++++----------- fdbserver/workloads/SnapTest.actor.cpp | 1 - 2 files changed, 66 insertions(+), 55 deletions(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index b8262c63bf..520843bcb5 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -222,6 +222,7 @@ struct ProxyCommitData { RequestStream getConsistentReadVersion; RequestStream commit; Database cx; + Reference> db; EventMetricHandle singleKeyMutationEvent; std::map> storageCache; @@ -258,7 +259,7 @@ struct ProxyCommitData { lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0), getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0), localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN), - firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), + firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), db(db), singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0) {} }; @@ -730,52 +731,63 @@ ACTOR Future commitBatch( } toCommit.addTypedMessage(m); } else if (m.type == MutationRef::Exec) { - auto ranges = self->keyInfo.intersectingRanges(allKeys); - std::set allSources; + if(self->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) { + // Cluster is not fully recovered and needs TLogs + // from previous generation for full recovery. + // Currently, snapshot of old tlog generation is not + // supported and hence failing the snapshot request until + // cluster is fully_recovered. + TraceEvent("ExecTransactionConflict") + .detail("TransactionNum", transactionNum); + committed[transactionNum] = ConflictBatch::TransactionConflict; + } else { + auto ranges = self->keyInfo.intersectingRanges(allKeys); + std::set allSources; - if (debugMutation("ProxyCommit", commitVersion, m)) - TraceEvent("ProxyCommitTo", self->dbgid) - .detail("To", "all sources") - .detail("Mutation", m.toString()) - .detail("Version", commitVersion); + if (debugMutation("ProxyCommit", commitVersion, m)) + TraceEvent("ProxyCommitTo", self->dbgid) + .detail("To", "all sources") + .detail("Mutation", m.toString()) + .detail("Version", commitVersion); - for (auto r : ranges) { - auto& tags = r.value().tags; - if (!tags.size()) { - for (auto info : r.value().src_info) { - tags.push_back(info->tag); + for (auto r : ranges) { + auto& tags = r.value().tags; + if (!tags.size()) { + for (auto info : r.value().src_info) { + tags.push_back(info->tag); + } + for (auto info : r.value().dest_info) { + tags.push_back(info->tag); + } + uniquify(tags); } - for (auto info : r.value().dest_info) { - tags.push_back(info->tag); - } - uniquify(tags); + allSources.insert(tags.begin(), tags.end()); } - allSources.insert(tags.begin(), tags.end()); - } - auto param2 = m.param2.toString(); - ExecCmdValueString execArg(param2); - execArg.dbgPrint(); - auto uidStr = execArg.getBinaryArgValue("uid"); - auto tokenStr = "ExecTrace/Proxy/" + uidStr; + auto param2 = m.param2.toString(); + ExecCmdValueString execArg(param2); + execArg.dbgPrint(); + auto uidStr = execArg.getBinaryArgValue("uid"); + auto tokenStr = "ExecTrace/Proxy/" + uidStr; - auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); - te1.detail("To", "all sources"); - te1.detail("Mutation", m.toString()); - te1.detail("Version", commitVersion); - te1.detail("NumTags", allSources.size()); - if (m.param1 == execSnap) { - te1.trackLatest(tokenStr.c_str()); + auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); + te1.detail("To", "all sources"); + te1.detail("Mutation", m.toString()); + te1.detail("Version", commitVersion); + te1.detail("NumTags", allSources.size()); + if (m.param1 == execSnap) { + te1.trackLatest(tokenStr.c_str()); + } + int i = 0; + std::string allTagString; + for (auto& tag : allSources) { + allTagString += tag.toString() + ","; + toCommit.addTag(tag); + } + TraceEvent(SevDebug, "TagInfo").detail("Tags", allTagString); + toCommit.addTypedMessage(m, true /* allLocations */); + toCommit.setHasExecOp(); } - int i = 0; - std::string allTagString; - for (auto& tag : allSources) { - allTagString += tag.toString() + ","; - toCommit.addTag(tag); - } - TraceEvent(SevDebug, "TagInfo").detail("Tags", allTagString); - toCommit.addTypedMessage(m, true /* allLocations */); - toCommit.setHasExecOp(); } else UNREACHABLE(); @@ -1464,21 +1476,21 @@ ACTOR Future masterProxyServerCore( //TraceEvent("ProxyInit1", proxy.id()); // Wait until we can load the "real" logsystem, since we don't support switching them currently - while (!(db->get().master.id() == master.id() && db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION)) { + while (!(commitData.db->get().master.id() == master.id() && commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION)) { //TraceEvent("ProxyInit2", proxy.id()).detail("LSEpoch", db->get().logSystemConfig.epoch).detail("Need", epoch); - wait(db->onChange()); + wait(commitData.db->onChange()); } - state Future dbInfoChange = db->onChange(); + state Future dbInfoChange = commitData.db->onChange(); //TraceEvent("ProxyInit3", proxy.id()); - commitData.resolvers = db->get().resolvers; + commitData.resolvers = commitData.db->get().resolvers; ASSERT(commitData.resolvers.size() != 0); auto rs = commitData.keyResolvers.modify(allKeys); for(auto r = rs.begin(); r != rs.end(); ++r) r->value().emplace_back(0,0); - commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor); + commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor); commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference>(), false); commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true); @@ -1487,8 +1499,8 @@ ACTOR Future masterProxyServerCore( state int64_t commitBatchesMemoryLimit = std::min(SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT, static_cast((SERVER_KNOBS->SERVER_MEM_LIMIT * SERVER_KNOBS->COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / SERVER_KNOBS->COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR)); TraceEvent(SevInfo, "CommitBatchesMemoryLimit").detail("BytesLimit", commitBatchesMemoryLimit); - addActor.send(monitorRemoteCommitted(&commitData, db)); - addActor.send(transactionStarter(proxy, db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply)); + addActor.send(monitorRemoteCommitted(&commitData, commitData.db)); + addActor.send(transactionStarter(proxy, commitData.db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply)); addActor.send(readRequestServer(proxy, &commitData)); addActor.send(rejoinServer(proxy, &commitData)); addActor.send(healthMetricsRequestServer(proxy, &healthMetricsReply, &detailedHealthMetricsReply)); @@ -1499,21 +1511,21 @@ ACTOR Future masterProxyServerCore( int commitBatchByteLimit = (int)std::min(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MAX, std::max(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MIN, - SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE * pow(db->get().client.proxies.size(), SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER))); + SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE * pow(commitData.db->get().client.proxies.size(), SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER))); commitBatcherActor = commitBatcher(&commitData, batchedCommits, proxy.commit.getFuture(), commitBatchByteLimit, commitBatchesMemoryLimit); loop choose{ when( wait( dbInfoChange ) ) { - dbInfoChange = db->onChange(); - if(db->get().master.id() == master.id() && db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION) { - commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor); + dbInfoChange = commitData.db->onChange(); + if(commitData.db->get().master.id() == master.id() && commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION) { + commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor); for(auto it : commitData.tag_popped) { commitData.logSystem->pop(it.second, it.first); } commitData.logSystem->pop(commitData.lastTxsPop, txsTag, 0, tagLocalityRemoteLog); } - Optional newLatencyBandConfig = db->get().latencyBandConfig; + Optional newLatencyBandConfig = commitData.db->get().latencyBandConfig; if(newLatencyBandConfig.present() != commitData.latencyBandConfig.present() || (newLatencyBandConfig.present() && newLatencyBandConfig.get().grvConfig != commitData.latencyBandConfig.get().grvConfig)) @@ -1546,7 +1558,7 @@ ACTOR Future masterProxyServerCore( const vector &trs = batchedRequests.first; int batchBytes = batchedRequests.second; //TraceEvent("MasterProxyCTR", proxy.id()).detail("CommitTransactions", trs.size()).detail("TransactionRate", transactionRate).detail("TransactionQueue", transactionQueue.size()).detail("ReleasedTransactionCount", transactionCount); - if (trs.size() || (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && now() - lastCommit >= SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL)) { + if (trs.size() || (commitData.db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && now() - lastCommit >= SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL)) { lastCommit = now(); if (trs.size() || lastCommitComplete.isReady()) { @@ -1585,7 +1597,7 @@ ACTOR Future masterProxyServerCore( // get the list of workers state std::vector workers = - wait(db->get().clusterInterface.getWorkers.getReply(GetWorkersRequest())); + wait(commitData.db->get().clusterInterface.getWorkers.getReply(GetWorkersRequest())); // send the exec command to the list of workers which are // coordinators diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 02f62ec78b..22f5f0e0c9 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -217,7 +217,6 @@ public: // workload functions throw operation_failed(); } } - wait(delay(30.0)); } CSimpleIni ini; ini.SetUnicode(); From 281c785f94d42f43578d250ece1559526a8665a3 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 20 Mar 2019 16:51:14 -0700 Subject: [PATCH 08/69] '--restoring' cmd line arg removed for fdbserver '--restoring' command line option was introduced to indicate simulated fdbserver to restore from snapshot and restart the cluster. As part of this change that option is removed and restore information is stored in the restartInfo.ini. --- cmake/AddFdbTest.cmake | 5 - fdbserver/fdbserver.actor.cpp | 103 +++++++++---------- fdbserver/workloads/SaveAndKill.actor.cpp | 3 + tests/CMakeLists.txt | 12 +-- tests/restarting/SnapCycleRestart-1.txt | 1 + tests/restarting/SnapTestAttrition-1.txt | 1 + tests/restarting/SnapTestRestart-1.txt | 1 + tests/restarting/SnapTestSimpleRestart-1.txt | 2 +- 8 files changed, 61 insertions(+), 67 deletions(-) diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index d06a3d7a5d..556a84cdd5 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -75,10 +75,6 @@ function(add_fdb_test) if(ADD_FDB_TEST_UNIT) set(test_type "test") endif() - set(TEST_RESTORING "NO") - if (ADD_FDB_TEST_RESTORE) - set(TEST_RESTORING "YES") - endif() list(GET ADD_FDB_TEST_TEST_FILES 0 first_file) string(REGEX REPLACE "^(.*)\\.txt$" "\\1" test_name ${first_file}) if("${test_name}" MATCHES "(-\\d)$") @@ -121,7 +117,6 @@ function(add_fdb_test) --log-format ${TEST_LOG_FORMAT} --keep-logs ${TEST_KEEP_LOGS} --keep-simdirs ${TEST_KEEP_SIMDIR} - --restoring ${TEST_RESTORING} --seed ${SEED} --test-number ${assigned_id} ${BUGGIFY_OPTION} diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index dabbb62342..791d992622 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -120,7 +120,6 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, { OPT_RESTARTING, "-R", SO_NONE }, { OPT_RESTARTING, "--restarting", SO_NONE }, - { OPT_RESTORING, "--restoring", SO_NONE }, { OPT_RANDOMSEED, "-s", SO_REQ_SEP }, { OPT_RANDOMSEED, "--seed", SO_REQ_SEP }, { OPT_KEY, "-k", SO_REQ_SEP }, @@ -935,7 +934,7 @@ int main(int argc, char* argv[]) { LocalityData localities; int minTesterCount = 1; bool testOnServers = false; - bool restoring = false; + bool isRestoring = false; Reference tlsOptions = Reference( new TLSOptions ); std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword; @@ -1197,10 +1196,6 @@ int main(int argc, char* argv[]) { case OPT_RESTARTING: restarting = true; break; - case OPT_RESTORING: { - restoring = true; - break; - } case OPT_RANDOMSEED: { char* end; randomSeed = (uint32_t)strtoul( args.OptionArg(), &end, 0 ); @@ -1670,64 +1665,66 @@ int main(int argc, char* argv[]) { if (!restarting) { platform::eraseDirectoryRecursive( dataFolder ); platform::createDirectory( dataFolder ); - } else if (restoring) { - std::vector returnList; - std::string ext = ""; - std::string tmpFolder = abspath(dataFolder); - returnList = platform::listDirectories(tmpFolder); - TraceEvent("RestoringDataFolder").detail("DataFolder", tmpFolder); - + } else { CSimpleIni ini; ini.SetUnicode(); + std::string tmpFolder = abspath(dataFolder); ini.LoadFile(joinPath(tmpFolder, "restartInfo.ini").c_str()); - std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID"); - TraceEvent("RestoreSnapUID").detail("UID", snapStr); + int isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); + if (isRestoring) { + std::vector returnList; + std::string ext = ""; + returnList = platform::listDirectories(tmpFolder); + std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID"); - // delete all files (except fdb.cluster) in non-snap directories - for (int i = 0; i < returnList.size(); i++) { - if (returnList[i] == "." || returnList[i] == "..") { - continue; - } - if (returnList[i].find(snapStr) != std::string::npos) { - continue; - } + TraceEvent("RestoringDataFolder").detail("DataFolder", tmpFolder); + TraceEvent("RestoreSnapUID").detail("UID", snapStr); - std::string childf = tmpFolder + "/" + returnList[i]; - std::vector returnFiles = platform::listFiles(childf, ext); - for (int j = 0; j < returnFiles.size(); j++) { - fprintf(stderr, "file : %s\n", returnFiles[j].c_str()); - if (returnFiles[j] != "fdb.cluster") { - TraceEvent("DeletingNonSnapfiles") - .detail("FileBeingDeleted", childf + "/" + returnFiles[j]); - deleteFile(childf + "/" + returnFiles[j]); + // delete all files (except fdb.cluster) in non-snap directories + for (int i = 0; i < returnList.size(); i++) { + if (returnList[i] == "." || returnList[i] == "..") { + continue; + } + if (returnList[i].find(snapStr) != std::string::npos) { + continue; + } + + std::string childf = tmpFolder + "/" + returnList[i]; + std::vector returnFiles = platform::listFiles(childf, ext); + for (int j = 0; j < returnFiles.size(); j++) { + fprintf(stderr, "file : %s\n", returnFiles[j].c_str()); + if (returnFiles[j] != "fdb.cluster") { + TraceEvent("DeletingNonSnapfiles") + .detail("FileBeingDeleted", childf + "/" + returnFiles[j]); + deleteFile(childf + "/" + returnFiles[j]); + } } } - } - // move the contents from snap folder to the original folder, - // delete snap folders - for (int i = 0; i < returnList.size(); i++) { - fprintf(stderr, "Dir : %s\n", returnList[i].c_str()); - if (returnList[i] == "." || returnList[i] == "..") { - continue; - } - if (returnList[i].find(snapStr) == std::string::npos) { - if (returnList[i].find("snap") != std::string::npos) { - platform::eraseDirectoryRecursive(tmpFolder + returnList[i]); + // move the contents from snap folder to the original folder, + // delete snap folders + for (int i = 0; i < returnList.size(); i++) { + fprintf(stderr, "Dir : %s\n", returnList[i].c_str()); + if (returnList[i] == "." || returnList[i] == "..") { + continue; } - continue; + if (returnList[i].find(snapStr) == std::string::npos) { + if (returnList[i].find("snap") != std::string::npos) { + platform::eraseDirectoryRecursive(tmpFolder + returnList[i]); + } + continue; + } + std::string origDir = returnList[i].substr(0, 32); + std::string dirToRemove = tmpFolder + "/" + origDir; + std::string dirSrc = tmpFolder + "/" + returnList[i]; + TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove); + platform::eraseDirectoryRecursive(dirToRemove); + renameFile(dirSrc, dirToRemove); + TraceEvent("RenamingSnapToOriginalDirectory") + .detail("Oldname", dirSrc) + .detail("Newname", dirToRemove); } - std::string origDir = returnList[i].substr(0, 32); - std::string dirToRemove = tmpFolder + "/" + origDir; - std::string dirSrc = tmpFolder + "/" + returnList[i]; - TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove); - platform::eraseDirectoryRecursive(dirToRemove); - renameFile(dirSrc, dirToRemove); - TraceEvent("RenamingSnapToOriginalDirectory") - .detail("Oldname", dirSrc) - .detail("Newname", dirToRemove); } } - setupAndRun( dataFolder, testFile, restarting, tlsOptions ); g_simulator.run(); } else if (role == FDBD) { diff --git a/fdbserver/workloads/SaveAndKill.actor.cpp b/fdbserver/workloads/SaveAndKill.actor.cpp index 4a6e0d5d0c..87faa3a7bc 100644 --- a/fdbserver/workloads/SaveAndKill.actor.cpp +++ b/fdbserver/workloads/SaveAndKill.actor.cpp @@ -34,12 +34,14 @@ struct SaveAndKillWorkload : TestWorkload { std::string restartInfo; double testDuration; + int isRestoring; SaveAndKillWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { restartInfo = getOption( options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini") ).toString(); testDuration = getOption( options, LiteralStringRef("testDuration"), 10.0 ); + isRestoring = getOption( options, LiteralStringRef("isRestoring"), 0 ); } virtual std::string description() { return "SaveAndKillWorkload"; } @@ -59,6 +61,7 @@ struct SaveAndKillWorkload : TestWorkload { ini.SetUnicode(); ini.LoadFile(self->restartInfo.c_str()); + ini.SetValue("RESTORE", "isRestoring", format("%d", self->isRestoring).c_str()); ini.SetValue("META", "processesPerMachine", format("%d", g_simulator.processesPerMachine).c_str()); ini.SetValue("META", "listenersPerProcess", format("%d", g_simulator.listenersPerProcess).c_str()); ini.SetValue("META", "desiredCoordinators", format("%d", g_simulator.desiredCoordinators).c_str()); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 10f85c2985..92f3af84a4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -142,20 +142,16 @@ add_fdb_test( restarting/StorefrontTestRestart-2.txt) add_fdb_test( TEST_FILES restarting/SnapTestSimpleRestart-1.txt - restarting/SnapTestSimpleRestart-2.txt - RESTORE) + restarting/SnapTestSimpleRestart-2.txt) add_fdb_test( TEST_FILES restarting/SnapTestRestart-1.txt - restarting/SnapTestRestart-2.txt - RESTORE) + restarting/SnapTestRestart-2.txt) add_fdb_test( TEST_FILES restarting/SnapCycleRestart-1.txt - restarting/SnapCycleRestart-2.txt - RESTORE) + restarting/SnapCycleRestart-2.txt) add_fdb_test( TEST_FILES restarting/SnapTestAttrition-1.txt - restarting/SnapTestAttrition-2.txt - RESTORE) + restarting/SnapTestAttrition-2.txt) add_fdb_test( TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/SnapCycleRestart-1.txt index 29a511ebe7..80c9893919 100644 --- a/tests/restarting/SnapCycleRestart-1.txt +++ b/tests/restarting/SnapCycleRestart-1.txt @@ -20,3 +20,4 @@ testTitle=SnapCycleShutdown testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 + isRestoring=1 diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt index 7eccd164d7..33d9862afb 100644 --- a/tests/restarting/SnapTestAttrition-1.txt +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -49,3 +49,4 @@ testTitle=SnapSimpleShutdown testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 + isRestoring=1 diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt index d1c26b9a82..d8159f35b6 100644 --- a/tests/restarting/SnapTestRestart-1.txt +++ b/tests/restarting/SnapTestRestart-1.txt @@ -46,3 +46,4 @@ testTitle=SnapTestShutdown testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 + isRestoring=1 diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/SnapTestSimpleRestart-1.txt index 635d0a6c1c..bfbfced2c0 100644 --- a/tests/restarting/SnapTestSimpleRestart-1.txt +++ b/tests/restarting/SnapTestSimpleRestart-1.txt @@ -30,7 +30,7 @@ testTitle=SnapSimplePost ; save and shutdown testTitle=SnapSimpleShutdown - testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 + isRestoring=1 From 382b24693091703fe6bc9c46922d185a5c14e7c2 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 22 Mar 2019 10:07:40 -0700 Subject: [PATCH 09/69] trace change and retain fitness file after restore --- fdbclient/NativeAPI.actor.cpp | 2 +- fdbserver/fdbserver.actor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 837f936dfa..b0afd21f0e 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3373,7 +3373,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) if (BUGGIFY) { int32_t toDelay = g_random->randomInt(1, 30); TraceEvent("SleepingBeforeEnablingPop") - .detail("duration", toDelay); + .detail("Duration", toDelay); wait(delay(toDelay)); TraceEvent("DoneSleepingBeforeEnablingPop"); } diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 791d992622..fb15674379 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1693,7 +1693,7 @@ int main(int argc, char* argv[]) { std::vector returnFiles = platform::listFiles(childf, ext); for (int j = 0; j < returnFiles.size(); j++) { fprintf(stderr, "file : %s\n", returnFiles[j].c_str()); - if (returnFiles[j] != "fdb.cluster") { + if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") { TraceEvent("DeletingNonSnapfiles") .detail("FileBeingDeleted", childf + "/" + returnFiles[j]); deleteFile(childf + "/" + returnFiles[j]); From f7ba0635ef1b72474f8c45d7ba24e37aa0d441ac Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 22 Mar 2019 10:36:23 -0700 Subject: [PATCH 10/69] Make Exec op the first op in the batch --- fdbclient/NativeAPI.actor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index b0afd21f0e..09622bf42e 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2329,6 +2329,9 @@ void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad) { auto& req = tr; + // Helps with quickly finding the exec op in a tlog batch + setOption(FDBTransactionOptions::FIRST_IN_BATCH); + auto& t = req.transaction; auto r = singleKeyRange(cmdType, req.arena); auto v = ValueRef(req.arena, cmdPayLoad); From a60145b9a1c738d4fcfd27b0245ed7aa53448935 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 25 Mar 2019 18:31:08 -0700 Subject: [PATCH 11/69] Restore the cluster in single region configuration --- fdbserver/SimulatedCluster.actor.cpp | 15 ++++++++++----- fdbserver/SimulatedCluster.h | 2 +- fdbserver/fdbserver.actor.cpp | 5 +++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 42be133fbe..f1587c2cf8 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -1380,7 +1380,7 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i ifs.close(); } -ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, Reference tlsOptions) { +ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, Reference tlsOptions ) { state vector> systemActors; state Optional connFile; state Standalone startingConfiguration; @@ -1410,10 +1410,15 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot try { //systemActors.push_back( startSystemMonitor(dataFolder) ); if (rebooting) { - wait(timeoutError(restartSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, - &startingConfiguration, tlsOptions, extraDB), - 100.0)); - } else { + wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB), 100.0 ) ); + if (restoring) { + std::string config = "usable_regions=1"; + startingConfiguration = makeString(config.size()); + uint8_t* ptr = mutateString(startingConfiguration); + memcpy(ptr, ((uint8_t*)config.c_str()), config.size()); + } + } + else { g_expect_full_pointermap = 1; setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, minimumReplication, minimumRegions, tlsOptions); diff --git a/fdbserver/SimulatedCluster.h b/fdbserver/SimulatedCluster.h index eb8f325bd3..151130bec0 100644 --- a/fdbserver/SimulatedCluster.h +++ b/fdbserver/SimulatedCluster.h @@ -24,6 +24,6 @@ #define FDBSERVER_SIMULATEDCLUSTER_H #pragma once -void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, Reference const& useSSL); +void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, Reference const& useSSL); #endif diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index fb15674379..33d837c6df 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1662,6 +1662,7 @@ int main(int argc, char* argv[]) { flushAndExit(FDB_EXIT_ERROR); } + int isRestoring = 0; if (!restarting) { platform::eraseDirectoryRecursive( dataFolder ); platform::createDirectory( dataFolder ); @@ -1670,7 +1671,7 @@ int main(int argc, char* argv[]) { ini.SetUnicode(); std::string tmpFolder = abspath(dataFolder); ini.LoadFile(joinPath(tmpFolder, "restartInfo.ini").c_str()); - int isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); + isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); if (isRestoring) { std::vector returnList; std::string ext = ""; @@ -1725,7 +1726,7 @@ int main(int argc, char* argv[]) { } } } - setupAndRun( dataFolder, testFile, restarting, tlsOptions ); + setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1) , tlsOptions ); g_simulator.run(); } else if (role == FDBD) { ASSERT( connectionFile ); From d282016f930baf9005108b178bb0747d88f34391 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 26 Mar 2019 07:23:03 -0700 Subject: [PATCH 12/69] Exec op to tag only local storage nodes --- fdbserver/MasterProxyServer.actor.cpp | 61 +++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 520843bcb5..a05d979b40 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -43,6 +43,9 @@ #include "fdbclient/Atomic.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. +#include "fdbclient/DatabaseConfiguration.h" +#include "fdbclient/FDBTypes.h" +#include "fdbclient/Knobs.h" struct ProxyStats { CounterCollection cc; @@ -715,8 +718,24 @@ ACTOR Future commitBatch( if (debugMutation("ProxyCommit", commitVersion, m)) TraceEvent("ProxyCommitTo", self->dbgid).detail("To", describe(ranges.begin().value().tags)).detail("Mutation", m.toString()).detail("Version", commitVersion); +<<<<<<< HEAD ranges.begin().value().populateTags(); toCommit.addTags(ranges.begin().value().tags); +======= + auto& tags = ranges.begin().value().tags; + if(!tags.size()) { + for( auto info : ranges.begin().value().src_info ) { + tags.push_back( info->tag ); + } + for( auto info : ranges.begin().value().dest_info ) { + tags.push_back( info->tag ); + } + uniquify(tags); + } + + for (auto& tag : tags) + toCommit.addTag(tag); +>>>>>>> c6dc6bf3... Exec op to tag only local storage nodes } else { TEST(true); //A clear range extends past a shard boundary @@ -741,9 +760,24 @@ ACTOR Future commitBatch( .detail("TransactionNum", transactionNum); committed[transactionNum] = ConflictBatch::TransactionConflict; } else { + // Send the ExecOp to + // - all the storage nodes in a single region and + // - only to storage nodes in local region in multi-region setup + // step 1: get the DatabaseConfiguration + state DatabaseConfiguration conf; + Standalone> results = wait( self->txnStateStore->readRange( configKeys ) ); + conf.fromKeyValues(results); + // step 2: find the tag.id from locality info of the master + auto localityKey = + self->txnStateStore->readValue(tagLocalityListKeyFor(self->master.locality.dcId())).get(); + int8_t locality = tagLocalityInvalid; + if (localityKey.present()) { + locality = decodeTagLocalityListValue(localityKey.get()); + } + auto ranges = self->keyInfo.intersectingRanges(allKeys); std::set allSources; - + auto& m = (*pMutations)[mutationNum]; if (debugMutation("ProxyCommit", commitVersion, m)) TraceEvent("ProxyCommitTo", self->dbgid) .detail("To", "all sources") @@ -754,14 +788,32 @@ ACTOR Future commitBatch( auto& tags = r.value().tags; if (!tags.size()) { for (auto info : r.value().src_info) { - tags.push_back(info->tag); + if (info->tag.locality == locality) { + tags.push_back(info->tag); + } } for (auto info : r.value().dest_info) { - tags.push_back(info->tag); + if (info->tag.locality == locality) { + tags.push_back(info->tag); + } } uniquify(tags); } - allSources.insert(tags.begin(), tags.end()); + std::vector localTags; + for (auto t : tags) { + if ( (!conf.isValid()) + || (conf.usableRegions > 1 && t.locality == locality) + || (conf.usableRegions == 1) ) { + // step 3: based on DatabaseConfiguration and locality + // information gathered in step 1 and step 2, + // - find all the relevant tags + localTags.push_back(t); + } + } + TraceEvent(SevDebug, "DebugTagInfo") + .detail("TagsSize", tags.size()) + .detail("LocalTagsSize", localTags.size()); + allSources.insert(localTags.begin(), localTags.end()); } auto param2 = m.param2.toString(); @@ -792,6 +844,7 @@ ACTOR Future commitBatch( UNREACHABLE(); + auto& m = (*pMutations)[mutationNum]; // Check on backing up key, if backup ranges are defined and a normal key if (self->vecBackupKeys.size() > 1 && (normalKeys.contains(m.param1) || m.param1 == metadataVersionKey)) { From f129d996feeeb087139153ce2f60af47ff9069ed Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 1 Apr 2019 12:14:23 -0700 Subject: [PATCH 13/69] Remove dumpAfterTest=true in snap tests --- tests/restarting/SnapCycleRestart-1.txt | 1 - tests/restarting/SnapTestAttrition-1.txt | 4 ---- tests/restarting/SnapTestAttrition-2.txt | 1 - tests/restarting/SnapTestRestart-1.txt | 4 ---- tests/restarting/SnapTestRestart-2.txt | 1 - tests/restarting/SnapTestSimpleRestart-1.txt | 3 --- 6 files changed, 14 deletions(-) diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/SnapCycleRestart-1.txt index 80c9893919..0898e0b1ea 100644 --- a/tests/restarting/SnapCycleRestart-1.txt +++ b/tests/restarting/SnapCycleRestart-1.txt @@ -13,7 +13,6 @@ testTitle=SnapCyclePre testID=1 clearAfterTest=false snapCheck=true - dumpAfterTest=true testTitle=SnapCycleShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt index 33d9862afb..e85c024ef9 100644 --- a/tests/restarting/SnapTestAttrition-1.txt +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -6,8 +6,6 @@ testTitle=SnapTestPre testID=0 clearAfterTest=false snapCheck=false - dumpAfterTest=true - testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -29,7 +27,6 @@ testTitle=SnapTestTakeSnap testID=1 clearAfterTest=false snapCheck=false - dumpAfterTest=true testName=Attrition testDuration=20.0 @@ -42,7 +39,6 @@ testTitle=SnapTestPost testID=2 clearAfterTest=false snapCheck=false - dumpAfterTest=true ; save and shutdown testTitle=SnapSimpleShutdown diff --git a/tests/restarting/SnapTestAttrition-2.txt b/tests/restarting/SnapTestAttrition-2.txt index 336d5a8137..fd6a3ab7a3 100644 --- a/tests/restarting/SnapTestAttrition-2.txt +++ b/tests/restarting/SnapTestAttrition-2.txt @@ -5,5 +5,4 @@ numSnaps=1 maxSnapDelay=3.0 testID=3 snapCheck=false -dumpAfterTest=true restartInfoLocation=simfdb/restartInfo.ini diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt index d8159f35b6..6ae2d6bb0c 100644 --- a/tests/restarting/SnapTestRestart-1.txt +++ b/tests/restarting/SnapTestRestart-1.txt @@ -6,8 +6,6 @@ testTitle=SnapTestPre testID=0 clearAfterTest=false snapCheck=false - dumpAfterTest=true - testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -29,7 +27,6 @@ testTitle=SnapTestTakeSnap testID=1 clearAfterTest=false snapCheck=true - dumpAfterTest=true testTitle=SnapTestPost ;write 1000 Keys ending with odd numbers @@ -39,7 +36,6 @@ testTitle=SnapTestPost testID=2 clearAfterTest=false snapCheck=false - dumpAfterTest=true testTitle=SnapTestShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestRestart-2.txt b/tests/restarting/SnapTestRestart-2.txt index b86c59b7c6..d8dd4b711e 100644 --- a/tests/restarting/SnapTestRestart-2.txt +++ b/tests/restarting/SnapTestRestart-2.txt @@ -5,4 +5,3 @@ numSnaps=1 maxSnapDelay=3.0 testID=3 snapCheck=false -dumpAfterTest=true diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/SnapTestSimpleRestart-1.txt index bfbfced2c0..773ac6c909 100644 --- a/tests/restarting/SnapTestSimpleRestart-1.txt +++ b/tests/restarting/SnapTestSimpleRestart-1.txt @@ -6,7 +6,6 @@ testTitle=SnapSimplePre testID=0 clearAfterTest=false snapCheck=false - dumpAfterTest=true ;take snap testTitle=SnapSimpleTakeSnap @@ -16,7 +15,6 @@ testTitle=SnapSimpleTakeSnap testID=1 clearAfterTest=false snapCheck=true - dumpAfterTest=true ;write 1000 Keys ending with odd number testTitle=SnapSimplePost @@ -26,7 +24,6 @@ testTitle=SnapSimplePost testID=2 clearAfterTest=false snapCheck=false - dumpAfterTest=true ; save and shutdown testTitle=SnapSimpleShutdown From c4d27ac9d27f2eeb13cbc35bdee541a683b613bf Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 2 Apr 2019 04:26:26 -0700 Subject: [PATCH 14/69] bug fixes in SnapTest Earlier the test was checking for the following condition: durable version of storage > min version of tlog, but the check has been modified to: durable version of storage >= min version of tlog - 1. Ensure that the pre-snap validate keys are exactly 1000 in the case of commit retires. --- fdbserver/workloads/SnapTest.actor.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 22f5f0e0c9..b7ab1b39fd 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -158,13 +158,17 @@ public: // workload functions ACTOR Future _create_keys(Database cx, std::string prefix, bool even = true) { state Transaction tr(cx); + state vector keys; + + for (int i = 0; i < 1000; i++) { + keys.push_back(g_random->randomInt64(0, INT64_MAX - 2)); + } state int retry = 0; loop { tr.reset(); try { - for (int i = 0; i < 1000; i++) { - int64_t id = g_random->randomInt64(0, INT64_MAX - 2); + for (auto id : keys) { if (even) { if (id % 2 != 0) { id++; @@ -264,6 +268,7 @@ public: // workload functions begin = firstGreaterThan(kvRange.end()[-1].key); } catch (Error& e) { wait(tr.onError(e)); + cnt = 0; } } TraceEvent("VerifyCntValue").detail("Value", cnt); @@ -376,7 +381,7 @@ public: // workload functions for (auto const& worker : workers) { address_workers[worker.interf.address()] = worker.interf; } - vector tLogServers = self->dbInfo->get().logSystemConfig.allPresentLogs(); + vector tLogServers = self->dbInfo->get().logSystemConfig.allLocalLogs(); for (auto s : tLogServers) { auto it = address_workers.find(s.address()); @@ -385,7 +390,7 @@ public: // workload functions TraceEvent("TLogWorker") .detail("Address", s.address()) .detail("Id", s.id()) - .detail("Localit", s.locality.toString()); + .detail("Locality", s.locality.toString()); } } @@ -620,7 +625,7 @@ public: // workload functions ASSERT(tLogMessages[k].get().toString() != emptyStr); getMinAndMaxTLogVersions(tLogMessages[k].get(), execVersion, tag, minTLogVersion, maxTLogVersion); if (minTLogVersion != -1 && maxTLogVersion != -1) { - if ((durableVersion > minTLogVersion) && (durableVersion < maxTLogVersion)) { + if ((durableVersion >= minTLogVersion - 1) && (durableVersion < maxTLogVersion)) { ++numDurableVersionChecks; TraceEvent("Successs!!!"); } From aa79480d699c46d4cc75b28fa4eaae35eb9c9077 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 29 Mar 2019 04:49:44 -0700 Subject: [PATCH 15/69] changes to make fdbfork asynchronous --- fdbclient/NativeAPI.actor.cpp | 2 + fdbserver/FDBExecArgs.cpp | 4 + fdbserver/FDBExecArgs.h | 1 + fdbserver/OldTLogServer_6_0.actor.cpp | 22 ++- fdbserver/TLogServer.actor.cpp | 23 ++- fdbserver/WorkerInterface.actor.h | 4 + fdbserver/storageserver.actor.cpp | 214 +++++++++++++++----------- fdbserver/worker.actor.cpp | 87 ++++++++--- flow/Platform.cpp | 46 ++++-- flow/Platform.h | 14 +- 10 files changed, 272 insertions(+), 145 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 09622bf42e..31c3992a2c 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3338,6 +3338,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) wait(tr.commit()); break; } catch (Error& e) { + TraceEvent("DisableTLogPopFailed").detail("Error", e.what()); wait(tr.onError(e)); } } @@ -3389,6 +3390,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) wait(tr.commit()); break; } catch (Error& e) { + TraceEvent("EnableTLogPopFailed").detail("Error", e.what()); wait(tr.onError(e)); } } diff --git a/fdbserver/FDBExecArgs.cpp b/fdbserver/FDBExecArgs.cpp index d7722a3886..972bff265f 100644 --- a/fdbserver/FDBExecArgs.cpp +++ b/fdbserver/FDBExecArgs.cpp @@ -20,6 +20,10 @@ void ExecCmdValueString::setCmdValueString(std::string const& pCmdValueString) { parseCmdValue(); } +std::string ExecCmdValueString::getCmdValueString() { + return cmdValueString; +} + std::string ExecCmdValueString::getBinaryPath() { return binaryPath; } diff --git a/fdbserver/FDBExecArgs.h b/fdbserver/FDBExecArgs.h index 6469ce5106..96ba4615ef 100644 --- a/fdbserver/FDBExecArgs.h +++ b/fdbserver/FDBExecArgs.h @@ -21,6 +21,7 @@ public: // interfaces std::vector getBinaryArgs(); std::string getBinaryArgValue(std::string const& key); void setCmdValueString(std::string const& cmdValueString); + std::string getCmdValueString(void); public: // helper functions void dbgPrint(); diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index fa23e5f3cc..16bedb8870 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1477,7 +1477,8 @@ ACTOR Future tLogCommit( wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { - int err = 0; + state int err = 0; + state Future cmdErr; auto uidStr = execArg.getBinaryArgValue("uid"); if (!g_network->isSimulated()) { // Run the exec command @@ -1503,11 +1504,13 @@ ACTOR Future tLogCommit( paramList.push_back(versionString); std::string roleString = "role=tlog"; paramList.push_back(roleString); - err = fdbFork(snapBin, paramList); + cmdErr = spawnProcess(snapBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } else { // copy the entire directory - std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; std::string tLogFolderCopyCmd = "cp " + tLogFolderFrom + " " + tLogFolderTo; @@ -1517,19 +1520,24 @@ ACTOR Future tLogCommit( .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); vector paramList; - std::string cpBin = "/bin/cp"; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); - err = fdbFork(mkdirBin, paramList); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; paramList.clear(); paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(tLogFolderFrom); paramList.push_back(tLogFolderTo); - err = fdbFork(cpBin, paramList); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } } TraceEvent("TLogCommitExecTraceTLog") diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 1dc32f2ee3..6756fdeaba 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1846,8 +1846,9 @@ ACTOR Future tLogCommit( if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { - int err = 0; + state int err = 0; auto uidStr = execArg.getBinaryArgValue("uid"); + state Future cmdErr; if (!g_network->isSimulated()) { // Run the exec command auto snapBin = execArg.getBinaryPath(); @@ -1871,11 +1872,13 @@ ACTOR Future tLogCommit( paramList.push_back(versionString); std::string roleString = "role=tlog"; paramList.push_back(roleString); - err = fdbFork(snapBin, paramList); + cmdErr = spawnProcess(snapBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } else { // copy the entire directory - std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; std::string tLogFolderCopyCmd = @@ -1886,19 +1889,25 @@ ACTOR Future tLogCommit( .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); vector paramList; - std::string cpBin = "/bin/cp"; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); - err = fdbFork(mkdirBin, paramList); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; paramList.clear(); paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(tLogFolderFrom); paramList.push_back(tLogFolderTo); - err = fdbFork(cpBin, paramList); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } } TraceEvent("TLogCommitExecTraceLog") diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 6ec6b35658..1218301295 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -447,5 +447,9 @@ ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQu typedef decltype(&tLog) TLogFn; +// spawns a process pointed by `binPath` and the arguments provided at `paramList`, +// if the process spawned takes more than `maxWaitTime` then it will be killed +ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 31721626d2..1e75d0286b 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1838,9 +1838,12 @@ void addMutation( Reference& target, Version version, MutationRef const& muta } template -void splitMutations(StorageServer* data, KeyRangeMap& map, VerUpdateRef const& update) { - for(auto& m : update.mutations) { - splitMutation(data, map, m, update.version); +void splitMutations(StorageServer* data, KeyRangeMap& map, VerUpdateRef const& update, vector& execIndex) { + for(int i = 0; i < update.mutations.size(); i++) { + splitMutation(data, map, update.mutations[i], update.version); + if (update.mutations[i].type == MutationRef::Exec) { + execIndex.push_back(i); + } } } @@ -1860,93 +1863,107 @@ void splitMutation(StorageServer* data, KeyRangeMap& map, MutationRef const& } } } else if (m.type == MutationRef::Exec) { - std::string cmd = m.param1.toString(); - int len = m.param2.size(); - if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) { - TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); - return; - } - ExecCmdValueString execArg(m.param2.toString()); - auto uidStr = execArg.getBinaryArgValue("uid"); - - int err = 0; - if (!g_network->isSimulated() || cmd != execSnap) { - // Run the exec command - auto binPath = execArg.getBinaryPath(); - auto dataFolder = "path=" + data->folder; - vector paramList; - // bin path - paramList.push_back(binPath); - // user passed arguments - auto listArgs = execArg.getBinaryArgs(); - execArg.dbgPrint(); - for (auto elem : listArgs) { - paramList.push_back(elem); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=storage"; - paramList.push_back(roleString); - err = fdbFork(binPath, paramList); - } else { - // copy the files - TraceEvent("ExecTraceStorage") - .detail("StorageFolder", data->folder) - .detail("LocalMachineId", data->thisServerID.toString()) - .detail("DurableVersion", data->durableVersion.get()); - - std::string folder = abspath(data->folder); - - std::string folderFrom = folder + "/."; - std::string folderTo = folder + "-snap-" + uidStr; - - std::string folderToCreateCmd = "mkdir " + folderTo; - std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; - - TraceEvent("ExecTraceStorageSnapcommands") - .detail("FolderToCreateCmd", folderToCreateCmd) - .detail("FolderCopyCmd", folderCopyCmd); - - vector paramList; - std::string cpBin = "/bin/cp"; - std::string mkdirBin = "/bin/mkdir"; - - paramList.push_back(mkdirBin); - paramList.push_back(folderTo); - err = fdbFork(mkdirBin, paramList); - TraceEvent("MkdirStatus").detail("Errno", err); - - if (err == 0) { - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(folderFrom); - paramList.push_back(folderTo); - err = fdbFork(cpBin, paramList); - } - } - auto tokenStr = "ExecTrace/storage/" + uidStr; - TraceEvent te = TraceEvent("ExecTraceStorage"); - te.detail("Uid", uidStr); - te.detail("Status", err); - te.detail("Role", "storage"); - te.detail("Version", ver); - te.detail("Mutation", m.toString()); - te.detail("Mid", data->thisServerID.toString()); - te.detail("DurableVersion", data->durableVersion.get()); - te.detail("DataVersion", data->version.get()); - te.detail("Tag", data->tag.toString()); - if (cmd == execSnap) { - te.trackLatest(tokenStr.c_str()); - } } else ASSERT(false); // Unknown mutation type in splitMutations } +ACTOR Future +snapHelper(StorageServer* data, MutationRef m, Version ver) +{ + state std::string cmd = m.param1.toString(); + int len = m.param2.size(); + + if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) { + TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); + return Void(); + } + ExecCmdValueString execArg(m.param2.toString()); + state std::string uidStr = execArg.getBinaryArgValue("uid"); + state int err = 0; + state Future cmdErr; + + if (!g_network->isSimulated() || cmd != execSnap) { + // Run the exec command + auto binPath = execArg.getBinaryPath(); + auto dataFolder = "path=" + data->folder; + vector paramList; + // bin path + paramList.push_back(binPath); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + execArg.dbgPrint(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=storage"; + paramList.push_back(roleString); + cmdErr = spawnProcess(binPath, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } else { + // copy the files + TraceEvent("ExecTraceStorage") + .detail("StorageFolder", data->folder) + .detail("LocalMachineId", data->thisServerID.toString()) + .detail("DurableVersion", data->durableVersion.get()); + + std::string folder = abspath(data->folder); + + state std::string folderFrom = folder + "/."; + state std::string folderTo = folder + "-snap-" + uidStr; + + std::string folderToCreateCmd = "mkdir " + folderTo; + std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; + + TraceEvent("ExecTraceStorageSnapcommands") + .detail("FolderToCreateCmd", folderToCreateCmd) + .detail("FolderCopyCmd", folderCopyCmd); + + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(folderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + TraceEvent("MkdirStatus").detail("Errno", err); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(folderFrom); + paramList.push_back(folderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } + } + auto tokenStr = "ExecTrace/storage/" + uidStr; + TraceEvent te = TraceEvent("ExecTraceStorage"); + te.detail("Uid", uidStr); + te.detail("Status", err); + te.detail("Role", "storage"); + te.detail("Version", ver); + te.detail("Mutation", m.toString()); + te.detail("Mid", data->thisServerID.toString()); + te.detail("DurableVersion", data->durableVersion.get()); + te.detail("DataVersion", data->version.get()); + te.detail("Tag", data->tag.toString()); + if (cmd == execSnap) { + te.trackLatest(tokenStr.c_str()); + } + return Void(); +} + ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { state TraceInterval interval("FetchKeys"); state KeyRange keys = shard->keys; @@ -2054,21 +2071,30 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { if (this_block.more) { Key nfk = this_block.readThrough.present() ? this_block.readThrough.get() : keyAfter( this_block.end()[-1].key ); if (nfk != keys.end) { - std::deque< Standalone > updatesToSplit = std::move( shard->updates ); + state std::deque< Standalone > updatesToSplit = std::move( shard->updates ); // This actor finishes committing the keys [keys.begin,nfk) that we already fetched. // The remaining unfetched keys [nfk,keys.end) will become a separate AddingShard with its own fetchKeys. shard->server->addShard( ShardInfo::addingSplitLeft( KeyRangeRef(keys.begin, nfk), shard ) ); shard->server->addShard( ShardInfo::newAdding( data, KeyRangeRef(nfk, keys.end) ) ); shard = data->shards.rangeContaining( keys.begin ).value()->adding; - auto otherShard = data->shards.rangeContaining( nfk ).value()->adding; + state AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding; keys = shard->keys; // Split our prior updates. The ones that apply to our new, restricted key range will go back into shard->updates, // and the ones delivered to the new shard will be discarded because it is in WaitPrevious phase (hasn't chosen a fetchVersion yet). // What we are doing here is expensive and could get more expensive if we started having many more blocks per shard. May need optimization in the future. - for(auto u = updatesToSplit.begin(); u != updatesToSplit.end(); ++u) - splitMutations(data, data->shards, *u); + state vector execIdxVec; + state std::deque< Standalone >::iterator u = updatesToSplit.begin(); + for(; u != updatesToSplit.end(); ++u) { + ASSERT(execIdxVec.size() == 0); + splitMutations(data, data->shards, *u, execIdxVec); + for (auto execIdx : execIdxVec) { + TraceEvent("TIMEFORSNAP"); + wait(snapHelper(data, u->mutations[execIdx], u->version)); + } + execIdxVec.clear(); + } TEST( true ); TEST( shard->updates.size() ); @@ -2675,6 +2701,10 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) state VerUpdateRef* pUpdate = &fii.changes[changeNum]; for(; mutationNum < pUpdate->mutations.size(); mutationNum++) { updater.applyMutation(data, pUpdate->mutations[mutationNum], pUpdate->version); + if (pUpdate->mutations[mutationNum].type == MutationRef::Exec) { + TraceEvent("TIMEFORSNAP"); + wait(snapHelper(data, pUpdate->mutations[mutationNum], pUpdate->version)); + } mutationBytes += pUpdate->mutations[mutationNum].totalSize(); injectedChanges = true; if(mutationBytes > SERVER_KNOBS->DESIRED_UPDATE_BYTES) { @@ -2747,6 +2777,10 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) ++data->counters.atomicMutations; break; } + if (msg.type == MutationRef::Exec) { + TraceEvent("TIMETOTAKESNAP"); + wait(snapHelper(data, msg, ver)); + } } else TraceEvent(SevError, "DiscardingPeekedData", data->thisServerID).detail("Mutation", msg.toString()).detail("Version", cloneCursor2->version().toString()); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 70e84fa6b6..58fafe28a0 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -693,7 +693,6 @@ ACTOR Future monitorServerDBInfo( Reference workerServer( Reference connFile, Reference>> ccInterface, @@ -701,16 +700,8 @@ ACTOR Future workerServer( Reference> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, - Promise recoveredDiskFiles, int64_t memoryProfileThreshold) { -======= -ACTOR Future workerServer(Reference connFile, - Reference>> ccInterface, - LocalityData locality, - Reference> asyncPriorityInfo, - ProcessClass initialClass, std::string folder, int64_t memoryLimit, - std::string metricsConnFile, std::string metricsPrefix, - Promise recoveredDiskFiles, std::string _coordFolder) { ->>>>>>> 2d5af668... Snapshot based backup and resotre implementation + Promise recoveredDiskFiles, int64_t memoryProfileThreshold, + std::string _coordFolder, std::string whiteListBinPaths) { state PromiseStream< ErrorInfo > errors; state Reference>> ddInterf( new AsyncVar>() ); state Reference>> rkInterf( new AsyncVar>() ); @@ -1181,13 +1172,13 @@ ACTOR Future workerServer(Reference connFile, systemMonitor(); loggingTrigger = delay( loggingDelay, TaskFlushTrace ); } - when(ExecuteRequest req = waitNext(interf.execReq.getFuture())) { + when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) { int len = req.execPayLoad.size(); - ExecCmdValueString execArg(req.execPayLoad.toString()); + state ExecCmdValueString execArg(req.execPayLoad.toString()); execArg.dbgPrint(); - auto uidStr = execArg.getBinaryArgValue("uid"); - - int err = 0; + state std::string uidStr = execArg.getBinaryArgValue("uid"); + state int err = 0; + state Future cmdErr; if (!g_network->isSimulated()) { // bin path auto snapBin = execArg.getBinaryPath(); @@ -1208,12 +1199,14 @@ ACTOR Future workerServer(Reference connFile, paramList.push_back(versionString); std::string roleString = "role=coordinator"; paramList.push_back(roleString); - err = fdbFork(snapBin, paramList); + cmdErr = spawnProcess(snapBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } else { // copy the files std::string folder = coordFolder; - std::string folderFrom = "./" + folder + "/."; - std::string folderTo = "./" + folder + "-snap-" + uidStr; + state std::string folderFrom = "./" + folder + "/."; + state std::string folderTo = "./" + folder + "-snap-" + uidStr; std::string folderToCreateCmd = "mkdir " + folderTo; std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; @@ -1223,21 +1216,25 @@ ACTOR Future workerServer(Reference connFile, .detail("FolderCopyCmd", folderCopyCmd); vector paramList; - std::string cpBin = "/bin/cp"; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(folderTo); - err = fdbFork(mkdirBin, paramList); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); TraceEvent("MkdirStatus").detail("Errno", err); - if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; paramList.clear(); paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(folderFrom); paramList.push_back(folderTo); - err = fdbFork(cpBin, paramList); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); } } @@ -1247,7 +1244,7 @@ ACTOR Future workerServer(Reference connFile, te.detail("Status", err); te.detail("Role", "coordinator"); te.detail("Value", coordFolder); - te.detail("ExecPayLoad", req.execPayLoad.toString()); + te.detail("ExecPayLoad", execArg.getCmdValueString()); te.trackLatest(tokenStr.c_str()); req.reply.send(Void()); } @@ -1441,6 +1438,48 @@ ACTOR Future fdbd( } } +ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime) +{ + state pid_t pid = -1; + try { + pid = fdbForkSpawn(binPath, paramList); + } catch (Error& e) { + TraceEvent("fdbForkSpawnFailed") + .detail("Error", e.what()); + } + if (pid < 0) { + return -1; + } + + state double sleepTime = 0; + state int err = 0; + while (true) { + err = fdbForkWaitPid(pid, g_network->isSimulated() ? true : false); + if (g_network->isSimulated()) { + if (err == pid) { + return 0; + } + return err; + } + if (err != EINPROGRESS) { + break; + } + + sleepTime += 0.1; + wait(delay(0.1)); + if (sleepTime > maxWaitTime) { + TraceEvent(SevWarnAlways, "SpawnProcessTookTooLong") + .detail("Error", EINPROGRESS); + kill(pid, SIGTERM); + // FIXME, we can end up here in a rare situation, + // make this asynchronous + fdbForkWaitPid(pid, true); + return -1; + } + } + return err; +} + const Role Role::WORKER("Worker", "WK", false); const Role Role::STORAGE_SERVER("StorageServer", "SS"); const Role Role::TRANSACTION_LOG("TLog", "TL"); diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 1cb0caf1fa..8efb6c4a45 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2684,7 +2684,7 @@ void* loadFunction(void* lib, const char* func_name) { } int -fdbFork(const std::string& path, const std::vector& args) +fdbForkSpawn(const std::string& path, const std::vector& args) { std::vector paramList; for (int i = 0; i < args.size(); i++) { @@ -2703,20 +2703,38 @@ fdbFork(const std::string& path, const std::vector& args) TraceEvent(SevWarnAlways, "CommandFailedToSpawn").detail("Cmd", path); throw platform_error(); } else if (pid > 0) { - int status; - waitpid(pid, &status, 0); - if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) { - TraceEvent(SevWarnAlways, "CommandFailed") - .detail("Cmd", path) - .detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); - return WIFEXITED(status) ? WEXITSTATUS(status) : -1; - } - TraceEvent("CommandStatus").detail("Cmd", path).detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : 0); - } else { - execv(const_cast(path.c_str()), ¶mList[0]); - _exit(EXIT_FAILURE); + // parent process returns with child's pid + return pid; } - return 0; + // child process + execv(const_cast(path.c_str()), ¶mList[0]); + _exit(EXIT_FAILURE); + return pid; +} + +int fdbForkWaitPid(pid_t pid, bool isSync) +{ + int status; + int err = waitpid(pid, &status, (!isSync) ? WNOHANG : 0); + if (isSync) { + err = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + return err; + } + if (err == 0) { + return EINPROGRESS; + } + + if (err == -1 || WIFSIGNALED(status)) { + err = -1; + } else if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + err = 0; + } else { + err = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + } + TraceEvent((err == 0) ? SevInfo : SevWarnAlways, "CommandStatus") + .detail("Pid", pid) + .detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); + return err; } diff --git a/flow/Platform.h b/flow/Platform.h index d43dd8eab8..a2141f611f 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -533,14 +533,22 @@ bool isLibraryLoaded(const char* lib_path); void* loadLibrary(const char* lib_path); void* loadFunction(void* lib, const char* func_name); -// wrapper to execv +// spwans a process with fork and execv, caller needs to use fdbForkWaitPid to +// find the status of the process and cleanup the resources // takes two arguments: // 1. path to the binary // 2. list of arguments // returns: +// returns pid of the process being spawned // throws platform_error() if it is not able to spawn the process -// returns 0 on success or status from the command being run -int fdbFork(const std::string& path, const std::vector& args); +int fdbForkSpawn(const std::string& path, const std::vector& args); + +// checks the completion of the process spawned by fdbForkSpawn +// returns +// - 0 for successful completion and +// - EINPROGRESS if pid is still running +// - exit code or -1 otherwise +int fdbForkWaitPid(pid_t pid, bool isSync = false); #ifdef _WIN32 inline static int ctzll( uint64_t value ) { From 898bed66c14840e13225132bcf7ad335953d040a Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 3 Apr 2019 05:27:11 -0700 Subject: [PATCH 16/69] Allow only whitelisted binary path for exec op --- fdbclient/NativeAPI.actor.cpp | 6 +- fdbclient/SystemData.cpp | 4 +- fdbserver/ConflictSet.h | 3 +- fdbserver/MasterProxyServer.actor.cpp | 61 +++++++++++++++++--- fdbserver/SimulatedCluster.actor.cpp | 28 ++++----- fdbserver/SimulatedCluster.h | 2 +- fdbserver/WorkerInterface.actor.h | 6 +- fdbserver/fdbserver.actor.cpp | 59 ++++++++++++++++++- fdbserver/worker.actor.cpp | 13 +++-- fdbserver/workloads/SnapTest.actor.cpp | 26 ++++++++- flow/error_definitions.h | 1 + tests/restarting/SnapTestSimpleRestart-2.txt | 8 +++ 12 files changed, 181 insertions(+), 36 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 31c3992a2c..431b85cb15 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2706,7 +2706,11 @@ ACTOR static Future tryCommit( Database cx, Reference // The user needs to be informed that we aren't sure whether the commit happened. Standard retry loops retry it anyway (relying on transaction idempotence) but a client might do something else. throw commit_unknown_result(); } else { - if (e.code() != error_code_transaction_too_old && e.code() != error_code_not_committed && e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded) + if (e.code() != error_code_transaction_too_old + && e.code() != error_code_not_committed + && e.code() != error_code_database_locked + && e.code() != error_code_proxy_memory_limit_exceeded + && e.code() != error_code_transaction_not_permitted) TraceEvent(SevError, "TryCommitError").error(e); if (trLogInfo) trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast(e.code()), req)); diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 45d2e92ed3..1ecbeffffd 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -39,8 +39,8 @@ const KeyRef keyServersKeyServersKey = keyServersKeyServersKeys.begin; // list of reserved exec commands const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent state of // storage, TLog and coordinated state -const StringRef execDisableTLogPop = LiteralStringRef("tldp"); // disable pop on TLog -const StringRef execEnableTLogPop = LiteralStringRef("tlep"); // enable pop on TLog +const StringRef execDisableTLogPop = LiteralStringRef("\xff/tldp"); // disable pop on TLog +const StringRef execEnableTLogPop = LiteralStringRef("\xff/tlep"); // enable pop on TLog const Key keyServersKey( const KeyRef& k ) { return k.withPrefix( keyServersPrefix ); diff --git a/fdbserver/ConflictSet.h b/fdbserver/ConflictSet.h index 5c219fc5d9..5f458df828 100644 --- a/fdbserver/ConflictSet.h +++ b/fdbserver/ConflictSet.h @@ -40,6 +40,7 @@ struct ConflictBatch { TransactionConflict = 0, TransactionTooOld, TransactionCommitted, + TransactionNotPermitted, }; void addTransaction( const CommitTransactionRef& transaction ); @@ -62,4 +63,4 @@ private: void addConflictRanges(Version now, std::vector< std::pair >::iterator begin, std::vector< std::pair >::iterator end, class SkipList* part); }; -#endif \ No newline at end of file +#endif diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index a05d979b40..b1dfa70241 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -233,6 +233,7 @@ struct ProxyCommitData { Deque> txsPopVersions; Version lastTxsPop; bool popRemoteTxs; + vector whiteListedBinPathVec; Optional latencyBandConfig; @@ -414,6 +415,34 @@ ACTOR Future commitBatcher(ProxyCommitData *commitData, PromiseStream& binPathVec) { + int p = 0; + TraceEvent(SevDebug, "BinPathConverter").detail("Input", binPath); + for (; p < binPath.size(); ) { + int pComma = binPath.find_first_of(',', p); + if (pComma == binPath.npos) { + pComma = binPath.size(); + } + std::string token = binPath.substr(p, pComma - p); + TraceEvent(SevDebug, "BinPathItem").detail("Element", token); + binPathVec.push_back(token); + p = pComma + 1; + while (binPath[p] == ' ' && p < binPath.size()) { + p++; + } + } + return; +} + +bool isWhiteListed(const vector& binPathVec, const std::string& binPath) { + for (auto item : binPathVec) { + if (item == binPath) { + return true; + } + } + return false; +} + ACTOR Future commitBatch( ProxyCommitData* self, vector trs, @@ -750,7 +779,19 @@ ACTOR Future commitBatch( } toCommit.addTypedMessage(m); } else if (m.type == MutationRef::Exec) { - if(self->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) { + state std::string param2 = m.param2.toString(); + state ExecCmdValueString execArg(param2); + execArg.dbgPrint(); + state std::string binPath = execArg.getBinaryPath(); + state std::string uidStr = execArg.getBinaryArgValue("uid"); + + if (m.param1 != execDisableTLogPop + && m.param1 != execEnableTLogPop + && !isWhiteListed(self->whiteListedBinPathVec, binPath)) { + TraceEvent("ExecTransactionNotPermitted") + .detail("TransactionNum", transactionNum); + committed[transactionNum] = ConflictBatch::TransactionNotPermitted; + } else if (self->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) { // Cluster is not fully recovered and needs TLogs // from previous generation for full recovery. // Currently, snapshot of old tlog generation is not @@ -816,12 +857,8 @@ ACTOR Future commitBatch( allSources.insert(localTags.begin(), localTags.end()); } - auto param2 = m.param2.toString(); - ExecCmdValueString execArg(param2); - execArg.dbgPrint(); - auto uidStr = execArg.getBinaryArgValue("uid"); - auto tokenStr = "ExecTrace/Proxy/" + uidStr; + std::string tokenStr = "ExecTrace/Proxy/" + uidStr; auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); te1.detail("To", "all sources"); te1.detail("Mutation", m.toString()); @@ -1061,6 +1098,9 @@ ACTOR Future commitBatch( else if (committed[t] == ConflictBatch::TransactionTooOld) { trs[t].reply.sendError(transaction_too_old()); } + else if (committed[t] == ConflictBatch::TransactionNotPermitted) { + trs[t].reply.sendError(transaction_not_permitted()); + } else { trs[t].reply.sendError(not_committed()); } @@ -1505,7 +1545,8 @@ ACTOR Future masterProxyServerCore( Reference> db, LogEpoch epoch, Version recoveryTransactionVersion, - bool firstProxy) + bool firstProxy, + std::string whiteListBinPaths) { state ProxyCommitData commitData(proxy.id(), master, proxy.getConsistentReadVersion, recoveryTransactionVersion, proxy.commit, db, firstProxy); @@ -1546,6 +1587,7 @@ ACTOR Future masterProxyServerCore( commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor); commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference>(), false); commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true); + createWhiteListBinPathVec(whiteListBinPaths, commitData.whiteListedBinPathVec); // ((SERVER_MEM_LIMIT * COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR) is only a approximate formula for limiting the memory used. // COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR is an estimate based on experiments and not an accurate one. @@ -1757,10 +1799,11 @@ ACTOR Future checkRemoved(Reference> db, uint64_t r ACTOR Future masterProxyServer( MasterProxyInterface proxy, InitializeMasterProxyRequest req, - Reference> db) + Reference> db, + std::string whiteListBinPaths) { try { - state Future core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy); + state Future core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whiteListBinPaths); loop choose{ when(wait(core)) { return Void(); } when(wait(checkRemoved(db, req.recoveryCount, proxy))) {} diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index f1587c2cf8..4f36bd09c5 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -196,7 +196,8 @@ ACTOR Future simulatedFDBDRebooter(ReferencerandomUniqueID(); state int cycles = 0; @@ -250,7 +251,7 @@ ACTOR Future simulatedFDBDRebooter(Reference fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1); + Future fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1, whiteListBinPaths); Future backup = runBackupAgents ? runBackup(connFile) : Future(Never()); Future dr = runBackupAgents ? runDr(connFile) : Future(Never()); @@ -359,7 +360,7 @@ std::map< Optional>, std::vector< std::vector< std::string ACTOR Future simulatedMachine(ClusterConnectionString connStr, std::vector ips, bool sslEnabled, Reference tlsOptions, LocalityData localities, ProcessClass processClass, std::string baseFolder, bool restarting, - bool useSeedFile, bool runBackupAgents, bool sslOnly) { + bool useSeedFile, bool runBackupAgents, bool sslOnly, std::string whiteListBinPaths) { state int bootCount = 0; state std::vector myFolders; state std::vector coordFolders; @@ -401,7 +402,7 @@ ACTOR Future simulatedMachine(ClusterConnectionString connStr, std::vector std::string path = joinPath(myFolders[i], "fdb.cluster"); Reference clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path)); const int listenPort = i*listenPerProcess + 1; - processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents)); + processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents, whiteListBinPaths)); TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]); } @@ -606,7 +607,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array parts) { ACTOR Future restartSimulatedSystem(vector>* systemActors, std::string baseFolder, int* pTesterCount, Optional* pConnString, Standalone* pStartingConfiguration, - Reference tlsOptions, int extraDB) { + Reference tlsOptions, int extraDB, std::string whiteListBinPaths) { CSimpleIni ini; ini.SetUnicode(); ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str()); @@ -704,7 +705,7 @@ ACTOR Future restartSimulatedSystem(vector>* systemActors, st systemActors->push_back(reportErrors( simulatedMachine(conn, ipAddrs, usingSSL, tlsOptions, localities, processClass, baseFolder, true, i == useSeedForMachine, enableExtraDB, - usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass)), + usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whiteListBinPaths), processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine")); } @@ -1086,7 +1087,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR void setupSimulatedSystem(vector>* systemActors, std::string baseFolder, int* pTesterCount, Optional* pConnString, Standalone* pStartingConfiguration, - int extraDB, int minimumReplication, int minimumRegions, Reference tlsOptions) { + int extraDB, int minimumReplication, int minimumRegions, Reference tlsOptions, + std::string whiteListBinPaths) { // SOMEDAY: this does not test multi-interface configurations SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions); StatusObject startingConfigJSON = simconfig.db.toJSON(true); @@ -1282,7 +1284,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo LocalityData localities(Optional>(), zoneId, machineId, dcUID); localities.set(LiteralStringRef("data_hall"), dcUID); systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled, tlsOptions, - localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly), "SimulatedMachine")); + localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly, whiteListBinPaths ), "SimulatedMachine")); if (extraDB && g_simulator.extraDB->toString() != conn.toString()) { std::vector extraIps; @@ -1296,7 +1298,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo localities.set(LiteralStringRef("data_hall"), dcUID); systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled, tlsOptions, localities, - processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly), "SimulatedMachine")); + processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly, whiteListBinPaths ), "SimulatedMachine")); } assignedMachines++; @@ -1324,7 +1326,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo systemActors->push_back( reportErrors( simulatedMachine( conn, ips, sslEnabled, tlsOptions, localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), - baseFolder, false, i == useSeedForMachine, false, sslEnabled), + baseFolder, false, i == useSeedForMachine, false, sslEnabled, whiteListBinPaths ), "SimulatedTesterMachine") ); } *pStartingConfiguration = startingConfigString; @@ -1380,7 +1382,7 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i ifs.close(); } -ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, Reference tlsOptions ) { +ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whiteListBinPaths, Reference tlsOptions) { state vector> systemActors; state Optional connFile; state Standalone startingConfiguration; @@ -1410,7 +1412,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot try { //systemActors.push_back( startSystemMonitor(dataFolder) ); if (rebooting) { - wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB), 100.0 ) ); + wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whiteListBinPaths), 100.0 ) ); if (restoring) { std::string config = "usable_regions=1"; startingConfiguration = makeString(config.size()); @@ -1421,7 +1423,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot else { g_expect_full_pointermap = 1; setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, - minimumReplication, minimumRegions, tlsOptions); + minimumReplication, minimumRegions, tlsOptions, whiteListBinPaths); wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot } std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() ); diff --git a/fdbserver/SimulatedCluster.h b/fdbserver/SimulatedCluster.h index 151130bec0..9818d007ba 100644 --- a/fdbserver/SimulatedCluster.h +++ b/fdbserver/SimulatedCluster.h @@ -24,6 +24,6 @@ #define FDBSERVER_SIMULATEDCLUSTER_H #pragma once -void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, Reference const& useSSL); +void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whiteListBinPath, Reference const& useSSL); #endif diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 1218301295..693a30c89a 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -397,7 +397,9 @@ ACTOR Future extractClusterInterface(Reference fdbd(Reference ccf, LocalityData localities, ProcessClass processClass, std::string dataFolder, std::string coordFolder, int64_t memoryLimit, - std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfilingThreshold); + std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfilingThreshold, + std::string whiteListBinPaths); + ACTOR Future clusterController(Reference ccf, Reference>> currentCC, Reference> asyncPriorityInfo, @@ -416,7 +418,7 @@ ACTOR Future storageServer(IKeyValueStore* persistentData, StorageServerIn ACTOR Future masterServer(MasterInterface mi, Reference> db, ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery); ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req, - Reference> db); + Reference> db, std::string whiteListBinPaths); ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 33d837c6df..52fd71d971 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -82,7 +82,11 @@ enum { OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX, +<<<<<<< HEAD OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER }; +======= + OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH }; +>>>>>>> 2847e101... Allow only whitelisted binary path for exec op CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP }, @@ -115,6 +119,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_NOBOX, "-q", SO_NONE }, { OPT_NOBOX, "--no_dialog", SO_NONE }, #endif +<<<<<<< HEAD { OPT_KVFILE, "--kvfile", SO_REQ_SEP }, { OPT_TESTFILE, "-f", SO_REQ_SEP }, { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, @@ -160,6 +165,52 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_TRACE_FORMAT , "--trace_format", SO_REQ_SEP }, { OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP }, { OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP }, +======= + { OPT_KVFILE, "--kvfile", SO_REQ_SEP }, + { OPT_TESTFILE, "-f", SO_REQ_SEP }, + { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, + { OPT_RESTARTING, "-R", SO_NONE }, + { OPT_RESTARTING, "--restarting", SO_NONE }, + { OPT_RANDOMSEED, "-s", SO_REQ_SEP }, + { OPT_RANDOMSEED, "--seed", SO_REQ_SEP }, + { OPT_KEY, "-k", SO_REQ_SEP }, + { OPT_KEY, "--key", SO_REQ_SEP }, + { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, + { OPT_MEMLIMIT, "--memory", SO_REQ_SEP }, + { OPT_STORAGEMEMLIMIT, "-M", SO_REQ_SEP }, + { OPT_STORAGEMEMLIMIT, "--storage_memory", SO_REQ_SEP }, + { OPT_MACHINEID, "-i", SO_REQ_SEP }, + { OPT_MACHINEID, "--machine_id", SO_REQ_SEP }, + { OPT_DCID, "-a", SO_REQ_SEP }, + { OPT_DCID, "--datacenter_id", SO_REQ_SEP }, + { OPT_MACHINE_CLASS, "-c", SO_REQ_SEP }, + { OPT_MACHINE_CLASS, "--class", SO_REQ_SEP }, + { OPT_BUGGIFY, "-b", SO_REQ_SEP }, + { OPT_BUGGIFY, "--buggify", SO_REQ_SEP }, + { OPT_VERSION, "-v", SO_NONE }, + { OPT_VERSION, "--version", SO_NONE }, + { OPT_CRASHONERROR, "--crash", SO_NONE }, + { OPT_NETWORKIMPL, "-N", SO_REQ_SEP }, + { OPT_NETWORKIMPL, "--network", SO_REQ_SEP }, + { OPT_NOBUFSTDOUT, "--unbufferedout", SO_NONE }, + { OPT_BUFSTDOUTERR, "--bufferedout", SO_NONE }, + { OPT_TRACECLOCK, "--traceclock", SO_REQ_SEP }, + { OPT_NUMTESTERS, "--num_testers", SO_REQ_SEP }, + { OPT_HELP, "-?", SO_NONE }, + { OPT_HELP, "-h", SO_NONE }, + { OPT_HELP, "--help", SO_NONE }, + { OPT_DEVHELP, "--dev-help", SO_NONE }, + { OPT_KNOB, "--knob_", SO_REQ_SEP }, + { OPT_LOCALITY, "--locality_", SO_REQ_SEP }, + { OPT_TESTSERVERS, "--testservers", SO_REQ_SEP }, + { OPT_TEST_ON_SERVERS, "--testonservers", SO_NONE }, + { OPT_METRICSCONNFILE, "--metrics_cluster", SO_REQ_SEP }, + { OPT_METRICSPREFIX, "--metrics_prefix", SO_REQ_SEP }, + { OPT_IO_TRUST_SECONDS, "--io_trust_seconds", SO_REQ_SEP }, + { OPT_IO_TRUST_WARN_ONLY, "--io_trust_warn_only", SO_NONE }, + { OPT_TRACE_FORMAT , "--trace_format", SO_REQ_SEP }, + { OPT_WHITELIST_BINPATH, "--whitelist_binpath", SO_REQ_SEP }, +>>>>>>> 2847e101... Allow only whitelisted binary path for exec op #ifndef TLS_DISABLED TLS_OPTION_FLAGS @@ -915,6 +966,7 @@ int main(int argc, char* argv[]) { const char *testFile = "tests/default.txt"; std::string kvFile; std::string testServersStr; + std::string whiteListBinPaths; std::vector publicAddressStrs, listenAddressStrs; const char *targetKey = NULL; uint64_t memLimit = 8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT @@ -1302,6 +1354,9 @@ int main(int argc, char* argv[]) { } break; } + case OPT_WHITELIST_BINPATH: + whiteListBinPaths = args.OptionArg(); + break; #ifndef TLS_DISABLED case TLSOptions::OPT_TLS_PLUGIN: args.OptionArg(); @@ -1726,7 +1781,7 @@ int main(int argc, char* argv[]) { } } } - setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1) , tlsOptions ); + setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1), whiteListBinPaths, tlsOptions); g_simulator.run(); } else if (role == FDBD) { ASSERT( connectionFile ); @@ -1737,7 +1792,7 @@ int main(int argc, char* argv[]) { dataFolder = format("fdb/%d/", publicAddresses.address.port); // SOMEDAY: Better default vector> actors(listenErrors.begin(), listenErrors.end()); - actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize) ); + actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize, whiteListBinPaths) ); //actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement f = stopAfter( waitForAll(actors) ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 58fafe28a0..5547a8fe31 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -233,6 +233,7 @@ std::string filenameFromId( KeyValueStoreType storeType, std::string folder, std UNREACHABLE(); } + struct TLogOptions { TLogOptions() = default; TLogOptions( TLogVersion v, TLogSpillType s ) : version(v), spillType(s) {} @@ -1059,7 +1060,7 @@ ACTOR Future workerServer( //printf("Recruited as masterProxyServer\n"); errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER_PROXY, recruited.id(), - masterProxyServer( recruited, req, dbInfo ) ) ) ); + masterProxyServer( recruited, req, dbInfo, whiteListBinPaths ) ) ) ); req.reply.send(recruited); } when( InitializeResolverRequest req = waitNext(interf.resolver.getFuture()) ) { @@ -1399,12 +1400,16 @@ ACTOR Future fdbd( int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, - int64_t memoryProfileThreshold) + int64_t memoryProfileThreshold, + std::string whiteListBinPaths) { try { ServerCoordinators coordinators( connFile ); - TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder); + if (g_network->isSimulated()) { + whiteListBinPaths = "random_path, /bin/snap_create.sh"; + } + TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whiteListBinPaths); // SOMEDAY: start the services on the machine in a staggered fashion in simulation? state vector> v; @@ -1426,7 +1431,7 @@ ACTOR Future fdbd( v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") ); - v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder), "WorkerServer", UID(), &normalWorkerErrors()) ); + v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whiteListBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) ); state Future firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" ); wait( quorum(v,1) ); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index b7ab1b39fd..5e74b6e9ad 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -355,7 +355,7 @@ public: // workload functions // snap create with different UID try { tr.reset(); - StringRef snapPayload = LiteralStringRef("empty-binary:uid=ba61e9612a561d60bd83ad83e1b63568"); + StringRef snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=ba61e9612a561d60bd83ad83e1b63568"); tr.execute(execSnap, snapPayload); wait(tr.commit()); break; @@ -364,6 +364,30 @@ public: // workload functions wait(tr.onError(e)); } } + } else if (self->testID == 8) { + // create a snapshot with a non whitelisted binary path and operation + // should fail + state bool testedFailure = false; + retry = 0; + loop { + self->snapUID = g_random->randomUniqueID(); + try { + StringRef snapCmdRef = LiteralStringRef("/bin/snap_create1.sh"); + Future status = snapCreate(cx, snapCmdRef, self->snapUID); + wait(status); + break; + } catch (Error& e) { + ++retry; + if (retry >= 5) { + break; + } + if (e.code() == error_code_transaction_not_permitted) { + testedFailure = true; + break; + } + } + } + ASSERT(testedFailure == true); } wait(delay(0.0)); return Void(); diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 82752ca938..aebe3430f6 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -65,6 +65,7 @@ ERROR( lookup_failed, 1041, "DNS lookup failed" ) ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" ) ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" ) ERROR( serialization_failed, 1044, "Failed to deserialize an object" ) +ERROR( transaction_not_permitted, 1045, "Operation not permitted") ERROR( broken_promise, 1100, "Broken promise" ) ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" ) diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/SnapTestSimpleRestart-2.txt index 38013680ba..ba7d30f94f 100644 --- a/tests/restarting/SnapTestSimpleRestart-2.txt +++ b/tests/restarting/SnapTestSimpleRestart-2.txt @@ -38,3 +38,11 @@ numSnaps=1 maxSnapDelay=3.0 testID=7 snapCheck=true + +; snapCreate with binary path that is not whitelisted +testTitle=SnapCreateNotWhiteListedBinaryPath +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=8 +snapCheck=false From 17ecba8313b486726d8f1363bfb739cfa8ba42ea Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 3 Apr 2019 05:47:19 -0700 Subject: [PATCH 17/69] trace cleanup and other indentation changes --- fdbclient/NativeAPI.actor.cpp | 4 - fdbserver/MasterProxyServer.actor.cpp | 2 +- fdbserver/OldTLogServer_6_0.actor.cpp | 19 +---- fdbserver/SimulatedCluster.actor.cpp | 1 + fdbserver/TLogServer.actor.cpp | 30 ++----- fdbserver/TagPartitionedLogSystem.actor.cpp | 10 --- fdbserver/storageserver.actor.cpp | 25 +----- fdbserver/worker.actor.cpp | 17 +--- fdbserver/workloads/SnapTest.actor.cpp | 88 ++++++++------------- 9 files changed, 54 insertions(+), 142 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 431b85cb15..0000397d86 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3352,7 +3352,6 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) int p = snapCmd.toString().find_first_of(':', 0); state std::string snapPayLoad; - TraceEvent("SnapCmd").detail("Command", snapCmd.toString()); if (p == snapCmd.toString().npos) { snapPayLoad = snapCmd.toString() + ":uid=" + snapUID.toString(); } else { @@ -3380,10 +3379,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) if (BUGGIFY) { int32_t toDelay = g_random->randomInt(1, 30); - TraceEvent("SleepingBeforeEnablingPop") - .detail("Duration", toDelay); wait(delay(toDelay)); - TraceEvent("DoneSleepingBeforeEnablingPop"); } // enable popping of the TLog diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index b1dfa70241..6020445b8f 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -1705,7 +1705,7 @@ ACTOR Future masterProxyServerCore( wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 1.0)); ++numSucc; } catch (Error& e) { - TraceEvent("ExecReqFailed").detail("what", e.what()); + TraceEvent("ExecReqFailed").detail("What", e.what()); } } } diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 16bedb8870..39713941e2 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -286,7 +286,7 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - std::map toBePopped; // map of Tag->Version for all the pops + std::map toBePopped; // map of Tag->Version for all the pops // that came when ignorePopRequest was set Reference> degraded; @@ -1481,15 +1481,11 @@ ACTOR Future tLogCommit( state Future cmdErr; auto uidStr = execArg.getBinaryArgValue("uid"); if (!g_network->isSimulated()) { - // Run the exec command - // std::string snapBin = extractBinPath(param2.toString()); + // get the bin path auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; - - TraceEvent("oldTLog6SnapCommand").detail("cmdLine", param2.toString()).detail("folderPath", dataFolder); - + TraceEvent("TLogSnapCommand").detail("CmdLine", param2.toString()).detail("FolderPath", dataFolder); vector paramList; - // bin path paramList.push_back(snapBin); // user passed arguments auto listArgs = execArg.getBinaryArgs(); @@ -1511,17 +1507,8 @@ ACTOR Future tLogCommit( // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; - - std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; - std::string tLogFolderCopyCmd = "cp " + tLogFolderFrom + " " + tLogFolderTo; - - TraceEvent("ExecSnapCommands") - .detail("TLogFolderToCreateCmd", tLogFolderToCreateCmd) - .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); - vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 4f36bd09c5..b271777394 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -1413,6 +1413,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot //systemActors.push_back( startSystemMonitor(dataFolder) ); if (rebooting) { wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whiteListBinPaths), 100.0 ) ); + // FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always if (restoring) { std::string config = "usable_regions=1"; startingConfiguration = makeString(config.size()); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 6756fdeaba..4f88cd3b57 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -329,10 +329,10 @@ struct TLogData : NonCopyable { FlowLock concurrentLogRouterReads; FlowLock persistentDataCommitLock; - bool ignorePopRequest; // ignore pop request from storage servers - double ignorePopDeadline; // time until which the ignorePopRequest will be + bool ignorePopRequest; // ignore pop request from storage servers + double ignorePopDeadline; // time until which the ignorePopRequest will be // honored - std::string ignorePopUid; // callers that set ignorePopRequest will set this + std::string ignorePopUid; // callers that set ignorePopRequest will set this // extra state, used to validate the ownership of // the set and for callers that unset will // be able to match it up @@ -1698,9 +1698,9 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - // inspect the messages to find if there is an Exec type and print - // it. message are prefixed by the length of the message and each - // field is prefixed by the length too + // inspect the messages to find if there is an Exec type and print + // it. message are prefixed by the length of the message and each + // field is prefixed by the length too uint8_t type = MutationRef::MAX_ATOMIC_OP; { ArenaReader rd(req.arena, qe.messages, Unversioned()); @@ -1850,14 +1850,11 @@ ACTOR Future tLogCommit( auto uidStr = execArg.getBinaryArgValue("uid"); state Future cmdErr; if (!g_network->isSimulated()) { - // Run the exec command + // get bin path auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; - - TraceEvent("TLogSnapCommand").detail("cmdLine", param2.toString()).detail("folderPath", dataFolder); - + TraceEvent("TLogSnapCommand").detail("CmdLine", param2.toString()).detail("FolderPath", dataFolder); vector paramList; - // bin path paramList.push_back(snapBin); // user passed arguments auto listArgs = execArg.getBinaryArgs(); @@ -1879,24 +1876,13 @@ ACTOR Future tLogCommit( // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; - - std::string tLogFolderToCreateCmd = "mkdir " + tLogFolderTo; - std::string tLogFolderCopyCmd = - "cp " + tLogFolderFrom + " " + tLogFolderTo; - - TraceEvent("TLogExecSnapcommands") - .detail("TLogFolderToCreateCmd", tLogFolderToCreateCmd) - .detail("TLogFolderCopyCmd", tLogFolderCopyCmd); - vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); - TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { vector paramList; std::string cpBin = "/bin/cp"; diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 5e42cd0762..2e25daae3b 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -1111,16 +1111,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted const& tags, std::vector& locations, bool allLocations) { int locationOffset = 0; for(auto& log : tLogs) { - // FIXME: sramamoorthy, remove after debugging - if (allLocations) { - TraceEvent("AllLocationsDetails") - .detail("NumLogServers", log->logServers.size()) - .detail("NumLogRouters", log->logRouters.size()) - .detail("SatelliteTagLocations", log->satelliteTagLocations.size()) - .detail("IsLocal", log->isLocal) - .detail("LogServerString", log->logServerString()) - .detail("LogRouterString", log->logRouterString()); - } if(log->isLocal && log->logServers.size()) { log->getPushLocations(tags, locations, locationOffset, allLocations); locationOffset += log->logServers.size(); diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 1e75d0286b..ac013f52f1 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1883,19 +1883,18 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) state Future cmdErr; if (!g_network->isSimulated() || cmd != execSnap) { - // Run the exec command + // get bin path auto binPath = execArg.getBinaryPath(); auto dataFolder = "path=" + data->folder; vector paramList; - // bin path paramList.push_back(binPath); - // user passed arguments + // get user passed arguments auto listArgs = execArg.getBinaryArgs(); execArg.dbgPrint(); for (auto elem : listArgs) { paramList.push_back(elem); } - // additional arguments + // get additional arguments paramList.push_back(dataFolder); const char* version = FDB_VT_VERSION; std::string versionString = "version="; @@ -1908,23 +1907,9 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) err = cmdErr.get(); } else { // copy the files - TraceEvent("ExecTraceStorage") - .detail("StorageFolder", data->folder) - .detail("LocalMachineId", data->thisServerID.toString()) - .detail("DurableVersion", data->durableVersion.get()); - std::string folder = abspath(data->folder); - state std::string folderFrom = folder + "/."; state std::string folderTo = folder + "-snap-" + uidStr; - - std::string folderToCreateCmd = "mkdir " + folderTo; - std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; - - TraceEvent("ExecTraceStorageSnapcommands") - .detail("FolderToCreateCmd", folderToCreateCmd) - .detail("FolderCopyCmd", folderCopyCmd); - vector paramList; std::string mkdirBin = "/bin/mkdir"; @@ -1933,7 +1918,6 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); - TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { vector paramList; std::string cpBin = "/bin/cp"; @@ -2090,7 +2074,6 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { ASSERT(execIdxVec.size() == 0); splitMutations(data, data->shards, *u, execIdxVec); for (auto execIdx : execIdxVec) { - TraceEvent("TIMEFORSNAP"); wait(snapHelper(data, u->mutations[execIdx], u->version)); } execIdxVec.clear(); @@ -2702,7 +2685,6 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) for(; mutationNum < pUpdate->mutations.size(); mutationNum++) { updater.applyMutation(data, pUpdate->mutations[mutationNum], pUpdate->version); if (pUpdate->mutations[mutationNum].type == MutationRef::Exec) { - TraceEvent("TIMEFORSNAP"); wait(snapHelper(data, pUpdate->mutations[mutationNum], pUpdate->version)); } mutationBytes += pUpdate->mutations[mutationNum].totalSize(); @@ -2778,7 +2760,6 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) break; } if (msg.type == MutationRef::Exec) { - TraceEvent("TIMETOTAKESNAP"); wait(snapHelper(data, msg, ver)); } } diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 5547a8fe31..49f91df063 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1181,18 +1181,17 @@ ACTOR Future workerServer( state int err = 0; state Future cmdErr; if (!g_network->isSimulated()) { - // bin path + // get bin path auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + coordFolder; vector paramList; - // bin path paramList.push_back(snapBin); - // user passed arguments + // get user passed arguments auto listArgs = execArg.getBinaryArgs(); for (auto elem : listArgs) { paramList.push_back(elem); } - // additional arguments + // get additional arguments paramList.push_back(dataFolder); const char* version = FDB_VT_VERSION; std::string versionString = "version="; @@ -1208,23 +1207,13 @@ ACTOR Future workerServer( std::string folder = coordFolder; state std::string folderFrom = "./" + folder + "/."; state std::string folderTo = "./" + folder + "-snap-" + uidStr; - - std::string folderToCreateCmd = "mkdir " + folderTo; - std::string folderCopyCmd = "cp " + folderFrom + " " + folderTo; - - TraceEvent("ExecTraceCoordSnapcommands") - .detail("FolderToCreateCmd", folderToCreateCmd) - .detail("FolderCopyCmd", folderCopyCmd); - vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(folderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); - TraceEvent("MkdirStatus").detail("Errno", err); if (err == 0) { vector paramList; std::string cpBin = "/bin/cp"; diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 5e74b6e9ad..85c50aab78 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -70,7 +70,7 @@ void filterEmptyMessages(std::vector>& messages) { void printMessages(std::vector>& messages) { for (int i = 0; i < messages.size(); i++) { - TraceEvent("MESSAGES").detail("I", i).detail("VALUE", messages[i].get().toString()); + TraceEvent("SnapTestMessages").detail("I", i).detail("Value", messages[i].get().toString()); } return; } @@ -84,12 +84,14 @@ public: // variables int testID; // test id UID snapUID; // UID used for snap name std::string restartInfoLocation; // file location to store the snap restore info + int maxRetryCntToRetrieveMessage; // number of retires to do trackLatest public: // ctor & dtor SnapTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), snapCheck(false), testID(0), snapUID() { TraceEvent("SnapTestWorkload Constructor"); std::string workloadName = "SnapTest"; + maxRetryCntToRetrieveMessage = 10; numSnaps = getOption(options, LiteralStringRef("numSnaps"), 0); maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0); @@ -151,6 +153,7 @@ public: // workload functions } default: { break; } } + TraceEvent(SevError, "InvalidPathCheckOptions"); return false; } @@ -246,7 +249,7 @@ public: // workload functions try { Standalone kvRange = wait(tr.getRange(begin, end, CLIENT_KNOBS->TOO_MANY)); if (!kvRange.more && kvRange.size() == 0) { - TraceEvent("NoMoreEntries"); + TraceEvent("SnapTestNoMoreEntries"); break; } @@ -271,7 +274,7 @@ public: // workload functions cnt = 0; } } - TraceEvent("VerifyCntValue").detail("Value", cnt); + TraceEvent("SnapTestVerifyCntValue").detail("Value", cnt); if (cnt != 1000) { throw operation_failed(); } @@ -428,30 +431,30 @@ public: // workload functions loop { retry = false; try { - TraceEvent("WaitingForTlogMessages"); + TraceEvent(SevDebug, "WaitingForTlogMessages"); wait(waitForAll(tLogMessages)); break; } catch (Error& e) { - TraceEvent("VerifyTLogTrackLatest") - .detail("Token", eventTokenRef.toString()) - .detail("Reason", "Failed to get tLogMessages") - .detail("Code", e.what()); if (e.code() != error_code_timed_out) { + TraceEvent(SevError, "VerifyTLogTrackLatest") + .detail("Token", eventTokenRef.toString()) + .detail("Reason", "Failed to get tLogMessages") + .detail("Code", e.what()); return false; } else { retry = true; ++retryCnt; } } - if (retryCnt >= 4) { - TraceEvent("Unable to retrieve TLog messages"); + if (retryCnt >= self->maxRetryCntToRetrieveMessage ) { + TraceEvent(SevError, "UnableToRetrieveTLogMessages"); return false; } } printMessages(tLogMessages); filterEmptyMessages(tLogMessages); if (tLogMessages.size() != 1) { - TraceEvent("VerifyTLogTrackLatestMessageNotFound") + TraceEvent(SevError, "VerifyTLogTrackLatestMessageNotFound") .detail("Address", tLogWorkers[i].address()) .detail("Token", eventTokenRef.toString()); return false; @@ -509,10 +512,10 @@ public: // workload functions wait(waitForAll(storageMessages)); wait(waitForAll(coordMessages)); } catch (Error& e) { - TraceEvent("VerifyExecTraceVersionFailure") - .detail("Reason", "Failed to get proxy or storage messages") - .detail("code", e.what()); if (e.code() != error_code_timed_out) { + TraceEvent(SevError, "VerifyExecTraceVersionFailure") + .detail("Reason", "FailedToGetProxyOrStorageMessages") + .detail("Code", e.what()); return false; } else { retry = true; @@ -524,33 +527,26 @@ public: // workload functions } if (retry && retryCnt >= 4) { - TraceEvent("Unable to retrieve proxy/storage/coord messages " - "after retries"); - ASSERT(1 == 0); - std::terminate(); + TraceEvent(SevError, "UnableToRetrieveProxyStorageCoordMessages"); return false; } } - printMessages(proxyMessages); - printMessages(storageMessages); - printMessages(coordMessages); // filter out empty messages filterEmptyMessages(proxyMessages); filterEmptyMessages(storageMessages); filterEmptyMessages(coordMessages); - TraceEvent("ProxyMessages"); + TraceEvent("SnapTestProxyMessages"); printMessages(proxyMessages); - TraceEvent("StorageMessages"); + TraceEvent("SnapTestStorageMessages"); printMessages(storageMessages); - TraceEvent("CoorMessages"); + TraceEvent("SnapTestCoordMessages"); printMessages(coordMessages); if (proxyMessages.size() != 1) { // if no message from proxy or more than one fail the check - TraceEvent("No ExecTrace message from Proxy"); - std::terminate(); + TraceEvent(SevError, "NoExecTraceMessageFromProxy"); return false; } @@ -558,8 +554,7 @@ public: // workload functions .detail("CoordMessageSize", coordMessages.size()) .detail("CoordAddrssize", coordAddrs.size()); if (coordMessages.size() < (coordAddrs.size() + 1) / 2) { - TraceEvent("No ExecTrace message from Quorum of coordinators"); - std::terminate(); + TraceEvent(SevError, "NoExecTraceMessageFromQuorumOfCoordinators"); return false; } @@ -579,13 +574,10 @@ public: // workload functions for (; (execVersion != -1) && j < storageMessages.size(); j++) { // for each message that has this verison, get the tag and // the durable version - // FIXME: sramamoorthy, for now allow default values state Tag tag; state Tag invalidTag; - // FIXME: sramamoorthy, for now allow default values state Version durableVersion = -1; TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[j].get().toString()); - // FIXME: sramamoorthy, how to compare with empty string ASSERT(storageMessages[j].get().toString() != emptyStr); getTagAndDurableVersion(storageMessages[j].get(), execVersion, tag, durableVersion); TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); @@ -595,7 +587,6 @@ public: // workload functions retry = false; tLogMessages.clear(); - // for (int m = 0; (tag != -1) && m < tLogWorkers.size(); m++) { for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { visitedStorageTags[tag] = true; std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); @@ -610,10 +601,10 @@ public: // workload functions wait(waitForAll(tLogMessages)); } } catch (Error& e) { - TraceEvent("VerifyExecTraceVersionFailure") - .detail("Reason", "Failed to get tLogMessages") - .detail("Code", e.what()); if (e.code() != error_code_timed_out) { + TraceEvent(SevError, "VerifyExecTraceVersionFailure") + .detail("Reason", "FailedToGetTLogMessages") + .detail("Code", e.what()); return false; } else { retry = true; @@ -623,11 +614,8 @@ public: // workload functions if (retry == false) { break; } - if (retry && retryCnt > 20) { - TraceEvent("Unable to retrieve tLog messages after " - "retries"); - ASSERT(1 == 0); - std::terminate(); + if (retry && retryCnt > self->maxRetryCntToRetrieveMessage) { + TraceEvent(SevError, "UnableToRetrieveTLogMessagesAfterRetries"); return false; } } @@ -638,14 +626,12 @@ public: // workload functions numDurableVersionChecks = 0; for (; (tag != invalidTag) && k < tLogMessages.size(); k++) { // for each of the message that has this version and tag - // verify that the minVersioninTlog < durableVersion < - // maxVersioninTlog + // verify that + // 1) durableVersion >= minTLogVersion -1 + // 2) durableVersion < maxTLogVersion Version minTLogVersion = -1; Version maxTLogVersion = -1; - TraceEvent("TLogMessage").detail("Msg", tLogMessages[k].get().toString()); - - // FIXME, sramamoorthy, handle empty string ASSERT(tLogMessages[k].get().toString() != emptyStr); getMinAndMaxTLogVersions(tLogMessages[k].get(), execVersion, tag, minTLogVersion, maxTLogVersion); if (minTLogVersion != -1 && maxTLogVersion != -1) { @@ -655,12 +641,10 @@ public: // workload functions } } } - // if we did not find even one tlog for a given tag fail the - // check + // if we did not find even one tlog for a given tag fail the check if (numDurableVersionChecks < 1) { - TraceEvent("NoTLogFoundForATag"); - ASSERT(1 == 0); - std::terminate(); + TraceEvent(SevError, "NoTLogFoundForATag"); + return false; } tLogMessages.clear(); } @@ -668,9 +652,7 @@ public: // workload functions // validates that we encountered unique tags of value numTags if (numTags != visitedStorageTags.size()) { - TraceEvent("StorageMessagesWereNotFound"); - ASSERT(1 == 0); - std::terminate(); + TraceEvent(SevError, "StorageMessagesWereNotFound"); return false; } TraceEvent("VerifyExecTraceVersionSuccess"); From 8370871e4cc38a47428881162f894ca42a2f1dab Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Thu, 4 Apr 2019 11:33:00 -0700 Subject: [PATCH 18/69] stale RESTORE option related code removed --- cmake/AddFdbTest.cmake | 2 +- cmake/ConfigureCompiler.cmake | 2 +- tests/TestRunner/TestRunner.py | 5 ----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index 556a84cdd5..b3ffb51760 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -54,7 +54,7 @@ endfunction() # all these tests in serialized order and within the same directory. This is # useful for restart tests function(add_fdb_test) - set(options UNIT IGNORE RESTORE) + set(options UNIT IGNORE) set(oneValueArgs TEST_NAME TIMEOUT) set(multiValueArgs TEST_FILES) cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index c84aa79172..dfaaa02fdd 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -140,7 +140,7 @@ else() -Wno-deprecated -fvisibility=hidden -Wreturn-type -# -fdiagnostics-color=always + -fdiagnostics-color=always -fPIC) if (GPERFTOOLS_FOUND AND GCC) add_compile_options( diff --git a/tests/TestRunner/TestRunner.py b/tests/TestRunner/TestRunner.py index 650609f32c..f0e5741830 100755 --- a/tests/TestRunner/TestRunner.py +++ b/tests/TestRunner/TestRunner.py @@ -300,8 +300,6 @@ def run_simulation_test(basedir, options): tmp[0] = options.old_binary if not first: tmp.append('-R') - if options.restoring == 'YES': - tmp.append('--restoring') first = False tmp.append('-f') tmp.append(testfile) @@ -370,9 +368,6 @@ if __name__ == '__main__': choices=['xml', 'json'], help='Log format (json or xml)') parser.add_argument('-O', '--old-binary', required=False, default=None, help='Path to the old binary to use for upgrade tests') - parser.add_argument('-RO', '--restoring', required=False, default='NO', - choices=['YES', 'NO'], - help='Set if you want asnapshot to be restored') parser.add_argument('--aggregate-traces', default='NONE', choices=['NONE', 'FAILED', 'ALL']) parser.add_argument('--keep-logs', default='FAILED', From 539e65efad9fd10bd0ac2318a87912147fccca35 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 5 Apr 2019 12:45:13 -0700 Subject: [PATCH 19/69] Skip parsing mutations if it is tagged for TxsTag In Tlog, if a mutation is targetted for TxsTag then skip from parsing them. --- fdbserver/OldTLogServer_6_0.actor.cpp | 14 ++++++++++---- fdbserver/TLogServer.actor.cpp | 12 +++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 39713941e2..3447b9c6c5 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1342,15 +1342,21 @@ ACTOR Future tLogCommit( uint32_t sub; while (!rd.empty()) { Tag tmpTag; + bool hasTxsTag = false; rd.checkpoint(); rd >> messageLength >> sub >> tagCount; for (int i = 0; i < tagCount; i++) { rd >> tmpTag; + if (tmpTag == txsTag) { + hasTxsTag = true; + } execTags.push_back(tmpTag); } - rd >> type; - if (type == MutationRef::Exec) { - break; + if (!hasTxsTag) { + rd >> type; + if (type == MutationRef::Exec) { + break; + } } rawLength = messageLength + sizeof(messageLength); rd.rewind(); @@ -1365,7 +1371,7 @@ ACTOR Future tLogCommit( rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent("TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 4f88cd3b57..b13a80a9e9 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1709,15 +1709,21 @@ ACTOR Future tLogCommit( uint32_t sub; while(!rd.empty()) { Tag tmpTag; + bool hasTxsTag = false; rd.checkpoint(); rd >> messageLength >> sub >> tagCount; for(int i = 0; i < tagCount; i++) { rd >> tmpTag; + if (tmpTag == txsTag) { + hasTxsTag = true; + } execTags.push_back(tmpTag); } - rd >> type; - if (type == MutationRef::Exec) { - break; + if (!hasTxsTag) { + rd >> type; + if (type == MutationRef::Exec) { + break; + } } rawLength = messageLength + sizeof(messageLength); rd.rewind(); From 89b7a052f5b1c6080ccfa31ae880b01270d0f3b4 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 8 Apr 2019 05:14:07 -0700 Subject: [PATCH 20/69] Bug fixes for snapping coordinators --- fdbclient/NativeAPI.actor.cpp | 2 +- fdbserver/MasterProxyServer.actor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 0000397d86..fdd1a88e5b 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2316,7 +2316,7 @@ ACTOR Future executeCoordinators(DatabaseContext* cx, StringRef execPayLoa } } } catch (Error& e) { - TraceEvent(SevError, "NativeAPI.executeCoordinatorsError").error(e); + TraceEvent("NativeAPI.executeCoordinatorsError").error(e); throw; } } diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 6020445b8f..66c5fa5c4f 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -1702,7 +1702,7 @@ ACTOR Future masterProxyServerCore( if (coordinatorsAddrSet.find(workers[i].interf.address()) != coordinatorsAddrSet.end()) { TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].interf.address()); try { - wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 1.0)); + wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 3.0)); ++numSucc; } catch (Error& e) { TraceEvent("ExecReqFailed").detail("What", e.what()); From cfdad0c5e6a860c6c031130d072ea4d796ccf59a Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 8 Apr 2019 05:23:48 -0700 Subject: [PATCH 21/69] tlog to snapshot exactly at exec version --- fdbserver/OldTLogServer_6_0.actor.cpp | 27 ++++++++++++++++-- fdbserver/TLogServer.actor.cpp | 41 ++++++++++++++++++++------- fdbserver/WorkerInterface.actor.h | 2 +- fdbserver/storageserver.actor.cpp | 2 +- fdbserver/worker.actor.cpp | 8 ++++-- 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 3447b9c6c5..0d6ad4f7be 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -431,13 +431,15 @@ struct LogData : NonCopyable, public ReferenceCounted { UID recruitmentID; std::set allTags; Future terminated; + Promise execOpHold; + bool execOpCommitInProgress; explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID), logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), - logRouterPopToVersion(0), locality(tagLocalityInvalid) + logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { startRole(Role::TRANSACTION_LOG, interf.id(), UID()); @@ -1313,6 +1315,14 @@ ACTOR Future tLogCommit( return Void(); } + // while exec op is being committed, no new transactions will be admitted. + // This property is useful for snapshot kind of operations which wants to + // take a snap of the disk image at a particular version (no data from + // future version to be included) + if (logData->execOpCommitInProgress) { + wait(logData->execOpHold.getFuture()); + } + state Version execVersion = invalidVersion; state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; @@ -1458,6 +1468,13 @@ ACTOR Future tLogCommit( } } } + if (execVersion != invalidVersion) { + TraceEvent(SevDebug, "SettingExecOpCommit") + .detail("ExecVersion", execVersion) + .detail("Version", req.version); + logData->execOpCommitInProgress = true; + logData->execOpHold.reset(); + } } //TraceEvent("TLogCommit", logData->logId).detail("Version", req.version); @@ -1517,7 +1534,7 @@ ACTOR Future tLogCommit( std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { @@ -1569,8 +1586,12 @@ ACTOR Future tLogCommit( te.trackLatest(message.c_str()); } } - execVersion = invalidVersion; } + if (execVersion != invalidVersion && logData->execOpCommitInProgress) { + logData->execOpCommitInProgress = false; + logData->execOpHold.send(Void()); + } + execVersion = invalidVersion; if(stopped.isReady()) { ASSERT(logData->stopped); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index b13a80a9e9..2e87855bbb 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -489,13 +489,15 @@ struct LogData : NonCopyable, public ReferenceCounted { UID recruitmentID; std::set allTags; Future terminated; + Promise execOpHold; + bool execOpCommitInProgress; explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, uint64_t protocolVersion, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), - logRouterPopToVersion(0), locality(tagLocalityInvalid) + logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { startRole(Role::TRANSACTION_LOG, interf.id(), UID()); @@ -1680,6 +1682,14 @@ ACTOR Future tLogCommit( return Void(); } + // while exec op is being committed, no new transactions will be admitted. + // This property is useful for snapshot kind of operations which wants to + // take a snap of the disk image at a particular version (not data from + // future version to be included) + if (logData->execOpCommitInProgress) { + wait(logData->execOpHold.getFuture()); + } + state Version execVersion = invalidVersion; state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; @@ -1738,7 +1748,7 @@ ACTOR Future tLogCommit( rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent("TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); execArg.setCmdValueString(param2.toString()); execArg.dbgPrint(); @@ -1766,12 +1776,12 @@ ACTOR Future tLogCommit( std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); "/" + uidStr; - TraceEvent("ExecCmdSnapCreate") - .detail("Uid", uidStr) - .detail("Status", -1) - .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); + TraceEvent("ExecCmdSnapCreate") + .detail("Uid", uidStr) + .detail("Status", -1) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog") + .trackLatest(message.c_str()); } } if (execCmd == execDisableTLogPop) { @@ -1826,6 +1836,13 @@ ACTOR Future tLogCommit( } } } + if (execVersion != invalidVersion) { + TraceEvent(SevDebug, "SettingExecOpCommit") + .detail("ExecVersion", execVersion) + .detail("Version", req.version); + logData->execOpCommitInProgress = true; + logData->execOpHold.reset(); + } } //TraceEvent("TLogCommit", logData->logId).detail("Version", req.version); @@ -1886,7 +1903,7 @@ ACTOR Future tLogCommit( std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { @@ -1938,8 +1955,12 @@ ACTOR Future tLogCommit( te.trackLatest(message.c_str()); } } - execVersion = invalidVersion; } + if (execVersion != invalidVersion && logData->execOpCommitInProgress) { + logData->execOpCommitInProgress = false; + logData->execOpHold.send(Void()); + } + execVersion = invalidVersion; if(stopped.isReady()) { ASSERT(logData->stopped); diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 693a30c89a..e5c260a615 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -451,7 +451,7 @@ typedef decltype(&tLog) TLogFn; // spawns a process pointed by `binPath` and the arguments provided at `paramList`, // if the process spawned takes more than `maxWaitTime` then it will be killed -ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime); +ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime, bool isSync = false); #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index ac013f52f1..719cfc0521 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1915,7 +1915,7 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) paramList.push_back(mkdirBin); paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 49f91df063..f4d33d8552 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1211,7 +1211,7 @@ ACTOR Future workerServer( std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { @@ -1432,7 +1432,7 @@ ACTOR Future fdbd( } } -ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime) +ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime, bool isSync) { state pid_t pid = -1; try { @@ -1445,6 +1445,10 @@ ACTOR Future spawnProcess(std::string binPath, vector paramLis return -1; } + if (!isSync && g_network->isSimulated()) { + wait(delay(g_random->random01())); + } + state double sleepTime = 0; state int err = 0; while (true) { From 090bb530346b8415ed50cb97518af93313983ac6 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 8 Apr 2019 12:35:20 -0700 Subject: [PATCH 22/69] ShardInfo::addMutation to handle exec mutation --- fdbserver/storageserver.actor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 719cfc0521..d0ce1fd273 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -2269,7 +2269,8 @@ void ShardInfo::addMutation(Version version, MutationRef const& mutation) { adding->addMutation(version, mutation); else if (readWrite) readWrite->addMutation(version, mutation, this->keys, readWrite->updateEagerReads); - else if (mutation.type != MutationRef::ClearRange) { + else if ((mutation.type != MutationRef::ClearRange) + && (mutation.type != MutationRef::Exec)) { TraceEvent(SevError, "DeliveredToNotAssigned").detail("Version", version).detail("Mutation", mutation.toString()); ASSERT(false); // Mutation delivered to notAssigned shard! } From 8838ba3d3b4873fcdf152b76d76b9d6a440d9cd0 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 8 Apr 2019 13:55:30 -0700 Subject: [PATCH 23/69] Split SnapTestSimpleRestart into two test cases --- tests/CMakeLists.txt | 1 + tests/fast/SnapTestFailAndDisablePop.txt | 40 +++++++++++++++++++ tests/restarting/SnapTestSimpleRestart-2.txt | 41 -------------------- 3 files changed, 41 insertions(+), 41 deletions(-) create mode 100644 tests/fast/SnapTestFailAndDisablePop.txt diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 92f3af84a4..0d02e42c18 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -107,6 +107,7 @@ add_fdb_test(TEST_FILES fast/RandomUnitTests.txt) add_fdb_test(TEST_FILES fast/SelectorCorrectness.txt) add_fdb_test(TEST_FILES fast/Sideband.txt) add_fdb_test(TEST_FILES fast/SidebandWithStatus.txt) +add_fdb_test(TEST_FILES fast/SnapTestFailAndDisablePop.txt) add_fdb_test(TEST_FILES fast/SwizzledRollbackSideband.txt) add_fdb_test(TEST_FILES fast/SystemRebootTestCycle.txt) add_fdb_test(TEST_FILES fast/TaskBucketCorrectness.txt) diff --git a/tests/fast/SnapTestFailAndDisablePop.txt b/tests/fast/SnapTestFailAndDisablePop.txt new file mode 100644 index 0000000000..0dc8df1222 --- /dev/null +++ b/tests/fast/SnapTestFailAndDisablePop.txt @@ -0,0 +1,40 @@ +; verify that the TLog popping disable times out and switches to enable mode +; automatically, if not enabled specifically +testTitle=SnapTLogPopDisableTimeout +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=4 +snapCheck=true + +; TLog pop enable and disable UID mismatch +testTitle=SnapTLogPopEnableDisableMismatch +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=5 +snapCheck=true + +; snapCreate without TLogPopDisable +testTitle=SnapCreateWithNoDisablePop +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=6 +snapCheck=true + +; snapCreate and tlogPopDisable with mis-matched UID +testTitle=SnapCreateDisableTLogPopMismatch +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=7 +snapCheck=true + +; snapCreate with binary path that is not whitelisted +testTitle=SnapCreateNotWhiteListedBinaryPath +testName=SnapTest +numSnaps=1 +maxSnapDelay=3.0 +testID=8 +snapCheck=false diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/SnapTestSimpleRestart-2.txt index ba7d30f94f..54cb126362 100644 --- a/tests/restarting/SnapTestSimpleRestart-2.txt +++ b/tests/restarting/SnapTestSimpleRestart-2.txt @@ -5,44 +5,3 @@ numSnaps=1 maxSnapDelay=3.0 testID=3 snapCheck=false - -; verify that the TLog popping disable times out and switches to enable mode -; automatically, if not enabled specifically -testTitle=SnapTLogPopDisableTimeout -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=4 -snapCheck=true - -; TLog pop enable and disable UID mismatch -testTitle=SnapTLogPopEnableDisableMismatch -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=5 -snapCheck=true - -; snapCreate without TLogPopDisable -testTitle=SnapCreateWithNoDisablePop -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=6 -snapCheck=true - -; snapCreate and tlogPopDisable with mis-matched UID -testTitle=SnapCreateDisableTLogPopMismatch -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=7 -snapCheck=true - -; snapCreate with binary path that is not whitelisted -testTitle=SnapCreateNotWhiteListedBinaryPath -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=8 -snapCheck=false From 00ccee8a6c4443ef359279ca812589d34b2b24ee Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 8 Apr 2019 13:55:50 -0700 Subject: [PATCH 24/69] workaround for log giving remote log and others logSystemConfig.allLocalLogs() sometimes returns remote TLog interface and a workaround is implemented here. Other minor cleanup. --- fdbserver/MasterProxyServer.actor.cpp | 1 - fdbserver/workloads/SnapTest.actor.cpp | 56 +++++++++++++++++--------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 66c5fa5c4f..6e2d498c8b 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -867,7 +867,6 @@ ACTOR Future commitBatch( if (m.param1 == execSnap) { te1.trackLatest(tokenStr.c_str()); } - int i = 0; std::string allTagString; for (auto& tag : allSources) { allTagString += tag.toString() + ","; diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 85c50aab78..bf018d539b 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -23,6 +23,12 @@ void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) { sscanf(md.getValue("NumTags").c_str(), "%d:%d", &numTags); } +void getNumTagServerInfo(TraceEventFields md, int& numTagServers) +{ + numTagServers = 0; + sscanf(md.getValue("NumTagServers").c_str(), "%lld", &numTagServers); +} + void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) { Version verifyVersion; durableVersion = -1; @@ -59,7 +65,7 @@ void filterEmptyMessages(std::vector>& messages) { std::string emptyStr; auto it = messages.begin(); while (it != messages.end()) { - if (it->get().toString() == emptyStr) { + if (!it->isReady() || it->get().toString() == emptyStr) { it = messages.erase(it); } else { ++it; @@ -287,7 +293,7 @@ public: // workload functions // disable pop of the TLog tr.reset(); try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=test"); + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=a36b2ca0e8dab0452ac3e12b6b926f4b"); tr.execute(execDisableTLogPop, payLoadRef); wait(tr.commit()); break; @@ -298,6 +304,7 @@ public: // workload functions // wait for 40 seconds and verify that the enabled pop happened // automatically wait(delay(40.0)); + self->snapUID = UID::fromString("a36b2ca0e8dab0452ac3e12b6b926f4b"); } else if (self->testID == 5) { // description: disable TLog pop and enable TLog pop with // different UIDs should mis-match and print an error @@ -305,7 +312,7 @@ public: // workload functions // disable pop of the TLog tr.reset(); try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=tmatch"); + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=956349f5f368d37a802f1f37d7f4b9c1"); tr.execute(execDisableTLogPop, payLoadRef); wait(tr.commit()); break; @@ -317,7 +324,7 @@ public: // workload functions // enable pop of the TLog tr.reset(); try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=didnotmatch"); + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=5810898ca2f3143a246886c79d1bea92"); tr.execute(execEnableTLogPop, payLoadRef); wait(tr.commit()); break; @@ -325,6 +332,7 @@ public: // workload functions wait(tr.onError(e)); } } + self->snapUID = UID::fromString("5810898ca2f3143a246886c79d1bea92"); } else if (self->testID == 6) { // snapshot create without disabling pop of the TLog loop { @@ -340,13 +348,14 @@ public: // workload functions wait(tr.onError(e)); } } + self->snapUID = UID::fromString("d78b08d47f341158e9a54d4baaf4a4dd"); } else if (self->testID == 7) { // disable popping of TLog and snapshot create with mis-matching loop { // disable pop of the TLog tr.reset(); try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=tmatch"); + StringRef payLoadRef = LiteralStringRef("empty-binary:uid=f49d27ddf7a28b6549d930743e0ebdbe"); tr.execute(execDisableTLogPop, payLoadRef); wait(tr.commit()); break; @@ -367,6 +376,7 @@ public: // workload functions wait(tr.onError(e)); } } + self->snapUID = UID::fromString("ba61e9612a561d60bd83ad83e1b63568"); } else if (self->testID == 8) { // create a snapshot with a non whitelisted binary path and operation // should fail @@ -401,7 +411,6 @@ public: // workload functions state StringRef eventTokenRef(event); state vector tLogWorkers; state std::vector> tLogMessages; - state std::vector workers = wait(getWorkers(self->dbInfo)); state std::map address_workers; @@ -422,45 +431,54 @@ public: // workload functions } state int i = 0; + state int foundTagServers = 0; for (; i < tLogWorkers.size(); i++) { tLogMessages.push_back( timeoutError(tLogWorkers[i].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); state int retryCnt = 0; - state bool retry = false; loop { - retry = false; try { TraceEvent(SevDebug, "WaitingForTlogMessages"); wait(waitForAll(tLogMessages)); break; } catch (Error& e) { + bool isFail = false; if (e.code() != error_code_timed_out) { - TraceEvent(SevError, "VerifyTLogTrackLatest") + isFail = true; + break; + } else { + ++retryCnt; + } + if (isFail || retryCnt >= self->maxRetryCntToRetrieveMessage ) { + TraceEvent(SevError, "UnableToRetrieveTLogMessages") .detail("Token", eventTokenRef.toString()) .detail("Reason", "Failed to get tLogMessages") .detail("Code", e.what()); return false; - } else { - retry = true; - ++retryCnt; } } - if (retryCnt >= self->maxRetryCntToRetrieveMessage ) { - TraceEvent(SevError, "UnableToRetrieveTLogMessages"); - return false; - } } printMessages(tLogMessages); filterEmptyMessages(tLogMessages); - if (tLogMessages.size() != 1) { - TraceEvent(SevError, "VerifyTLogTrackLatestMessageNotFound") + if (tLogMessages.size() < 1) { + TraceEvent("VerifyTLogTrackLatestMessageNotFound") .detail("Address", tLogWorkers[i].address()) .detail("Token", eventTokenRef.toString()); - return false; + } else { + ++foundTagServers; } tLogMessages.clear(); } + // FIXME: logSystemConfig.allLocalLogs returns remote tlogServers also in few cases and hence the test fails. + // Verify that foundTagServers matches the number of TLogServers in the local region + if (foundTagServers < 1) { + TraceEvent(SevError, "VerifyTLogTrackLatestMessageNotReachAllTLogservers") + .detail("Token", eventTokenRef.toString()) + .detail("FoundaTagServers", foundTagServers); + return false; + } + TraceEvent("VerifyTLogTrackLatestDone"); return true; } From 858604b51d87f67ecc1829ee8b80dd47fc883997 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 9 Apr 2019 10:11:15 -0700 Subject: [PATCH 25/69] minor cleanups to SnapTest --- fdbserver/workloads/SnapTest.actor.cpp | 169 ++++++++----------------- 1 file changed, 53 insertions(+), 116 deletions(-) diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index bf018d539b..aad6d2bccf 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -23,12 +23,6 @@ void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) { sscanf(md.getValue("NumTags").c_str(), "%d:%d", &numTags); } -void getNumTagServerInfo(TraceEventFields md, int& numTagServers) -{ - numTagServers = 0; - sscanf(md.getValue("NumTagServers").c_str(), "%lld", &numTagServers); -} - void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) { Version verifyVersion; durableVersion = -1; @@ -434,30 +428,17 @@ public: // workload functions state int foundTagServers = 0; for (; i < tLogWorkers.size(); i++) { tLogMessages.push_back( - timeoutError(tLogWorkers[i].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); + timeoutError(tLogWorkers[i].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - state int retryCnt = 0; - loop { - try { - TraceEvent(SevDebug, "WaitingForTlogMessages"); - wait(waitForAll(tLogMessages)); - break; - } catch (Error& e) { - bool isFail = false; - if (e.code() != error_code_timed_out) { - isFail = true; - break; - } else { - ++retryCnt; - } - if (isFail || retryCnt >= self->maxRetryCntToRetrieveMessage ) { - TraceEvent(SevError, "UnableToRetrieveTLogMessages") - .detail("Token", eventTokenRef.toString()) - .detail("Reason", "Failed to get tLogMessages") - .detail("Code", e.what()); - return false; - } - } + try { + TraceEvent(SevDebug, "WaitingForTlogMessages"); + wait(waitForAll(tLogMessages)); + } catch (Error& e) { + TraceEvent(SevError, "UnableToRetrieveTLogMessages") + .detail("Token", eventTokenRef.toString()) + .detail("Reason", "FailedToGetTLogMessages") + .detail("Code", e.what()); + return false; } printMessages(tLogMessages); filterEmptyMessages(tLogMessages); @@ -496,58 +477,34 @@ public: // workload functions state int numDurableVersionChecks = 0; state std::map visitedStorageTags; - state int retryCnt = 0; - loop { - proxyMessages.clear(); - storageMessages.clear(); - coordMessages.clear(); + for (int i = 0; i < workers.size(); i++) { + std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + coordMessages.push_back( + timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); + } - state bool retry = false; + for (int i = 0; i < workers.size(); i++) { + std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + proxyMessages.push_back( + timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); + } - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - coordMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); - } + for (int i = 0; i < storageWorkers.size(); i++) { + std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + storageMessages.push_back(timeoutError( + storageWorkers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); + } - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - proxyMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); - } - - for (int i = 0; i < storageWorkers.size(); i++) { - std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - storageMessages.push_back(timeoutError( - storageWorkers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); - } - - try { - wait(waitForAll(proxyMessages)); - wait(waitForAll(storageMessages)); - wait(waitForAll(coordMessages)); - } catch (Error& e) { - if (e.code() != error_code_timed_out) { - TraceEvent(SevError, "VerifyExecTraceVersionFailure") - .detail("Reason", "FailedToGetProxyOrStorageMessages") - .detail("Code", e.what()); - return false; - } else { - retry = true; - ++retryCnt; - } - } - if (retry == false) { - break; - } - - if (retry && retryCnt >= 4) { - TraceEvent(SevError, "UnableToRetrieveProxyStorageCoordMessages"); - return false; - } + try { + wait(waitForAll(proxyMessages)); + wait(waitForAll(storageMessages)); + wait(waitForAll(coordMessages)); + } catch (Error& e) { + TraceEvent(SevError, "UnableToRetrieveProxyStorageCoordMessages"); + return false; } // filter out empty messages @@ -600,46 +557,26 @@ public: // workload functions getTagAndDurableVersion(storageMessages[j].get(), execVersion, tag, durableVersion); TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); - retryCnt = 0; - loop { - retry = false; - tLogMessages.clear(); - - for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { - visitedStorageTags[tag] = true; - std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - tLogMessages.push_back(timeoutError( - tLogWorkers[m].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 1.0)); - } - - try { - TraceEvent("WaitingForTlogMessages"); - if (tag != invalidTag) { - wait(waitForAll(tLogMessages)); - } - } catch (Error& e) { - if (e.code() != error_code_timed_out) { - TraceEvent(SevError, "VerifyExecTraceVersionFailure") - .detail("Reason", "FailedToGetTLogMessages") - .detail("Code", e.what()); - return false; - } else { - retry = true; - ++retryCnt; - } - } - if (retry == false) { - break; - } - if (retry && retryCnt > self->maxRetryCntToRetrieveMessage) { - TraceEvent(SevError, "UnableToRetrieveTLogMessagesAfterRetries"); - return false; - } + tLogMessages.clear(); + for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { + visitedStorageTags[tag] = true; + std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); + StringRef eventTokenRef(eventToken); + tLogMessages.push_back(timeoutError( + tLogWorkers[m].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); + } + try { + TraceEvent("WaitingForTlogMessages"); + if (tag != invalidTag) { + wait(waitForAll(tLogMessages)); + } + } catch (Error& e) { + TraceEvent(SevError, "VerifyExecTraceVersionFailure") + .detail("Reason", "FailedToGetTLogMessages") + .detail("Code", e.what()); + return false; } - filterEmptyMessages(tLogMessages); - state int k = 0; numDurableVersionChecks = 0; for (; (tag != invalidTag) && k < tLogMessages.size(); k++) { From 9e3104c2d43873b404a31aff58e2dafc63344bfb Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 9 Apr 2019 11:17:58 -0700 Subject: [PATCH 26/69] Fix: races in async exec leading to bad backup --- fdbserver/OldTLogServer_6_0.actor.cpp | 96 ++++++++++++---------- fdbserver/TLogServer.actor.cpp | 112 ++++++++++++++------------ fdbserver/WorkerInterface.actor.h | 7 ++ fdbserver/storageserver.actor.cpp | 99 ++++++++++++----------- fdbserver/worker.actor.cpp | 26 +++++- 5 files changed, 200 insertions(+), 140 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 0d6ad4f7be..2515f46b9c 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1503,52 +1503,64 @@ ACTOR Future tLogCommit( state int err = 0; state Future cmdErr; auto uidStr = execArg.getBinaryArgValue("uid"); - if (!g_network->isSimulated()) { - // get the bin path - auto snapBin = execArg.getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - TraceEvent("TLogSnapCommand").detail("CmdLine", param2.toString()).detail("FolderPath", dataFolder); - vector paramList; - paramList.push_back(snapBin); - // user passed arguments - auto listArgs = execArg.getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; - vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { + state UID execUID = UID::fromString(uidStr); + state bool otherRoleExeced = false; + // TLog is special, we need to exec at the execVersion. + // If storage on the same process has initiated the exec then wait for it to + // finish and hold the tlog at the execVersion + while (isExecOpInProgress(execUID)) { + wait(delay(0.1)); + otherRoleExeced = true; + } + if (!otherRoleExeced) { + setExecOpInProgress(execUID); + if (!g_network->isSimulated()) { + // get the bin path + auto snapBin = execArg.getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; vector paramList; - std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + paramList.push_back(snapBin); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + cmdErr = spawnProcess(snapBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); + } else { + // copy the entire directory + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } } + clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") .detail("UidStr", uidStr) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 2e87855bbb..67998462c8 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1868,57 +1868,69 @@ ACTOR Future tLogCommit( wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); if ((execVersion != invalidVersion) && - execVersion <= logData->queueCommittedVersion.get()) { - state int err = 0; - auto uidStr = execArg.getBinaryArgValue("uid"); - state Future cmdErr; - if (!g_network->isSimulated()) { - // get bin path - auto snapBin = execArg.getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - TraceEvent("TLogSnapCommand").detail("CmdLine", param2.toString()).detail("FolderPath", dataFolder); - vector paramList; - paramList.push_back(snapBin); - // user passed arguments - auto listArgs = execArg.getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; - vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { + execVersion <= logData->queueCommittedVersion.get()) { + state int err = 0; + auto uidStr = execArg.getBinaryArgValue("uid"); + state UID execUID = UID::fromString(uidStr); + state bool otherRoleExeced = false; + // TLog is special, we need to exec at the execVersion. + // If storage on the same process has initiated the exec then wait for it to + // finish and hold the tlog at the execVersion + while (isExecOpInProgress(execUID)) { + wait(delay(0.1)); + otherRoleExeced = true; + } + state Future cmdErr; + if (!otherRoleExeced) { + setExecOpInProgress(execUID); + if (!g_network->isSimulated()) { + // get bin path + auto snapBin = execArg.getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; vector paramList; - std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } + paramList.push_back(snapBin); + // user passed arguments + auto listArgs = execArg.getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + cmdErr = spawnProcess(snapBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } else { + // copy the entire directory + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } + } + clearExecOpInProgress(execUID); + } TraceEvent("TLogCommitExecTraceLog") .detail("UidStr", uidStr) .detail("Status", err) diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index e5c260a615..7d55f0ecee 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -453,5 +453,12 @@ typedef decltype(&tLog) TLogFn; // if the process spawned takes more than `maxWaitTime` then it will be killed ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime, bool isSync = false); +// returns true if the execUID op is in progress +bool isExecOpInProgress(UID execUID); +// adds the execUID op to the list of ops in progress +void setExecOpInProgress(UID execUID); +// clears the execUID op from the list of ops in progress +void clearExecOpInProgress(UID execUID); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index d0ce1fd273..fd0fa3efca 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1871,8 +1871,6 @@ ACTOR Future snapHelper(StorageServer* data, MutationRef m, Version ver) { state std::string cmd = m.param1.toString(); - int len = m.param2.size(); - if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) { TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); return Void(); @@ -1881,55 +1879,66 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) state std::string uidStr = execArg.getBinaryArgValue("uid"); state int err = 0; state Future cmdErr; + state UID execUID = UID::fromString(uidStr); + bool otherRoleExeced = false; - if (!g_network->isSimulated() || cmd != execSnap) { - // get bin path - auto binPath = execArg.getBinaryPath(); - auto dataFolder = "path=" + data->folder; - vector paramList; - paramList.push_back(binPath); - // get user passed arguments - auto listArgs = execArg.getBinaryArgs(); - execArg.dbgPrint(); - for (auto elem : listArgs) { - paramList.push_back(elem); - } - // get additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=storage"; - paramList.push_back(roleString); - cmdErr = spawnProcess(binPath, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the files - std::string folder = abspath(data->folder); - state std::string folderFrom = folder + "/."; - state std::string folderTo = folder + "-snap-" + uidStr; - vector paramList; - std::string mkdirBin = "/bin/mkdir"; + // other TLog or storage has initiated the exec, so we can skip + if (isExecOpInProgress(execUID)) { + otherRoleExeced = true; + } - paramList.push_back(mkdirBin); - paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { + if (!otherRoleExeced) { + setExecOpInProgress(execUID); + if (!g_network->isSimulated() || cmd != execSnap) { + // get bin path + auto binPath = execArg.getBinaryPath(); + auto dataFolder = "path=" + data->folder; vector paramList; - std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(folderFrom); - paramList.push_back(folderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + paramList.push_back(binPath); + // get user passed arguments + auto listArgs = execArg.getBinaryArgs(); + execArg.dbgPrint(); + for (auto elem : listArgs) { + paramList.push_back(elem); + } + // get additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=storage"; + paramList.push_back(roleString); + cmdErr = spawnProcess(binPath, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); + } else { + // copy the files + std::string folder = abspath(data->folder); + state std::string folderFrom = folder + "/."; + state std::string folderTo = folder + "-snap-" + uidStr; + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + + paramList.push_back(mkdirBin); + paramList.push_back(folderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(folderFrom); + paramList.push_back(folderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } } + clearExecOpInProgress(execUID); } auto tokenStr = "ExecTrace/storage/" + uidStr; TraceEvent te = TraceEvent("ExecTraceStorage"); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index f4d33d8552..ea5152feaf 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -70,6 +70,28 @@ extern IKeyValueStore* keyValueStoreCompressTestData(IKeyValueStore* store); # define KV_STORE(filename,uid) keyValueStoreMemory(filename,uid) #endif + +std::map> execOpsInProgress; + +bool isExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + return (execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); +} + +void setExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + ASSERT(execOpsInProgress[addr].find(execUID) == execOpsInProgress[addr].end()); + execOpsInProgress[addr].insert(execUID); + return; +} + +void clearExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + ASSERT(execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); + execOpsInProgress[addr].erase(execUID); + return; +} + ACTOR static Future extractClientInfo( Reference> db, Reference> info ) { loop { info->set( db->get().client ); @@ -1174,7 +1196,6 @@ ACTOR Future workerServer( loggingTrigger = delay( loggingDelay, TaskFlushTrace ); } when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) { - int len = req.execPayLoad.size(); state ExecCmdValueString execArg(req.execPayLoad.toString()); execArg.dbgPrint(); state std::string uidStr = execArg.getBinaryArgValue("uid"); @@ -1211,7 +1232,7 @@ ACTOR Future workerServer( std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0, true); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { @@ -1227,7 +1248,6 @@ ACTOR Future workerServer( err = cmdErr.get(); } } - auto tokenStr = "ExecTrace/Coordinators/" + uidStr; auto te = TraceEvent("ExecTraceCoordinators"); te.detail("Uid", uidStr); From 61e93a93046808c57597ccd9b1f7c780e73bbaf0 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 12 Apr 2019 13:23:02 -0700 Subject: [PATCH 27/69] Address review comments and minor fixes --- fdbcli/fdbcli.actor.cpp | 8 +- fdbclient/CommitTransaction.h | 5 +- fdbclient/ManagementAPI.actor.cpp | 2 +- fdbclient/MasterProxyInterface.h | 8 +- fdbclient/NativeAPI.actor.cpp | 89 +++++++++----------- fdbclient/NativeAPI.actor.h | 6 +- fdbclient/SystemData.cpp | 4 +- fdbserver/FDBExecArgs.cpp | 64 +++++++------- fdbserver/FDBExecArgs.h | 21 ++--- fdbserver/MasterProxyServer.actor.cpp | 102 +++++++++-------------- fdbserver/OldTLogServer_6_0.actor.cpp | 51 ++++++------ fdbserver/SimulatedCluster.actor.cpp | 31 ++++--- fdbserver/SimulatedCluster.h | 2 +- fdbserver/TLogServer.actor.cpp | 49 ++++++----- fdbserver/WorkerInterface.actor.h | 12 +-- fdbserver/fdbserver.actor.cpp | 62 ++------------ fdbserver/fdbserver.vcxproj | 2 + fdbserver/storageserver.actor.cpp | 18 ++-- fdbserver/worker.actor.cpp | 30 +++---- tests/fast/SnapTestFailAndDisablePop.txt | 2 +- 20 files changed, 248 insertions(+), 320 deletions(-) diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index b76e674b98..00327def46 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -470,10 +470,10 @@ void initHelp() { "include all|
*", "permit previously-excluded servers to rejoin the database", "If `all' is specified, the excluded servers list is cleared.\n\nFor each IP address or IP:port pair in
*, removes any matching exclusions from the excluded servers list. (A specified IP will match all IP:* exclusion entries)"); - helpMap["snapshot"] = CommandHelp("snapshot :,,...", "snapshot the database", - "invokes binary provided in binary-path" - "with the arg,value pairs on TLog, Storage and " - "Coordinators nodes. uid is a reserved ARG key."); + helpMap["snapshot"] = CommandHelp( + "snapshot :,,...", + "snapshot the database", + "invokes binary provided in binary-path with the arg,value pairs on TLog, Storage and Coordinators nodes. UID is a reserved ARG key."); helpMap["setclass"] = CommandHelp( "setclass
", "change the class of a process", diff --git a/fdbclient/CommitTransaction.h b/fdbclient/CommitTransaction.h index e3eb3a9d1c..ec6c4a3723 100644 --- a/fdbclient/CommitTransaction.h +++ b/fdbclient/CommitTransaction.h @@ -44,7 +44,8 @@ static const char* typeString[] = { "SetValue", "ByteMax", "MinV2", "AndV2", - "CompareAndClear" }; + "CompareAndClear", + "Exec" }; struct MutationRef { static const int OVERHEAD_BYTES = 12; //12 is the size of Header in MutationList entries @@ -70,6 +71,8 @@ struct MutationRef { MinV2, AndV2, CompareAndClear, + // ExecOp is always set with FIRST_IN_BATCH option to quickly identify + // the op in a transaction batch while parsing it in TLog Exec, MAX_ATOMIC_OP }; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 7a13e7148d..76fac82107 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1488,7 +1488,7 @@ ACTOR Future mgmtSnapCreate(Database cx, StringRef snapCmd) { ++retryCount; TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e); if (retryCount > 3) { - printf("Snapshot create failed, %d (%s)\n", e.code(), e.what()); + fprintf(stderr, "Snapshot create failed, %d (%s)\n", e.code(), e.what()); throw; } } diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index 0186cccfd6..f59ffb373c 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -301,16 +301,16 @@ struct GetHealthMetricsRequest struct ExecRequest { Arena arena; - StringRef execPayLoad; + StringRef execPayload; ReplyPromise reply; Optional debugID; - ExecRequest(Optional const& debugID = Optional()) : debugID(debugID) {} - ExecRequest(StringRef exec, Optional debugID = Optional()) : execPayLoad(exec), debugID(debugID) {} + explicit ExecRequest(Optional const& debugID = Optional()) : debugID(debugID) {} + explicit ExecRequest(StringRef exec, Optional debugID = Optional()) : execPayload(exec), debugID(debugID) {} template void serialize(Ar& ar) { - serializer(ar, execPayLoad, reply, arena, debugID); + serializer(ar, execPayload, reply, arena, debugID); } }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index fdd1a88e5b..3b76ae312c 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2287,45 +2287,33 @@ void Transaction::atomicOp(const KeyRef& key, const ValueRef& operand, MutationR TEST(true); //NativeAPI atomic operation } -ACTOR Future executeCoordinators(DatabaseContext* cx, StringRef execPayLoad, Optional debugID) { +ACTOR Future executeCoordinators(DatabaseContext* cx, StringRef execPayload, Optional debugID) { try { if (debugID.present()) { g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.executeCoordinators.Before"); } - loop { - state ExecRequest req(execPayLoad, debugID); - if (debugID.present()) { - g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), - "NativeAPI.executeCoordinators.Inside loop"); - } - choose { - when(wait(cx->onMasterProxiesChanged())) { - if (debugID.present()) { - g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), - "NativeAPI.executeCoordinators." - "MasterProxyChangeDuringStart"); - } - } - when(wait(loadBalance(cx->getMasterProxies(), &MasterProxyInterface::execReq, req, cx->taskID))) { - if (debugID.present()) - g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), - "NativeAPI.executeCoordinators.After"); - return Void(); - } - } + state ExecRequest req(execPayload, debugID); + if (debugID.present()) { + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), + "NativeAPI.executeCoordinators.Inside loop"); } + wait(loadBalance(cx->getMasterProxies(), &MasterProxyInterface::execReq, req, cx->taskID)); + if (debugID.present()) + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), + "NativeAPI.executeCoordinators.After"); + return Void(); } catch (Error& e) { TraceEvent("NativeAPI.executeCoordinatorsError").error(e); throw; } } -void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad) { - TraceEvent("Execute operation").detail("Key", cmdType.toString()).detail("Value", cmdPayLoad.toString()); +void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayload) { + TraceEvent("Execute operation").detail("Key", cmdType.toString()).detail("Value", cmdPayload.toString()); if (cmdType.size() > CLIENT_KNOBS->KEY_SIZE_LIMIT) throw key_too_large(); - if (cmdPayLoad.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) throw value_too_large(); + if (cmdPayload.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) throw value_too_large(); auto& req = tr; @@ -2334,9 +2322,8 @@ void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad) { auto& t = req.transaction; auto r = singleKeyRange(cmdType, req.arena); - auto v = ValueRef(req.arena, cmdPayLoad); + auto v = ValueRef(req.arena, cmdPayload); t.mutations.push_back(req.arena, MutationRef(MutationRef::Exec, r.begin, v)); - return; } void Transaction::clear( const KeyRangeRef& range, bool addConflictRange ) { @@ -3319,7 +3306,6 @@ void enableClientInfoLogging() { ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) { state Transaction tr(inputCx); - state Database testCx = inputCx; state DatabaseContext* cx = inputCx.getPtr(); // remember the client ID before the snap operation state UID preSnapClientUID = cx->clientInfo->get().id; @@ -3330,36 +3316,37 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) .detail("PreSnapClientUID", preSnapClientUID); tr.debugTransaction(snapUID); - std::string snapString = "empty-binary:uid=" + snapUID.toString(); - state Standalone uidPayLoad = makeString(snapString.size()); - uint8_t* ptr = mutateString(uidPayLoad); - memcpy(ptr, ((uint8_t*)snapString.c_str()), snapString.size()); + state Standalone snapUIDRef(snapUID.toString()); + state Standalone + tLogCmdPayloadRef = LiteralStringRef("empty-binary:uid=").withSuffix(snapUIDRef); // disable popping of TLog loop { tr.reset(); try { - tr.execute(execDisableTLogPop, uidPayLoad); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + tr.execute(execDisableTLogPop, tLogCmdPayloadRef); wait(tr.commit()); break; } catch (Error& e) { - TraceEvent("DisableTLogPopFailed").detail("Error", e.what()); + TraceEvent("DisableTLogPopFailed").error(e); wait(tr.onError(e)); } } TraceEvent("SnapCreateAfterLockingTLogs").detail("UID", snapUID); - int p = snapCmd.toString().find_first_of(':', 0); - state std::string snapPayLoad; - - if (p == snapCmd.toString().npos) { - snapPayLoad = snapCmd.toString() + ":uid=" + snapUID.toString(); - } else { - snapPayLoad = snapCmd.toString() + ",uid=" + snapUID.toString(); + const uint8_t* ptr = snapCmd.begin(); + while (*ptr != ':' && ptr < snapCmd.end()) { + ptr++; + } + state Standalone snapPayloadRef; + if (ptr == snapCmd.end()) { + snapPayloadRef = + snapCmd.withSuffix(LiteralStringRef(":uid=")).withSuffix(snapUIDRef); + } else { + snapPayloadRef = + snapCmd.withSuffix(LiteralStringRef(",uid=")).withSuffix(snapUIDRef); } - Standalone snapPayLoadRef = makeString(snapPayLoad.size()); - uint8_t* ptr = mutateString(snapPayLoadRef); - memcpy(ptr, ((uint8_t*)snapPayLoad.c_str()), snapPayLoad.size()); // snap the storage and Tlogs // if we retry the below command in failure cases with the same snapUID @@ -3368,10 +3355,11 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) // failure cases and let the caller retry with different snapUID try { tr.reset(); - tr.execute(execSnap, snapPayLoadRef); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + tr.execute(execSnap, snapPayloadRef); wait(tr.commit()); } catch (Error& e) { - TraceEvent("SnapCreateErroSnapTLogStorage").detail("Error", e.what()); + TraceEvent("SnapCreateErroSnapTLogStorage").error(e); throw; } @@ -3386,11 +3374,12 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) loop { tr.reset(); try { - tr.execute(execEnableTLogPop, uidPayLoad); + tr.execute(execEnableTLogPop, tLogCmdPayloadRef); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); wait(tr.commit()); break; } catch (Error& e) { - TraceEvent("EnableTLogPopFailed").detail("Error", e.what()); + TraceEvent("EnableTLogPopFailed").error(e); wait(tr.onError(e)); } } @@ -3399,10 +3388,10 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) // snap the coordinators try { - Future exec = executeCoordinators(cx, snapPayLoad, snapUID); + Future exec = executeCoordinators(cx, snapPayloadRef, snapUID); wait(exec); } catch (Error& e) { - TraceEvent("SnapCreateErrorSnapCoords").detail("Error", e.what()); + TraceEvent("SnapCreateErrorSnapCoords").error(e); throw; } diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index a02d3c8a51..11b915a2ea 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -264,9 +264,9 @@ public: // instead of setting a key and value on the DB, it executes the command // that is passed in the value field. // - cmdType can be used for logging purposes - // - cmdPayLoad contains the details of the command to be executed: - // format of the cmdPayLoad : :,... - void execute(const KeyRef& cmdType, const ValueRef& cmdPayLoad); + // - cmdPayload contains the details of the command to be executed: + // format of the cmdPayload : :,... + void execute(const KeyRef& cmdType, const ValueRef& cmdPayload); void clear( const KeyRangeRef& range, bool addConflictRange = true ); void clear( const KeyRef& key, bool addConflictRange = true ); Future commit(); // Throws not_committed or commit_unknown_result errors in normal operation diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 1ecbeffffd..888f9bc743 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -39,8 +39,8 @@ const KeyRef keyServersKeyServersKey = keyServersKeyServersKeys.begin; // list of reserved exec commands const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent state of // storage, TLog and coordinated state -const StringRef execDisableTLogPop = LiteralStringRef("\xff/tldp"); // disable pop on TLog -const StringRef execEnableTLogPop = LiteralStringRef("\xff/tlep"); // enable pop on TLog +const StringRef execDisableTLogPop = LiteralStringRef("\xff/TLogDisablePop"); // disable pop on TLog +const StringRef execEnableTLogPop = LiteralStringRef("\xff/TLogEnablePop"); // enable pop on TLog const Key keyServersKey( const KeyRef& k ) { return k.withPrefix( keyServersPrefix ); diff --git a/fdbserver/FDBExecArgs.cpp b/fdbserver/FDBExecArgs.cpp index 972bff265f..7a4a949aae 100644 --- a/fdbserver/FDBExecArgs.cpp +++ b/fdbserver/FDBExecArgs.cpp @@ -1,16 +1,15 @@ #include "fdbserver/FDBExecArgs.h" -#include -#include +#include "flow/Trace.h" +#include "flow/flow.h" -ExecCmdValueString::ExecCmdValueString(std::string const& pCmdValueString) { +ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) { cmdValueString = pCmdValueString; parseCmdValue(); } -void ExecCmdValueString::setCmdValueString(std::string const& pCmdValueString) { +void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) { // reset everything - binaryPath = ""; - binaryArgs.clear(); + binaryPath = StringRef(); keyValueMap.clear(); // set the new cmdValueString @@ -20,20 +19,20 @@ void ExecCmdValueString::setCmdValueString(std::string const& pCmdValueString) { parseCmdValue(); } -std::string ExecCmdValueString::getCmdValueString() { - return cmdValueString; +StringRef ExecCmdValueString::getCmdValueString() { + return cmdValueString.toString(); } -std::string ExecCmdValueString::getBinaryPath() { +StringRef ExecCmdValueString::getBinaryPath() { return binaryPath; } -std::vector ExecCmdValueString::getBinaryArgs() { +VectorRef ExecCmdValueString::getBinaryArgs() { return binaryArgs; } -std::string ExecCmdValueString::getBinaryArgValue(const std::string& key) { - std::string res; +StringRef ExecCmdValueString::getBinaryArgValue(StringRef key) { + StringRef res; if (keyValueMap.find(key) != keyValueMap.end()) { res = keyValueMap[key]; } @@ -41,20 +40,20 @@ std::string ExecCmdValueString::getBinaryArgValue(const std::string& key) { } void ExecCmdValueString::parseCmdValue() { + StringRef param = this->cmdValueString; + const uint8_t* ptr = param.begin(); int p = 0; int pSemiColon = 0; - std::string const& param = this->cmdValueString; { // get the binary path - pSemiColon = param.find_first_of(':', p); - if (pSemiColon == param.npos) { - pSemiColon = param.size(); + while (*(ptr + pSemiColon) != ':' && (ptr + pSemiColon) < param.end()) { + pSemiColon++; } this->binaryPath = param.substr(p, pSemiColon - p); } // no arguments provided - if (pSemiColon >= param.size() - 1) { + if ((ptr + pSemiColon) >= param.end()) { return; } @@ -63,26 +62,27 @@ void ExecCmdValueString::parseCmdValue() { { // extract the arguments for (; p <= param.size();) { - int pComma = param.find_first_of(',', p); - if (pComma == param.npos) { - pComma = param.size(); + int pComma = p; + while (*(ptr + pComma) != ',' && (ptr + pComma) < param.end()) { + pComma++; } - std::string token = param.substr(p, pComma - p); - this->binaryArgs.push_back(token); + StringRef token = param.substr(p, pComma - p); + this->binaryArgs.push_back(this->binaryArgs.arena(), token); { // parse the token to get key,value int idx = 0; - int pEqual = token.find_first_of('=', idx); - if (pEqual == token.npos) { - pEqual = token.size(); + int pEqual = 0; + const uint8_t* tokenPtr = token.begin(); + while (*(tokenPtr + pEqual) != '=' + && (tokenPtr + pEqual) < token.end()) { + pEqual++; } - std::string key = token.substr(idx, pEqual - idx); - - std::string value; + StringRef key = token.substr(idx, pEqual - idx); + StringRef value; if (pEqual < token.size() - 1) { value = token.substr(pEqual + 1); } - keyValueMap.insert(std::pair(key, value)); + keyValueMap.insert(std::pair(key, value)); } p = pComma + 1; } @@ -93,12 +93,12 @@ void ExecCmdValueString::parseCmdValue() { void ExecCmdValueString::dbgPrint() { auto te = TraceEvent("ExecCmdValueString"); - te.detail("CmdValueString", cmdValueString); - te.detail("BinaryPath", binaryPath); + te.detail("CmdValueString", cmdValueString.toString()); + te.detail("BinaryPath", binaryPath.toString()); int i = 0; for (auto elem : binaryArgs) { - te.detail(format("Arg", ++i).c_str(), elem); + te.detail(format("Arg", ++i).c_str(), elem.toString()); } return; } diff --git a/fdbserver/FDBExecArgs.h b/fdbserver/FDBExecArgs.h index 96ba4615ef..caf9fc3858 100644 --- a/fdbserver/FDBExecArgs.h +++ b/fdbserver/FDBExecArgs.h @@ -4,6 +4,7 @@ #include #include #include +#include // execute/snapshot command takes two arguments: // param1 - represents the command type/name @@ -14,14 +15,14 @@ class ExecCmdValueString { public: // ctor & dtor ExecCmdValueString() {} - ExecCmdValueString(std::string const& cmdValueString); + explicit ExecCmdValueString(StringRef cmdValueString); public: // interfaces - std::string getBinaryPath(); - std::vector getBinaryArgs(); - std::string getBinaryArgValue(std::string const& key); - void setCmdValueString(std::string const& cmdValueString); - std::string getCmdValueString(void); + StringRef getBinaryPath(); + VectorRef getBinaryArgs(); + StringRef getBinaryArgValue(StringRef key); + void setCmdValueString(StringRef cmdValueString); + StringRef getCmdValueString(void); public: // helper functions void dbgPrint(); @@ -30,9 +31,9 @@ private: // functions void parseCmdValue(); private: // data - std::string cmdValueString; - std::vector binaryArgs; - std::string binaryPath; - std::map keyValueMap; + Standalone cmdValueString; + Standalone> binaryArgs; + StringRef binaryPath; + std::map keyValueMap; }; #endif diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 6e2d498c8b..d1abbc1554 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -39,13 +39,13 @@ #include "fdbserver/ApplyMetadataMutation.h" #include "fdbserver/RecoveryState.h" #include "fdbserver/LatencyBandConfig.h" -#include "fdbserver/FDBExecArgs.h" #include "fdbclient/Atomic.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. #include "fdbclient/DatabaseConfiguration.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/Knobs.h" +#include "fdbserver/FDBExecArgs.h" struct ProxyStats { CounterCollection cc; @@ -233,7 +233,7 @@ struct ProxyCommitData { Deque> txsPopVersions; Version lastTxsPop; bool popRemoteTxs; - vector whiteListedBinPathVec; + vector> whitelistedBinPathVec; Optional latencyBandConfig; @@ -415,7 +415,7 @@ ACTOR Future commitBatcher(ProxyCommitData *commitData, PromiseStream& binPathVec) { +void createWhitelistBinPathVec(const std::string& binPath, vector>& binPathVec) { int p = 0; TraceEvent(SevDebug, "BinPathConverter").detail("Input", binPath); for (; p < binPath.size(); ) { @@ -423,8 +423,8 @@ void createWhiteListBinPathVec(const std::string& binPath, vector& if (pComma == binPath.npos) { pComma = binPath.size(); } - std::string token = binPath.substr(p, pComma - p); - TraceEvent(SevDebug, "BinPathItem").detail("Element", token); + Standalone token(binPath.substr(p, pComma - p)); + TraceEvent(SevDebug, "BinPathItem").detail("Element", token.toString()); binPathVec.push_back(token); p = pComma + 1; while (binPath[p] == ' ' && p < binPath.size()) { @@ -434,13 +434,12 @@ void createWhiteListBinPathVec(const std::string& binPath, vector& return; } -bool isWhiteListed(const vector& binPathVec, const std::string& binPath) { +bool isWhitelisted(vector>& binPathVec, StringRef binPath) { + TraceEvent("BinPath").detail("Value", binPath.toString()); for (auto item : binPathVec) { - if (item == binPath) { - return true; - } + TraceEvent("Element").detail("Value", item.toString()); } - return false; + return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end(); } ACTOR Future commitBatch( @@ -782,12 +781,12 @@ ACTOR Future commitBatch( state std::string param2 = m.param2.toString(); state ExecCmdValueString execArg(param2); execArg.dbgPrint(); - state std::string binPath = execArg.getBinaryPath(); - state std::string uidStr = execArg.getBinaryArgValue("uid"); + state StringRef binPath = execArg.getBinaryPath(); + state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); if (m.param1 != execDisableTLogPop && m.param1 != execEnableTLogPop - && !isWhiteListed(self->whiteListedBinPathVec, binPath)) { + && !isWhitelisted(self->whitelistedBinPathVec, binPath)) { TraceEvent("ExecTransactionNotPermitted") .detail("TransactionNum", transactionNum); committed[transactionNum] = ConflictBatch::TransactionNotPermitted; @@ -805,16 +804,17 @@ ACTOR Future commitBatch( // - all the storage nodes in a single region and // - only to storage nodes in local region in multi-region setup // step 1: get the DatabaseConfiguration - state DatabaseConfiguration conf; - Standalone> results = wait( self->txnStateStore->readRange( configKeys ) ); - conf.fromKeyValues(results); + auto result = + self->txnStateStore->readValue(LiteralStringRef("usable_regions").withPrefix(configKeysPrefix)).get(); + ASSERT(result.present()); + state int usableRegions = atoi(result.get().toString().c_str()); + // step 2: find the tag.id from locality info of the master auto localityKey = self->txnStateStore->readValue(tagLocalityListKeyFor(self->master.locality.dcId())).get(); int8_t locality = tagLocalityInvalid; - if (localityKey.present()) { - locality = decodeTagLocalityListValue(localityKey.get()); - } + ASSERT(localityKey.present()); + locality = decodeTagLocalityListValue(localityKey.get()); auto ranges = self->keyInfo.intersectingRanges(allKeys); std::set allSources; @@ -825,40 +825,18 @@ ACTOR Future commitBatch( .detail("Mutation", m.toString()) .detail("Version", commitVersion); - for (auto r : ranges) { - auto& tags = r.value().tags; - if (!tags.size()) { - for (auto info : r.value().src_info) { - if (info->tag.locality == locality) { - tags.push_back(info->tag); - } - } - for (auto info : r.value().dest_info) { - if (info->tag.locality == locality) { - tags.push_back(info->tag); - } - } - uniquify(tags); + std::vector localTags; + auto tagKeys = self->txnStateStore->readRange(serverTagKeys).get(); + for( auto& kv : tagKeys ) { + Tag t = decodeServerTagValue( kv.value ); + if ((usableRegions > 1 && t.locality == locality) + || (usableRegions == 1)) { + localTags.push_back(t); } - std::vector localTags; - for (auto t : tags) { - if ( (!conf.isValid()) - || (conf.usableRegions > 1 && t.locality == locality) - || (conf.usableRegions == 1) ) { - // step 3: based on DatabaseConfiguration and locality - // information gathered in step 1 and step 2, - // - find all the relevant tags - localTags.push_back(t); - } - } - TraceEvent(SevDebug, "DebugTagInfo") - .detail("TagsSize", tags.size()) - .detail("LocalTagsSize", localTags.size()); allSources.insert(localTags.begin(), localTags.end()); } - - std::string tokenStr = "ExecTrace/Proxy/" + uidStr; + std::string tokenStr = "ExecTrace/Proxy/" + uidStr.toString(); auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); te1.detail("To", "all sources"); te1.detail("Mutation", m.toString()); @@ -880,7 +858,7 @@ ACTOR Future commitBatch( UNREACHABLE(); - auto& m = (*pMutations)[mutationNum]; + // auto& m = (*pMutations)[mutationNum]; // Check on backing up key, if backup ranges are defined and a normal key if (self->vecBackupKeys.size() > 1 && (normalKeys.contains(m.param1) || m.param1 == metadataVersionKey)) { @@ -1481,12 +1459,12 @@ ACTOR Future healthMetricsRequestServer(MasterProxyInterface proxy, GetHea } } -ACTOR Future monitorRemoteCommitted(ProxyCommitData* self, Reference> db) { +ACTOR Future monitorRemoteCommitted(ProxyCommitData* self) { loop { wait(delay(0)); //allow this actor to be cancelled if we are removed after db changes. state Optional>> remoteLogs; - if(db->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) { - for(auto& logSet : db->get().logSystemConfig.tLogs) { + if(self->db->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) { + for(auto& logSet : self->db->get().logSystemConfig.tLogs) { if(!logSet.isLocal) { remoteLogs = logSet.tLogs; for(auto& tLog : logSet.tLogs) { @@ -1501,12 +1479,12 @@ ACTOR Future monitorRemoteCommitted(ProxyCommitData* self, ReferenceonChange()); + wait(self->db->onChange()); continue; } self->popRemoteTxs = true; - state Future onChange = db->onChange(); + state Future onChange = self->db->onChange(); loop { state std::vector> replies; for(auto &it : remoteLogs.get()) { @@ -1545,7 +1523,7 @@ ACTOR Future masterProxyServerCore( LogEpoch epoch, Version recoveryTransactionVersion, bool firstProxy, - std::string whiteListBinPaths) + std::string whitelistBinPaths) { state ProxyCommitData commitData(proxy.id(), master, proxy.getConsistentReadVersion, recoveryTransactionVersion, proxy.commit, db, firstProxy); @@ -1586,14 +1564,14 @@ ACTOR Future masterProxyServerCore( commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor); commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference>(), false); commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true); - createWhiteListBinPathVec(whiteListBinPaths, commitData.whiteListedBinPathVec); + createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec); // ((SERVER_MEM_LIMIT * COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR) is only a approximate formula for limiting the memory used. // COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR is an estimate based on experiments and not an accurate one. state int64_t commitBatchesMemoryLimit = std::min(SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT, static_cast((SERVER_KNOBS->SERVER_MEM_LIMIT * SERVER_KNOBS->COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / SERVER_KNOBS->COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR)); TraceEvent(SevInfo, "CommitBatchesMemoryLimit").detail("BytesLimit", commitBatchesMemoryLimit); - addActor.send(monitorRemoteCommitted(&commitData, commitData.db)); + addActor.send(monitorRemoteCommitted(&commitData)); addActor.send(transactionStarter(proxy, commitData.db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply)); addActor.send(readRequestServer(proxy, &commitData)); addActor.send(rejoinServer(proxy, &commitData)); @@ -1678,7 +1656,7 @@ ACTOR Future masterProxyServerCore( "MasterProxyServer.masterProxyServerCore." "ExecRequest"); - TraceEvent("ExecRequest").detail("Payload", execReq.execPayLoad.toString()); + TraceEvent("ExecRequest").detail("Payload", execReq.execPayload.toString()); // get the list of coordinators state Optional coordinators = commitData.txnStateStore->readValue(coordinatorsKey).get(); @@ -1701,7 +1679,7 @@ ACTOR Future masterProxyServerCore( if (coordinatorsAddrSet.find(workers[i].interf.address()) != coordinatorsAddrSet.end()) { TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].interf.address()); try { - wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayLoad)), 3.0)); + wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload)), 3.0)); ++numSucc; } catch (Error& e) { TraceEvent("ExecReqFailed").detail("What", e.what()); @@ -1799,10 +1777,10 @@ ACTOR Future masterProxyServer( MasterProxyInterface proxy, InitializeMasterProxyRequest req, Reference> db, - std::string whiteListBinPaths) + std::string whitelistBinPaths) { try { - state Future core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whiteListBinPaths); + state Future core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths); loop choose{ when(wait(core)) { return Void(); } when(wait(checkRemoved(db, req.recoveryCount, proxy))) {} diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 2515f46b9c..4731870e31 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -955,12 +955,14 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference::iterator it; + state vector> ignoredPops; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); - wait(tLogPopCore(self, it->first, it->second, logData)); + ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } + wait(waitForAll(ignoredPops)); self->toBePopped.clear(); self->ignorePopRequest = false; @@ -1319,6 +1321,8 @@ ACTOR Future tLogCommit( // This property is useful for snapshot kind of operations which wants to // take a snap of the disk image at a particular version (no data from // future version to be included) + // NOTE: execOpCommitInProgress will not be set for exec commands which + // start with \xff if (logData->execOpCommitInProgress) { wait(logData->execOpHold.getFuture()); } @@ -1383,17 +1387,19 @@ ACTOR Future tLogCommit( TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); - execArg.setCmdValueString(param2.toString()); + execArg.setCmdValueString(param2); execArg.dbgPrint(); - state std::string uidStr = execArg.getBinaryArgValue("uid"); - execVersion = qe.version; + state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); + if (!execCmd.startsWith(LiteralStringRef("\xff"))) { + execVersion = qe.version; + } if (execCmd == execSnap) { // validation check specific to snap request std::string reason; if (!self->ignorePopRequest) { execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; - } else if (uidStr != self->ignorePopUid) { + } else if (uidStr.toString() != self->ignorePopUid) { execVersion = invalidVersion; reason = "SnapFailedDisableTLogUidMismatch"; } @@ -1407,9 +1413,9 @@ ACTOR Future tLogCommit( auto startTag = logData->allTags.begin(); std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr; + "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") - .detail("Uid", uidStr) + .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) .detail("Role", "TLog") @@ -1417,30 +1423,28 @@ ACTOR Future tLogCommit( } } if (execCmd == execDisableTLogPop) { - execVersion = invalidVersion; self->ignorePopRequest = true; if (self->ignorePopUid != "") { TraceEvent(SevWarn, "TLogPopDisableonDisable") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr); + .detail("UidStr", uidStr.toString()); } - self->ignorePopUid = uidStr; + self->ignorePopUid = uidStr.toString(); // ignorePopRequest will be turned off after 30 seconds self->ignorePopDeadline = g_network->now() + 30.0; TraceEvent("TLogExecCmdPopDisable") .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) .trackLatest("DisablePopTLog"); } if (execCmd == execEnableTLogPop) { - execVersion = invalidVersion; - if (self->ignorePopUid != uidStr) { + if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .trackLatest("TLogPopDisableEnableUidMismatch"); } @@ -1460,7 +1464,7 @@ ACTOR Future tLogCommit( self->ignorePopUid = ""; TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) @@ -1502,8 +1506,7 @@ ACTOR Future tLogCommit( if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { state int err = 0; state Future cmdErr; - auto uidStr = execArg.getBinaryArgValue("uid"); - state UID execUID = UID::fromString(uidStr); + state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; // TLog is special, we need to exec at the execVersion. // If storage on the same process has initiated the exec then wait for it to @@ -1519,11 +1522,11 @@ ACTOR Future tLogCommit( auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; vector paramList; - paramList.push_back(snapBin); + paramList.push_back(snapBin.toString()); // user passed arguments auto listArgs = execArg.getBinaryArgs(); for (auto elem : listArgs) { - paramList.push_back(elem); + paramList.push_back(elem.toString()); } // additional arguments paramList.push_back(dataFolder); @@ -1533,13 +1536,13 @@ ACTOR Future tLogCommit( paramList.push_back(versionString); std::string roleString = "role=tlog"; paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin, paramList, 3.0); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); } else { // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); vector paramList; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); @@ -1563,7 +1566,7 @@ ACTOR Future tLogCommit( clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("Status", err) .detail("Tag", logData->allTags.begin()->toString()) .detail("OldTagSize", logData->allTags.size()) @@ -1579,10 +1582,10 @@ ACTOR Future tLogCommit( poppedTagVersion = tagv->popped; int len = param2.size(); - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr; + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); - te.detail("Uid", uidStr); + te.detail("Uid", uidStr.toString()); te.detail("Status", err); te.detail("Role", "TLog"); te.detail("ExecCmd", execCmd.toString()); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index b271777394..25650a8dc7 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -197,7 +197,7 @@ ACTOR Future simulatedFDBDRebooter(ReferencerandomUniqueID(); state int cycles = 0; @@ -251,7 +251,7 @@ ACTOR Future simulatedFDBDRebooter(Reference fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1, whiteListBinPaths); + Future fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1, whitelistBinPaths); Future backup = runBackupAgents ? runBackup(connFile) : Future(Never()); Future dr = runBackupAgents ? runDr(connFile) : Future(Never()); @@ -360,7 +360,7 @@ std::map< Optional>, std::vector< std::vector< std::string ACTOR Future simulatedMachine(ClusterConnectionString connStr, std::vector ips, bool sslEnabled, Reference tlsOptions, LocalityData localities, ProcessClass processClass, std::string baseFolder, bool restarting, - bool useSeedFile, bool runBackupAgents, bool sslOnly, std::string whiteListBinPaths) { + bool useSeedFile, bool runBackupAgents, bool sslOnly, std::string whitelistBinPaths) { state int bootCount = 0; state std::vector myFolders; state std::vector coordFolders; @@ -402,7 +402,7 @@ ACTOR Future simulatedMachine(ClusterConnectionString connStr, std::vector std::string path = joinPath(myFolders[i], "fdb.cluster"); Reference clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path)); const int listenPort = i*listenPerProcess + 1; - processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents, whiteListBinPaths)); + processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents, whitelistBinPaths)); TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]); } @@ -607,7 +607,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array parts) { ACTOR Future restartSimulatedSystem(vector>* systemActors, std::string baseFolder, int* pTesterCount, Optional* pConnString, Standalone* pStartingConfiguration, - Reference tlsOptions, int extraDB, std::string whiteListBinPaths) { + Reference tlsOptions, int extraDB, std::string whitelistBinPaths) { CSimpleIni ini; ini.SetUnicode(); ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str()); @@ -705,7 +705,7 @@ ACTOR Future restartSimulatedSystem(vector>* systemActors, st systemActors->push_back(reportErrors( simulatedMachine(conn, ipAddrs, usingSSL, tlsOptions, localities, processClass, baseFolder, true, i == useSeedForMachine, enableExtraDB, - usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whiteListBinPaths), + usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths), processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine")); } @@ -1088,7 +1088,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR void setupSimulatedSystem(vector>* systemActors, std::string baseFolder, int* pTesterCount, Optional* pConnString, Standalone* pStartingConfiguration, int extraDB, int minimumReplication, int minimumRegions, Reference tlsOptions, - std::string whiteListBinPaths) { + std::string whitelistBinPaths) { // SOMEDAY: this does not test multi-interface configurations SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions); StatusObject startingConfigJSON = simconfig.db.toJSON(true); @@ -1284,7 +1284,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo LocalityData localities(Optional>(), zoneId, machineId, dcUID); localities.set(LiteralStringRef("data_hall"), dcUID); systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled, tlsOptions, - localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly, whiteListBinPaths ), "SimulatedMachine")); + localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly, whitelistBinPaths ), "SimulatedMachine")); if (extraDB && g_simulator.extraDB->toString() != conn.toString()) { std::vector extraIps; @@ -1298,7 +1298,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo localities.set(LiteralStringRef("data_hall"), dcUID); systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled, tlsOptions, localities, - processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly, whiteListBinPaths ), "SimulatedMachine")); + processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly, whitelistBinPaths ), "SimulatedMachine")); } assignedMachines++; @@ -1326,7 +1326,7 @@ void setupSimulatedSystem(vector>* systemActors, std::string baseFo systemActors->push_back( reportErrors( simulatedMachine( conn, ips, sslEnabled, tlsOptions, localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), - baseFolder, false, i == useSeedForMachine, false, sslEnabled, whiteListBinPaths ), + baseFolder, false, i == useSeedForMachine, false, sslEnabled, whitelistBinPaths ), "SimulatedTesterMachine") ); } *pStartingConfiguration = startingConfigString; @@ -1382,7 +1382,7 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i ifs.close(); } -ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whiteListBinPaths, Reference tlsOptions) { +ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths, Reference tlsOptions) { state vector> systemActors; state Optional connFile; state Standalone startingConfiguration; @@ -1412,19 +1412,16 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot try { //systemActors.push_back( startSystemMonitor(dataFolder) ); if (rebooting) { - wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whiteListBinPaths), 100.0 ) ); + wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whitelistBinPaths), 100.0 ) ); // FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always if (restoring) { - std::string config = "usable_regions=1"; - startingConfiguration = makeString(config.size()); - uint8_t* ptr = mutateString(startingConfiguration); - memcpy(ptr, ((uint8_t*)config.c_str()), config.size()); + startingConfiguration = LiteralStringRef("usable_regions=1"); } } else { g_expect_full_pointermap = 1; setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, - minimumReplication, minimumRegions, tlsOptions, whiteListBinPaths); + minimumReplication, minimumRegions, tlsOptions, whitelistBinPaths); wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot } std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() ); diff --git a/fdbserver/SimulatedCluster.h b/fdbserver/SimulatedCluster.h index 9818d007ba..85dc44d655 100644 --- a/fdbserver/SimulatedCluster.h +++ b/fdbserver/SimulatedCluster.h @@ -24,6 +24,6 @@ #define FDBSERVER_SIMULATEDCLUSTER_H #pragma once -void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whiteListBinPath, Reference const& useSSL); +void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath, Reference const& useSSL); #endif diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 67998462c8..4f3d11ca7f 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1206,12 +1206,14 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference::iterator it; + state vector> ignoredPops; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); - wait(tLogPopCore(self, it->first, it->second, logData)); + ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } + wait(waitForAll(ignoredPops)); self->toBePopped.clear(); self->ignorePopRequest = false; @@ -1686,6 +1688,8 @@ ACTOR Future tLogCommit( // This property is useful for snapshot kind of operations which wants to // take a snap of the disk image at a particular version (not data from // future version to be included) + // NOTE: execOpCommitInProgress will not be set for exec commands which + // start with \xff if (logData->execOpCommitInProgress) { wait(logData->execOpHold.getFuture()); } @@ -1750,17 +1754,19 @@ ACTOR Future tLogCommit( TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); - execArg.setCmdValueString(param2.toString()); + execArg.setCmdValueString(param2); execArg.dbgPrint(); - state std::string uidStr = execArg.getBinaryArgValue("uid"); - execVersion = qe.version; + state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); + if (!execCmd.startsWith(LiteralStringRef("\xff"))) { + execVersion = qe.version; + } if (execCmd == execSnap) { // validation check specific to snap request std::string reason; if (!self->ignorePopRequest) { execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; - } else if (uidStr != self->ignorePopUid) { + } else if (uidStr.toString() != self->ignorePopUid) { execVersion = invalidVersion; reason = "SnapFailedDisableTLogUidMismatch"; } @@ -1775,9 +1781,9 @@ ACTOR Future tLogCommit( auto startTag = logData->allTags.begin(); std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr; + "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") - .detail("Uid", uidStr) + .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) .detail("Role", "TLog") @@ -1790,14 +1796,14 @@ ACTOR Future tLogCommit( if (self->ignorePopUid != "") { TraceEvent(SevWarn, "TLogPopDisableOnDisable") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr); + .detail("UidStr", uidStr.toString()); } - self->ignorePopUid = uidStr; + self->ignorePopUid = uidStr.toString(); // ignorePopRequest will be turned off after 30 seconds self->ignorePopDeadline = g_network->now() + 30.0; TraceEvent("TLogExecCmdPopDisable") .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) @@ -1805,10 +1811,10 @@ ACTOR Future tLogCommit( } if (execCmd == execEnableTLogPop) { execVersion = invalidVersion; - if (self->ignorePopUid != uidStr) { + if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .trackLatest("TLogPopDisableEnableUidMismatch"); } @@ -1828,7 +1834,7 @@ ACTOR Future tLogCommit( self->ignorePopUid = ""; TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) .detail("IgnporePopDeadline", self->ignorePopDeadline) @@ -1870,8 +1876,7 @@ ACTOR Future tLogCommit( if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { state int err = 0; - auto uidStr = execArg.getBinaryArgValue("uid"); - state UID execUID = UID::fromString(uidStr); + state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; // TLog is special, we need to exec at the execVersion. // If storage on the same process has initiated the exec then wait for it to @@ -1888,11 +1893,11 @@ ACTOR Future tLogCommit( auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; vector paramList; - paramList.push_back(snapBin); + paramList.push_back(snapBin.toString()); // user passed arguments auto listArgs = execArg.getBinaryArgs(); for (auto elem : listArgs) { - paramList.push_back(elem); + paramList.push_back(elem.toString()); } // additional arguments paramList.push_back(dataFolder); @@ -1902,13 +1907,13 @@ ACTOR Future tLogCommit( paramList.push_back(versionString); std::string roleString = "role=tlog"; paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin, paramList, 3.0); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); } else { // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); vector paramList; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); @@ -1932,7 +1937,7 @@ ACTOR Future tLogCommit( clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceLog") - .detail("UidStr", uidStr) + .detail("UidStr", uidStr.toString()) .detail("Status", err) .detail("Tag", logData->allTags.begin()->toString()) .detail("TagSize", logData->allTags.size()) @@ -1948,10 +1953,10 @@ ACTOR Future tLogCommit( poppedTagVersion = tagv->popped; int len = param2.size(); - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr; + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); - te.detail("Uid", uidStr); + te.detail("Uid", uidStr.toString()); te.detail("Status", err); te.detail("Role", "TLog"); te.detail("ExecCmd", execCmd.toString()); diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 7d55f0ecee..b20ebd81b6 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -244,15 +244,15 @@ struct ExecuteRequest { ReplyPromise reply; Arena arena; - StringRef execPayLoad; + StringRef execPayload; - ExecuteRequest(StringRef execPayLoad) : execPayLoad(execPayLoad) {} + ExecuteRequest(StringRef execPayload) : execPayload(execPayload) {} - ExecuteRequest() : execPayLoad() {} + ExecuteRequest() : execPayload() {} template void serialize(Ar& ar) { - serializer(ar, reply, execPayLoad, arena); + serializer(ar, reply, execPayload, arena); } }; @@ -398,7 +398,7 @@ ACTOR Future extractClusterInterface(Reference fdbd(Reference ccf, LocalityData localities, ProcessClass processClass, std::string dataFolder, std::string coordFolder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfilingThreshold, - std::string whiteListBinPaths); + std::string whitelistBinPaths); ACTOR Future clusterController(Reference ccf, Reference>> currentCC, @@ -418,7 +418,7 @@ ACTOR Future storageServer(IKeyValueStore* persistentData, StorageServerIn ACTOR Future masterServer(MasterInterface mi, Reference> db, ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery); ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req, - Reference> db, std::string whiteListBinPaths); + Reference> db, std::string whitelistBinPaths); ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 52fd71d971..8d2764562c 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -82,11 +82,7 @@ enum { OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX, -<<<<<<< HEAD - OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER }; -======= - OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH }; ->>>>>>> 2847e101... Allow only whitelisted binary path for exec op + OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER, OPT_WHITELIST_BINPATH }; CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP }, @@ -119,7 +115,6 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_NOBOX, "-q", SO_NONE }, { OPT_NOBOX, "--no_dialog", SO_NONE }, #endif -<<<<<<< HEAD { OPT_KVFILE, "--kvfile", SO_REQ_SEP }, { OPT_TESTFILE, "-f", SO_REQ_SEP }, { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, @@ -165,52 +160,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_TRACE_FORMAT , "--trace_format", SO_REQ_SEP }, { OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP }, { OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP }, -======= - { OPT_KVFILE, "--kvfile", SO_REQ_SEP }, - { OPT_TESTFILE, "-f", SO_REQ_SEP }, - { OPT_TESTFILE, "--testfile", SO_REQ_SEP }, - { OPT_RESTARTING, "-R", SO_NONE }, - { OPT_RESTARTING, "--restarting", SO_NONE }, - { OPT_RANDOMSEED, "-s", SO_REQ_SEP }, - { OPT_RANDOMSEED, "--seed", SO_REQ_SEP }, - { OPT_KEY, "-k", SO_REQ_SEP }, - { OPT_KEY, "--key", SO_REQ_SEP }, - { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, - { OPT_MEMLIMIT, "--memory", SO_REQ_SEP }, - { OPT_STORAGEMEMLIMIT, "-M", SO_REQ_SEP }, - { OPT_STORAGEMEMLIMIT, "--storage_memory", SO_REQ_SEP }, - { OPT_MACHINEID, "-i", SO_REQ_SEP }, - { OPT_MACHINEID, "--machine_id", SO_REQ_SEP }, - { OPT_DCID, "-a", SO_REQ_SEP }, - { OPT_DCID, "--datacenter_id", SO_REQ_SEP }, - { OPT_MACHINE_CLASS, "-c", SO_REQ_SEP }, - { OPT_MACHINE_CLASS, "--class", SO_REQ_SEP }, - { OPT_BUGGIFY, "-b", SO_REQ_SEP }, - { OPT_BUGGIFY, "--buggify", SO_REQ_SEP }, - { OPT_VERSION, "-v", SO_NONE }, - { OPT_VERSION, "--version", SO_NONE }, - { OPT_CRASHONERROR, "--crash", SO_NONE }, - { OPT_NETWORKIMPL, "-N", SO_REQ_SEP }, - { OPT_NETWORKIMPL, "--network", SO_REQ_SEP }, - { OPT_NOBUFSTDOUT, "--unbufferedout", SO_NONE }, - { OPT_BUFSTDOUTERR, "--bufferedout", SO_NONE }, - { OPT_TRACECLOCK, "--traceclock", SO_REQ_SEP }, - { OPT_NUMTESTERS, "--num_testers", SO_REQ_SEP }, - { OPT_HELP, "-?", SO_NONE }, - { OPT_HELP, "-h", SO_NONE }, - { OPT_HELP, "--help", SO_NONE }, - { OPT_DEVHELP, "--dev-help", SO_NONE }, - { OPT_KNOB, "--knob_", SO_REQ_SEP }, - { OPT_LOCALITY, "--locality_", SO_REQ_SEP }, - { OPT_TESTSERVERS, "--testservers", SO_REQ_SEP }, - { OPT_TEST_ON_SERVERS, "--testonservers", SO_NONE }, - { OPT_METRICSCONNFILE, "--metrics_cluster", SO_REQ_SEP }, - { OPT_METRICSPREFIX, "--metrics_prefix", SO_REQ_SEP }, - { OPT_IO_TRUST_SECONDS, "--io_trust_seconds", SO_REQ_SEP }, - { OPT_IO_TRUST_WARN_ONLY, "--io_trust_warn_only", SO_NONE }, - { OPT_TRACE_FORMAT , "--trace_format", SO_REQ_SEP }, - { OPT_WHITELIST_BINPATH, "--whitelist_binpath", SO_REQ_SEP }, ->>>>>>> 2847e101... Allow only whitelisted binary path for exec op + { OPT_WHITELIST_BINPATH, "--whitelist_binpath", SO_REQ_SEP }, #ifndef TLS_DISABLED TLS_OPTION_FLAGS @@ -966,7 +916,7 @@ int main(int argc, char* argv[]) { const char *testFile = "tests/default.txt"; std::string kvFile; std::string testServersStr; - std::string whiteListBinPaths; + std::string whitelistBinPaths; std::vector publicAddressStrs, listenAddressStrs; const char *targetKey = NULL; uint64_t memLimit = 8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT @@ -1355,7 +1305,7 @@ int main(int argc, char* argv[]) { break; } case OPT_WHITELIST_BINPATH: - whiteListBinPaths = args.OptionArg(); + whitelistBinPaths = args.OptionArg(); break; #ifndef TLS_DISABLED case TLSOptions::OPT_TLS_PLUGIN: @@ -1781,7 +1731,7 @@ int main(int argc, char* argv[]) { } } } - setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1), whiteListBinPaths, tlsOptions); + setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1), whitelistBinPaths, tlsOptions); g_simulator.run(); } else if (role == FDBD) { ASSERT( connectionFile ); @@ -1792,7 +1742,7 @@ int main(int argc, char* argv[]) { dataFolder = format("fdb/%d/", publicAddresses.address.port); // SOMEDAY: Better default vector> actors(listenErrors.begin(), listenErrors.end()); - actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize, whiteListBinPaths) ); + actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize, whitelistBinPaths) ); //actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement f = stopAfter( waitForAll(actors) ); diff --git a/fdbserver/fdbserver.vcxproj b/fdbserver/fdbserver.vcxproj index 5563e2511e..1d54a9d7fb 100644 --- a/fdbserver/fdbserver.vcxproj +++ b/fdbserver/fdbserver.vcxproj @@ -47,6 +47,7 @@ + @@ -165,6 +166,7 @@ + diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index fd0fa3efca..51e1c4af4f 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1875,11 +1875,11 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); return Void(); } - ExecCmdValueString execArg(m.param2.toString()); - state std::string uidStr = execArg.getBinaryArgValue("uid"); + state ExecCmdValueString execArg(m.param2); + state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); state int err = 0; state Future cmdErr; - state UID execUID = UID::fromString(uidStr); + state UID execUID = UID::fromString(uidStr.toString()); bool otherRoleExeced = false; // other TLog or storage has initiated the exec, so we can skip @@ -1894,12 +1894,12 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) auto binPath = execArg.getBinaryPath(); auto dataFolder = "path=" + data->folder; vector paramList; - paramList.push_back(binPath); + paramList.push_back(binPath.toString()); // get user passed arguments auto listArgs = execArg.getBinaryArgs(); execArg.dbgPrint(); for (auto elem : listArgs) { - paramList.push_back(elem); + paramList.push_back(elem.toString()); } // get additional arguments paramList.push_back(dataFolder); @@ -1909,14 +1909,14 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) paramList.push_back(versionString); std::string roleString = "role=storage"; paramList.push_back(roleString); - cmdErr = spawnProcess(binPath, paramList, 3.0); + cmdErr = spawnProcess(binPath.toString(), paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); } else { // copy the files std::string folder = abspath(data->folder); state std::string folderFrom = folder + "/."; - state std::string folderTo = folder + "-snap-" + uidStr; + state std::string folderTo = folder + "-snap-" + uidStr.toString(); vector paramList; std::string mkdirBin = "/bin/mkdir"; @@ -1940,9 +1940,9 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) } clearExecOpInProgress(execUID); } - auto tokenStr = "ExecTrace/storage/" + uidStr; + auto tokenStr = "ExecTrace/storage/" + uidStr.toString(); TraceEvent te = TraceEvent("ExecTraceStorage"); - te.detail("Uid", uidStr); + te.detail("Uid", uidStr.toString()); te.detail("Status", err); te.detail("Role", "storage"); te.detail("Version", ver); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index ea5152feaf..60c7929fa1 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -724,7 +724,7 @@ ACTOR Future workerServer( ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise recoveredDiskFiles, int64_t memoryProfileThreshold, - std::string _coordFolder, std::string whiteListBinPaths) { + std::string _coordFolder, std::string whitelistBinPaths) { state PromiseStream< ErrorInfo > errors; state Reference>> ddInterf( new AsyncVar>() ); state Reference>> rkInterf( new AsyncVar>() ); @@ -1082,7 +1082,7 @@ ACTOR Future workerServer( //printf("Recruited as masterProxyServer\n"); errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER_PROXY, recruited.id(), - masterProxyServer( recruited, req, dbInfo, whiteListBinPaths ) ) ) ); + masterProxyServer( recruited, req, dbInfo, whitelistBinPaths ) ) ) ); req.reply.send(recruited); } when( InitializeResolverRequest req = waitNext(interf.resolver.getFuture()) ) { @@ -1196,9 +1196,9 @@ ACTOR Future workerServer( loggingTrigger = delay( loggingDelay, TaskFlushTrace ); } when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) { - state ExecCmdValueString execArg(req.execPayLoad.toString()); + state ExecCmdValueString execArg(req.execPayload); execArg.dbgPrint(); - state std::string uidStr = execArg.getBinaryArgValue("uid"); + state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); state int err = 0; state Future cmdErr; if (!g_network->isSimulated()) { @@ -1206,11 +1206,11 @@ ACTOR Future workerServer( auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + coordFolder; vector paramList; - paramList.push_back(snapBin); + paramList.push_back(snapBin.toString()); // get user passed arguments auto listArgs = execArg.getBinaryArgs(); for (auto elem : listArgs) { - paramList.push_back(elem); + paramList.push_back(elem.toString()); } // get additional arguments paramList.push_back(dataFolder); @@ -1220,14 +1220,14 @@ ACTOR Future workerServer( paramList.push_back(versionString); std::string roleString = "role=coordinator"; paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin, paramList, 3.0); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); } else { // copy the files std::string folder = coordFolder; state std::string folderFrom = "./" + folder + "/."; - state std::string folderTo = "./" + folder + "-snap-" + uidStr; + state std::string folderTo = "./" + folder + "-snap-" + uidStr.toString(); vector paramList; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(mkdirBin); @@ -1248,13 +1248,13 @@ ACTOR Future workerServer( err = cmdErr.get(); } } - auto tokenStr = "ExecTrace/Coordinators/" + uidStr; + auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString(); auto te = TraceEvent("ExecTraceCoordinators"); - te.detail("Uid", uidStr); + te.detail("Uid", uidStr.toString()); te.detail("Status", err); te.detail("Role", "coordinator"); te.detail("Value", coordFolder); - te.detail("ExecPayLoad", execArg.getCmdValueString()); + te.detail("ExecPayload", execArg.getCmdValueString().toString()); te.trackLatest(tokenStr.c_str()); req.reply.send(Void()); } @@ -1410,15 +1410,15 @@ ACTOR Future fdbd( std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfileThreshold, - std::string whiteListBinPaths) + std::string whitelistBinPaths) { try { ServerCoordinators coordinators( connFile ); if (g_network->isSimulated()) { - whiteListBinPaths = "random_path, /bin/snap_create.sh"; + whitelistBinPaths = "random_path, /bin/snap_create.sh"; } - TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whiteListBinPaths); + TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whitelistBinPaths); // SOMEDAY: start the services on the machine in a staggered fashion in simulation? state vector> v; @@ -1440,7 +1440,7 @@ ACTOR Future fdbd( v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") ); - v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whiteListBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) ); + v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whitelistBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) ); state Future firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" ); wait( quorum(v,1) ); diff --git a/tests/fast/SnapTestFailAndDisablePop.txt b/tests/fast/SnapTestFailAndDisablePop.txt index 0dc8df1222..e19532be99 100644 --- a/tests/fast/SnapTestFailAndDisablePop.txt +++ b/tests/fast/SnapTestFailAndDisablePop.txt @@ -32,7 +32,7 @@ testID=7 snapCheck=true ; snapCreate with binary path that is not whitelisted -testTitle=SnapCreateNotWhiteListedBinaryPath +testTitle=SnapCreateNotWhitelistedBinaryPath testName=SnapTest numSnaps=1 maxSnapDelay=3.0 From e91c76834e367a2d7461b7ab5e758e4bd38bce16 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 17 Apr 2019 04:38:27 -0700 Subject: [PATCH 28/69] tlog: move snap create part to indepdendent funcs --- fdbserver/OldTLogServer_6_0.actor.cpp | 478 +++++++++++++------------ fdbserver/TLogServer.actor.cpp | 480 ++++++++++++++------------ 2 files changed, 503 insertions(+), 455 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 4731870e31..2cb38dc909 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1278,6 +1278,255 @@ ACTOR Future commitQueue( TLogData* self ) { } } +ACTOR Future execProcessingHelper(TLogData* self, + Reference logData, + TLogCommitRequest* req, + Standalone>* execTags, + ExecCmdValueString* execArg, + StringRef* execCmd, + Version* execVersion) +{ + // inspect the messages to find if there is an Exec type and print + // it. message are prefixed by the length of the message and each + // field is prefixed by the length too + uint8_t type = MutationRef::MAX_ATOMIC_OP; + state StringRef param2; + ArenaReader rd(req->arena, req->messages, Unversioned()); + int32_t messageLength, rawLength; + uint16_t tagCount; + uint32_t sub; + while (!rd.empty()) { + Tag tmpTag; + bool hasTxsTag = false; + rd.checkpoint(); + rd >> messageLength >> sub >> tagCount; + for (int i = 0; i < tagCount; i++) { + rd >> tmpTag; + if (tmpTag == txsTag) { + hasTxsTag = true; + } + execTags->push_back(execTags->arena(), tmpTag); + } + if (!hasTxsTag) { + rd >> type; + if (type == MutationRef::Exec) { + break; + } + } + rawLength = messageLength + sizeof(messageLength); + rd.rewind(); + rd.readBytes(rawLength); + } + + int32_t len = 0; + if (type == MutationRef::Exec) { + // get param1 + rd >> len; + *execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); + // get param2 + rd >> len; + param2 = StringRef((uint8_t const*)rd.readBytes(len), len); + + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd->toString()); + + execArg->setCmdValueString(param2); + execArg->dbgPrint(); + state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + if (!execCmd->startsWith(LiteralStringRef("\xff"))) { + *execVersion = req->version; + } + if (*execCmd == execSnap) { + // validation check specific to snap request + std::string reason; + if (!self->ignorePopRequest) { + *execVersion = invalidVersion; + reason = "SnapFailIgnorePopNotSet"; + } else if (uidStr.toString() != self->ignorePopUid) { + *execVersion = invalidVersion; + reason = "SnapFailedDisableTLogUidMismatch"; + } + + if (*execVersion == invalidVersion) { + TraceEvent(SevWarn, "TLogSnapFailed") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("Reason", reason) + .trackLatest(reason.c_str()); + + auto startTag = logData->allTags.begin(); + std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); + "/" + uidStr.toString(); + TraceEvent("ExecCmdSnapCreate") + .detail("Uid", uidStr.toString()) + .detail("Status", -1) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog") + .trackLatest(message.c_str()); + } + } + if (*execCmd == execDisableTLogPop) { + self->ignorePopRequest = true; + if (self->ignorePopUid != "") { + TraceEvent(SevWarn, "TLogPopDisableonDisable") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr.toString()); + } + self->ignorePopUid = uidStr.toString(); + // ignorePopRequest will be turned off after 30 seconds + self->ignorePopDeadline = g_network->now() + 30.0; + TraceEvent("TLogExecCmdPopDisable") + .detail("ExecCmd", execCmd->toString()) + .detail("UidStr", uidStr.toString()) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .trackLatest("DisablePopTLog"); + } + if (*execCmd == execEnableTLogPop) { + if (self->ignorePopUid != uidStr.toString()) { + TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr.toString()) + .trackLatest("TLogPopDisableEnableUidMismatch"); + } + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + state vector> ignoredPops; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); + } + wait(waitForAll(ignoredPops)); + self->toBePopped.clear(); + + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; + TraceEvent("TLogExecCmdPopEnable") + .detail("ExecCmd", execCmd->toString()) + .detail("UidStr", uidStr.toString()) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .trackLatest("EnablePopTLog"); + } + } + return Void(); +} + + +ACTOR Future tLogSnapHelper(TLogData* self, + Reference logData, + ExecCmdValueString* execArg, + Version version, + Version execVersion, + StringRef execCmd, + Standalone> execTags) +{ + state int err = 0; + state Future cmdErr; + state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + state UID execUID = UID::fromString(uidStr.toString()); + state bool otherRoleExeced = false; + // TLog is special, we need to exec at the execVersion. + // If storage on the same process has initiated the exec then wait for it to + // finish and hold the tlog at the execVersion + while (isExecOpInProgress(execUID)) { + wait(delay(0.1)); + otherRoleExeced = true; + } + if (!otherRoleExeced) { + setExecOpInProgress(execUID); + if (!g_network->isSimulated()) { + // get the bin path + auto snapBin = execArg->getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; + vector paramList; + paramList.push_back(snapBin.toString()); + // user passed arguments + auto listArgs = execArg->getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem.toString()); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } else { + // copy the entire directory + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } + } + clearExecOpInProgress(execUID); + } + TraceEvent("TLogCommitExecTraceTLog") + .detail("UidStr", uidStr.toString()) + .detail("Status", err) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("OldTagSize", logData->allTags.size()) + .detail("Role", "TLog"); + + // print the detailed status message + for (int i = 0; i < execTags.size(); i++) { + Version poppedTagVersion = -1; + auto tagv = logData->getTagData(execTags[i]); + if (!tagv) { + continue; + } + poppedTagVersion = tagv->popped; + + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); + + TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); + te.detail("Uid", uidStr.toString()); + te.detail("Status", err); + te.detail("Role", "TLog"); + te.detail("ExecCmd", execCmd.toString()); + te.detail("Param2", execArg->getCmdValueString().toString()); + te.detail("Tag", tagv->tag.toString()); + te.detail("Version", version); + te.detail("PoppedTagVersion", poppedTagVersion); + te.detail("PersistentDataVersion", logData->persistentDataVersion); + te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); + te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); + te.detail("IgnorePopUid", self->ignorePopUid); + if (execCmd == execSnap) { + te.trackLatest(message.c_str()); + } + } + return Void(); +} + ACTOR Future tLogCommit( TLogData* self, TLogCommitRequest req, @@ -1331,8 +1580,7 @@ ACTOR Future tLogCommit( state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; state StringRef execCmd; - state StringRef param2; - state vector execTags; + state Standalone> execTags; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1345,133 +1593,7 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - // inspect the messages to find if there is an Exec type and print - // it. message are prefixed by the length of the message and each - // field is prefixed by the length too - uint8_t type = MutationRef::MAX_ATOMIC_OP; - { - ArenaReader rd(req.arena, qe.messages, Unversioned()); - int32_t messageLength, rawLength; - uint16_t tagCount; - uint32_t sub; - while (!rd.empty()) { - Tag tmpTag; - bool hasTxsTag = false; - rd.checkpoint(); - rd >> messageLength >> sub >> tagCount; - for (int i = 0; i < tagCount; i++) { - rd >> tmpTag; - if (tmpTag == txsTag) { - hasTxsTag = true; - } - execTags.push_back(tmpTag); - } - if (!hasTxsTag) { - rd >> type; - if (type == MutationRef::Exec) { - break; - } - } - rawLength = messageLength + sizeof(messageLength); - rd.rewind(); - rd.readBytes(rawLength); - } - int32_t len = 0; - if (type == MutationRef::Exec) { - // get param1 - rd >> len; - execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); - // get param2 - rd >> len; - param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - - TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); - - execArg.setCmdValueString(param2); - execArg.dbgPrint(); - state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); - if (!execCmd.startsWith(LiteralStringRef("\xff"))) { - execVersion = qe.version; - } - if (execCmd == execSnap) { - // validation check specific to snap request - std::string reason; - if (!self->ignorePopRequest) { - execVersion = invalidVersion; - reason = "SnapFailIgnorePopNotSet"; - } else if (uidStr.toString() != self->ignorePopUid) { - execVersion = invalidVersion; - reason = "SnapFailedDisableTLogUidMismatch"; - } - - if (execVersion == invalidVersion) { - TraceEvent(SevWarn, "TLogSnapFailed") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); - - auto startTag = logData->allTags.begin(); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr.toString(); - TraceEvent("ExecCmdSnapCreate") - .detail("Uid", uidStr.toString()) - .detail("Status", -1) - .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); - } - } - if (execCmd == execDisableTLogPop) { - self->ignorePopRequest = true; - if (self->ignorePopUid != "") { - TraceEvent(SevWarn, "TLogPopDisableonDisable") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); - } - self->ignorePopUid = uidStr.toString(); - // ignorePopRequest will be turned off after 30 seconds - self->ignorePopDeadline = g_network->now() + 30.0; - TraceEvent("TLogExecCmdPopDisable") - .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr.toString()) - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); - } - if (execCmd == execEnableTLogPop) { - if (self->ignorePopUid != uidStr.toString()) { - TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); - } - - TraceEvent("EnableTLogPlayAllIgnoredPops"); - // use toBePopped and issue all the pops - state std::map::iterator it; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { - TraceEvent("PlayIgnoredPop") - .detail("Tag", it->first.toString()) - .detail("Version", it->second); - wait(tLogPopCore(self, it->first, it->second, logData)); - } - self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopDeadline = 0.0; - self->ignorePopUid = ""; - TraceEvent("TLogExecCmdPopEnable") - .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr.toString()) - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); - } - } - } + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1504,103 +1626,7 @@ ACTOR Future tLogCommit( wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) ); if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { - state int err = 0; - state Future cmdErr; - state UID execUID = UID::fromString(uidStr.toString()); - state bool otherRoleExeced = false; - // TLog is special, we need to exec at the execVersion. - // If storage on the same process has initiated the exec then wait for it to - // finish and hold the tlog at the execVersion - while (isExecOpInProgress(execUID)) { - wait(delay(0.1)); - otherRoleExeced = true; - } - if (!otherRoleExeced) { - setExecOpInProgress(execUID); - if (!g_network->isSimulated()) { - // get the bin path - auto snapBin = execArg.getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - vector paramList; - paramList.push_back(snapBin.toString()); - // user passed arguments - auto listArgs = execArg.getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - vector paramList; - std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } - clearExecOpInProgress(execUID); - } - TraceEvent("TLogCommitExecTraceTLog") - .detail("UidStr", uidStr.toString()) - .detail("Status", err) - .detail("Tag", logData->allTags.begin()->toString()) - .detail("OldTagSize", logData->allTags.size()) - .detail("Role", "TLog"); - - // print the detailed status message - for (int i = 0; i < execTags.size(); i++) { - Version poppedTagVersion = -1; - auto tagv = logData->getTagData(execTags[i]); - if (!tagv) { - continue; - } - poppedTagVersion = tagv->popped; - - int len = param2.size(); - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); - te.detail("Uid", uidStr.toString()); - te.detail("Status", err); - te.detail("Role", "TLog"); - te.detail("ExecCmd", execCmd.toString()); - te.detail("Param2", param2.toString()); - te.detail("Tag", tagv->tag.toString()); - te.detail("Version", qe.version); - te.detail("PoppedTagVersion", poppedTagVersion); - te.detail("PersistentDataVersion", logData->persistentDataVersion); - te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); - te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); - te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } - } + wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags)); } if (execVersion != invalidVersion && logData->execOpCommitInProgress) { logData->execOpCommitInProgress = false; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 4f3d11ca7f..09c01f46b3 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1645,6 +1645,254 @@ ACTOR Future commitQueue( TLogData* self ) { } } +ACTOR Future execProcessingHelper(TLogData* self, + Reference logData, + TLogCommitRequest* req, + Standalone>* execTags, + ExecCmdValueString* execArg, + StringRef* execCmd, + Version* execVersion) +{ + // inspect the messages to find if there is an Exec type and print + // it. message are prefixed by the length of the message and each + // field is prefixed by the length too + uint8_t type = MutationRef::MAX_ATOMIC_OP; + state StringRef param2; + ArenaReader rd(req->arena, req->messages, Unversioned()); + int32_t messageLength, rawLength; + uint16_t tagCount; + uint32_t sub; + while (!rd.empty()) { + Tag tmpTag; + bool hasTxsTag = false; + rd.checkpoint(); + rd >> messageLength >> sub >> tagCount; + for (int i = 0; i < tagCount; i++) { + rd >> tmpTag; + if (tmpTag == txsTag) { + hasTxsTag = true; + } + execTags->push_back(execTags->arena(), tmpTag); + } + if (!hasTxsTag) { + rd >> type; + if (type == MutationRef::Exec) { + break; + } + } + rawLength = messageLength + sizeof(messageLength); + rd.rewind(); + rd.readBytes(rawLength); + } + + int32_t len = 0; + if (type == MutationRef::Exec) { + // get param1 + rd >> len; + *execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); + // get param2 + rd >> len; + param2 = StringRef((uint8_t const*)rd.readBytes(len), len); + + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd->toString()); + + execArg->setCmdValueString(param2); + execArg->dbgPrint(); + state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + if (!execCmd->startsWith(LiteralStringRef("\xff"))) { + *execVersion = req->version; + } + if (*execCmd == execSnap) { + // validation check specific to snap request + std::string reason; + if (!self->ignorePopRequest) { + *execVersion = invalidVersion; + reason = "SnapFailIgnorePopNotSet"; + } else if (uidStr.toString() != self->ignorePopUid) { + *execVersion = invalidVersion; + reason = "SnapFailedDisableTLogUidMismatch"; + } + + if (*execVersion == invalidVersion) { + TraceEvent(SevWarn, "TLogSnapFailed") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnorePopRequest", self->ignorePopRequest) + .detail("Reason", reason) + .trackLatest(reason.c_str()); + + auto startTag = logData->allTags.begin(); + std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); + "/" + uidStr.toString(); + TraceEvent("ExecCmdSnapCreate") + .detail("Uid", uidStr.toString()) + .detail("Status", -1) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("Role", "TLog") + .trackLatest(message.c_str()); + } + } + if (*execCmd == execDisableTLogPop) { + self->ignorePopRequest = true; + if (self->ignorePopUid != "") { + TraceEvent(SevWarn, "TLogPopDisableonDisable") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr.toString()); + } + self->ignorePopUid = uidStr.toString(); + // ignorePopRequest will be turned off after 30 seconds + self->ignorePopDeadline = g_network->now() + 30.0; + TraceEvent("TLogExecCmdPopDisable") + .detail("ExecCmd", execCmd->toString()) + .detail("UidStr", uidStr.toString()) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .trackLatest("DisablePopTLog"); + } + if (*execCmd == execEnableTLogPop) { + if (self->ignorePopUid != uidStr.toString()) { + TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") + .detail("IgnorePopUid", self->ignorePopUid) + .detail("UidStr", uidStr.toString()) + .trackLatest("TLogPopDisableEnableUidMismatch"); + } + + TraceEvent("EnableTLogPlayAllIgnoredPops"); + // use toBePopped and issue all the pops + state std::map::iterator it; + state vector> ignoredPops; + for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + TraceEvent("PlayIgnoredPop") + .detail("Tag", it->first.toString()) + .detail("Version", it->second); + ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); + } + wait(waitForAll(ignoredPops)); + self->toBePopped.clear(); + + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; + TraceEvent("TLogExecCmdPopEnable") + .detail("ExecCmd", execCmd->toString()) + .detail("UidStr", uidStr.toString()) + .detail("IgnorePopUid", self->ignorePopUid) + .detail("IgnporePopRequest", self->ignorePopRequest) + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .trackLatest("EnablePopTLog"); + } + } + return Void(); +} + +ACTOR Future tLogSnapHelper(TLogData* self, + Reference logData, + ExecCmdValueString* execArg, + Version version, + Version execVersion, + StringRef execCmd, + Standalone> execTags) +{ + state int err = 0; + state Future cmdErr; + state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + state UID execUID = UID::fromString(uidStr.toString()); + state bool otherRoleExeced = false; + // TLog is special, we need to exec at the execVersion. + // If storage on the same process has initiated the exec then wait for it to + // finish and hold the tlog at the execVersion + while (isExecOpInProgress(execUID)) { + wait(delay(0.1)); + otherRoleExeced = true; + } + if (!otherRoleExeced) { + setExecOpInProgress(execUID); + if (!g_network->isSimulated()) { + // get the bin path + auto snapBin = execArg->getBinaryPath(); + auto dataFolder = "path=" + self->dataFolder; + vector paramList; + paramList.push_back(snapBin.toString()); + // user passed arguments + auto listArgs = execArg->getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem.toString()); + } + // additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + std::string roleString = "role=tlog"; + paramList.push_back(roleString); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } else { + // copy the entire directory + state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; + state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); + vector paramList; + std::string mkdirBin = "/bin/mkdir"; + paramList.push_back(mkdirBin); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + vector paramList; + std::string cpBin = "/bin/cp"; + paramList.clear(); + paramList.push_back(cpBin); + paramList.push_back("-a"); + paramList.push_back(tLogFolderFrom); + paramList.push_back(tLogFolderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0); + wait(success(cmdErr)); + err = cmdErr.get(); + } + } + clearExecOpInProgress(execUID); + } + TraceEvent("TLogCommitExecTraceTLog") + .detail("UidStr", uidStr.toString()) + .detail("Status", err) + .detail("Tag", logData->allTags.begin()->toString()) + .detail("OldTagSize", logData->allTags.size()) + .detail("Role", "TLog"); + + // print the detailed status message + for (int i = 0; i < execTags.size(); i++) { + Version poppedTagVersion = -1; + auto tagv = logData->getTagData(execTags[i]); + if (!tagv) { + continue; + } + poppedTagVersion = tagv->popped; + + state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); + + TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); + te.detail("Uid", uidStr.toString()); + te.detail("Status", err); + te.detail("Role", "TLog"); + te.detail("ExecCmd", execCmd.toString()); + te.detail("Param2", execArg->getCmdValueString().toString()); + te.detail("Tag", tagv->tag.toString()); + te.detail("Version", version); + te.detail("PoppedTagVersion", poppedTagVersion); + te.detail("PersistentDataVersion", logData->persistentDataVersion); + te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); + te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); + te.detail("IgnorePopUid", self->ignorePopUid); + if (execCmd == execSnap) { + te.trackLatest(message.c_str()); + } + } + return Void(); +} + ACTOR Future tLogCommit( TLogData* self, TLogCommitRequest req, @@ -1698,8 +1946,7 @@ ACTOR Future tLogCommit( state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; state StringRef execCmd; - state StringRef param2; - state vector execTags; + state Standalone> execTags; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1712,136 +1959,7 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - // inspect the messages to find if there is an Exec type and print - // it. message are prefixed by the length of the message and each - // field is prefixed by the length too - uint8_t type = MutationRef::MAX_ATOMIC_OP; - { - ArenaReader rd(req.arena, qe.messages, Unversioned()); - int32_t messageLength, rawLength; - uint16_t tagCount; - uint32_t sub; - while(!rd.empty()) { - Tag tmpTag; - bool hasTxsTag = false; - rd.checkpoint(); - rd >> messageLength >> sub >> tagCount; - for(int i = 0; i < tagCount; i++) { - rd >> tmpTag; - if (tmpTag == txsTag) { - hasTxsTag = true; - } - execTags.push_back(tmpTag); - } - if (!hasTxsTag) { - rd >> type; - if (type == MutationRef::Exec) { - break; - } - } - rawLength = messageLength + sizeof(messageLength); - rd.rewind(); - rd.readBytes(rawLength); - } - int32_t len = 0; - if (type == MutationRef::Exec) { - // get param1 - rd >> len; - execCmd = StringRef((uint8_t const*)rd.readBytes(len), len); - // get param2 - rd >> len; - param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - - TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd.toString()); - - execArg.setCmdValueString(param2); - execArg.dbgPrint(); - state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); - if (!execCmd.startsWith(LiteralStringRef("\xff"))) { - execVersion = qe.version; - } - if (execCmd == execSnap) { - // validation check specific to snap request - std::string reason; - if (!self->ignorePopRequest) { - execVersion = invalidVersion; - reason = "SnapFailIgnorePopNotSet"; - } else if (uidStr.toString() != self->ignorePopUid) { - execVersion = invalidVersion; - reason = "SnapFailedDisableTLogUidMismatch"; - } - - if (execVersion == invalidVersion) { - TraceEvent(SevWarn, "TLogSnapFailed") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); - - auto startTag = logData->allTags.begin(); - std::string message = "ExecTrace/TLog/" + - logData->allTags.begin()->toString(); - "/" + uidStr.toString(); - TraceEvent("ExecCmdSnapCreate") - .detail("Uid", uidStr.toString()) - .detail("Status", -1) - .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); - } - } - if (execCmd == execDisableTLogPop) { - execVersion = invalidVersion; - self->ignorePopRequest = true; - if (self->ignorePopUid != "") { - TraceEvent(SevWarn, "TLogPopDisableOnDisable") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); - } - self->ignorePopUid = uidStr.toString(); - // ignorePopRequest will be turned off after 30 seconds - self->ignorePopDeadline = g_network->now() + 30.0; - TraceEvent("TLogExecCmdPopDisable") - .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr.toString()) - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); - } - if (execCmd == execEnableTLogPop) { - execVersion = invalidVersion; - if (self->ignorePopUid != uidStr.toString()) { - TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") - .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); - } - - TraceEvent("EnableTLogPlayAllIgnoredPops"); - // use toBePopped and issue all the pops - state std::map::iterator it; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { - TraceEvent("PlayIgnoredPop") - .detail("Tag", it->first.toString()) - .detail("Version", it->second); - wait(tLogPopCore(self, it->first, it->second, logData)); - } - self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopDeadline = 0.0; - self->ignorePopUid = ""; - TraceEvent("TLogExecCmdPopEnable") - .detail("ExecCmd", execCmd.toString()) - .detail("UidStr", uidStr.toString()) - .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); - } - } - } + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1875,103 +1993,7 @@ ACTOR Future tLogCommit( if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) { - state int err = 0; - state UID execUID = UID::fromString(uidStr.toString()); - state bool otherRoleExeced = false; - // TLog is special, we need to exec at the execVersion. - // If storage on the same process has initiated the exec then wait for it to - // finish and hold the tlog at the execVersion - while (isExecOpInProgress(execUID)) { - wait(delay(0.1)); - otherRoleExeced = true; - } - state Future cmdErr; - if (!otherRoleExeced) { - setExecOpInProgress(execUID); - if (!g_network->isSimulated()) { - // get bin path - auto snapBin = execArg.getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - vector paramList; - paramList.push_back(snapBin.toString()); - // user passed arguments - auto listArgs = execArg.getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - vector paramList; - std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } - clearExecOpInProgress(execUID); - } - TraceEvent("TLogCommitExecTraceLog") - .detail("UidStr", uidStr.toString()) - .detail("Status", err) - .detail("Tag", logData->allTags.begin()->toString()) - .detail("TagSize", logData->allTags.size()) - .detail("Role", "TLog"); - - // print the status message - for (int i = 0; i < execTags.size(); i++) { - Version poppedTagVersion = -1; - auto tagv = logData->getTagData(execTags[i]); - if (!tagv) { - continue; - } - poppedTagVersion = tagv->popped; - - int len = param2.size(); - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); - te.detail("Uid", uidStr.toString()); - te.detail("Status", err); - te.detail("Role", "TLog"); - te.detail("ExecCmd", execCmd.toString()); - te.detail("Param2", param2.toString()); - te.detail("Tag", tagv->tag.toString()); - te.detail("Version", qe.version); - te.detail("PoppedTagVersion", poppedTagVersion); - te.detail("PersistentDataVersion", logData->persistentDataVersion); - te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); - te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); - te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } - } + wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags)); } if (execVersion != invalidVersion && logData->execOpCommitInProgress) { logData->execOpCommitInProgress = false; From c76cc84ded37f1616c1ad8f452799ab537bfa29e Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 16 Apr 2019 18:02:46 -0700 Subject: [PATCH 29/69] execute coordinators code reorganized --- fdbserver/MasterProxyServer.actor.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index d1abbc1554..d15a291683 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -858,8 +858,6 @@ ACTOR Future commitBatch( UNREACHABLE(); - // auto& m = (*pMutations)[mutationNum]; - // Check on backing up key, if backup ranges are defined and a normal key if (self->vecBackupKeys.size() > 1 && (normalKeys.contains(m.param1) || m.param1 == metadataVersionKey)) { if (m.type != MutationRef::Type::ClearRange) { @@ -1673,24 +1671,22 @@ ACTOR Future masterProxyServerCore( // send the exec command to the list of workers which are // coordinators - state int i = 0; - state int numSucc = 0; - for (; i < workers.size(); i++) { + state vector> execCoords; + for (int i = 0; i < workers.size(); i++) { if (coordinatorsAddrSet.find(workers[i].interf.address()) != coordinatorsAddrSet.end()) { TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].interf.address()); - try { - wait(timeoutError(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload)), 3.0)); - ++numSucc; - } catch (Error& e) { - TraceEvent("ExecReqFailed").detail("What", e.what()); - } + execCoords.push_back(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload))); } } - if (numSucc >= (coordinatorsAddrSet.size() + 1) / 2) { - execReq.reply.send(Void()); - } else { - execReq.reply.sendError(operation_failed()); + wait(timeoutError(waitForAll(execCoords), 10.0)); + int numSucc = 0; + for (auto item : execCoords) { + if (item.isValid() && item.isReady()) { + ++numSucc; + } } + bool succ = numSucc >= ((execCoords.size() + 1) / 2); + succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed()); } when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) { state ReplyPromise reply = req.reply; From b6e037ffbc4c21d10d82bf0d1893f628296482da Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 19 Apr 2019 06:33:18 -0700 Subject: [PATCH 30/69] Replace fork with boost::process::child --- fdbserver/FDBExecArgs.h | 2 +- fdbserver/OldTLogServer_6_0.actor.cpp | 11 ++-- fdbserver/TLogServer.actor.cpp | 11 ++-- fdbserver/WorkerInterface.actor.h | 2 +- fdbserver/storageserver.actor.cpp | 14 ++--- fdbserver/worker.actor.cpp | 83 +++++++++++++-------------- flow/Platform.cpp | 55 ------------------ flow/Platform.h | 17 ------ 8 files changed, 56 insertions(+), 139 deletions(-) diff --git a/fdbserver/FDBExecArgs.h b/fdbserver/FDBExecArgs.h index caf9fc3858..2b4b1dae40 100644 --- a/fdbserver/FDBExecArgs.h +++ b/fdbserver/FDBExecArgs.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include "flow/Arena.h" // execute/snapshot command takes two arguments: // param1 - represents the command type/name diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 2cb38dc909..cb335b1967 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1445,7 +1445,7 @@ ACTOR Future tLogSnapHelper(TLogData* self, // get the bin path auto snapBin = execArg->getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; - vector paramList; + std::vector paramList; paramList.push_back(snapBin.toString()); // user passed arguments auto listArgs = execArg->getBinaryArgs(); @@ -1467,22 +1467,19 @@ ACTOR Future tLogSnapHelper(TLogData* self, // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - vector paramList; + std::vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { - vector paramList; + std::vector paramList; std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(tLogFolderFrom); paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); wait(success(cmdErr)); err = cmdErr.get(); } diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 09c01f46b3..d8c0880af5 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1811,7 +1811,7 @@ ACTOR Future tLogSnapHelper(TLogData* self, // get the bin path auto snapBin = execArg->getBinaryPath(); auto dataFolder = "path=" + self->dataFolder; - vector paramList; + std::vector paramList; paramList.push_back(snapBin.toString()); // user passed arguments auto listArgs = execArg->getBinaryArgs(); @@ -1833,22 +1833,19 @@ ACTOR Future tLogSnapHelper(TLogData* self, // copy the entire directory state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - vector paramList; + std::vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(tLogFolderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { - vector paramList; + std::vector paramList; std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(tLogFolderFrom); paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); wait(success(cmdErr)); err = cmdErr.get(); } diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index b20ebd81b6..cc26b77da3 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -451,7 +451,7 @@ typedef decltype(&tLog) TLogFn; // spawns a process pointed by `binPath` and the arguments provided at `paramList`, // if the process spawned takes more than `maxWaitTime` then it will be killed -ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime, bool isSync = false); +ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync = false); // returns true if the execUID op is in progress bool isExecOpInProgress(UID execUID); diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 51e1c4af4f..2c6967ee22 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1889,11 +1889,11 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) if (!otherRoleExeced) { setExecOpInProgress(execUID); - if (!g_network->isSimulated() || cmd != execSnap) { + if (!g_network->isSimulated()) { // get bin path auto binPath = execArg.getBinaryPath(); auto dataFolder = "path=" + data->folder; - vector paramList; + std::vector paramList; paramList.push_back(binPath.toString()); // get user passed arguments auto listArgs = execArg.getBinaryArgs(); @@ -1917,23 +1917,19 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) std::string folder = abspath(data->folder); state std::string folderFrom = folder + "/."; state std::string folderTo = folder + "-snap-" + uidStr.toString(); - vector paramList; + std::vector paramList; std::string mkdirBin = "/bin/mkdir"; - - paramList.push_back(mkdirBin); paramList.push_back(folderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { - vector paramList; + std::vector paramList; std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(folderFrom); paramList.push_back(folderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); wait(success(cmdErr)); err = cmdErr.get(); } diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 60c7929fa1..eb163a587c 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -19,6 +19,15 @@ */ #include +#define BOOST_SYSTEM_NO_LIB +#define BOOST_DATE_TIME_NO_LIB +#define BOOST_REGEX_NO_LIB +#include "boost/process.hpp" +// c.wait() conflicts with ACTOR compiler +void childWait(boost::process::child& c) { + c.wait(); + return; +} #include "flow/ActorCollection.h" #include "flow/SystemMonitor.h" @@ -1205,7 +1214,7 @@ ACTOR Future workerServer( // get bin path auto snapBin = execArg.getBinaryPath(); auto dataFolder = "path=" + coordFolder; - vector paramList; + std::vector paramList; paramList.push_back(snapBin.toString()); // get user passed arguments auto listArgs = execArg.getBinaryArgs(); @@ -1228,22 +1237,19 @@ ACTOR Future workerServer( std::string folder = coordFolder; state std::string folderFrom = "./" + folder + "/."; state std::string folderTo = "./" + folder + "-snap-" + uidStr.toString(); - vector paramList; + std::vector paramList; std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(mkdirBin); paramList.push_back(folderTo); cmdErr = spawnProcess(mkdirBin, paramList, 3.0); wait(success(cmdErr)); err = cmdErr.get(); if (err == 0) { - vector paramList; + std::vector paramList; std::string cpBin = "/bin/cp"; - paramList.clear(); - paramList.push_back(cpBin); paramList.push_back("-a"); paramList.push_back(folderFrom); paramList.push_back(folderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0); + cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); wait(success(cmdErr)); err = cmdErr.get(); } @@ -1452,49 +1458,42 @@ ACTOR Future fdbd( } } -ACTOR Future spawnProcess(std::string binPath, vector paramList, double maxWaitTime, bool isSync) +ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync) { - state pid_t pid = -1; - try { - pid = fdbForkSpawn(binPath, paramList); - } catch (Error& e) { - TraceEvent("fdbForkSpawnFailed") - .detail("Error", e.what()); - } - if (pid < 0) { - return -1; + std::string argsString; + for (int i = 0; i < paramList.size(); i++) { + argsString += paramList[i] + ","; } + TraceEvent("SpawnProcess").detail("Cmd", binPath).detail("Args", argsString); - if (!isSync && g_network->isSimulated()) { - wait(delay(g_random->random01())); - } - - state double sleepTime = 0; state int err = 0; - while (true) { - err = fdbForkWaitPid(pid, g_network->isSimulated() ? true : false); - if (g_network->isSimulated()) { - if (err == pid) { - return 0; - } - return err; + state double runTime = 0; + state boost::process::child c(binPath, boost::process::args(paramList)); + if (!isSync) { + while (c.running() && runTime <= maxWaitTime) { + wait(delay(0.1)); + runTime += 0.1; } - if (err != EINPROGRESS) { - break; + if (c.running()) { + c.terminate(); + err = -1; + } else { + err = c.exit_code(); } - - sleepTime += 0.1; - wait(delay(0.1)); - if (sleepTime > maxWaitTime) { - TraceEvent(SevWarnAlways, "SpawnProcessTookTooLong") - .detail("Error", EINPROGRESS); - kill(pid, SIGTERM); - // FIXME, we can end up here in a rare situation, - // make this asynchronous - fdbForkWaitPid(pid, true); - return -1; + childWait(c); + } else { + state std::error_code errCode; + bool succ = c.wait_for(std::chrono::seconds(3), errCode); + err = errCode.value(); + if (!succ) { + err = -1; + c.terminate(); + childWait(c); } } + TraceEvent("SpawnProcess") + .detail("Cmd", binPath) + .detail("Error", err); return err; } diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 8efb6c4a45..04b51b407c 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2683,61 +2683,6 @@ void* loadFunction(void* lib, const char* func_name) { return dlfcn; } -int -fdbForkSpawn(const std::string& path, const std::vector& args) -{ - std::vector paramList; - for (int i = 0; i < args.size(); i++) { - paramList.push_back(const_cast(args[i].c_str())); - } - paramList.push_back(nullptr); - - std::string argsString; - for (int i = 0; i < args.size(); i++) { - argsString += args[i] + ","; - } - TraceEvent("FdbFork").detail("Cmd", path).detail("Args", argsString); - - pid_t pid = fork(); - if (pid == -1) { - TraceEvent(SevWarnAlways, "CommandFailedToSpawn").detail("Cmd", path); - throw platform_error(); - } else if (pid > 0) { - // parent process returns with child's pid - return pid; - } - // child process - execv(const_cast(path.c_str()), ¶mList[0]); - _exit(EXIT_FAILURE); - return pid; -} - -int fdbForkWaitPid(pid_t pid, bool isSync) -{ - int status; - int err = waitpid(pid, &status, (!isSync) ? WNOHANG : 0); - if (isSync) { - err = WIFEXITED(status) ? WEXITSTATUS(status) : -1; - return err; - } - if (err == 0) { - return EINPROGRESS; - } - - if (err == -1 || WIFSIGNALED(status)) { - err = -1; - } else if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { - err = 0; - } else { - err = WIFEXITED(status) ? WEXITSTATUS(status) : -1; - } - TraceEvent((err == 0) ? SevInfo : SevWarnAlways, "CommandStatus") - .detail("Pid", pid) - .detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1); - return err; -} - - void platformInit() { #ifdef WIN32 _set_FMA3_enable(0); // Workaround for VS 2013 code generation bug. See https://connect.microsoft.com/VisualStudio/feedback/details/811093/visual-studio-2013-rtm-c-x64-code-generation-bug-for-avx2-instructions diff --git a/flow/Platform.h b/flow/Platform.h index a2141f611f..b6183bbc1c 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -533,23 +533,6 @@ bool isLibraryLoaded(const char* lib_path); void* loadLibrary(const char* lib_path); void* loadFunction(void* lib, const char* func_name); -// spwans a process with fork and execv, caller needs to use fdbForkWaitPid to -// find the status of the process and cleanup the resources -// takes two arguments: -// 1. path to the binary -// 2. list of arguments -// returns: -// returns pid of the process being spawned -// throws platform_error() if it is not able to spawn the process -int fdbForkSpawn(const std::string& path, const std::vector& args); - -// checks the completion of the process spawned by fdbForkSpawn -// returns -// - 0 for successful completion and -// - EINPROGRESS if pid is still running -// - exit code or -1 otherwise -int fdbForkWaitPid(pid_t pid, bool isSync = false); - #ifdef _WIN32 inline static int ctzll( uint64_t value ) { unsigned long count = 0; From ec7834e2f7bbeb8912ca610364c3fd8e19bfd1bf Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Sat, 20 Apr 2019 12:58:24 -0700 Subject: [PATCH 31/69] code re-orgnaization and address comments --- fdbclient/ManagementAPI.actor.cpp | 3 +- fdbclient/NativeAPI.actor.cpp | 10 +- fdbserver/CMakeLists.txt | 4 +- fdbserver/FDBExecArgs.cpp | 104 ------------ fdbserver/FDBExecArgs.h | 39 ----- fdbserver/FDBExecHelper.actor.cpp | 217 +++++++++++++++++++++++++ fdbserver/FDBExecHelper.actor.h | 70 ++++++++ fdbserver/MasterProxyServer.actor.cpp | 35 ++-- fdbserver/OldTLogServer_6_0.actor.cpp | 68 ++------ fdbserver/TLogServer.actor.cpp | 73 ++------- fdbserver/WorkerInterface.actor.h | 11 -- fdbserver/fdbserver.actor.cpp | 2 - fdbserver/fdbserver.vcxproj | 7 +- fdbserver/storageserver.actor.cpp | 67 ++------ fdbserver/worker.actor.cpp | 126 +------------- fdbserver/workloads/SnapTest.actor.cpp | 129 +++++++-------- 16 files changed, 421 insertions(+), 544 deletions(-) delete mode 100644 fdbserver/FDBExecArgs.cpp delete mode 100644 fdbserver/FDBExecArgs.h create mode 100644 fdbserver/FDBExecHelper.actor.cpp create mode 100644 fdbserver/FDBExecHelper.actor.h diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 76fac82107..6b344a51a9 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1488,7 +1488,8 @@ ACTOR Future mgmtSnapCreate(Database cx, StringRef snapCmd) { ++retryCount; TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e); if (retryCount > 3) { - fprintf(stderr, "Snapshot create failed, %d (%s)\n", e.code(), e.what()); + fprintf(stderr, "Snapshot create failed, %d (%s)." + " Please cleanup any instance level snapshots created.\n", e.code(), e.what()); throw; } } diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3b76ae312c..4b25b73c0c 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3320,8 +3320,8 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) state Standalone tLogCmdPayloadRef = LiteralStringRef("empty-binary:uid=").withSuffix(snapUIDRef); // disable popping of TLog + tr.reset(); loop { - tr.reset(); try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.execute(execDisableTLogPop, tLogCmdPayloadRef); @@ -3353,8 +3353,8 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) // then the snapCreate can end up creating multiple snapshots with // the same name which needs additional handling, hence we fail in // failure cases and let the caller retry with different snapUID + tr.reset(); try { - tr.reset(); tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.execute(execSnap, snapPayloadRef); wait(tr.commit()); @@ -3371,11 +3371,11 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) } // enable popping of the TLog + tr.reset(); loop { - tr.reset(); try { - tr.execute(execEnableTLogPop, tLogCmdPayloadRef); tr.setOption(FDBTransactionOptions::LOCK_AWARE); + tr.execute(execEnableTLogPop, tLogCmdPayloadRef); wait(tr.commit()); break; } catch (Error& e) { @@ -3389,7 +3389,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) // snap the coordinators try { Future exec = executeCoordinators(cx, snapPayloadRef, snapUID); - wait(exec); + wait(timeoutError(exec, 5.0)); } catch (Error& e) { TraceEvent("SnapCreateErrorSnapCoords").error(e); throw; diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 5dd48ec6d6..016e1cc6f6 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -17,8 +17,8 @@ set(FDBSERVER_SRCS DBCoreState.h DiskQueue.actor.cpp fdbserver.actor.cpp - FDBExecArgs.cpp - FDBExecArgs.h + FDBExecHelper.actor.cpp + FDBExecHelper.actor.h IDiskQueue.h IKeyValueStore.h IPager.h diff --git a/fdbserver/FDBExecArgs.cpp b/fdbserver/FDBExecArgs.cpp deleted file mode 100644 index 7a4a949aae..0000000000 --- a/fdbserver/FDBExecArgs.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "fdbserver/FDBExecArgs.h" -#include "flow/Trace.h" -#include "flow/flow.h" - -ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) { - cmdValueString = pCmdValueString; - parseCmdValue(); -} - -void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) { - // reset everything - binaryPath = StringRef(); - keyValueMap.clear(); - - // set the new cmdValueString - cmdValueString = pCmdValueString; - - // parse it out - parseCmdValue(); -} - -StringRef ExecCmdValueString::getCmdValueString() { - return cmdValueString.toString(); -} - -StringRef ExecCmdValueString::getBinaryPath() { - return binaryPath; -} - -VectorRef ExecCmdValueString::getBinaryArgs() { - return binaryArgs; -} - -StringRef ExecCmdValueString::getBinaryArgValue(StringRef key) { - StringRef res; - if (keyValueMap.find(key) != keyValueMap.end()) { - res = keyValueMap[key]; - } - return res; -} - -void ExecCmdValueString::parseCmdValue() { - StringRef param = this->cmdValueString; - const uint8_t* ptr = param.begin(); - int p = 0; - int pSemiColon = 0; - { - // get the binary path - while (*(ptr + pSemiColon) != ':' && (ptr + pSemiColon) < param.end()) { - pSemiColon++; - } - this->binaryPath = param.substr(p, pSemiColon - p); - } - - // no arguments provided - if ((ptr + pSemiColon) >= param.end()) { - return; - } - - p = pSemiColon + 1; - - { - // extract the arguments - for (; p <= param.size();) { - int pComma = p; - while (*(ptr + pComma) != ',' && (ptr + pComma) < param.end()) { - pComma++; - } - StringRef token = param.substr(p, pComma - p); - this->binaryArgs.push_back(this->binaryArgs.arena(), token); - { - // parse the token to get key,value - int idx = 0; - int pEqual = 0; - const uint8_t* tokenPtr = token.begin(); - while (*(tokenPtr + pEqual) != '=' - && (tokenPtr + pEqual) < token.end()) { - pEqual++; - } - StringRef key = token.substr(idx, pEqual - idx); - StringRef value; - if (pEqual < token.size() - 1) { - value = token.substr(pEqual + 1); - } - keyValueMap.insert(std::pair(key, value)); - } - p = pComma + 1; - } - } - return; -} - -void ExecCmdValueString::dbgPrint() { - auto te = TraceEvent("ExecCmdValueString"); - - te.detail("CmdValueString", cmdValueString.toString()); - te.detail("BinaryPath", binaryPath.toString()); - - int i = 0; - for (auto elem : binaryArgs) { - te.detail(format("Arg", ++i).c_str(), elem.toString()); - } - return; -} diff --git a/fdbserver/FDBExecArgs.h b/fdbserver/FDBExecArgs.h deleted file mode 100644 index 2b4b1dae40..0000000000 --- a/fdbserver/FDBExecArgs.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#ifndef FDBCLIENT_EXECCMDARGS_H -#define FDBCLIENT_EXECCMDARGS_H -#include -#include -#include -#include "flow/Arena.h" - -// execute/snapshot command takes two arguments: -// param1 - represents the command type/name -// param2 - takes a binary path followed by a set of arguments in the following -// format :,... -// this class will abstract the format and give functions to get various pieces -// of information -class ExecCmdValueString { -public: // ctor & dtor - ExecCmdValueString() {} - explicit ExecCmdValueString(StringRef cmdValueString); - -public: // interfaces - StringRef getBinaryPath(); - VectorRef getBinaryArgs(); - StringRef getBinaryArgValue(StringRef key); - void setCmdValueString(StringRef cmdValueString); - StringRef getCmdValueString(void); - -public: // helper functions - void dbgPrint(); - -private: // functions - void parseCmdValue(); - -private: // data - Standalone cmdValueString; - Standalone> binaryArgs; - StringRef binaryPath; - std::map keyValueMap; -}; -#endif diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp new file mode 100644 index 0000000000..48fabef789 --- /dev/null +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -0,0 +1,217 @@ +#define BOOST_SYSTEM_NO_LIB +#define BOOST_DATE_TIME_NO_LIB +#define BOOST_REGEX_NO_LIB +#include +#include "fdbserver/FDBExecHelper.actor.h" +#include "flow/Trace.h" +#include "flow/flow.h" +#if defined(CMAKE_BUILD) || !defined(WIN32) +#include "versions.h" +#endif +#include "flow/actorcompiler.h" // This must be the last #include. + +ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) { + cmdValueString = pCmdValueString; + parseCmdValue(); +} + +void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) { + // reset everything + binaryPath = StringRef(); + keyValueMap.clear(); + + // set the new cmdValueString + cmdValueString = pCmdValueString; + + // parse it out + parseCmdValue(); +} + +StringRef ExecCmdValueString::getCmdValueString() { + return cmdValueString.toString(); +} + +StringRef ExecCmdValueString::getBinaryPath() { + return binaryPath; +} + +VectorRef ExecCmdValueString::getBinaryArgs() { + return binaryArgs; +} + +StringRef ExecCmdValueString::getBinaryArgValue(StringRef key) { + StringRef res; + if (keyValueMap.find(key) != keyValueMap.end()) { + res = keyValueMap[key]; + } + return res; +} + +void ExecCmdValueString::parseCmdValue() { + StringRef param = this->cmdValueString; + // get the binary path + this->binaryPath = param.eat(LiteralStringRef(":")); + + // no arguments provided + if (param == StringRef()) { + return; + } + + // extract the arguments + while (param != StringRef()) { + StringRef token = param.eat(LiteralStringRef(",")); + this->binaryArgs.push_back(this->binaryArgs.arena(), token); + + StringRef key = token.eat(LiteralStringRef("=")); + keyValueMap.insert(std::make_pair(key, token)); + } + return; +} + +void ExecCmdValueString::dbgPrint() { + auto te = TraceEvent("ExecCmdValueString"); + + te.detail("CmdValueString", cmdValueString.toString()); + te.detail("BinaryPath", binaryPath.toString()); + + int i = 0; + for (auto elem : binaryArgs) { + te.detail(format("Arg", ++i).c_str(), elem.toString()); + } + return; +} + +ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync) +{ + state std::string argsString; + for (auto const& elem : paramList) { + argsString += elem + ","; + } + TraceEvent("SpawnProcess").detail("Cmd", binPath).detail("Args", argsString); + + state int err = 0; + state double runTime = 0; + state boost::process::child c(binPath, boost::process::args(paramList), + boost::process::std_err > boost::process::null); + if (!isSync) { + while (c.running() && runTime <= maxWaitTime) { + wait(delay(0.1)); + runTime += 0.1; + } + if (c.running()) { + c.terminate(); + err = -1; + } else { + err = c.exit_code(); + } + if (!c.wait_for(std::chrono::seconds(1))) { + TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit") + .detail("Cmd", binPath) + .detail("Args", argsString); + } + } else { + state std::error_code errCode; + bool succ = c.wait_for(std::chrono::seconds(3), errCode); + err = errCode.value(); + if (!succ) { + err = -1; + c.terminate(); + if (!c.wait_for(std::chrono::seconds(1))) { + TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit") + .detail("Cmd", binPath) + .detail("Args", argsString); + } + } + } + TraceEvent("SpawnProcess") + .detail("Cmd", binPath) + .detail("Error", err); + return err; +} + +ACTOR Future execHelper(ExecCmdValueString* execArg, std::string folder, std::string role) { + state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + state int err = 0; + state Future cmdErr; + if (!g_network->isSimulated()) { + // get bin path + auto snapBin = execArg->getBinaryPath(); + auto dataFolder = "path=" + folder; + std::vector paramList; + paramList.push_back(snapBin.toString()); + // get user passed arguments + auto listArgs = execArg->getBinaryArgs(); + for (auto elem : listArgs) { + paramList.push_back(elem.toString()); + } + // get additional arguments + paramList.push_back(dataFolder); + const char* version = FDB_VT_VERSION; + std::string versionString = "version="; + versionString += version; + paramList.push_back(versionString); + paramList.push_back(role); + cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0, false /*isSync*/); + wait(success(cmdErr)); + err = cmdErr.get(); + } else { + // copy the files + state std::string folderFrom = "./" + folder + "/."; + state std::string folderTo = "./" + folder + "-snap-" + uidStr.toString(); + std::vector paramList; + std::string mkdirBin = "/bin/mkdir"; + paramList.push_back(folderTo); + cmdErr = spawnProcess(mkdirBin, paramList, 3.0, false /*isSync*/); + wait(success(cmdErr)); + err = cmdErr.get(); + if (err == 0) { + std::vector paramList; + std::string cpBin = "/bin/cp"; + paramList.push_back("-a"); + paramList.push_back(folderFrom); + paramList.push_back(folderTo); + cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); + wait(success(cmdErr)); + err = cmdErr.get(); + } + } + return err; +} + +std::map> execOpsInProgress; + +bool isExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + return (execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); +} + +void setExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + ASSERT(execOpsInProgress[addr].find(execUID) == execOpsInProgress[addr].end()); + execOpsInProgress[addr].insert(execUID); + return; +} + +void clearExecOpInProgress(UID execUID) { + NetworkAddress addr = g_network->getLocalAddress(); + ASSERT(execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); + execOpsInProgress[addr].erase(execUID); + return; +} + +std::map> tLogsAlive; + +void registerTLog(UID uid) { + NetworkAddress addr = g_network->getLocalAddress(); + tLogsAlive[addr].insert(uid); +} +void unregisterTLog(UID uid) { + NetworkAddress addr = g_network->getLocalAddress(); + if (tLogsAlive[addr].find(uid) != tLogsAlive[addr].end()) { + tLogsAlive[addr].erase(uid); + } +} +bool isTLogInSameNode() { + NetworkAddress addr = g_network->getLocalAddress(); + return tLogsAlive[addr].size() >= 1; +} diff --git a/fdbserver/FDBExecHelper.actor.h b/fdbserver/FDBExecHelper.actor.h new file mode 100644 index 0000000000..23e290f90e --- /dev/null +++ b/fdbserver/FDBExecHelper.actor.h @@ -0,0 +1,70 @@ +#pragma once +#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_EXEC_HELPER_ACTOR_G_H) + #define FDBSERVER_EXEC_HELPER_ACTOR_G_H + #include "fdbserver/FDBExecHelper.actor.g.h" +#elif !defined(FDBSERVER_EXEC_HELPER_ACTOR_H) + #define FDBSERVER_EXEC_HELPER_ACTOR_H + +#include +#include +#include +#include "flow/Arena.h" +#include "flow/flow.h" +#include "flow/actorcompiler.h" + +// execute/snapshot command takes two arguments: +// param1 - represents the command type/name +// param2 - takes a binary path followed by a set of arguments in the following +// format :,... +// this class will abstract the format and give functions to get various pieces +// of information +class ExecCmdValueString { +public: // ctor & dtor + ExecCmdValueString() {} + explicit ExecCmdValueString(StringRef cmdValueString); + +public: // interfaces + StringRef getBinaryPath(); + VectorRef getBinaryArgs(); + StringRef getBinaryArgValue(StringRef key); + void setCmdValueString(StringRef cmdValueString); + StringRef getCmdValueString(void); + +public: // helper functions + void dbgPrint(); + +private: // functions + void parseCmdValue(); + +private: // data + Standalone cmdValueString; + Standalone> binaryArgs; + StringRef binaryPath; + std::map keyValueMap; +}; + +// FIXME: move this function to a common location +// spawns a process pointed by `binPath` and the arguments provided at `paramList`, +// if the process spawned takes more than `maxWaitTime` then it will be killed +// if isSync is set to true then the process will be synchronously executed +ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync); + +// helper to run all the work related to running the exec command +ACTOR Future execHelper(ExecCmdValueString* execArg, std::string folder, std::string role); + +// returns true if the execUID op is in progress +bool isExecOpInProgress(UID execUID); +// adds the execUID op to the list of ops in progress +void setExecOpInProgress(UID execUID); +// clears the execUID op from the list of ops in progress +void clearExecOpInProgress(UID execUID); + + +// registers a non-stopped TLog instance +void registerTLog(UID uid); +// unregisters a stopped TLog instance +void unregisterTLog(UID uid); +// checks if there is any non-stopped TLog instance +bool isTLogInSameNode(); + +#endif diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index d15a291683..8fe9e90684 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -45,7 +45,7 @@ #include "fdbclient/DatabaseConfiguration.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/Knobs.h" -#include "fdbserver/FDBExecArgs.h" +#include "fdbserver/FDBExecHelper.actor.h" struct ProxyStats { CounterCollection cc; @@ -416,27 +416,28 @@ ACTOR Future commitBatcher(ProxyCommitData *commitData, PromiseStream>& binPathVec) { - int p = 0; TraceEvent(SevDebug, "BinPathConverter").detail("Input", binPath); - for (; p < binPath.size(); ) { - int pComma = binPath.find_first_of(',', p); - if (pComma == binPath.npos) { - pComma = binPath.size(); - } - Standalone token(binPath.substr(p, pComma - p)); - TraceEvent(SevDebug, "BinPathItem").detail("Element", token.toString()); - binPathVec.push_back(token); - p = pComma + 1; - while (binPath[p] == ' ' && p < binPath.size()) { - p++; + StringRef input(binPath); + while (input != StringRef()) { + StringRef token = input.eat(LiteralStringRef(",")); + if (token != StringRef()) { + const uint8_t* ptr = token.begin(); + while (ptr != token.end() && *ptr == ' ') { + ptr++; + } + if (ptr != token.end()) { + Standalone newElement(token.substr(ptr - token.begin())); + TraceEvent(SevDebug, "BinPathItem").detail("Element", newElement.toString()); + binPathVec.push_back(newElement); + } } } return; } -bool isWhitelisted(vector>& binPathVec, StringRef binPath) { +bool isWhitelisted(const vector>& binPathVec, StringRef binPath) { TraceEvent("BinPath").detail("Value", binPath.toString()); - for (auto item : binPathVec) { + for (const auto& item : binPathVec) { TraceEvent("Element").detail("Value", item.toString()); } return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end(); @@ -816,7 +817,6 @@ ACTOR Future commitBatch( ASSERT(localityKey.present()); locality = decodeTagLocalityListValue(localityKey.get()); - auto ranges = self->keyInfo.intersectingRanges(allKeys); std::set allSources; auto& m = (*pMutations)[mutationNum]; if (debugMutation("ProxyCommit", commitVersion, m)) @@ -845,12 +845,9 @@ ACTOR Future commitBatch( if (m.param1 == execSnap) { te1.trackLatest(tokenStr.c_str()); } - std::string allTagString; for (auto& tag : allSources) { - allTagString += tag.toString() + ","; toCommit.addTag(tag); } - TraceEvent(SevDebug, "TagInfo").detail("Tags", allTagString); toCommit.addTypedMessage(m, true /* allLocations */); toCommit.setHasExecOp(); } diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index cb335b1967..88c079ffe4 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -38,11 +38,8 @@ #include "fdbserver/LogSystem.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/RecoveryState.h" -#include "fdbserver/FDBExecArgs.h" +#include "fdbserver/FDBExecHelper.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -#if defined(CMAKE_BUILD) || !defined(WIN32) -#include "versions.h" -#endif using std::pair; using std::make_pair; @@ -536,6 +533,7 @@ ACTOR Future tLogLock( TLogData* self, ReplyPromise< TLogLockResult > repl TEST( !logData->stopped ); TraceEvent("TLogStop", logData->logId).detail("Ver", stopVersion).detail("IsStopped", logData->stopped).detail("QueueCommitted", logData->queueCommittedVersion.get()); + unregisterTLog(logData->logId); logData->stopped = true; if(!logData->recoveryComplete.isSet()) { @@ -1353,9 +1351,8 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("Reason", reason) .trackLatest(reason.c_str()); - auto startTag = logData->allTags.begin(); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr.toString(); + std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString() + + "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) @@ -1432,58 +1429,13 @@ ACTOR Future tLogSnapHelper(TLogData* self, state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; - // TLog is special, we need to exec at the execVersion. - // If storage on the same process has initiated the exec then wait for it to - // finish and hold the tlog at the execVersion - while (isExecOpInProgress(execUID)) { - wait(delay(0.1)); - otherRoleExeced = true; - } + // TLog is special, we need to snap at the execVersion. + // storage on the same node should not initiate a snap before TLog which will make + // the snap version at TLog unpredictable + ASSERT(!isExecOpInProgress(execUID)); if (!otherRoleExeced) { setExecOpInProgress(execUID); - if (!g_network->isSimulated()) { - // get the bin path - auto snapBin = execArg->getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - std::vector paramList; - paramList.push_back(snapBin.toString()); - // user passed arguments - auto listArgs = execArg->getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - std::vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - std::vector paramList; - std::string cpBin = "/bin/cp"; - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } + int err = wait(execHelper(execArg, self->dataFolder, "role=tlog")); clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") @@ -1849,6 +1801,7 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere void removeLog( TLogData* self, Reference logData ) { TraceEvent("TLogRemoved", logData->logId).detail("Input", logData->bytesInput.getValue()).detail("Durable", logData->bytesDurable.getValue()); logData->stopped = true; + unregisterTLog(logData->logId); if(!logData->recoveryComplete.isSet()) { logData->recoveryComplete.sendError(end_of_stream()); } @@ -2335,6 +2288,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit self->queueOrder.push_back(recruited.id()); TraceEvent("TLogStart", logData->logId); + registerTLog(logData->logId); state Future updater; state bool pulledRecoveryVersions = false; try { diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d8c0880af5..d0b9ecf76a 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -38,11 +38,8 @@ #include "fdbserver/LogSystem.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/RecoveryState.h" -#include "fdbserver/FDBExecArgs.h" +#include "fdbserver/FDBExecHelper.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -#if defined(CMAKE_BUILD) || !defined(WIN32) -#include "versions.h" -#endif using std::pair; using std::make_pair; @@ -608,6 +605,7 @@ ACTOR Future tLogLock( TLogData* self, ReplyPromise< TLogLockResult > repl TEST( !logData->stopped ); TraceEvent("TLogStop", logData->logId).detail("Ver", stopVersion).detail("IsStopped", logData->stopped).detail("QueueCommitted", logData->queueCommittedVersion.get()); + unregisterTLog(logData->logId); logData->stopped = true; if(!logData->recoveryComplete.isSet()) { @@ -1720,7 +1718,6 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("Reason", reason) .trackLatest(reason.c_str()); - auto startTag = logData->allTags.begin(); std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") @@ -1798,58 +1795,13 @@ ACTOR Future tLogSnapHelper(TLogData* self, state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; - // TLog is special, we need to exec at the execVersion. - // If storage on the same process has initiated the exec then wait for it to - // finish and hold the tlog at the execVersion - while (isExecOpInProgress(execUID)) { - wait(delay(0.1)); - otherRoleExeced = true; - } + // TLog is special, we need to snap at the execVersion. + // storage on the same node should not initiate a snap before TLog which will make + // the snap version at TLog unpredictable + ASSERT(!isExecOpInProgress(execUID)); if (!otherRoleExeced) { setExecOpInProgress(execUID); - if (!g_network->isSimulated()) { - // get the bin path - auto snapBin = execArg->getBinaryPath(); - auto dataFolder = "path=" + self->dataFolder; - std::vector paramList; - paramList.push_back(snapBin.toString()); - // user passed arguments - auto listArgs = execArg->getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=tlog"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the entire directory - state std::string tLogFolderFrom = "./" + self->dataFolder + "/."; - state std::string tLogFolderTo = "./" + self->dataFolder + "-snap-" + uidStr.toString(); - std::vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - std::vector paramList; - std::string cpBin = "/bin/cp"; - paramList.push_back("-a"); - paramList.push_back(tLogFolderFrom); - paramList.push_back(tLogFolderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } + int err = wait(execHelper(execArg, self->dataFolder, "role=tlog")); clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") @@ -1939,10 +1891,10 @@ ACTOR Future tLogCommit( wait(logData->execOpHold.getFuture()); } - state Version execVersion = invalidVersion; - state ExecCmdValueString execArg(); - state TLogQueueEntryRef qe; - state StringRef execCmd; + state Version execVersion = invalidVersion; + state ExecCmdValueString execArg(); + state TLogQueueEntryRef qe; + state StringRef execCmd; state Standalone> execTags; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) @@ -2217,6 +2169,7 @@ ACTOR Future serveTLogInterface( TLogData* self, TLogInterface tli, Refere void removeLog( TLogData* self, Reference logData ) { TraceEvent("TLogRemoved", self->dbgid).detail("LogId", logData->logId).detail("Input", logData->bytesInput.getValue()).detail("Durable", logData->bytesDurable.getValue()); logData->stopped = true; + unregisterTLog(logData->logId); if(!logData->recoveryComplete.isSet()) { logData->recoveryComplete.sendError(end_of_stream()); } @@ -2730,6 +2683,8 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit self->spillOrder.push_back(recruited.id()); TraceEvent("TLogStart", logData->logId); + registerTLog(logData->logId); + state Future updater; state bool pulledRecoveryVersions = false; try { diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index cc26b77da3..6ccdb253ae 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -449,16 +449,5 @@ ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQu typedef decltype(&tLog) TLogFn; -// spawns a process pointed by `binPath` and the arguments provided at `paramList`, -// if the process spawned takes more than `maxWaitTime` then it will be killed -ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync = false); - -// returns true if the execUID op is in progress -bool isExecOpInProgress(UID execUID); -// adds the execUID op to the list of ops in progress -void setExecOpInProgress(UID execUID); -// clears the execUID op from the list of ops in progress -void clearExecOpInProgress(UID execUID); - #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 8d2764562c..300921c927 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1698,7 +1698,6 @@ int main(int argc, char* argv[]) { std::string childf = tmpFolder + "/" + returnList[i]; std::vector returnFiles = platform::listFiles(childf, ext); for (int j = 0; j < returnFiles.size(); j++) { - fprintf(stderr, "file : %s\n", returnFiles[j].c_str()); if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") { TraceEvent("DeletingNonSnapfiles") .detail("FileBeingDeleted", childf + "/" + returnFiles[j]); @@ -1709,7 +1708,6 @@ int main(int argc, char* argv[]) { // move the contents from snap folder to the original folder, // delete snap folders for (int i = 0; i < returnList.size(); i++) { - fprintf(stderr, "Dir : %s\n", returnList[i].c_str()); if (returnList[i] == "." || returnList[i] == "..") { continue; } diff --git a/fdbserver/fdbserver.vcxproj b/fdbserver/fdbserver.vcxproj index 1d54a9d7fb..43e6a0d91f 100644 --- a/fdbserver/fdbserver.vcxproj +++ b/fdbserver/fdbserver.vcxproj @@ -47,7 +47,7 @@ - + @@ -153,6 +153,7 @@ + @@ -166,7 +167,9 @@ - + + false + diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 2c6967ee22..0e9e583e3f 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -49,12 +49,9 @@ #include "fdbserver/RecoveryState.h" #include "fdbserver/LogProtocolMessage.h" #include "fdbserver/LatencyBandConfig.h" -#include "fdbserver/FDBExecArgs.h" +#include "fdbserver/FDBExecHelper.actor.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -#if defined(CMAKE_BUILD) || !defined(WIN32) -#include "versions.h" -#endif using std::pair; using std::make_pair; @@ -1875,65 +1872,24 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd); return Void(); } + state ExecCmdValueString execArg(m.param2); state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); state int err = 0; state Future cmdErr; state UID execUID = UID::fromString(uidStr.toString()); - bool otherRoleExeced = false; - - // other TLog or storage has initiated the exec, so we can skip - if (isExecOpInProgress(execUID)) { - otherRoleExeced = true; + state bool skip = false; + if (cmd == execSnap && isTLogInSameNode()) { + skip = true; + } + // other storage has initiated the exec, so we can skip + if (!skip && isExecOpInProgress(execUID)) { + skip = true; } - if (!otherRoleExeced) { + if (!skip) { setExecOpInProgress(execUID); - if (!g_network->isSimulated()) { - // get bin path - auto binPath = execArg.getBinaryPath(); - auto dataFolder = "path=" + data->folder; - std::vector paramList; - paramList.push_back(binPath.toString()); - // get user passed arguments - auto listArgs = execArg.getBinaryArgs(); - execArg.dbgPrint(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // get additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=storage"; - paramList.push_back(roleString); - cmdErr = spawnProcess(binPath.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the files - std::string folder = abspath(data->folder); - state std::string folderFrom = folder + "/."; - state std::string folderTo = folder + "-snap-" + uidStr.toString(); - std::vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - std::vector paramList; - std::string cpBin = "/bin/cp"; - paramList.push_back("-a"); - paramList.push_back(folderFrom); - paramList.push_back(folderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } + int err = wait(execHelper(&execArg, data->folder, "role=storage")); clearExecOpInProgress(execUID); } auto tokenStr = "ExecTrace/storage/" + uidStr.toString(); @@ -1947,6 +1903,7 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) te.detail("DurableVersion", data->durableVersion.get()); te.detail("DataVersion", data->version.get()); te.detail("Tag", data->tag.toString()); + te.detail("SnapCreateSkipped", skip); if (cmd == execSnap) { te.trackLatest(tokenStr.c_str()); } diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index eb163a587c..4d0c28de6d 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -19,15 +19,6 @@ */ #include -#define BOOST_SYSTEM_NO_LIB -#define BOOST_DATE_TIME_NO_LIB -#define BOOST_REGEX_NO_LIB -#include "boost/process.hpp" -// c.wait() conflicts with ACTOR compiler -void childWait(boost::process::child& c) { - c.wait(); - return; -} #include "flow/ActorCollection.h" #include "flow/SystemMonitor.h" @@ -44,15 +35,12 @@ void childWait(boost::process::child& c) { #include "fdbserver/ClusterRecruitmentInterface.h" #include "fdbserver/DataDistributorInterface.h" #include "fdbserver/ServerDBInfo.h" -#include "fdbserver/FDBExecArgs.h" +#include "fdbserver/FDBExecHelper.actor.h" #include "fdbserver/CoordinationInterface.h" #include "fdbclient/FailureMonitorClient.h" #include "fdbclient/MonitorLeader.h" #include "fdbclient/ClientWorkerInterface.h" #include "flow/Profiler.h" -#if defined(CMAKE_BUILD) || !defined(WIN32) -#include "versions.h" -#endif #ifdef __linux__ #include @@ -80,27 +68,6 @@ extern IKeyValueStore* keyValueStoreCompressTestData(IKeyValueStore* store); #endif -std::map> execOpsInProgress; - -bool isExecOpInProgress(UID execUID) { - NetworkAddress addr = g_network->getLocalAddress(); - return (execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); -} - -void setExecOpInProgress(UID execUID) { - NetworkAddress addr = g_network->getLocalAddress(); - ASSERT(execOpsInProgress[addr].find(execUID) == execOpsInProgress[addr].end()); - execOpsInProgress[addr].insert(execUID); - return; -} - -void clearExecOpInProgress(UID execUID) { - NetworkAddress addr = g_network->getLocalAddress(); - ASSERT(execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end()); - execOpsInProgress[addr].erase(execUID); - return; -} - ACTOR static Future extractClientInfo( Reference> db, Reference> info ) { loop { info->set( db->get().client ); @@ -1206,54 +1173,8 @@ ACTOR Future workerServer( } when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) { state ExecCmdValueString execArg(req.execPayload); - execArg.dbgPrint(); - state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); - state int err = 0; - state Future cmdErr; - if (!g_network->isSimulated()) { - // get bin path - auto snapBin = execArg.getBinaryPath(); - auto dataFolder = "path=" + coordFolder; - std::vector paramList; - paramList.push_back(snapBin.toString()); - // get user passed arguments - auto listArgs = execArg.getBinaryArgs(); - for (auto elem : listArgs) { - paramList.push_back(elem.toString()); - } - // get additional arguments - paramList.push_back(dataFolder); - const char* version = FDB_VT_VERSION; - std::string versionString = "version="; - versionString += version; - paramList.push_back(versionString); - std::string roleString = "role=coordinator"; - paramList.push_back(roleString); - cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - } else { - // copy the files - std::string folder = coordFolder; - state std::string folderFrom = "./" + folder + "/."; - state std::string folderTo = "./" + folder + "-snap-" + uidStr.toString(); - std::vector paramList; - std::string mkdirBin = "/bin/mkdir"; - paramList.push_back(folderTo); - cmdErr = spawnProcess(mkdirBin, paramList, 3.0); - wait(success(cmdErr)); - err = cmdErr.get(); - if (err == 0) { - std::vector paramList; - std::string cpBin = "/bin/cp"; - paramList.push_back("-a"); - paramList.push_back(folderFrom); - paramList.push_back(folderTo); - cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/); - wait(success(cmdErr)); - err = cmdErr.get(); - } - } + int err = wait(execHelper(&execArg, coordFolder, "role=coordinator")); + StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString(); auto te = TraceEvent("ExecTraceCoordinators"); te.detail("Uid", uidStr.toString()); @@ -1422,7 +1343,7 @@ ACTOR Future fdbd( ServerCoordinators coordinators( connFile ); if (g_network->isSimulated()) { - whitelistBinPaths = "random_path, /bin/snap_create.sh"; + whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,"; } TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whitelistBinPaths); @@ -1458,45 +1379,6 @@ ACTOR Future fdbd( } } -ACTOR Future spawnProcess(std::string binPath, std::vector paramList, double maxWaitTime, bool isSync) -{ - std::string argsString; - for (int i = 0; i < paramList.size(); i++) { - argsString += paramList[i] + ","; - } - TraceEvent("SpawnProcess").detail("Cmd", binPath).detail("Args", argsString); - - state int err = 0; - state double runTime = 0; - state boost::process::child c(binPath, boost::process::args(paramList)); - if (!isSync) { - while (c.running() && runTime <= maxWaitTime) { - wait(delay(0.1)); - runTime += 0.1; - } - if (c.running()) { - c.terminate(); - err = -1; - } else { - err = c.exit_code(); - } - childWait(c); - } else { - state std::error_code errCode; - bool succ = c.wait_for(std::chrono::seconds(3), errCode); - err = errCode.value(); - if (!succ) { - err = -1; - c.terminate(); - childWait(c); - } - } - TraceEvent("SpawnProcess") - .detail("Cmd", binPath) - .detail("Error", err); - return err; -} - const Role Role::WORKER("Worker", "WK", false); const Role Role::STORAGE_SERVER("StorageServer", "SS"); const Role Role::TRANSACTION_LOG("TLog", "TL"); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index aad6d2bccf..b9c41c3965 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -1,39 +1,38 @@ -#include "fdbserver/Status.h" -#include "flow/actorcompiler.h" -#include "fdbrpc/ContinuousSample.h" -#include "fdbclient/NativeAPI.actor.h" +#include #include "fdbclient/ManagementAPI.actor.h" +#include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/ReadYourWrites.h" +#include "fdbrpc/ContinuousSample.h" +#include "fdbmonitor/SimpleIni.h" +#include "fdbserver/ClusterRecruitmentInterface.h" +#include "fdbserver/Status.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h" -#include "workloads.actor.h" -#include "BulkSetup.actor.h" -#include "fdbserver/ClusterRecruitmentInterface.h" -#include "fdbclient/ReadYourWrites.h" -#include "fdbmonitor/SimpleIni.h" -#include - -#undef FLOW_ACOMPILER_STATE -#define FLOW_ACOMPILER_STATE 1 +#include "fdbserver/workloads/BulkSetup.actor.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "flow/actorcompiler.h" void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) { version = -1; numTags = -1; - sscanf(md.getValue("Version").c_str(), "%lld", &version); - sscanf(md.getValue("NumTags").c_str(), "%d:%d", &numTags); + version = boost::lexical_cast(md.getValue("Version")); + numTags = boost::lexical_cast(md.getValue("NumTags")); } void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) { Version verifyVersion; durableVersion = -1; - int tagLocality; - int tagId; - sscanf(md.getValue("Version").c_str(), "%lld", &verifyVersion); - sscanf(md.getValue("Tag").c_str(), "%d:%d", &tagLocality, &tagId); - tag.locality = tagLocality; - tag.id = tagId; - sscanf(md.getValue("DurableVersion").c_str(), "%lld", &durableVersion); + verifyVersion = boost::lexical_cast(md.getValue("Version")); + std::string tagString = md.getValue("Tag"); + int colon = tagString.find_first_of(':'); + std::string localityString = tagString.substr(0, colon); + std::string idString = tagString.substr(colon + 1); + tag.locality = boost::lexical_cast(localityString); + tag.id = boost::lexical_cast(idString); + + durableVersion = boost::lexical_cast(md.getValue("DurableVersion")); } void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Version& minTLogVersion, @@ -42,29 +41,27 @@ void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Ver Tag verifyTag; minTLogVersion = maxTLogVersion = -1; - sscanf(md.getValue("Version").c_str(), "%lld", &verifyVersion); - int tagLocality; - int tagId; - sscanf(md.getValue("Tag").c_str(), "%d:%d", &tagLocality, &tagId); - verifyTag.locality = tagLocality; - verifyTag.id = tagId; + verifyVersion = boost::lexical_cast(md.getValue("Version")); + std::string tagString = md.getValue("Tag"); + int colon = tagString.find_first_of(':'); + std::string localityString = tagString.substr(0, colon); + std::string idString = tagString.substr(colon + 1); + verifyTag.locality = boost::lexical_cast(localityString); + verifyTag.id = boost::lexical_cast(idString); if (tag != verifyTag) { return; } - sscanf(md.getValue("PoppedTagVersion").c_str(), "%lld", &minTLogVersion); - sscanf(md.getValue("QueueCommittedVersion").c_str(), "%lld", &maxTLogVersion); + minTLogVersion = boost::lexical_cast(md.getValue("PoppedTagVersion")); + maxTLogVersion = boost::lexical_cast(md.getValue("QueueCommittedVersion")); } void filterEmptyMessages(std::vector>& messages) { - std::string emptyStr; - auto it = messages.begin(); - while (it != messages.end()) { - if (!it->isReady() || it->get().toString() == emptyStr) { - it = messages.erase(it); - } else { - ++it; - } - } + messages.erase(std::remove_if(messages.begin(), messages.end(), + [](Futureconst & msgFuture) + { + return !msgFuture.isReady() || msgFuture.get().size() == 0; + } + ), messages.end()); return; } @@ -168,8 +165,8 @@ public: // workload functions } state int retry = 0; + tr.reset(); loop { - tr.reset(); try { for (auto id : keys) { if (even) { @@ -244,8 +241,8 @@ public: // workload functions // with snapKeys 1) validate that all key ids are even ie - // created before snap 2) values are same as the key id 3) # of // keys adds up to the total keys created before snap + tr.reset(); loop { - tr.reset(); try { Standalone kvRange = wait(tr.getRange(begin, end, CLIENT_KNOBS->TOO_MANY)); if (!kvRange.more && kvRange.size() == 0) { @@ -283,9 +280,9 @@ public: // workload functions // corresponding enable, then TLog will automatically enable the // popping of TLogs. this test case validates that we auto // enable the popping of TLogs + tr.reset(); loop { // disable pop of the TLog - tr.reset(); try { StringRef payLoadRef = LiteralStringRef("empty-binary:uid=a36b2ca0e8dab0452ac3e12b6b926f4b"); tr.execute(execDisableTLogPop, payLoadRef); @@ -302,9 +299,9 @@ public: // workload functions } else if (self->testID == 5) { // description: disable TLog pop and enable TLog pop with // different UIDs should mis-match and print an error + tr.reset(); loop { // disable pop of the TLog - tr.reset(); try { StringRef payLoadRef = LiteralStringRef("empty-binary:uid=956349f5f368d37a802f1f37d7f4b9c1"); tr.execute(execDisableTLogPop, payLoadRef); @@ -314,9 +311,9 @@ public: // workload functions wait(tr.onError(e)); } } + tr.reset(); loop { // enable pop of the TLog - tr.reset(); try { StringRef payLoadRef = LiteralStringRef("empty-binary:uid=5810898ca2f3143a246886c79d1bea92"); tr.execute(execEnableTLogPop, payLoadRef); @@ -329,9 +326,9 @@ public: // workload functions self->snapUID = UID::fromString("5810898ca2f3143a246886c79d1bea92"); } else if (self->testID == 6) { // snapshot create without disabling pop of the TLog + tr.reset(); loop { try { - tr.reset(); StringRef snapPayload = LiteralStringRef("/bin/" "snap_create.sh:uid=d78b08d47f341158e9a54d4baaf4a4dd"); tr.execute(execSnap, snapPayload); @@ -345,9 +342,9 @@ public: // workload functions self->snapUID = UID::fromString("d78b08d47f341158e9a54d4baaf4a4dd"); } else if (self->testID == 7) { // disable popping of TLog and snapshot create with mis-matching + tr.reset(); loop { // disable pop of the TLog - tr.reset(); try { StringRef payLoadRef = LiteralStringRef("empty-binary:uid=f49d27ddf7a28b6549d930743e0ebdbe"); tr.execute(execDisableTLogPop, payLoadRef); @@ -357,10 +354,10 @@ public: // workload functions wait(tr.onError(e)); } } + tr.reset(); loop { // snap create with different UID try { - tr.reset(); StringRef snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=ba61e9612a561d60bd83ad83e1b63568"); tr.execute(execSnap, snapPayload); wait(tr.commit()); @@ -424,11 +421,11 @@ public: // workload functions } } - state int i = 0; + state int entryi = 0; state int foundTagServers = 0; - for (; i < tLogWorkers.size(); i++) { + for (; entryi < tLogWorkers.size(); entryi++) { tLogMessages.push_back( - timeoutError(tLogWorkers[i].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); + timeoutError(tLogWorkers[entryi].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); try { TraceEvent(SevDebug, "WaitingForTlogMessages"); @@ -444,7 +441,7 @@ public: // workload functions filterEmptyMessages(tLogMessages); if (tLogMessages.size() < 1) { TraceEvent("VerifyTLogTrackLatestMessageNotFound") - .detail("Address", tLogWorkers[i].address()) + .detail("Address", tLogWorkers[entryi].address()) .detail("Token", eventTokenRef.toString()); } else { ++foundTagServers; @@ -533,28 +530,28 @@ public: // workload functions return false; } - state int i = 0; + state int entryi = 0; state int numTags = -1; - for (; i < proxyMessages.size(); i++) { + for (; entryi < proxyMessages.size(); entryi++) { state Version execVersion = -1; state std::string emptyStr; - TraceEvent("RelevantProxyMessage").detail("Msg", proxyMessages[i].get().toString()); - if (proxyMessages[i].get().toString() != emptyStr) { - getVersionAndnumTags(proxyMessages[i].get(), execVersion, numTags); + TraceEvent("RelevantProxyMessage").detail("Msg", proxyMessages[entryi].get().toString()); + if (proxyMessages[entryi].get().toString() != emptyStr) { + getVersionAndnumTags(proxyMessages[entryi].get(), execVersion, numTags); ASSERT(numTags > 0); } - state int j = 0; - for (; (execVersion != -1) && j < storageMessages.size(); j++) { + state int entryj = 0; + for (; (execVersion != -1) && entryj < storageMessages.size(); entryj++) { // for each message that has this verison, get the tag and // the durable version state Tag tag; state Tag invalidTag; state Version durableVersion = -1; - TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[j].get().toString()); - ASSERT(storageMessages[j].get().toString() != emptyStr); - getTagAndDurableVersion(storageMessages[j].get(), execVersion, tag, durableVersion); + TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[entryj].get().toString()); + ASSERT(storageMessages[entryj].get().toString() != emptyStr); + getTagAndDurableVersion(storageMessages[entryj].get(), execVersion, tag, durableVersion); TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); tLogMessages.clear(); @@ -577,18 +574,18 @@ public: // workload functions return false; } filterEmptyMessages(tLogMessages); - state int k = 0; + state int entryk = 0; numDurableVersionChecks = 0; - for (; (tag != invalidTag) && k < tLogMessages.size(); k++) { + for (; (tag != invalidTag) && entryk < tLogMessages.size(); entryk++) { // for each of the message that has this version and tag // verify that // 1) durableVersion >= minTLogVersion -1 // 2) durableVersion < maxTLogVersion Version minTLogVersion = -1; Version maxTLogVersion = -1; - TraceEvent("TLogMessage").detail("Msg", tLogMessages[k].get().toString()); - ASSERT(tLogMessages[k].get().toString() != emptyStr); - getMinAndMaxTLogVersions(tLogMessages[k].get(), execVersion, tag, minTLogVersion, maxTLogVersion); + TraceEvent("TLogMessage").detail("Msg", tLogMessages[entryk].get().toString()); + ASSERT(tLogMessages[entryk].get().toString() != emptyStr); + getMinAndMaxTLogVersions(tLogMessages[entryk].get(), execVersion, tag, minTLogVersion, maxTLogVersion); if (minTLogVersion != -1 && maxTLogVersion != -1) { if ((durableVersion >= minTLogVersion - 1) && (durableVersion < maxTLogVersion)) { ++numDurableVersionChecks; From d68a22977208f132566f6476d743ee1c59bf157a Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 22 Apr 2019 15:13:01 -0700 Subject: [PATCH 32/69] makefile changes to accommodate boost/process.hpp --- FDBLibTLS/local.mk | 2 +- fdbrpc/local.mk | 2 +- flow/local.mk | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FDBLibTLS/local.mk b/FDBLibTLS/local.mk index 9dd492f132..0a0618f30a 100644 --- a/FDBLibTLS/local.mk +++ b/FDBLibTLS/local.mk @@ -23,6 +23,6 @@ FDBLibTLS_BUILD_SOURCES += -FDBLibTLS_CFLAGS := -fPIC -I/usr/local/include -I$(BOOSTDIR) -I. -DUSE_UCONTEXT +FDBLibTLS_CFLAGS := -fPIC -I/usr/local/include -isystem$(BOOSTDIR) -I. -DUSE_UCONTEXT lib/libFDBLibTLS.a: bin/coverage.FDBLibTLS.xml diff --git a/fdbrpc/local.mk b/fdbrpc/local.mk index 6d648b1740..fd3636aedd 100644 --- a/fdbrpc/local.mk +++ b/fdbrpc/local.mk @@ -22,7 +22,7 @@ fdbrpc_BUILD_SOURCES += fdbrpc/libeio/eio.c -fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc/libeio -DUSE_UCONTEXT +fdbrpc_CFLAGS := -isystem$(BOOSTDIR) -I. -Ifdbrpc/libeio -DUSE_UCONTEXT fdbrpc_LDFLAGS := ifeq ($(PLATFORM),osx) diff --git a/flow/local.mk b/flow/local.mk index 18cf9f71e0..6ff17bb62e 100644 --- a/flow/local.mk +++ b/flow/local.mk @@ -20,7 +20,7 @@ # -*- mode: makefile; -*- -flow_CFLAGS := -I$(BOOSTDIR) -I. -DUSE_UCONTEXT +flow_CFLAGS := -isystem$(BOOSTDIR) -I. -DUSE_UCONTEXT flow_LDFLAGS := ifeq ($(PLATFORM),osx) From 936ffc2ddefcd7c21dc96657f28903673d5b29cc Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 23 Apr 2019 06:55:55 -0700 Subject: [PATCH 33/69] rebase related changes --- fdbclient/NativeAPI.actor.cpp | 2 +- fdbserver/FDBExecHelper.actor.cpp | 4 ++-- fdbserver/MasterProxyServer.actor.cpp | 6 +++--- fdbserver/fdbserver.actor.cpp | 1 - fdbserver/worker.actor.cpp | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 4b25b73c0c..d6a5458f3d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2298,7 +2298,7 @@ ACTOR Future executeCoordinators(DatabaseContext* cx, StringRef execPayloa g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.executeCoordinators.Inside loop"); } - wait(loadBalance(cx->getMasterProxies(), &MasterProxyInterface::execReq, req, cx->taskID)); + wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::execReq, req, cx->taskID)); if (debugID.present()) g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.executeCoordinators.After"); diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp index 48fabef789..76af843cc1 100644 --- a/fdbserver/FDBExecHelper.actor.cpp +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -156,8 +156,8 @@ ACTOR Future execHelper(ExecCmdValueString* execArg, std::string folder, st err = cmdErr.get(); } else { // copy the files - state std::string folderFrom = "./" + folder + "/."; - state std::string folderTo = "./" + folder + "-snap-" + uidStr.toString(); + state std::string folderFrom = folder + "/."; + state std::string folderTo = folder + "-snap-" + uidStr.toString(); std::vector paramList; std::string mkdirBin = "/bin/mkdir"; paramList.push_back(folderTo); diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 8fe9e90684..572b7dd1b0 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -427,7 +427,7 @@ void createWhitelistBinPathVec(const std::string& binPath, vector newElement(token.substr(ptr - token.begin())); - TraceEvent(SevDebug, "BinPathItem").detail("Element", newElement.toString()); + TraceEvent(SevDebug, "BinPathItem").detail("Element", newElement); binPathVec.push_back(newElement); } } @@ -436,9 +436,9 @@ void createWhitelistBinPathVec(const std::string& binPath, vector>& binPathVec, StringRef binPath) { - TraceEvent("BinPath").detail("Value", binPath.toString()); + TraceEvent("BinPath").detail("Value", binPath); for (const auto& item : binPathVec) { - TraceEvent("Element").detail("Value", item.toString()); + TraceEvent("Element").detail("Value", item); } return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end(); } diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 300921c927..856fedd98e 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -936,7 +936,6 @@ int main(int argc, char* argv[]) { LocalityData localities; int minTesterCount = 1; bool testOnServers = false; - bool isRestoring = false; Reference tlsOptions = Reference( new TLSOptions ); std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 4d0c28de6d..594dcabb13 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -721,7 +721,7 @@ ACTOR Future workerServer( // here is no, so that when running with log_version==3, all files should say V=3. state std::map, std::pair, PromiseStream>> sharedLogs; - state std::string coordFolder = _coordFolder; + state std::string coordFolder = abspath(_coordFolder); state WorkerInterface interf( locality ); From 4083af0b01db9405a77ec2841b9d305e027a4c09 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 23 Apr 2019 16:17:54 -0700 Subject: [PATCH 34/69] Avoid using trackLatest for TLog pop test cases --- fdbclient/SystemData.cpp | 2 + fdbclient/SystemData.h | 1 + fdbserver/MasterProxyServer.actor.cpp | 5 +- fdbserver/OldTLogServer_6_0.actor.cpp | 52 +-- fdbserver/TLogServer.actor.cpp | 52 +-- fdbserver/storageserver.actor.cpp | 4 - fdbserver/workloads/SnapTest.actor.cpp | 325 ++----------------- tests/fast/SnapTestFailAndDisablePop.txt | 18 +- tests/restarting/SnapCycleRestart-1.txt | 1 - tests/restarting/SnapTestAttrition-1.txt | 3 - tests/restarting/SnapTestAttrition-2.txt | 1 - tests/restarting/SnapTestRestart-1.txt | 3 - tests/restarting/SnapTestRestart-2.txt | 1 - tests/restarting/SnapTestSimpleRestart-1.txt | 3 - tests/restarting/SnapTestSimpleRestart-2.txt | 1 - 15 files changed, 100 insertions(+), 372 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 888f9bc743..ea7d91544d 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -41,6 +41,8 @@ const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent stat // storage, TLog and coordinated state const StringRef execDisableTLogPop = LiteralStringRef("\xff/TLogDisablePop"); // disable pop on TLog const StringRef execEnableTLogPop = LiteralStringRef("\xff/TLogEnablePop"); // enable pop on TLog +// used to communicate snap failures between TLog and SnapTest Workload, used only in simulator +const StringRef snapTestFailStatus = LiteralStringRef("\xff/SnapTestFailStatus/"); const Key keyServersKey( const KeyRef& k ) { return k.withPrefix( keyServersPrefix ); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 0f7d5d591c..b29805208c 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -285,6 +285,7 @@ std::pair decodeHealthyZoneValue( ValueRef const& ); extern const StringRef execSnap; extern const StringRef execDisableTLogPop; extern const StringRef execEnableTLogPop; +extern const StringRef snapTestFailStatus; // All mutations done to this range are blindly copied into txnStateStore. // Used to create artifically large txnStateStore instances in testing. diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 572b7dd1b0..8af540ea33 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -836,15 +836,12 @@ ACTOR Future commitBatch( allSources.insert(localTags.begin(), localTags.end()); } - std::string tokenStr = "ExecTrace/Proxy/" + uidStr.toString(); auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); te1.detail("To", "all sources"); + te1.detail("UidStr", uidStr); te1.detail("Mutation", m.toString()); te1.detail("Version", commitVersion); te1.detail("NumTags", allSources.size()); - if (m.param1 == execSnap) { - te1.trackLatest(tokenStr.c_str()); - } for (auto& tag : allSources) { toCommit.addTag(tag); } diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 88c079ffe4..84991a489a 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -25,6 +25,7 @@ #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/SystemData.h" +#include "fdbclient/RunTransaction.actor.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" @@ -252,6 +253,7 @@ struct TLogData : NonCopyable { AsyncVar largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES Reference> dbInfo; + Database cx; NotifiedVersion queueCommitEnd; Version queueCommitBegin; @@ -295,6 +297,7 @@ struct TLogData : NonCopyable { concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { + cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true); } }; @@ -969,8 +972,7 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Referencenow()) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("DisableTLogPopTimedOut"); + .detail("IgnorePopDeadline", self->ignorePopDeadline); } wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); @@ -1282,7 +1284,8 @@ ACTOR Future execProcessingHelper(TLogData* self, Standalone>* execTags, ExecCmdValueString* execArg, StringRef* execCmd, - Version* execVersion) + Version* execVersion, + vector>* snapFailKeySetters) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each @@ -1335,7 +1338,7 @@ ACTOR Future execProcessingHelper(TLogData* self, } if (*execCmd == execSnap) { // validation check specific to snap request - std::string reason; + state std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1348,17 +1351,21 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); + .detail("Reason", reason); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString() - + "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); + .detail("Role", "TLog"); + if (g_network->isSimulated()) { + // write SnapFailedTLog.$UID + Standalone keyStr = snapTestFailStatus.withSuffix(uidStr); + Standalone valStr = LiteralStringRef("Success"); + TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr); + snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference tr) -> Future + { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); })); + } } } if (*execCmd == execDisableTLogPop) { @@ -1376,15 +1383,13 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); + .detail("UidStr", uidStr.toString()); } TraceEvent("EnableTLogPlayAllIgnoredPops"); @@ -1408,8 +1413,7 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } } return Void(); @@ -1454,8 +1458,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, } poppedTagVersion = tagv->popped; - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1469,9 +1471,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } } return Void(); } @@ -1530,6 +1529,7 @@ ACTOR Future tLogCommit( state TLogQueueEntryRef qe; state StringRef execCmd; state Standalone> execTags; + state vector> snapFailKeySetters; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1542,7 +1542,7 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1567,6 +1567,7 @@ ACTOR Future tLogCommit( // Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors logData->version.set( req.version ); + if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit"); } @@ -1593,6 +1594,13 @@ ACTOR Future tLogCommit( g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After"); req.reply.send( logData->durableKnownCommittedVersion ); + if (g_network->isSimulated()) { + if (snapFailKeySetters.size() > 0) { + TraceEvent(SevDebug, "SettingSnapFailKey"); + wait(waitForAll(snapFailKeySetters)); + TraceEvent(SevDebug, "SettingSnapFailKeyDone"); + } + } return Void(); } diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d0b9ecf76a..bc3256ac64 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -25,6 +25,7 @@ #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/SystemData.h" +#include "fdbclient/RunTransaction.actor.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" @@ -302,6 +303,7 @@ struct TLogData : NonCopyable { AsyncVar largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES Reference> dbInfo; + Database cx; NotifiedVersion queueCommitEnd; Version queueCommitBegin; @@ -347,6 +349,7 @@ struct TLogData : NonCopyable { concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { + cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true); } }; @@ -1220,8 +1223,7 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Referencenow()) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("DisableTLogPopTimedOut"); + .detail("IgnorePopDeadline", self->ignorePopDeadline); } wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); @@ -1649,7 +1651,8 @@ ACTOR Future execProcessingHelper(TLogData* self, Standalone>* execTags, ExecCmdValueString* execArg, StringRef* execCmd, - Version* execVersion) + Version* execVersion, + vector>* snapFailKeySetters) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each @@ -1702,7 +1705,7 @@ ACTOR Future execProcessingHelper(TLogData* self, } if (*execCmd == execSnap) { // validation check specific to snap request - std::string reason; + state std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1715,17 +1718,22 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); + .detail("Reason", reason); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); + .detail("Role", "TLog"); + + if (g_network->isSimulated()) { + // write SnapFailedTLog.$UID + Standalone keyStr = snapTestFailStatus.withSuffix(uidStr); + StringRef valStr = LiteralStringRef("Success"); + TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr); + snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference tr) -> Future + { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); })); + } } } if (*execCmd == execDisableTLogPop) { @@ -1743,15 +1751,13 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); + .detail("UidStr", uidStr.toString()); } TraceEvent("EnableTLogPlayAllIgnoredPops"); @@ -1775,8 +1781,7 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } } return Void(); @@ -1820,8 +1825,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, } poppedTagVersion = tagv->popped; - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1835,9 +1838,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } } return Void(); } @@ -1906,9 +1906,10 @@ ACTOR Future tLogCommit( qe.knownCommittedVersion = logData->knownCommittedVersion; qe.messages = req.messages; qe.id = logData->logId; + state vector> snapFailKeySetters; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1960,6 +1961,13 @@ ACTOR Future tLogCommit( g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After"); req.reply.send( logData->durableKnownCommittedVersion ); + if (g_network->isSimulated()) { + if (snapFailKeySetters.size() > 0) { + TraceEvent(SevDebug, "SettingSnapFailKey"); + wait(waitForAll(snapFailKeySetters)); + TraceEvent(SevDebug, "SettingSnapFailKeyDone"); + } + } return Void(); } diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 0e9e583e3f..f3b0a1e9da 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1892,7 +1892,6 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) int err = wait(execHelper(&execArg, data->folder, "role=storage")); clearExecOpInProgress(execUID); } - auto tokenStr = "ExecTrace/storage/" + uidStr.toString(); TraceEvent te = TraceEvent("ExecTraceStorage"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1904,9 +1903,6 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) te.detail("DataVersion", data->version.get()); te.detail("Tag", data->tag.toString()); te.detail("SnapCreateSkipped", skip); - if (cmd == execSnap) { - te.trackLatest(tokenStr.c_str()); - } return Void(); } diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index b9c41c3965..908d53c37a 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -77,7 +77,6 @@ public: // variables int numSnaps; // num of snapshots to be taken // FIXME: currently validation works on numSnap = 1 double maxSnapDelay; // max delay before which a snapshot will be taken - bool snapCheck; // check for the successful snap create int testID; // test id UID snapUID; // UID used for snap name std::string restartInfoLocation; // file location to store the snap restore info @@ -85,14 +84,13 @@ public: // variables public: // ctor & dtor SnapTestWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), snapCheck(false), testID(0), snapUID() { + : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), testID(0), snapUID() { TraceEvent("SnapTestWorkload Constructor"); std::string workloadName = "SnapTest"; maxRetryCntToRetrieveMessage = 10; numSnaps = getOption(options, LiteralStringRef("numSnaps"), 0); maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0); - snapCheck = getOption(options, LiteralStringRef("snapCheck"), false); testID = getOption(options, LiteralStringRef("testID"), 0); restartInfoLocation = getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini")) @@ -113,45 +111,33 @@ public: // workload functions return Void(); } + ACTOR Future _check(Database cx, SnapTestWorkload* self) { + state Transaction tr(cx); + // read the key SnapFailedTLog.$UID + loop { + try { + Standalone keyStr = snapTestFailStatus.withSuffix(StringRef(self->snapUID.toString())); + TraceEvent("TestKeyStr").detail("Value", keyStr); + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + Optional val = wait(tr.get(keyStr)); + ASSERT(val.present()); + break; + } catch (Error &e) { + wait(tr.onError(e)); + } + } + return true; + } + Future check(Database const& cx) override { - TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId).detail("SnapCheck", this->snapCheck); - if (!this->snapCheck || clientId != 0) { - TraceEvent("SnapTestCheckSucc"); + TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId); + if (clientId != 0) { return true; } - switch (this->testID) { - case 0: - case 1: - case 2: - case 3: { - Future> proxyIfaces; - return (verifyExecTraceVersion(cx, this)); - break; + if (this->testID != 5 && this->testID != 6) { + return true; } - case 4: { - std::string token = "DisableTLogPopTimedOut"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 5: { - std::string token = "TLogPopDisableEnableUidMismatch"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 6: { - std::string token = "SnapFailIgnorePopNotSet"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 7: { - std::string token = "SnapFailedDisableTLogUidMismatch"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - default: { break; } - } - TraceEvent(SevError, "InvalidPathCheckOptions"); - return false; + return _check(cx, this); } void getMetrics(vector& m) override { TraceEvent("SnapTestWorkloadGetMetrics"); } @@ -268,7 +254,6 @@ public: // workload functions begin = firstGreaterThan(kvRange.end()[-1].key); } catch (Error& e) { wait(tr.onError(e)); - cnt = 0; } } TraceEvent("SnapTestVerifyCntValue").detail("Value", cnt); @@ -292,45 +277,15 @@ public: // workload functions wait(tr.onError(e)); } } - // wait for 40 seconds and verify that the enabled pop happened - // automatically - wait(delay(40.0)); - self->snapUID = UID::fromString("a36b2ca0e8dab0452ac3e12b6b926f4b"); } else if (self->testID == 5) { - // description: disable TLog pop and enable TLog pop with - // different UIDs should mis-match and print an error - tr.reset(); - loop { - // disable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=956349f5f368d37a802f1f37d7f4b9c1"); - tr.execute(execDisableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - tr.reset(); - loop { - // enable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=5810898ca2f3143a246886c79d1bea92"); - tr.execute(execEnableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - self->snapUID = UID::fromString("5810898ca2f3143a246886c79d1bea92"); - } else if (self->testID == 6) { // snapshot create without disabling pop of the TLog tr.reset(); + state Standalone uidStr = LiteralStringRef("d78b08d47f341158e9a54d4baaf4a4dd"); + self->snapUID = UID::fromString(uidStr.toString()); loop { try { - StringRef snapPayload = LiteralStringRef("/bin/" - "snap_create.sh:uid=d78b08d47f341158e9a54d4baaf4a4dd"); + Standalone snapPayload = LiteralStringRef("/bin/" + "snap_create.sh:uid=").withSuffix(uidStr); tr.execute(execSnap, snapPayload); wait(tr.commit()); break; @@ -339,8 +294,7 @@ public: // workload functions wait(tr.onError(e)); } } - self->snapUID = UID::fromString("d78b08d47f341158e9a54d4baaf4a4dd"); - } else if (self->testID == 7) { + } else if (self->testID == 6) { // disable popping of TLog and snapshot create with mis-matching tr.reset(); loop { @@ -355,10 +309,12 @@ public: // workload functions } } tr.reset(); + uidStr = LiteralStringRef("ba61e9612a561d60bd83ad83e1b63568"); + self->snapUID = UID::fromString(uidStr.toString()); loop { // snap create with different UID try { - StringRef snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=ba61e9612a561d60bd83ad83e1b63568"); + Standalone snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=").withSuffix(uidStr); tr.execute(execSnap, snapPayload); wait(tr.commit()); break; @@ -367,8 +323,7 @@ public: // workload functions wait(tr.onError(e)); } } - self->snapUID = UID::fromString("ba61e9612a561d60bd83ad83e1b63568"); - } else if (self->testID == 8) { + } else if (self->testID == 7) { // create a snapshot with a non whitelisted binary path and operation // should fail state bool testedFailure = false; @@ -396,220 +351,6 @@ public: // workload functions wait(delay(0.0)); return Void(); } - - ACTOR Future verifyTLogTrackLatest(Database cx, SnapTestWorkload* self, std::string event) { - TraceEvent("VerifyTLogTrackLatest"); - state StringRef eventTokenRef(event); - state vector tLogWorkers; - state std::vector> tLogMessages; - state std::vector workers = wait(getWorkers(self->dbInfo)); - state std::map address_workers; - - for (auto const& worker : workers) { - address_workers[worker.interf.address()] = worker.interf; - } - vector tLogServers = self->dbInfo->get().logSystemConfig.allLocalLogs(); - - for (auto s : tLogServers) { - auto it = address_workers.find(s.address()); - if (it != address_workers.end()) { - tLogWorkers.push_back(it->second); - TraceEvent("TLogWorker") - .detail("Address", s.address()) - .detail("Id", s.id()) - .detail("Locality", s.locality.toString()); - } - } - - state int entryi = 0; - state int foundTagServers = 0; - for (; entryi < tLogWorkers.size(); entryi++) { - tLogMessages.push_back( - timeoutError(tLogWorkers[entryi].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - - try { - TraceEvent(SevDebug, "WaitingForTlogMessages"); - wait(waitForAll(tLogMessages)); - } catch (Error& e) { - TraceEvent(SevError, "UnableToRetrieveTLogMessages") - .detail("Token", eventTokenRef.toString()) - .detail("Reason", "FailedToGetTLogMessages") - .detail("Code", e.what()); - return false; - } - printMessages(tLogMessages); - filterEmptyMessages(tLogMessages); - if (tLogMessages.size() < 1) { - TraceEvent("VerifyTLogTrackLatestMessageNotFound") - .detail("Address", tLogWorkers[entryi].address()) - .detail("Token", eventTokenRef.toString()); - } else { - ++foundTagServers; - } - tLogMessages.clear(); - } - // FIXME: logSystemConfig.allLocalLogs returns remote tlogServers also in few cases and hence the test fails. - // Verify that foundTagServers matches the number of TLogServers in the local region - if (foundTagServers < 1) { - TraceEvent(SevError, "VerifyTLogTrackLatestMessageNotReachAllTLogservers") - .detail("Token", eventTokenRef.toString()) - .detail("FoundaTagServers", foundTagServers); - return false; - } - TraceEvent("VerifyTLogTrackLatestDone"); - return true; - } - - ACTOR Future verifyExecTraceVersion(Database cx, SnapTestWorkload* self) { - state std::vector coordAddrs = wait(getCoordinators(cx)); - state vector proxyWorkers = wait(getWorkers(self->dbInfo)); - state vector storageWorkers = wait(getWorkers(self->dbInfo)); - state vector tLogWorkers = wait(getWorkers(self->dbInfo)); - state vector workers = wait(getWorkers(self->dbInfo)); - - state std::vector> proxyMessages; - state std::vector> tLogMessages; - state std::vector> storageMessages; - state std::vector> coordMessages; - state int numDurableVersionChecks = 0; - state std::map visitedStorageTags; - - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - coordMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - proxyMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - for (int i = 0; i < storageWorkers.size(); i++) { - std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - storageMessages.push_back(timeoutError( - storageWorkers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - try { - wait(waitForAll(proxyMessages)); - wait(waitForAll(storageMessages)); - wait(waitForAll(coordMessages)); - } catch (Error& e) { - TraceEvent(SevError, "UnableToRetrieveProxyStorageCoordMessages"); - return false; - } - - // filter out empty messages - filterEmptyMessages(proxyMessages); - filterEmptyMessages(storageMessages); - filterEmptyMessages(coordMessages); - - TraceEvent("SnapTestProxyMessages"); - printMessages(proxyMessages); - TraceEvent("SnapTestStorageMessages"); - printMessages(storageMessages); - TraceEvent("SnapTestCoordMessages"); - printMessages(coordMessages); - - if (proxyMessages.size() != 1) { - // if no message from proxy or more than one fail the check - TraceEvent(SevError, "NoExecTraceMessageFromProxy"); - return false; - } - - TraceEvent("CoordinatorSnapStatus") - .detail("CoordMessageSize", coordMessages.size()) - .detail("CoordAddrssize", coordAddrs.size()); - if (coordMessages.size() < (coordAddrs.size() + 1) / 2) { - TraceEvent(SevError, "NoExecTraceMessageFromQuorumOfCoordinators"); - return false; - } - - state int entryi = 0; - state int numTags = -1; - - for (; entryi < proxyMessages.size(); entryi++) { - state Version execVersion = -1; - state std::string emptyStr; - - TraceEvent("RelevantProxyMessage").detail("Msg", proxyMessages[entryi].get().toString()); - if (proxyMessages[entryi].get().toString() != emptyStr) { - getVersionAndnumTags(proxyMessages[entryi].get(), execVersion, numTags); - ASSERT(numTags > 0); - } - state int entryj = 0; - for (; (execVersion != -1) && entryj < storageMessages.size(); entryj++) { - // for each message that has this verison, get the tag and - // the durable version - state Tag tag; - state Tag invalidTag; - state Version durableVersion = -1; - TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[entryj].get().toString()); - ASSERT(storageMessages[entryj].get().toString() != emptyStr); - getTagAndDurableVersion(storageMessages[entryj].get(), execVersion, tag, durableVersion); - TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); - - tLogMessages.clear(); - for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { - visitedStorageTags[tag] = true; - std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - tLogMessages.push_back(timeoutError( - tLogWorkers[m].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - try { - TraceEvent("WaitingForTlogMessages"); - if (tag != invalidTag) { - wait(waitForAll(tLogMessages)); - } - } catch (Error& e) { - TraceEvent(SevError, "VerifyExecTraceVersionFailure") - .detail("Reason", "FailedToGetTLogMessages") - .detail("Code", e.what()); - return false; - } - filterEmptyMessages(tLogMessages); - state int entryk = 0; - numDurableVersionChecks = 0; - for (; (tag != invalidTag) && entryk < tLogMessages.size(); entryk++) { - // for each of the message that has this version and tag - // verify that - // 1) durableVersion >= minTLogVersion -1 - // 2) durableVersion < maxTLogVersion - Version minTLogVersion = -1; - Version maxTLogVersion = -1; - TraceEvent("TLogMessage").detail("Msg", tLogMessages[entryk].get().toString()); - ASSERT(tLogMessages[entryk].get().toString() != emptyStr); - getMinAndMaxTLogVersions(tLogMessages[entryk].get(), execVersion, tag, minTLogVersion, maxTLogVersion); - if (minTLogVersion != -1 && maxTLogVersion != -1) { - if ((durableVersion >= minTLogVersion - 1) && (durableVersion < maxTLogVersion)) { - ++numDurableVersionChecks; - TraceEvent("Successs!!!"); - } - } - } - // if we did not find even one tlog for a given tag fail the check - if (numDurableVersionChecks < 1) { - TraceEvent(SevError, "NoTLogFoundForATag"); - return false; - } - tLogMessages.clear(); - } - } - - // validates that we encountered unique tags of value numTags - if (numTags != visitedStorageTags.size()) { - TraceEvent(SevError, "StorageMessagesWereNotFound"); - return false; - } - TraceEvent("VerifyExecTraceVersionSuccess"); - return true; - } }; WorkloadFactory SnapTestWorkloadFactory("SnapTest"); diff --git a/tests/fast/SnapTestFailAndDisablePop.txt b/tests/fast/SnapTestFailAndDisablePop.txt index e19532be99..00676a78bb 100644 --- a/tests/fast/SnapTestFailAndDisablePop.txt +++ b/tests/fast/SnapTestFailAndDisablePop.txt @@ -5,36 +5,24 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=4 -snapCheck=true - -; TLog pop enable and disable UID mismatch -testTitle=SnapTLogPopEnableDisableMismatch -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=5 -snapCheck=true ; snapCreate without TLogPopDisable testTitle=SnapCreateWithNoDisablePop testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=6 -snapCheck=true +testID=5 ; snapCreate and tlogPopDisable with mis-matched UID testTitle=SnapCreateDisableTLogPopMismatch testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=7 -snapCheck=true +testID=6 ; snapCreate with binary path that is not whitelisted testTitle=SnapCreateNotWhitelistedBinaryPath testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=8 -snapCheck=false +testID=7 diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/SnapCycleRestart-1.txt index 0898e0b1ea..2a1a8f275c 100644 --- a/tests/restarting/SnapCycleRestart-1.txt +++ b/tests/restarting/SnapCycleRestart-1.txt @@ -12,7 +12,6 @@ testTitle=SnapCyclePre maxSnapDelay=10.0 testID=1 clearAfterTest=false - snapCheck=true testTitle=SnapCycleShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt index e85c024ef9..2ff2d35437 100644 --- a/tests/restarting/SnapTestAttrition-1.txt +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -5,7 +5,6 @@ testTitle=SnapTestPre maxSnapDelay=3.0 testID=0 clearAfterTest=false - snapCheck=false testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -26,7 +25,6 @@ testTitle=SnapTestTakeSnap maxSnapDelay=30.0 testID=1 clearAfterTest=false - snapCheck=false testName=Attrition testDuration=20.0 @@ -38,7 +36,6 @@ testTitle=SnapTestPost maxSnapDelay=25.0 testID=2 clearAfterTest=false - snapCheck=false ; save and shutdown testTitle=SnapSimpleShutdown diff --git a/tests/restarting/SnapTestAttrition-2.txt b/tests/restarting/SnapTestAttrition-2.txt index fd6a3ab7a3..07d71073e1 100644 --- a/tests/restarting/SnapTestAttrition-2.txt +++ b/tests/restarting/SnapTestAttrition-2.txt @@ -4,5 +4,4 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false restartInfoLocation=simfdb/restartInfo.ini diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt index 6ae2d6bb0c..97972ff2de 100644 --- a/tests/restarting/SnapTestRestart-1.txt +++ b/tests/restarting/SnapTestRestart-1.txt @@ -5,7 +5,6 @@ testTitle=SnapTestPre maxSnapDelay=3.0 testID=0 clearAfterTest=false - snapCheck=false testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -26,7 +25,6 @@ testTitle=SnapTestTakeSnap maxSnapDelay=30.0 testID=1 clearAfterTest=false - snapCheck=true testTitle=SnapTestPost ;write 1000 Keys ending with odd numbers @@ -35,7 +33,6 @@ testTitle=SnapTestPost maxSnapDelay=25.0 testID=2 clearAfterTest=false - snapCheck=false testTitle=SnapTestShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestRestart-2.txt b/tests/restarting/SnapTestRestart-2.txt index d8dd4b711e..b8bdfc6b34 100644 --- a/tests/restarting/SnapTestRestart-2.txt +++ b/tests/restarting/SnapTestRestart-2.txt @@ -4,4 +4,3 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/SnapTestSimpleRestart-1.txt index 773ac6c909..bf74326ecc 100644 --- a/tests/restarting/SnapTestSimpleRestart-1.txt +++ b/tests/restarting/SnapTestSimpleRestart-1.txt @@ -5,7 +5,6 @@ testTitle=SnapSimplePre maxSnapDelay=30.0 testID=0 clearAfterTest=false - snapCheck=false ;take snap testTitle=SnapSimpleTakeSnap @@ -14,7 +13,6 @@ testTitle=SnapSimpleTakeSnap maxSnapDelay=5.0 testID=1 clearAfterTest=false - snapCheck=true ;write 1000 Keys ending with odd number testTitle=SnapSimplePost @@ -23,7 +21,6 @@ testTitle=SnapSimplePost maxSnapDelay=3.0 testID=2 clearAfterTest=false - snapCheck=false ; save and shutdown testTitle=SnapSimpleShutdown diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/SnapTestSimpleRestart-2.txt index 54cb126362..6e17c33151 100644 --- a/tests/restarting/SnapTestSimpleRestart-2.txt +++ b/tests/restarting/SnapTestSimpleRestart-2.txt @@ -4,4 +4,3 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false From dcd2d96751d3b2664ffef62e5ef576ccd6c3255e Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 24 Apr 2019 12:45:28 -0700 Subject: [PATCH 35/69] make spawnProcess predictable in the simulator --- fdbserver/FDBExecHelper.actor.cpp | 9 ++++++++- fdbserver/OldTLogServer_6_0.actor.cpp | 2 +- fdbserver/TLogServer.actor.cpp | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp index 76af843cc1..9e521d5c98 100644 --- a/fdbserver/FDBExecHelper.actor.cpp +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -93,7 +93,14 @@ ACTOR Future spawnProcess(std::string binPath, std::vector par state double runTime = 0; state boost::process::child c(binPath, boost::process::args(paramList), boost::process::std_err > boost::process::null); - if (!isSync) { + + // for async calls in simulator, always delay by a fixed time, otherwise + // the predictability of the simulator breaks + if (!isSync && g_network->isSimulated()) { + wait(delay(g_random->random01())); + } + + if (!isSync && !g_network->isSimulated()) { while (c.running() && runTime <= maxWaitTime) { wait(delay(0.1)); runTime += 0.1; diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 84991a489a..9e4bd88b75 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -24,8 +24,8 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" -#include "fdbclient/SystemData.h" #include "fdbclient/RunTransaction.actor.h" +#include "fdbclient/SystemData.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index bc3256ac64..d31b500bf6 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -24,8 +24,8 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" -#include "fdbclient/SystemData.h" #include "fdbclient/RunTransaction.actor.h" +#include "fdbclient/SystemData.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" From 6f42337c099174aad235cbb71be422b87a0bc8fa Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 24 Apr 2019 17:36:44 -0700 Subject: [PATCH 36/69] TransactionNotPermitted instead of conflict error When the cluster has not recovered completely, return op not permitted instead of conflict error --- fdbserver/MasterProxyServer.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 8af540ea33..6e0df0f007 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -799,7 +799,7 @@ ACTOR Future commitBatch( // cluster is fully_recovered. TraceEvent("ExecTransactionConflict") .detail("TransactionNum", transactionNum); - committed[transactionNum] = ConflictBatch::TransactionConflict; + committed[transactionNum] = ConflictBatch::TransactionNotPermitted; } else { // Send the ExecOp to // - all the storage nodes in a single region and From 591ff96b9308fe4b0b74fc59bea67abde349a21b Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 29 Apr 2019 15:24:19 -0700 Subject: [PATCH 37/69] increase retry and use eat instead of parsing --- fdbclient/NativeAPI.actor.cpp | 21 +++++++-------------- fdbserver/workloads/SnapTest.actor.cpp | 5 +++-- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index d6a5458f3d..73f11f2324 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3315,8 +3315,14 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) .detail("UID", snapUID) .detail("PreSnapClientUID", preSnapClientUID); - tr.debugTransaction(snapUID); + StringRef snapCmdArgs = snapCmd; + StringRef snapCmdPart = snapCmdArgs.eat(":"); state Standalone snapUIDRef(snapUID.toString()); + state Standalone snapPayloadRef = snapCmdPart + .withSuffix(LiteralStringRef(":uid=")) + .withSuffix(snapUIDRef) + .withSuffix(LiteralStringRef(",")) + .withSuffix(snapCmdArgs); state Standalone tLogCmdPayloadRef = LiteralStringRef("empty-binary:uid=").withSuffix(snapUIDRef); // disable popping of TLog @@ -3335,19 +3341,6 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) TraceEvent("SnapCreateAfterLockingTLogs").detail("UID", snapUID); - const uint8_t* ptr = snapCmd.begin(); - while (*ptr != ':' && ptr < snapCmd.end()) { - ptr++; - } - state Standalone snapPayloadRef; - if (ptr == snapCmd.end()) { - snapPayloadRef = - snapCmd.withSuffix(LiteralStringRef(":uid=")).withSuffix(snapUIDRef); - } else { - snapPayloadRef = - snapCmd.withSuffix(LiteralStringRef(",uid=")).withSuffix(snapUIDRef); - } - // snap the storage and Tlogs // if we retry the below command in failure cases with the same snapUID // then the snapCreate can end up creating multiple snapshots with diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 908d53c37a..8e681b5661 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -202,8 +202,9 @@ public: // workload functions break; } catch (Error& e) { ++retry; - TraceEvent(retry > 3 ? SevWarn : SevInfo, "SnapCreateCommandFailed").detail("Error", e.what()); - if (retry > 3) { + TraceEvent(retry > 100 ? SevWarn : SevInfo, "SnapCreateCommandFailed").detail("Error", e.what()); + if (retry > 100) { + TraceEvent(SevError, "SnapCreateCommandFailed").detail("Error", e.what()); throw operation_failed(); } } From 925499954b869adc78176f0f9527c4346719d456 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 29 Apr 2019 20:30:13 -0700 Subject: [PATCH 38/69] New status cluster_not_fully_recovered --- fdbclient/NativeAPI.actor.cpp | 3 ++- fdbserver/ConflictSet.h | 1 + fdbserver/MasterProxyServer.actor.cpp | 7 +++++-- fdbserver/workloads/SnapTest.actor.cpp | 2 +- flow/error_definitions.h | 1 + 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 73f11f2324..f69bde4089 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2697,7 +2697,8 @@ ACTOR static Future tryCommit( Database cx, Reference && e.code() != error_code_not_committed && e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded - && e.code() != error_code_transaction_not_permitted) + && e.code() != error_code_transaction_not_permitted + && e.code() != error_code_transaction_not_fully_recovered) TraceEvent(SevError, "TryCommitError").error(e); if (trLogInfo) trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast(e.code()), req)); diff --git a/fdbserver/ConflictSet.h b/fdbserver/ConflictSet.h index 5f458df828..09d72a8061 100644 --- a/fdbserver/ConflictSet.h +++ b/fdbserver/ConflictSet.h @@ -41,6 +41,7 @@ struct ConflictBatch { TransactionTooOld, TransactionCommitted, TransactionNotPermitted, + TransactionNotFullyRecovered, }; void addTransaction( const CommitTransactionRef& transaction ); diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 6e0df0f007..4c57637dc9 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -797,9 +797,9 @@ ACTOR Future commitBatch( // Currently, snapshot of old tlog generation is not // supported and hence failing the snapshot request until // cluster is fully_recovered. - TraceEvent("ExecTransactionConflict") + TraceEvent("ExecTransactionNotFullyRecovered") .detail("TransactionNum", transactionNum); - committed[transactionNum] = ConflictBatch::TransactionNotPermitted; + committed[transactionNum] = ConflictBatch::TransactionNotFullyRecovered; } else { // Send the ExecOp to // - all the storage nodes in a single region and @@ -1070,6 +1070,9 @@ ACTOR Future commitBatch( else if (committed[t] == ConflictBatch::TransactionNotPermitted) { trs[t].reply.sendError(transaction_not_permitted()); } + else if (committed[t] == ConflictBatch::TransactionNotFullyRecovered) { + trs[t].reply.sendError(transaction_not_fully_recovered()); + } else { trs[t].reply.sendError(not_committed()); } diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 8e681b5661..9c96e8d150 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -204,7 +204,7 @@ public: // workload functions ++retry; TraceEvent(retry > 100 ? SevWarn : SevInfo, "SnapCreateCommandFailed").detail("Error", e.what()); if (retry > 100) { - TraceEvent(SevError, "SnapCreateCommandFailed").detail("Error", e.what()); + TraceEvent(SevError, "SnapCreateCommandExhausted").detail("Error", e.what()); throw operation_failed(); } } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index aebe3430f6..d2a365197b 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -66,6 +66,7 @@ ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" ) ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" ) ERROR( serialization_failed, 1044, "Failed to deserialize an object" ) ERROR( transaction_not_permitted, 1045, "Operation not permitted") +ERROR( transaction_not_fully_recovered, 1046, "Cluster not fully_recovered") ERROR( broken_promise, 1100, "Broken promise" ) ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" ) From c53c4fa8982cee8ddeca8bbbe2c1b6c5cbc676cc Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 29 Apr 2019 20:52:52 -0700 Subject: [PATCH 39/69] reduce the snap test durations --- tests/restarting/SnapTestAttrition-1.txt | 6 +++--- tests/restarting/SnapTestRestart-1.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt index 2ff2d35437..d3ceed1584 100644 --- a/tests/restarting/SnapTestAttrition-1.txt +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -9,7 +9,7 @@ testTitle=SnapTestPre testTitle=SnapTestTakeSnap ;Take snap and do read/write testName=ReadWrite - testDuration=20.0 + testDuration=10.0 transactionsPerSecond=10000 writesPerTransactionA=0 readsPerTransactionA=10 @@ -22,12 +22,12 @@ testTitle=SnapTestTakeSnap testName=SnapTest numSnaps=1 - maxSnapDelay=30.0 + maxSnapDelay=10.0 testID=1 clearAfterTest=false testName=Attrition - testDuration=20.0 + testDuration=10.0 testTitle=SnapTestPost ;write 1000 Keys ending with odd numbers diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt index 97972ff2de..f3aeb58b2b 100644 --- a/tests/restarting/SnapTestRestart-1.txt +++ b/tests/restarting/SnapTestRestart-1.txt @@ -9,7 +9,7 @@ testTitle=SnapTestPre testTitle=SnapTestTakeSnap ;Take snap and do read/write testName=ReadWrite - testDuration=20.0 + testDuration=10.0 transactionsPerSecond=10000 writesPerTransactionA=0 readsPerTransactionA=10 @@ -22,7 +22,7 @@ testTitle=SnapTestTakeSnap testName=SnapTest numSnaps=1 - maxSnapDelay=30.0 + maxSnapDelay=10.0 testID=1 clearAfterTest=false From 16fc7b6aaa15db332ec3752b7f7fe64ad64edbd3 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 29 Apr 2019 21:19:01 -0700 Subject: [PATCH 40/69] move SnapTests into restarting/from_6.2.0 --- tests/restarting/{ => from_6.2.0}/SnapCycleRestart-1.txt | 0 tests/restarting/{ => from_6.2.0}/SnapCycleRestart-2.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestAttrition-1.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestAttrition-2.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestRestart-1.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestRestart-2.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestSimpleRestart-1.txt | 0 tests/restarting/{ => from_6.2.0}/SnapTestSimpleRestart-2.txt | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename tests/restarting/{ => from_6.2.0}/SnapCycleRestart-1.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapCycleRestart-2.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestAttrition-1.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestAttrition-2.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestRestart-1.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestRestart-2.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestSimpleRestart-1.txt (100%) rename tests/restarting/{ => from_6.2.0}/SnapTestSimpleRestart-2.txt (100%) diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/from_6.2.0/SnapCycleRestart-1.txt similarity index 100% rename from tests/restarting/SnapCycleRestart-1.txt rename to tests/restarting/from_6.2.0/SnapCycleRestart-1.txt diff --git a/tests/restarting/SnapCycleRestart-2.txt b/tests/restarting/from_6.2.0/SnapCycleRestart-2.txt similarity index 100% rename from tests/restarting/SnapCycleRestart-2.txt rename to tests/restarting/from_6.2.0/SnapCycleRestart-2.txt diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/from_6.2.0/SnapTestAttrition-1.txt similarity index 100% rename from tests/restarting/SnapTestAttrition-1.txt rename to tests/restarting/from_6.2.0/SnapTestAttrition-1.txt diff --git a/tests/restarting/SnapTestAttrition-2.txt b/tests/restarting/from_6.2.0/SnapTestAttrition-2.txt similarity index 100% rename from tests/restarting/SnapTestAttrition-2.txt rename to tests/restarting/from_6.2.0/SnapTestAttrition-2.txt diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/from_6.2.0/SnapTestRestart-1.txt similarity index 100% rename from tests/restarting/SnapTestRestart-1.txt rename to tests/restarting/from_6.2.0/SnapTestRestart-1.txt diff --git a/tests/restarting/SnapTestRestart-2.txt b/tests/restarting/from_6.2.0/SnapTestRestart-2.txt similarity index 100% rename from tests/restarting/SnapTestRestart-2.txt rename to tests/restarting/from_6.2.0/SnapTestRestart-2.txt diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/from_6.2.0/SnapTestSimpleRestart-1.txt similarity index 100% rename from tests/restarting/SnapTestSimpleRestart-1.txt rename to tests/restarting/from_6.2.0/SnapTestSimpleRestart-1.txt diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/from_6.2.0/SnapTestSimpleRestart-2.txt similarity index 100% rename from tests/restarting/SnapTestSimpleRestart-2.txt rename to tests/restarting/from_6.2.0/SnapTestSimpleRestart-2.txt From bb474dc3232705f534176f4d1e2a2a58b49e82cd Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 1 May 2019 18:11:05 -0700 Subject: [PATCH 41/69] if recovery < fully_recovered then fail the exec Will do more cleanup, pushing it for a test run in CI --- fdbclient/DatabaseContext.h | 1 + fdbclient/NativeAPI.actor.cpp | 31 +++++- fdbclient/NativeAPI.actor.h | 3 + fdbserver/MasterProxyServer.actor.cpp | 20 ++-- fdbserver/fdbserver.actor.cpp | 3 +- fdbserver/workloads/SnapTest.actor.cpp | 128 +++++++++++++------------ flow/error_definitions.h | 2 +- 7 files changed, 114 insertions(+), 74 deletions(-) diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index b59fce81a5..0245c2abdb 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -148,6 +148,7 @@ public: int64_t transactionsMaybeCommitted; int64_t transactionsResourceConstrained; int64_t transactionsProcessBehind; + int64_t transactionWaitsForFullRecovery; ContinuousSample latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit; int outstandingWatches; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index f69bde4089..41023052e7 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1964,7 +1964,7 @@ Future> getRange( Database const& cx, Future } Transaction::Transaction( Database const& cx ) - : cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise>()), options(cx), numErrors(0), trLogInfo(createTrLogInfoProbabilistically(cx)) + : cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise>()), options(cx), numErrors(0), numRetries(0), trLogInfo(createTrLogInfoProbabilistically(cx)) { setPriority(GetReadVersionRequest::PRIORITY_DEFAULT); } @@ -1987,6 +1987,7 @@ void Transaction::operator=(Transaction&& r) BOOST_NOEXCEPT { info = r.info; backoff = r.backoff; numErrors = r.numErrors; + numRetries = r.numRetries; committedVersion = r.committedVersion; versionstampPromise = std::move(r.versionstampPromise); watches = r.watches; @@ -2403,6 +2404,7 @@ TransactionOptions::TransactionOptions(Database const& cx) { if (BUGGIFY) { commitOnFirstProxy = true; } + maxRetries = cx->transactionMaxRetries; } TransactionOptions::TransactionOptions() { @@ -2412,11 +2414,19 @@ TransactionOptions::TransactionOptions() { void TransactionOptions::reset(Database const& cx) { double oldMaxBackoff = maxBackoff; + double oldMaxRetries = maxRetries; memset(this, 0, sizeof(*this)); maxBackoff = cx->apiVersionAtLeast(610) ? oldMaxBackoff : cx->transactionMaxBackoff; + maxRetries = oldMaxRetries; lockAware = cx->lockAware; } +void Transaction::onErrorReset() { + int32_t oldNumRetires = numRetries; + reset(); + numRetries = oldNumRetires; +} + void Transaction::reset() { tr = CommitTransactionRequest(); readVersion = Future(); @@ -2698,7 +2708,7 @@ ACTOR static Future tryCommit( Database cx, Reference && e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded && e.code() != error_code_transaction_not_permitted - && e.code() != error_code_transaction_not_fully_recovered) + && e.code() != error_code_cluster_not_fully_recovered) TraceEvent(SevError, "TryCommitError").error(e); if (trLogInfo) trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast(e.code()), req)); @@ -2809,6 +2819,7 @@ ACTOR Future commitAndWatch(Transaction *self) { } self->versionstampPromise.sendError(transaction_invalid_version()); + //self->onErrorReset(); self->reset(); } @@ -3068,6 +3079,9 @@ Future> Transaction::getVersionstamp() { } Future Transaction::onError( Error const& e ) { + if (numRetries < std::numeric_limits::max()) { + numRetries++; + } if (e.code() == error_code_success) { return client_invalid_operation(); @@ -3076,7 +3090,8 @@ Future Transaction::onError( Error const& e ) { e.code() == error_code_commit_unknown_result || e.code() == error_code_database_locked || e.code() == error_code_proxy_memory_limit_exceeded || - e.code() == error_code_process_behind) + e.code() == error_code_process_behind || + e.code() == error_code_cluster_not_fully_recovered) { if(e.code() == error_code_not_committed) cx->transactionsNotCommitted++; @@ -3086,9 +3101,15 @@ Future Transaction::onError( Error const& e ) { cx->transactionsResourceConstrained++; if (e.code() == error_code_process_behind) cx->transactionsProcessBehind++; + if (e.code() == error_code_cluster_not_fully_recovered) { + cx->transactionWaitsForFullRecovery++; + if (numRetries > options.maxRetries) { + return e; + } + } double backoff = getBackoff(e.code()); - reset(); + onErrorReset(); return delay( backoff, info.taskID ); } if (e.code() == error_code_transaction_too_old || @@ -3100,7 +3121,7 @@ Future Transaction::onError( Error const& e ) { cx->transactionsFutureVersions++; double maxBackoff = options.maxBackoff; - reset(); + onErrorReset(); return delay( std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID ); } diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 11b915a2ea..3c8f9dc5b7 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -141,6 +141,7 @@ struct StorageMetrics; struct TransactionOptions { double maxBackoff; + uint32_t maxRetries; uint32_t getReadVersionFlags; uint32_t customTransactionSizeLimit; bool checkWritesEnabled : 1; @@ -286,6 +287,7 @@ public: void operator=(Transaction&& r) BOOST_NOEXCEPT; void reset(); + void onErrorReset(); void fullReset(); double getBackoff(int errCode); void debugTransaction(UID dID) { info.debugID = dID; } @@ -296,6 +298,7 @@ public: TransactionInfo info; int numErrors; + int numRetries; std::vector> watches; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 4c57637dc9..13f725d58f 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -1071,7 +1071,7 @@ ACTOR Future commitBatch( trs[t].reply.sendError(transaction_not_permitted()); } else if (committed[t] == ConflictBatch::TransactionNotFullyRecovered) { - trs[t].reply.sendError(transaction_not_fully_recovered()); + trs[t].reply.sendError(cluster_not_fully_recovered()); } else { trs[t].reply.sendError(not_committed()); @@ -1675,15 +1675,19 @@ ACTOR Future masterProxyServerCore( execCoords.push_back(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload))); } } - wait(timeoutError(waitForAll(execCoords), 10.0)); - int numSucc = 0; - for (auto item : execCoords) { - if (item.isValid() && item.isReady()) { - ++numSucc; + try { + wait(timeoutError(waitForAll(execCoords), 10.0)); + int numSucc = 0; + for (auto item : execCoords) { + if (item.isValid() && item.isReady()) { + ++numSucc; + } } + bool succ = numSucc >= ((execCoords.size() + 1) / 2); + succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed()); + } catch (Error& e) { + execReq.reply.sendError(e); } - bool succ = numSucc >= ((execCoords.size() + 1) / 2); - succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed()); } when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) { state ReplyPromise reply = req.reply; diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 856fedd98e..8b6e44fb3d 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1676,7 +1676,8 @@ int main(int argc, char* argv[]) { std::string tmpFolder = abspath(dataFolder); ini.LoadFile(joinPath(tmpFolder, "restartInfo.ini").c_str()); isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); - if (isRestoring) { + bool snapFailed = atoi(ini.GetValue("RESTORE", "BackupFailed")); + if (isRestoring && !snapFailed) { std::vector returnList; std::string ext = ""; returnList = platform::listDirectories(tmpFolder); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 9c96e8d150..55b1966c97 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -81,6 +81,7 @@ public: // variables UID snapUID; // UID used for snap name std::string restartInfoLocation; // file location to store the snap restore info int maxRetryCntToRetrieveMessage; // number of retires to do trackLatest + bool skipCheck; // disable check if the exec fails public: // ctor & dtor SnapTestWorkload(WorkloadContext const& wcx) @@ -95,6 +96,7 @@ public: // ctor & dtor restartInfoLocation = getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini")) .toString(); + skipCheck = false; } public: // workload functions @@ -112,6 +114,10 @@ public: // workload functions } ACTOR Future _check(Database cx, SnapTestWorkload* self) { + if (self->skipCheck) { + TraceEvent(SevWarnAlways, "SnapCheckIgnored"); + return true; + } state Transaction tr(cx); // read the key SnapFailedTLog.$UID loop { @@ -142,6 +148,29 @@ public: // workload functions void getMetrics(vector& m) override { TraceEvent("SnapTestWorkloadGetMetrics"); } + ACTOR Future snapExecHelper(SnapTestWorkload* self, Database cx, StringRef keyRef, StringRef valueRef) { + state Transaction tr(cx); + loop { + try { + tr.execute(keyRef, valueRef); + wait(tr.commit()); + break; + } catch (Error& e) { + try { + wait(tr.onError(e)); + } catch (Error& e) { + if (e.code() == error_code_cluster_not_fully_recovered) { + TraceEvent(SevWarnAlways, "ClusterNotFullyRecovered"); + self->skipCheck = true; + break; + } + throw; + } + } + } + return Void(); + } + ACTOR Future _create_keys(Database cx, std::string prefix, bool even = true) { state Transaction tr(cx); state vector keys; @@ -193,6 +222,7 @@ public: // workload functions wait(delay(toDelay)); state int retry = 0; + state bool snapFailed = false; loop { self->snapUID = g_random->randomUniqueID(); try { @@ -202,10 +232,12 @@ public: // workload functions break; } catch (Error& e) { ++retry; - TraceEvent(retry > 100 ? SevWarn : SevInfo, "SnapCreateCommandFailed").detail("Error", e.what()); - if (retry > 100) { - TraceEvent(SevError, "SnapCreateCommandExhausted").detail("Error", e.what()); - throw operation_failed(); + if (retry > 3) { + if (e.code() == error_code_cluster_not_fully_recovered) { + snapFailed = true; + break; + } + throw e; } } } @@ -214,13 +246,23 @@ public: // workload functions ini.LoadFile(self->restartInfoLocation.c_str()); std::string uidStr = self->snapUID.toString(); ini.SetValue("RESTORE", "RestoreSnapUID", uidStr.c_str()); + ini.SetValue("RESTORE", "BackupFailed", format("%d", snapFailed).c_str()); ini.SaveFile(self->restartInfoLocation.c_str()); // write the snapUID to a file - TraceEvent("Snapshot create succeeded"); + TraceEvent("SnapshotCreateStatus").detail("Status", !snapFailed ? "Success" : "Failure"); } else if (self->testID == 2) { // create odd keys after the snapshot wait(self->_create_keys(cx, "snapKey", false /*even*/)); } else if (self->testID == 3) { + CSimpleIni ini; + ini.SetUnicode(); + ini.LoadFile(self->restartInfoLocation.c_str()); + bool backupFailed = atoi(ini.GetValue("RESTORE", "BackupFailed")); + if (backupFailed) { + // since backup failed, skip the restore checking + TraceEvent(SevWarnAlways, "BackupFailedSkippingRestoreCheck"); + return Void(); + } state KeySelector begin = firstGreaterOrEqual(normalKeys.begin); state KeySelector end = firstGreaterOrEqual(normalKeys.end); state int cnt = 0; @@ -257,8 +299,8 @@ public: // workload functions wait(tr.onError(e)); } } - TraceEvent("SnapTestVerifyCntValue").detail("Value", cnt); if (cnt != 1000) { + TraceEvent(SevError, "SnapTestVerifyCntValue").detail("Value", cnt); throw operation_failed(); } } else if (self->testID == 4) { @@ -266,68 +308,32 @@ public: // workload functions // corresponding enable, then TLog will automatically enable the // popping of TLogs. this test case validates that we auto // enable the popping of TLogs - tr.reset(); - loop { - // disable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=a36b2ca0e8dab0452ac3e12b6b926f4b"); - tr.execute(execDisableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } + state Standalone payLoadRef = LiteralStringRef("empty-binary:uid=a36b2ca0e8dab0452ac3e12b6b926f4b"); + wait(self->snapExecHelper(self, cx, execDisableTLogPop, payLoadRef)); } else if (self->testID == 5) { // snapshot create without disabling pop of the TLog - tr.reset(); - state Standalone uidStr = LiteralStringRef("d78b08d47f341158e9a54d4baaf4a4dd"); + StringRef uidStr = LiteralStringRef("d78b08d47f341158e9a54d4baaf4a4dd"); self->snapUID = UID::fromString(uidStr.toString()); - loop { - try { - Standalone snapPayload = LiteralStringRef("/bin/" - "snap_create.sh:uid=").withSuffix(uidStr); - tr.execute(execSnap, snapPayload); - wait(tr.commit()); - break; - } catch (Error& e) { - TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); - wait(tr.onError(e)); - } - } + state Standalone snapPayload = LiteralStringRef("/bin/" + "snap_create.sh:uid=").withSuffix(uidStr); + wait(self->snapExecHelper(self, cx, execSnap, snapPayload)); } else if (self->testID == 6) { // disable popping of TLog and snapshot create with mis-matching - tr.reset(); - loop { - // disable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=f49d27ddf7a28b6549d930743e0ebdbe"); - tr.execute(execDisableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } + payLoadRef = LiteralStringRef("empty-binary:uid=f49d27ddf7a28b6549d930743e0ebdbe"); + wait(self->snapExecHelper(self, cx, execDisableTLogPop, payLoadRef)); + if (self->skipCheck) { + return Void(); } - tr.reset(); - uidStr = LiteralStringRef("ba61e9612a561d60bd83ad83e1b63568"); + + StringRef uidStr = LiteralStringRef("ba61e9612a561d60bd83ad83e1b63568"); self->snapUID = UID::fromString(uidStr.toString()); - loop { - // snap create with different UID - try { - Standalone snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=").withSuffix(uidStr); - tr.execute(execSnap, snapPayload); - wait(tr.commit()); - break; - } catch (Error& e) { - TraceEvent("SnapCreate").detail("SnapCreateErrorSnapTLogStorage", e.what()); - wait(tr.onError(e)); - } - } + snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=").withSuffix(uidStr); + wait(self->snapExecHelper(self, cx, execSnap, snapPayload)); } else if (self->testID == 7) { // create a snapshot with a non whitelisted binary path and operation // should fail state bool testedFailure = false; + snapFailed = false; retry = 0; loop { self->snapUID = g_random->randomUniqueID(); @@ -338,16 +344,20 @@ public: // workload functions break; } catch (Error& e) { ++retry; - if (retry >= 5) { + if (e.code() == error_code_cluster_not_fully_recovered) { + snapFailed = true; break; } if (e.code() == error_code_transaction_not_permitted) { testedFailure = true; break; } + if (retry >= 5) { + break; + } } } - ASSERT(testedFailure == true); + ASSERT(testedFailure || snapFailed); } wait(delay(0.0)); return Void(); diff --git a/flow/error_definitions.h b/flow/error_definitions.h index d2a365197b..5c5d25c8b2 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -66,7 +66,7 @@ ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" ) ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" ) ERROR( serialization_failed, 1044, "Failed to deserialize an object" ) ERROR( transaction_not_permitted, 1045, "Operation not permitted") -ERROR( transaction_not_fully_recovered, 1046, "Cluster not fully_recovered") +ERROR( cluster_not_fully_recovered, 1046, "Cluster not fully recovered") ERROR( broken_promise, 1100, "Broken promise" ) ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" ) From d3a179b6f9dd25f95933fabc2e4565d470367a62 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Thu, 2 May 2019 17:51:39 -0700 Subject: [PATCH 42/69] Multiple bug fixes - wait for snapTLogFailKeys in a loop, otherwise in some race condition it can cause a false assert - in single region, there does not seem to be a guarantee of tagLocalityListKey for a given DC ID, avoiding that assert for now - to find the workers that are coordinators, looking up by primary address is not sufficient in some cases, hence looking by both primary and secondary address - test make files to reflect the location of the new test cases --- fdbclient/NativeAPI.actor.cpp | 3 ++ fdbserver/MasterProxyServer.actor.cpp | 48 ++++++++++++++++++-------- fdbserver/OldTLogServer_6_0.actor.cpp | 20 +++++------ fdbserver/TLogServer.actor.cpp | 20 +++++------ fdbserver/worker.actor.cpp | 27 +++++++++------ fdbserver/workloads/SnapTest.actor.cpp | 7 ++-- tests/CMakeLists.txt | 16 ++++----- 7 files changed, 83 insertions(+), 58 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 41023052e7..39e90b1a3a 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2405,6 +2405,9 @@ TransactionOptions::TransactionOptions(Database const& cx) { commitOnFirstProxy = true; } maxRetries = cx->transactionMaxRetries; + if (maxRetries == -1) { + maxRetries = 10; + } } TransactionOptions::TransactionOptions() { diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 13f725d58f..a8830aca71 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -813,9 +813,15 @@ ACTOR Future commitBatch( // step 2: find the tag.id from locality info of the master auto localityKey = self->txnStateStore->readValue(tagLocalityListKeyFor(self->master.locality.dcId())).get(); + int8_t locality = tagLocalityInvalid; - ASSERT(localityKey.present()); - locality = decodeTagLocalityListValue(localityKey.get()); + if (usableRegions > 1) { + if (!localityKey.present()) { + TraceEvent(SevError, "LocalityKeyNotPresentForMasterDCID"); + ASSERT(localityKey.present()); + } + locality = decodeTagLocalityListValue(localityKey.get()); + } std::set allSources; auto& m = (*pMutations)[mutationNum]; @@ -1659,6 +1665,7 @@ ACTOR Future masterProxyServerCore( ClusterConnectionString(coordinators.get().toString()).coordinators(); state std::set coordinatorsAddrSet; for (int i = 0; i < coordinatorsAddr.size(); i++) { + TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", coordinatorsAddr[i]); coordinatorsAddrSet.insert(coordinatorsAddr[i]); } @@ -1670,23 +1677,34 @@ ACTOR Future masterProxyServerCore( // coordinators state vector> execCoords; for (int i = 0; i < workers.size(); i++) { - if (coordinatorsAddrSet.find(workers[i].interf.address()) != coordinatorsAddrSet.end()) { - TraceEvent("ExecReqToCoordinator").detail("WorkerAddr", workers[i].interf.address()); - execCoords.push_back(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload))); + NetworkAddress primary = workers[i].interf.address(); + Optional secondary = workers[i].interf.tLog.getEndpoint().addresses.secondaryAddress; + if (coordinatorsAddrSet.find(primary) != coordinatorsAddrSet.end() + || (secondary.present() && (coordinatorsAddrSet.find(secondary.get()) != coordinatorsAddrSet.end()))) { + TraceEvent("ExecReqToCoordinator") + .detail("PrimaryWorkerAddr", primary) + .detail("SecondaryWorkerAddr", secondary); + execCoords.push_back(brokenPromiseToNever(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload)))); } } - try { - wait(timeoutError(waitForAll(execCoords), 10.0)); - int numSucc = 0; - for (auto item : execCoords) { - if (item.isValid() && item.isReady()) { - ++numSucc; + if (execCoords.size() <= 0) { + TraceEvent(SevDebug, "CoordinatorWorkersNotFound"); + execReq.reply.sendError(operation_failed()); + } else { + try { + wait(timeoutError(waitForAll(execCoords), 10.0)); + int numSucc = 0; + for (auto item : execCoords) { + if (item.isValid() && item.isReady()) { + ++numSucc; + } } + bool succ = (numSucc >= ((execCoords.size() + 1) / 2)); + succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed()); + } catch (Error& e) { + TraceEvent("WaitingForAllExecCoords").error(e); + execReq.reply.sendError(broken_promise()); } - bool succ = numSucc >= ((execCoords.size() + 1) / 2); - succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed()); - } catch (Error& e) { - execReq.reply.sendError(e); } } when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) { diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 9e4bd88b75..110c54012a 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -957,18 +957,17 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference::iterator it; state vector> ignoredPops; + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - wait(waitForAll(ignoredPops)); self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopUid = ""; - self->ignorePopDeadline = 0.0; + wait(waitForAll(ignoredPops)); TraceEvent("ResetIgnorePopRequest") .detail("Now", g_network->now()) .detail("IgnorePopRequest", self->ignorePopRequest) @@ -1392,22 +1391,21 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()); } - TraceEvent("EnableTLogPlayAllIgnoredPops"); + TraceEvent("EnableTLogPlayAllIgnoredPops2"); // use toBePopped and issue all the pops state std::map::iterator it; state vector> ignoredPops; + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - wait(waitForAll(ignoredPops)); self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopDeadline = 0.0; - self->ignorePopUid = ""; + wait(waitForAll(ignoredPops)); TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d31b500bf6..b0ec62186c 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1208,18 +1208,17 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Reference::iterator it; state vector> ignoredPops; + self->ignorePopRequest = false; + self->ignorePopUid = ""; + self->ignorePopDeadline = 0.0; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - wait(waitForAll(ignoredPops)); self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopUid = ""; - self->ignorePopDeadline = 0.0; + wait(waitForAll(ignoredPops)); TraceEvent("ResetIgnorePopRequest") .detail("Now", g_network->now()) .detail("IgnorePopRequest", self->ignorePopRequest) @@ -1760,22 +1759,21 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()); } - TraceEvent("EnableTLogPlayAllIgnoredPops"); + TraceEvent("EnableTLogPlayAllIgnoredPops2"); // use toBePopped and issue all the pops state std::map::iterator it; state vector> ignoredPops; + self->ignorePopRequest = false; + self->ignorePopDeadline = 0.0; + self->ignorePopUid = ""; for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - wait(waitForAll(ignoredPops)); self->toBePopped.clear(); - - self->ignorePopRequest = false; - self->ignorePopDeadline = 0.0; - self->ignorePopUid = ""; + wait(waitForAll(ignoredPops)); TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 594dcabb13..aa2b37b404 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1173,17 +1173,22 @@ ACTOR Future workerServer( } when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) { state ExecCmdValueString execArg(req.execPayload); - int err = wait(execHelper(&execArg, coordFolder, "role=coordinator")); - StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); - auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString(); - auto te = TraceEvent("ExecTraceCoordinators"); - te.detail("Uid", uidStr.toString()); - te.detail("Status", err); - te.detail("Role", "coordinator"); - te.detail("Value", coordFolder); - te.detail("ExecPayload", execArg.getCmdValueString().toString()); - te.trackLatest(tokenStr.c_str()); - req.reply.send(Void()); + try { + int err = wait(execHelper(&execArg, coordFolder, "role=coordinator")); + StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); + auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString(); + auto te = TraceEvent("ExecTraceCoordinators"); + te.detail("Uid", uidStr.toString()); + te.detail("Status", err); + te.detail("Role", "coordinator"); + te.detail("Value", coordFolder); + te.detail("ExecPayload", execArg.getCmdValueString().toString()); + te.trackLatest(tokenStr.c_str()); + req.reply.send(Void()); + } catch (Error& e) { + TraceEvent("ExecHelperError").error(e); + req.reply.sendError(broken_promise()); + } } when( wait( errorForwarders.getResult() ) ) {} when( wait( handleErrors ) ) {} diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 55b1966c97..0aa812209d 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -126,8 +126,11 @@ public: // workload functions TraceEvent("TestKeyStr").detail("Value", keyStr); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); Optional val = wait(tr.get(keyStr)); - ASSERT(val.present()); - break; + if (val.present()) { + break; + } + // wait for the key to be written out by TLogs + wait(delay(0.1)); } catch (Error &e) { wait(tr.onError(e)); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0d02e42c18..567683dd25 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -142,17 +142,17 @@ add_fdb_test( TEST_FILES restarting/StorefrontTestRestart-1.txt restarting/StorefrontTestRestart-2.txt) add_fdb_test( - TEST_FILES restarting/SnapTestSimpleRestart-1.txt - restarting/SnapTestSimpleRestart-2.txt) + TEST_FILES restarting/from_6.2.0/SnapTestSimpleRestart-1.txt + restarting/from_6.2.0/SnapTestSimpleRestart-2.txt) add_fdb_test( - TEST_FILES restarting/SnapTestRestart-1.txt - restarting/SnapTestRestart-2.txt) + TEST_FILES restarting/from_6.2.0/SnapTestRestart-1.txt + restarting/from_6.2.0/SnapTestRestart-2.txt) add_fdb_test( - TEST_FILES restarting/SnapCycleRestart-1.txt - restarting/SnapCycleRestart-2.txt) + TEST_FILES restarting/from_6.2.0/SnapCycleRestart-1.txt + restarting/from_6.2.0/SnapCycleRestart-2.txt) add_fdb_test( - TEST_FILES restarting/SnapTestAttrition-1.txt - restarting/SnapTestAttrition-2.txt) + TEST_FILES restarting/from_6.2.0/SnapTestAttrition-1.txt + restarting/from_6.2.0/SnapTestAttrition-2.txt) add_fdb_test( TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE) From ceac68c990e9957ada965d64eebc1e95bb6a6194 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 6 May 2019 18:57:26 -0700 Subject: [PATCH 43/69] restore - remove emtpy snapdir,snap loop retry fix - remove partially snapped directories to avoid no cluster file assert - snap create to retry max 3 times for not_fully_recovered and keep retrying for the other failures --- fdbserver/fdbserver.actor.cpp | 24 ++++++++++++++++-------- fdbserver/workloads/SnapTest.actor.cpp | 7 +++---- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 8b6e44fb3d..78a546706c 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1673,17 +1673,17 @@ int main(int argc, char* argv[]) { } else { CSimpleIni ini; ini.SetUnicode(); - std::string tmpFolder = abspath(dataFolder); - ini.LoadFile(joinPath(tmpFolder, "restartInfo.ini").c_str()); + std::string absDataFolder = abspath(dataFolder); + ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str()); isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); bool snapFailed = atoi(ini.GetValue("RESTORE", "BackupFailed")); if (isRestoring && !snapFailed) { std::vector returnList; std::string ext = ""; - returnList = platform::listDirectories(tmpFolder); + returnList = platform::listDirectories(absDataFolder); std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID"); - TraceEvent("RestoringDataFolder").detail("DataFolder", tmpFolder); + TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder); TraceEvent("RestoreSnapUID").detail("UID", snapStr); // delete all files (except fdb.cluster) in non-snap directories @@ -1695,7 +1695,7 @@ int main(int argc, char* argv[]) { continue; } - std::string childf = tmpFolder + "/" + returnList[i]; + std::string childf = absDataFolder + "/" + returnList[i]; std::vector returnFiles = platform::listFiles(childf, ext); for (int j = 0; j < returnFiles.size(); j++) { if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") { @@ -1711,15 +1711,23 @@ int main(int argc, char* argv[]) { if (returnList[i] == "." || returnList[i] == "..") { continue; } + std::string dirSrc = absDataFolder + "/" + returnList[i]; + // delete snap directories which are not part of restoreSnapUID if (returnList[i].find(snapStr) == std::string::npos) { if (returnList[i].find("snap") != std::string::npos) { - platform::eraseDirectoryRecursive(tmpFolder + returnList[i]); + platform::eraseDirectoryRecursive(dirSrc); } continue; } + // remove empty/partial snap directories + std::vector childrenList = platform::listFiles(dirSrc); + if (childrenList.size() == 0) { + TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc); + platform::eraseDirectoryRecursive(dirSrc); + continue; + } std::string origDir = returnList[i].substr(0, 32); - std::string dirToRemove = tmpFolder + "/" + origDir; - std::string dirSrc = tmpFolder + "/" + returnList[i]; + std::string dirToRemove = absDataFolder + "/" + origDir; TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove); platform::eraseDirectoryRecursive(dirToRemove); renameFile(dirSrc, dirToRemove); diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 0aa812209d..23d17ab342 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -234,13 +234,12 @@ public: // workload functions wait(status); break; } catch (Error& e) { - ++retry; - if (retry > 3) { - if (e.code() == error_code_cluster_not_fully_recovered) { + if (e.code() == error_code_cluster_not_fully_recovered) { + ++retry; + if (retry > 3) { snapFailed = true; break; } - throw e; } } } From f27a40f1188733707852c6940255ca446fa3eb21 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 7 May 2019 10:42:17 -0700 Subject: [PATCH 44/69] execProcessingHelper made synchronous tLogCommit exects no blocking between duplicate check and setting of the new version, that constraint was broken when synchronous execProcessingHelper was introduced. As a fix, execProcessingHelper was made asynchronous. --- fdbserver/OldTLogServer_6_0.actor.cpp | 57 +++++++++++++++------------ fdbserver/TLogServer.actor.cpp | 56 +++++++++++++++----------- 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 110c54012a..2ee05ab0a4 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1277,20 +1277,21 @@ ACTOR Future commitQueue( TLogData* self ) { } } -ACTOR Future execProcessingHelper(TLogData* self, - Reference logData, - TLogCommitRequest* req, - Standalone>* execTags, - ExecCmdValueString* execArg, - StringRef* execCmd, - Version* execVersion, - vector>* snapFailKeySetters) +void execProcessingHelper(TLogData* self, + Reference logData, + TLogCommitRequest* req, + Standalone>* execTags, + ExecCmdValueString* execArg, + StringRef* execCmd, + Version* execVersion, + vector>* snapFailKeySetters, + vector>* ignoredPops) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each // field is prefixed by the length too uint8_t type = MutationRef::MAX_ATOMIC_OP; - state StringRef param2; + StringRef param2; ArenaReader rd(req->arena, req->messages, Unversioned()); int32_t messageLength, rawLength; uint16_t tagCount; @@ -1327,17 +1328,19 @@ ACTOR Future execProcessingHelper(TLogData* self, rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd->toString()); + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid) + .detail("Value", execCmd->toString()) + .detail("Version", req->version); execArg->setCmdValueString(param2); execArg->dbgPrint(); - state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); if (!execCmd->startsWith(LiteralStringRef("\xff"))) { *execVersion = req->version; } if (*execCmd == execSnap) { // validation check specific to snap request - state std::string reason; + std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1350,13 +1353,15 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason); + .detail("Reason", reason) + .detail("Version", req->version); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog"); + .detail("Role", "TLog") + .detail("Version", req->version); if (g_network->isSimulated()) { // write SnapFailedTLog.$UID Standalone keyStr = snapTestFailStatus.withSuffix(uidStr); @@ -1372,7 +1377,8 @@ ACTOR Future execProcessingHelper(TLogData* self, if (self->ignorePopUid != "") { TraceEvent(SevWarn, "TLogPopDisableonDisable") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); + .detail("UidStr", uidStr.toString()) + .detail("Version", req->version); } self->ignorePopUid = uidStr.toString(); // ignorePopRequest will be turned off after 30 seconds @@ -1382,19 +1388,20 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline); + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .detail("Version", req->version); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); + .detail("UidStr", uidStr.toString()) + .detail("Version", req->version); } TraceEvent("EnableTLogPlayAllIgnoredPops2"); // use toBePopped and issue all the pops - state std::map::iterator it; - state vector> ignoredPops; + std::map::iterator it; self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; @@ -1402,19 +1409,18 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); - ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); + ignoredPops->push_back(tLogPopCore(self, it->first, it->second, logData)); } self->toBePopped.clear(); - wait(waitForAll(ignoredPops)); TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline); + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .detail("Version", req->version); } } - return Void(); } @@ -1528,6 +1534,7 @@ ACTOR Future tLogCommit( state StringRef execCmd; state Standalone> execTags; state vector> snapFailKeySetters; + state vector> playIgnoredPops; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1540,7 +1547,7 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); + execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1564,7 +1571,7 @@ ACTOR Future tLogCommit( // Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors logData->version.set( req.version ); - + wait(waitForAll(playIgnoredPops)); if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit"); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index b0ec62186c..b6cfe3d21e 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1644,20 +1644,21 @@ ACTOR Future commitQueue( TLogData* self ) { } } -ACTOR Future execProcessingHelper(TLogData* self, - Reference logData, - TLogCommitRequest* req, - Standalone>* execTags, - ExecCmdValueString* execArg, - StringRef* execCmd, - Version* execVersion, - vector>* snapFailKeySetters) +void execProcessingHelper(TLogData* self, + Reference logData, + TLogCommitRequest* req, + Standalone>* execTags, + ExecCmdValueString* execArg, + StringRef* execCmd, + Version* execVersion, + vector>* snapFailKeySetters, + vector>* ignoredPops) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each // field is prefixed by the length too uint8_t type = MutationRef::MAX_ATOMIC_OP; - state StringRef param2; + StringRef param2; ArenaReader rd(req->arena, req->messages, Unversioned()); int32_t messageLength, rawLength; uint16_t tagCount; @@ -1694,17 +1695,19 @@ ACTOR Future execProcessingHelper(TLogData* self, rd >> len; param2 = StringRef((uint8_t const*)rd.readBytes(len), len); - TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid).detail("Value", execCmd->toString()); + TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid) + .detail("Value", execCmd->toString()) + .detail("Version", req->version); execArg->setCmdValueString(param2); execArg->dbgPrint(); - state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); + StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); if (!execCmd->startsWith(LiteralStringRef("\xff"))) { *execVersion = req->version; } if (*execCmd == execSnap) { // validation check specific to snap request - state std::string reason; + std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1717,13 +1720,15 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason); + .detail("Reason", reason) + .detail("Version", req->version); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog"); + .detail("Role", "TLog") + .detail("Version", req->version); if (g_network->isSimulated()) { // write SnapFailedTLog.$UID @@ -1740,7 +1745,8 @@ ACTOR Future execProcessingHelper(TLogData* self, if (self->ignorePopUid != "") { TraceEvent(SevWarn, "TLogPopDisableonDisable") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); + .detail("UidStr", uidStr.toString()) + .detail("Version", req->version); } self->ignorePopUid = uidStr.toString(); // ignorePopRequest will be turned off after 30 seconds @@ -1750,19 +1756,20 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline); + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .detail("Version", req->version); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()); + .detail("UidStr", uidStr.toString()) + .detail("Version", req->version); } TraceEvent("EnableTLogPlayAllIgnoredPops2"); // use toBePopped and issue all the pops - state std::map::iterator it; - state vector> ignoredPops; + std::map::iterator it; self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; @@ -1770,19 +1777,18 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent("PlayIgnoredPop") .detail("Tag", it->first.toString()) .detail("Version", it->second); - ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); + ignoredPops->push_back(tLogPopCore(self, it->first, it->second, logData)); } self->toBePopped.clear(); - wait(waitForAll(ignoredPops)); TraceEvent("TLogExecCmdPopEnable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline); + .detail("IgnporePopDeadline", self->ignorePopDeadline) + .detail("Version", req->version); } } - return Void(); } ACTOR Future tLogSnapHelper(TLogData* self, @@ -1894,6 +1900,7 @@ ACTOR Future tLogCommit( state TLogQueueEntryRef qe; state StringRef execCmd; state Standalone> execTags; + state vector> playIgnoredPops; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1907,7 +1914,7 @@ ACTOR Future tLogCommit( state vector> snapFailKeySetters; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); + execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1931,6 +1938,7 @@ ACTOR Future tLogCommit( // Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors logData->version.set( req.version ); + wait(waitForAll(playIgnoredPops)); if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit"); From b56d8e648f1a24979064a122dd03f8af9bd2a32c Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 7 May 2019 13:35:28 -0700 Subject: [PATCH 45/69] bp::child->wait_for does not give correct err code boost::process::child->wait_for does not give the error code from the process being run. Re-arrange the code to work-around it. --- fdbserver/FDBExecHelper.actor.cpp | 34 +++++++++++++------------------ 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp index 9e521d5c98..6e139c743f 100644 --- a/fdbserver/FDBExecHelper.actor.cpp +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -102,33 +102,27 @@ ACTOR Future spawnProcess(std::string binPath, std::vector par if (!isSync && !g_network->isSimulated()) { while (c.running() && runTime <= maxWaitTime) { - wait(delay(0.1)); - runTime += 0.1; - } - if (c.running()) { - c.terminate(); - err = -1; - } else { - err = c.exit_code(); + wait(delay(0.1)); + runTime += 0.1; } + } else { + int maxWaitTimeInt = static_cast(maxWaitTime + 1.0); + c.wait_for(std::chrono::seconds(maxWaitTimeInt)); + } + + if (c.running()) { + TraceEvent(SevWarnAlways, "ChildTermination") + .detail("Cmd", binPath) + .detail("Args", argsString); + c.terminate(); + err = -1; if (!c.wait_for(std::chrono::seconds(1))) { TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit") .detail("Cmd", binPath) .detail("Args", argsString); } } else { - state std::error_code errCode; - bool succ = c.wait_for(std::chrono::seconds(3), errCode); - err = errCode.value(); - if (!succ) { - err = -1; - c.terminate(); - if (!c.wait_for(std::chrono::seconds(1))) { - TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit") - .detail("Cmd", binPath) - .detail("Args", argsString); - } - } + err = c.exit_code(); } TraceEvent("SpawnProcess") .detail("Cmd", binPath) From dcb99c51384698e82fd0b34bfe44ef13e6d021ed Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 8 May 2019 10:00:05 -0700 Subject: [PATCH 46/69] txn to disable tlogPop to be timedout If the disable tlog pop txn takes more than 30 seconds then tlog will automatically start enabling pop, fix is to timeout the txn if it takes more than 10 seconds and retry a new txn to disable tlog pop. --- fdbclient/NativeAPI.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 39e90b1a3a..5b1a84ba29 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3356,7 +3356,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.execute(execDisableTLogPop, tLogCmdPayloadRef); - wait(tr.commit()); + wait(timeoutError(tr.commit(), 10)); break; } catch (Error& e) { TraceEvent("DisableTLogPopFailed").error(e); From e6c0b87a4d43924d04bcff9a01cc344198e157bb Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 8 May 2019 11:02:06 -0700 Subject: [PATCH 47/69] remove unused variable --- fdbserver/OldTLogServer_6_0.actor.cpp | 4 ++-- fdbserver/TLogServer.actor.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 2ee05ab0a4..86205a6707 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1433,7 +1433,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, Standalone> execTags) { state int err = 0; - state Future cmdErr; state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; @@ -1443,7 +1442,8 @@ ACTOR Future tLogSnapHelper(TLogData* self, ASSERT(!isExecOpInProgress(execUID)); if (!otherRoleExeced) { setExecOpInProgress(execUID); - int err = wait(execHelper(execArg, self->dataFolder, "role=tlog")); + int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog")); + err = tmpErr; clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index b6cfe3d21e..382b17233b 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1800,7 +1800,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, Standalone> execTags) { state int err = 0; - state Future cmdErr; state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid")); state UID execUID = UID::fromString(uidStr.toString()); state bool otherRoleExeced = false; @@ -1810,7 +1809,8 @@ ACTOR Future tLogSnapHelper(TLogData* self, ASSERT(!isExecOpInProgress(execUID)); if (!otherRoleExeced) { setExecOpInProgress(execUID); - int err = wait(execHelper(execArg, self->dataFolder, "role=tlog")); + int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog")); + err = tmpErr; clearExecOpInProgress(execUID); } TraceEvent("TLogCommitExecTraceTLog") From 5749e220bd2d21b9644ae0d0e686f218704b837c Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Thu, 9 May 2019 14:56:54 -0700 Subject: [PATCH 48/69] use FlowLock for implementing critical section Instead of using Promises and future to implement critcal section use FlowLock --- fdbserver/OldTLogServer_6_0.actor.cpp | 21 +++++++++++++++++---- fdbserver/TLogServer.actor.cpp | 21 +++++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 86205a6707..708aee957a 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -431,7 +431,7 @@ struct LogData : NonCopyable, public ReferenceCounted { UID recruitmentID; std::set allTags; Future terminated; - Promise execOpHold; + FlowLock execOpLock; bool execOpCommitInProgress; explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), @@ -1524,8 +1524,10 @@ ACTOR Future tLogCommit( // future version to be included) // NOTE: execOpCommitInProgress will not be set for exec commands which // start with \xff + state bool execOpLockTaken = false; if (logData->execOpCommitInProgress) { - wait(logData->execOpHold.getFuture()); + wait(logData->execOpLock.take()); + execOpLockTaken = true; } state Version execVersion = invalidVersion; @@ -1550,10 +1552,17 @@ ACTOR Future tLogCommit( execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") + .detail("LogId", logData->logId) .detail("ExecVersion", execVersion) .detail("Version", req.version); logData->execOpCommitInProgress = true; - logData->execOpHold.reset(); + if (!execOpLockTaken) { + wait(logData->execOpLock.take()); + execOpLockTaken = true; + } else { + ASSERT(logData->execOpLock.available() == 0); + } + ASSERT(execOpLockTaken); } } @@ -1584,8 +1593,12 @@ ACTOR Future tLogCommit( wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags)); } if (execVersion != invalidVersion && logData->execOpCommitInProgress) { + ASSERT(execOpLockTaken); logData->execOpCommitInProgress = false; - logData->execOpHold.send(Void()); + } + if (execOpLockTaken) { + logData->execOpLock.release(); + execOpLockTaken = false; } execVersion = invalidVersion; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 382b17233b..97070181cd 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -489,7 +489,7 @@ struct LogData : NonCopyable, public ReferenceCounted { UID recruitmentID; std::set allTags; Future terminated; - Promise execOpHold; + FlowLock execOpLock; bool execOpCommitInProgress; explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, uint64_t protocolVersion, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), @@ -1891,8 +1891,10 @@ ACTOR Future tLogCommit( // future version to be included) // NOTE: execOpCommitInProgress will not be set for exec commands which // start with \xff + state bool execOpLockTaken = false; if (logData->execOpCommitInProgress) { - wait(logData->execOpHold.getFuture()); + wait(logData->execOpLock.take()); + execOpLockTaken = true; } state Version execVersion = invalidVersion; @@ -1917,10 +1919,17 @@ ACTOR Future tLogCommit( execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") + .detail("LogId", logData->logId) .detail("ExecVersion", execVersion) .detail("Version", req.version); logData->execOpCommitInProgress = true; - logData->execOpHold.reset(); + if (!execOpLockTaken) { + wait(logData->execOpLock.take()); + execOpLockTaken = true; + } else { + ASSERT(logData->execOpLock.available() == 0); + } + ASSERT(execOpLockTaken); } } @@ -1952,8 +1961,12 @@ ACTOR Future tLogCommit( wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags)); } if (execVersion != invalidVersion && logData->execOpCommitInProgress) { + ASSERT(execOpLockTaken); logData->execOpCommitInProgress = false; - logData->execOpHold.send(Void()); + } + if (execOpLockTaken) { + logData->execOpLock.release(); + execOpLockTaken = false; } execVersion = invalidVersion; From b1b96946afe7033f3ebc7e0280923e1ba185481f Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Thu, 9 May 2019 18:15:17 -0700 Subject: [PATCH 49/69] logData->stop check right after execOpHold wait --- fdbserver/OldTLogServer_6_0.actor.cpp | 10 +++++----- fdbserver/TLogServer.actor.cpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 708aee957a..5d72d26c96 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1513,11 +1513,6 @@ ACTOR Future tLogCommit( wait( delayJittered(.005, TaskTLogCommit) ); } - if(logData->stopped) { - req.reply.sendError( tlog_stopped() ); - return Void(); - } - // while exec op is being committed, no new transactions will be admitted. // This property is useful for snapshot kind of operations which wants to // take a snap of the disk image at a particular version (no data from @@ -1530,6 +1525,11 @@ ACTOR Future tLogCommit( execOpLockTaken = true; } + if(logData->stopped) { + req.reply.sendError( tlog_stopped() ); + return Void(); + } + state Version execVersion = invalidVersion; state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 97070181cd..d50c67b7f3 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1880,11 +1880,6 @@ ACTOR Future tLogCommit( wait( delayJittered(.005, TaskTLogCommit) ); } - if(logData->stopped) { - req.reply.sendError( tlog_stopped() ); - return Void(); - } - // while exec op is being committed, no new transactions will be admitted. // This property is useful for snapshot kind of operations which wants to // take a snap of the disk image at a particular version (not data from @@ -1897,6 +1892,11 @@ ACTOR Future tLogCommit( execOpLockTaken = true; } + if(logData->stopped) { + req.reply.sendError( tlog_stopped() ); + return Void(); + } + state Version execVersion = invalidVersion; state ExecCmdValueString execArg(); state TLogQueueEntryRef qe; From 40358e1dd659daec5fdaac3fa54603291ba096eb Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 13 May 2019 17:15:55 -0700 Subject: [PATCH 50/69] limit of getRange in snapTest reduced With CLIENT_KNOBS->TOO_MANY in snapTest, by the time getRange gathers all the results, the storage server's oldest version has gone past the req->version and hence the transaction fails with transaction_too_old --- fdbserver/workloads/SnapTest.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 23d17ab342..9c8ac71329 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -275,7 +275,7 @@ public: // workload functions tr.reset(); loop { try { - Standalone kvRange = wait(tr.getRange(begin, end, CLIENT_KNOBS->TOO_MANY)); + Standalone kvRange = wait(tr.getRange(begin, end, 1000)); if (!kvRange.more && kvRange.size() == 0) { TraceEvent("SnapTestNoMoreEntries"); break; From 31b6c86650dafb0eaa56ac28b935d18c723e5882 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 13 May 2019 15:43:27 -0700 Subject: [PATCH 51/69] ignorePopDeadline to have high limit in simulator - ignorePopDeadline to have highier limit in simulator to accommdate for the buggify delays and make snapshot succeed. - introduce a new knob for auto resetting the disabling of tlog pop --- fdbserver/Knobs.cpp | 1 + fdbserver/Knobs.h | 1 + fdbserver/OldTLogServer_6_0.actor.cpp | 3 +-- fdbserver/TLogServer.actor.cpp | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index e1a805e14b..68d73498c4 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -78,6 +78,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0; init( TLOG_DEGRADED_DELAY_COUNT, 5 ); init( TLOG_DEGRADED_DURATION, 5.0 ); + init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 ); // Data distribution queue init( HEALTH_POLL_TIME, 1.0 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index e7a43d0a2f..30ff70bf57 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -316,6 +316,7 @@ public: int64_t TLOG_SPILL_THRESHOLD; int64_t TLOG_HARD_LIMIT_BYTES; int64_t TLOG_RECOVER_MEMORY_LIMIT; + double TLOG_IGNORE_POP_AUTO_ENABLE_DELAY; double MAX_TRANSACTIONS_PER_BYTE; diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 5d72d26c96..0aed431c7c 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1381,8 +1381,7 @@ void execProcessingHelper(TLogData* self, .detail("Version", req->version); } self->ignorePopUid = uidStr.toString(); - // ignorePopRequest will be turned off after 30 seconds - self->ignorePopDeadline = g_network->now() + 30.0; + self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY; TraceEvent("TLogExecCmdPopDisable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d50c67b7f3..6d1f82093f 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1749,8 +1749,7 @@ void execProcessingHelper(TLogData* self, .detail("Version", req->version); } self->ignorePopUid = uidStr.toString(); - // ignorePopRequest will be turned off after 30 seconds - self->ignorePopDeadline = g_network->now() + 30.0; + self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY; TraceEvent("TLogExecCmdPopDisable") .detail("ExecCmd", execCmd->toString()) .detail("UidStr", uidStr.toString()) From c906da1f625a030fbb31bc92302db3e2297b72f7 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Mon, 13 May 2019 17:55:38 -0700 Subject: [PATCH 52/69] simulator: spawnProcess to wait for long duration spawnProcess was waiting for 3 seconds and terminating the child process for synchronous calls, but in the simulator, this can lead to non-determinism, because some cases the command can run in <3 or >3 seconds. The fix is to increase the wait for duration to be very long that it has to synchronously wait and get the results or the test will timeout. --- fdbserver/FDBExecHelper.actor.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp index 6e139c743f..35fa13b097 100644 --- a/fdbserver/FDBExecHelper.actor.cpp +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -102,12 +102,19 @@ ACTOR Future spawnProcess(std::string binPath, std::vector par if (!isSync && !g_network->isSimulated()) { while (c.running() && runTime <= maxWaitTime) { - wait(delay(0.1)); - runTime += 0.1; + wait(delay(0.1)); + runTime += 0.1; } } else { - int maxWaitTimeInt = static_cast(maxWaitTime + 1.0); - c.wait_for(std::chrono::seconds(maxWaitTimeInt)); + if (g_network->isSimulated()) { + // to keep the simulator deterministic, wait till the process exits, + // hence giving a large wait time + c.wait_for(std::chrono::hours(24)); + ASSERT(!c.running()); + } else { + int maxWaitTimeInt = static_cast(maxWaitTime + 1.0); + c.wait_for(std::chrono::seconds(maxWaitTimeInt)); + } } if (c.running()) { From 3aa848b8afd831ecbb89ddbc76ae3d8c0d1ae669 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 17 May 2019 11:52:01 -0700 Subject: [PATCH 53/69] minor bug in whitelist binary path testing --- fdbserver/workloads/SnapTest.actor.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 9c8ac71329..c912d26af9 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -336,7 +336,6 @@ public: // workload functions // should fail state bool testedFailure = false; snapFailed = false; - retry = 0; loop { self->snapUID = g_random->randomUniqueID(); try { @@ -345,7 +344,6 @@ public: // workload functions wait(status); break; } catch (Error& e) { - ++retry; if (e.code() == error_code_cluster_not_fully_recovered) { snapFailed = true; break; @@ -354,9 +352,6 @@ public: // workload functions testedFailure = true; break; } - if (retry >= 5) { - break; - } } } ASSERT(testedFailure || snapFailed); From b17ad85497f2ee59fd49d1a76b894f03d949383e Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Fri, 17 May 2019 17:34:55 -0700 Subject: [PATCH 54/69] exec op not supported when log_anti_quorum > 0 --- fdbclient/NativeAPI.actor.cpp | 3 ++- fdbserver/ConflictSet.h | 1 + fdbserver/MasterProxyServer.actor.cpp | 18 +++++++++++++++++- fdbserver/workloads/SnapTest.actor.cpp | 9 ++++++--- flow/error_definitions.h | 1 + 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 5b1a84ba29..5755e375df 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2711,7 +2711,8 @@ ACTOR static Future tryCommit( Database cx, Reference && e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded && e.code() != error_code_transaction_not_permitted - && e.code() != error_code_cluster_not_fully_recovered) + && e.code() != error_code_cluster_not_fully_recovered + && e.code() != error_code_txn_exec_log_anti_quorum) TraceEvent(SevError, "TryCommitError").error(e); if (trLogInfo) trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast(e.code()), req)); diff --git a/fdbserver/ConflictSet.h b/fdbserver/ConflictSet.h index 09d72a8061..6a28e467eb 100644 --- a/fdbserver/ConflictSet.h +++ b/fdbserver/ConflictSet.h @@ -42,6 +42,7 @@ struct ConflictBatch { TransactionCommitted, TransactionNotPermitted, TransactionNotFullyRecovered, + TransactionExecLogAntiQuorum, }; void addTransaction( const CommitTransactionRef& transaction ); diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index a8830aca71..92d72883db 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -785,6 +785,13 @@ ACTOR Future commitBatch( state StringRef binPath = execArg.getBinaryPath(); state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid")); + auto result = + self->txnStateStore->readValue(LiteralStringRef("log_anti_quorum").withPrefix(configKeysPrefix)).get(); + state int logAntiQuorum = 0; + if (result.present()) { + logAntiQuorum = atoi(result.get().toString().c_str()); + } + if (m.param1 != execDisableTLogPop && m.param1 != execEnableTLogPop && !isWhitelisted(self->whitelistedBinPathVec, binPath)) { @@ -800,6 +807,13 @@ ACTOR Future commitBatch( TraceEvent("ExecTransactionNotFullyRecovered") .detail("TransactionNum", transactionNum); committed[transactionNum] = ConflictBatch::TransactionNotFullyRecovered; + } else if (logAntiQuorum > 0) { + // exec op is not supported when logAntiQuorum is configured + // FIXME: Add support for exec ops in the presence of log anti quorum + TraceEvent("ExecOpNotSupportedWithLogAntiQuorum") + .detail("LogAntiQuorum", logAntiQuorum) + .detail("TransactionNum", transactionNum); + committed[transactionNum] = ConflictBatch::TransactionExecLogAntiQuorum; } else { // Send the ExecOp to // - all the storage nodes in a single region and @@ -1079,7 +1093,9 @@ ACTOR Future commitBatch( else if (committed[t] == ConflictBatch::TransactionNotFullyRecovered) { trs[t].reply.sendError(cluster_not_fully_recovered()); } - else { + else if (committed[t] == ConflictBatch::TransactionExecLogAntiQuorum) { + trs[t].reply.sendError(txn_exec_log_anti_quorum()); + } else { trs[t].reply.sendError(not_committed()); } diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index c912d26af9..3574303b4f 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -162,7 +162,8 @@ public: // workload functions try { wait(tr.onError(e)); } catch (Error& e) { - if (e.code() == error_code_cluster_not_fully_recovered) { + if (e.code() == error_code_cluster_not_fully_recovered + || e.code() == error_code_txn_exec_log_anti_quorum) { TraceEvent(SevWarnAlways, "ClusterNotFullyRecovered"); self->skipCheck = true; break; @@ -234,7 +235,8 @@ public: // workload functions wait(status); break; } catch (Error& e) { - if (e.code() == error_code_cluster_not_fully_recovered) { + if (e.code() == error_code_cluster_not_fully_recovered || + e.code() == error_code_txn_exec_log_anti_quorum) { ++retry; if (retry > 3) { snapFailed = true; @@ -344,7 +346,8 @@ public: // workload functions wait(status); break; } catch (Error& e) { - if (e.code() == error_code_cluster_not_fully_recovered) { + if (e.code() == error_code_cluster_not_fully_recovered || + e.code() == error_code_txn_exec_log_anti_quorum) { snapFailed = true; break; } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 5c5d25c8b2..37ef476fb2 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -67,6 +67,7 @@ ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdow ERROR( serialization_failed, 1044, "Failed to deserialize an object" ) ERROR( transaction_not_permitted, 1045, "Operation not permitted") ERROR( cluster_not_fully_recovered, 1046, "Cluster not fully recovered") +ERROR( txn_exec_log_anti_quorum, 1047, "Execute Transaction not supported when log anti quorum is configured") ERROR( broken_promise, 1100, "Broken promise" ) ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" ) From 2a68b2859015f88fff7fd2fcb41805beacc20011 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 21 May 2019 13:55:30 -0700 Subject: [PATCH 55/69] rebase related changes --- fdbclient/MasterProxyInterface.h | 4 +++- fdbserver/MasterProxyServer.actor.cpp | 16 ---------------- fdbserver/WorkerInterface.actor.h | 1 + 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/MasterProxyInterface.h index f59ffb373c..9b65ec572c 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/MasterProxyInterface.h @@ -299,7 +299,9 @@ struct GetHealthMetricsRequest } }; -struct ExecRequest { +struct ExecRequest +{ + constexpr static FileIdentifier file_identifier = 22403900; Arena arena; StringRef execPayload; ReplyPromise reply; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 92d72883db..3fc4665a15 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -747,24 +747,8 @@ ACTOR Future commitBatch( if (debugMutation("ProxyCommit", commitVersion, m)) TraceEvent("ProxyCommitTo", self->dbgid).detail("To", describe(ranges.begin().value().tags)).detail("Mutation", m.toString()).detail("Version", commitVersion); -<<<<<<< HEAD ranges.begin().value().populateTags(); toCommit.addTags(ranges.begin().value().tags); -======= - auto& tags = ranges.begin().value().tags; - if(!tags.size()) { - for( auto info : ranges.begin().value().src_info ) { - tags.push_back( info->tag ); - } - for( auto info : ranges.begin().value().dest_info ) { - tags.push_back( info->tag ); - } - uniquify(tags); - } - - for (auto& tag : tags) - toCommit.addTag(tag); ->>>>>>> c6dc6bf3... Exec op to tag only local storage nodes } else { TEST(true); //A clear range extends past a shard boundary diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 6ccdb253ae..c72cd4e85d 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -241,6 +241,7 @@ struct TraceBatchDumpRequest { }; struct ExecuteRequest { + constexpr static FileIdentifier file_identifier = 8184128; ReplyPromise reply; Arena arena; From 3877f874811cb10a1b37b3109d779ce8f941dd97 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 21 May 2019 16:19:40 -0700 Subject: [PATCH 56/69] comment change in tLogCommit --- fdbserver/OldTLogServer_6_0.actor.cpp | 2 +- fdbserver/TLogServer.actor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 0aed431c7c..9331141b26 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1537,7 +1537,7 @@ ACTOR Future tLogCommit( state vector> snapFailKeySetters; state vector> playIgnoredPops; - if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) + if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!) if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 6d1f82093f..7d108955bc 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1903,7 +1903,7 @@ ACTOR Future tLogCommit( state Standalone> execTags; state vector> playIgnoredPops; - if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) + if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!) if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); From 42c551a996e1ed7ca3dad68d32846bdab5b6c39a Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Wed, 22 May 2019 13:38:56 -0700 Subject: [PATCH 57/69] handle isRestoring & BackupFailed not being set restartInfo.in->BackupFailed and isRestoring may not be set in all cases, handle the absence of them. --- fdbserver/fdbserver.actor.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 78a546706c..a13e2c815d 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1675,9 +1675,16 @@ int main(int argc, char* argv[]) { ini.SetUnicode(); std::string absDataFolder = abspath(dataFolder); ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str()); - isRestoring = atoi(ini.GetValue("RESTORE", "isRestoring")); - bool snapFailed = atoi(ini.GetValue("RESTORE", "BackupFailed")); - if (isRestoring && !snapFailed) { + int backupFailed = true; + const char* isRestoringStr = ini.GetValue("RESTORE", "isRestoring", NULL); + if (isRestoringStr) { + isRestoring = atoi(isRestoringStr); + const char* backupFailedStr = ini.GetValue("RESTORE", "BackupFailed", NULL); + if (isRestoring && backupFailedStr) { + backupFailed = atoi(backupFailedStr); + } + } + if (isRestoring && !backupFailed) { std::vector returnList; std::string ext = ""; returnList = platform::listDirectories(absDataFolder); From b43c100e57b2420b56a9450f49b075dbd4ad7f54 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Sat, 25 May 2019 14:12:18 -0700 Subject: [PATCH 58/69] TLog bug fixes --- fdbserver/OldTLogServer_6_0.actor.cpp | 11 +++++------ fdbserver/TLogServer.actor.cpp | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 9331141b26..2889e4a069 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -1541,12 +1541,6 @@ ACTOR Future tLogCommit( if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); - // Log the changes to the persistent queue, to be committed by commitQueue() - qe.version = req.version; - qe.knownCommittedVersion = logData->knownCommittedVersion; - qe.messages = req.messages; - qe.id = logData->logId; - if (req.hasExecOp) { execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); if (execVersion != invalidVersion) { @@ -1570,6 +1564,11 @@ ACTOR Future tLogCommit( logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); + // Log the changes to the persistent queue, to be committed by commitQueue() + qe.version = req.version; + qe.knownCommittedVersion = logData->knownCommittedVersion; + qe.messages = req.messages; + qe.id = logData->logId; self->persistentQueue->push( qe, logData ); self->diskQueueCommitBytes += qe.expectedSize(); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 7d108955bc..e3d2960d76 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1902,17 +1902,12 @@ ACTOR Future tLogCommit( state StringRef execCmd; state Standalone> execTags; state vector> playIgnoredPops; + state vector> snapFailKeySetters; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!) if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before"); - // Log the changes to the persistent queue, to be committed by commitQueue() - qe.version = req.version; - qe.knownCommittedVersion = logData->knownCommittedVersion; - qe.messages = req.messages; - qe.id = logData->logId; - state vector> snapFailKeySetters; if (req.hasExecOp) { execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops); @@ -1937,6 +1932,11 @@ ACTOR Future tLogCommit( logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); + // Log the changes to the persistent queue, to be committed by commitQueue() + qe.version = req.version; + qe.knownCommittedVersion = logData->knownCommittedVersion; + qe.messages = req.messages; + qe.id = logData->logId; self->persistentQueue->push( qe, logData ); self->diskQueueCommitBytes += qe.expectedSize(); From 4bcb590f123cd9b497f19142172e0082436f73e4 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 28 May 2019 17:29:05 -0700 Subject: [PATCH 59/69] g_random -> deterministicRandom() --- fdbclient/ManagementAPI.actor.cpp | 2 +- fdbclient/NativeAPI.actor.cpp | 2 +- fdbserver/FDBExecHelper.actor.cpp | 2 +- fdbserver/workloads/SnapTest.actor.cpp | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 6b344a51a9..a371ac2624 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1478,7 +1478,7 @@ ACTOR Future mgmtSnapCreate(Database cx, StringRef snapCmd) { state int retryCount = 0; loop { - state UID snapUID = g_random->randomUniqueID(); + state UID snapUID = deterministicRandom()->randomUniqueID(); try { wait(snapCreate(cx, snapCmd, snapUID)); printf("Snapshots tagged with UID: %s, check logs for status\n", snapUID.toString().c_str()); diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 5755e375df..9345425d97 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3385,7 +3385,7 @@ ACTOR Future snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) TraceEvent("SnapCreateAfterSnappingTLogStorage").detail("UID", snapUID); if (BUGGIFY) { - int32_t toDelay = g_random->randomInt(1, 30); + int32_t toDelay = deterministicRandom()->randomInt(1, 30); wait(delay(toDelay)); } diff --git a/fdbserver/FDBExecHelper.actor.cpp b/fdbserver/FDBExecHelper.actor.cpp index 35fa13b097..259136c009 100644 --- a/fdbserver/FDBExecHelper.actor.cpp +++ b/fdbserver/FDBExecHelper.actor.cpp @@ -97,7 +97,7 @@ ACTOR Future spawnProcess(std::string binPath, std::vector par // for async calls in simulator, always delay by a fixed time, otherwise // the predictability of the simulator breaks if (!isSync && g_network->isSimulated()) { - wait(delay(g_random->random01())); + wait(delay(deterministicRandom()->random01())); } if (!isSync && !g_network->isSimulated()) { diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index 3574303b4f..ec77cbccde 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -180,7 +180,7 @@ public: // workload functions state vector keys; for (int i = 0; i < 1000; i++) { - keys.push_back(g_random->randomInt64(0, INT64_MAX - 2)); + keys.push_back(deterministicRandom()->randomInt64(0, INT64_MAX - 2)); } state int retry = 0; @@ -220,7 +220,7 @@ public: // workload functions wait(self->_create_keys(cx, "snapKey")); } else if (self->testID == 1) { // create a snapshot - state double toDelay = fmod(g_random->randomUInt32(), self->maxSnapDelay); + state double toDelay = fmod(deterministicRandom()->randomUInt32(), self->maxSnapDelay); TraceEvent("ToDelay").detail("Value", toDelay); ASSERT(toDelay < self->maxSnapDelay); wait(delay(toDelay)); @@ -228,7 +228,7 @@ public: // workload functions state int retry = 0; state bool snapFailed = false; loop { - self->snapUID = g_random->randomUniqueID(); + self->snapUID = deterministicRandom()->randomUniqueID(); try { StringRef snapCmdRef = LiteralStringRef("/bin/snap_create.sh"); Future status = snapCreate(cx, snapCmdRef, self->snapUID); @@ -339,7 +339,7 @@ public: // workload functions state bool testedFailure = false; snapFailed = false; loop { - self->snapUID = g_random->randomUniqueID(); + self->snapUID = deterministicRandom()->randomUniqueID(); try { StringRef snapCmdRef = LiteralStringRef("/bin/snap_create1.sh"); Future status = snapCreate(cx, snapCmdRef, self->snapUID); From 1190f2f33d7345104e17e2a638ed67b4b82c60c8 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 28 May 2019 18:21:06 -0700 Subject: [PATCH 60/69] rebased related changes --- fdbserver/OldTLogServer_6_0.actor.cpp | 2 +- fdbserver/TLogServer.actor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 2889e4a069..fc9251ec78 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -2419,7 +2419,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit // New tLog (if !recoverFrom.size()) or restore from network ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded) { - state TLogData self( tlogId, persistentData, persistentQueue, db, folder, degraded ); + state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index e3d2960d76..4638133442 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -2817,7 +2817,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit // New tLog (if !recoverFrom.size()) or restore from network ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded ) { - state TLogData self( tlogId, persistentData, persistentQueue, db, folder, degraded ); + state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); From 19de1ae229bf850bf79294ac9a9eb26994a540f4 Mon Sep 17 00:00:00 2001 From: Pieter Joost Date: Wed, 29 May 2019 11:15:00 +0200 Subject: [PATCH 61/69] include errors in the dir layer documentation --- bindings/go/src/fdb/directory/directory.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bindings/go/src/fdb/directory/directory.go b/bindings/go/src/fdb/directory/directory.go index 167cfc0bf3..78afc9a1ef 100644 --- a/bindings/go/src/fdb/directory/directory.go +++ b/bindings/go/src/fdb/directory/directory.go @@ -56,8 +56,14 @@ const ( ) var ( - ErrDirAlreadyExists = errors.New("the directory already exists") - ErrDirNotExists = errors.New("the directory does not exist") + // ErrDirAlreadyExists is returned when trying to create a directory while it already exists. + ErrDirAlreadyExists = errors.New("the directory already exists") + + // ErrDirNotExists is returned when opening or listing a directory that does not exists. + ErrDirNotExists = errors.New("the directory does not exist") + + // ErrParentDirDoesNotExist is returned when opening a directory and one or more + // parent directories in the path do not exist. ErrParentDirDoesNotExist = errors.New("the parent directory does not exist") ) @@ -76,8 +82,9 @@ type Directory interface { CreateOrOpen(t fdb.Transactor, path []string, layer []byte) (DirectorySubspace, error) // Open opens the directory specified by path (relative to this Directory), - // and returns the directory and its contents as a DirectorySubspace (or an - // error if the directory does not exist). + // and returns the directory and its contents as a DirectorySubspace (or ErrDirNotExists + // error if the directory does not exist, or ErrParentDirDoesNotExist if one of the parent + // directories in the path does not exist). // // If the byte slice layer is specified, it is compared against the layer // specified when the directory was created, and an error is returned if @@ -86,7 +93,7 @@ type Directory interface { // Create creates a directory specified by path (relative to this // Directory), and returns the directory and its contents as a - // DirectorySubspace (or an error if the directory already exists). + // DirectorySubspace (or ErrDirAlreadyExists if the directory already exists). // // If the byte slice layer is specified, it is recorded as the layer and // will be checked when opening the directory in the future. From f83531c8ee3defaed7ad06085a977f9dc18438d1 Mon Sep 17 00:00:00 2001 From: Timm Preetz <52437+tp@users.noreply.github.com> Date: Thu, 30 May 2019 11:08:22 +0200 Subject: [PATCH 62/69] Fix typos --- documentation/sphinx/source/flow.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/flow.rst b/documentation/sphinx/source/flow.rst index 2dec52584b..b9ab3c99f8 100644 --- a/documentation/sphinx/source/flow.rst +++ b/documentation/sphinx/source/flow.rst @@ -40,7 +40,7 @@ Promises and futures can be used within a single process, but their real strengt wait() ------ -At the point when a receiver holding a ``Future`` needs the ``T`` to continue computation, it invokes the ``wait()`` statement with the ``Future`` as its parameter. The ``wait()`` statement allows the calling actor to pause execution until the value of the future is set, returning a value of type ``T`` During the wait, other actors can continue execution, providing asynchronous concurrency within a single process. +At the point when a receiver holding a ``Future`` needs the ``T`` to continue computation, it invokes the ``wait()`` statement with the ``Future`` as its parameter. The ``wait()`` statement allows the calling actor to pause execution until the value of the future is set, returning a value of type ``T``. During the wait, other actors can continue execution, providing asynchronous concurrency within a single process. ACTOR ----- @@ -154,5 +154,5 @@ Some preprocessor definitions will not fix all issues though. When programming f foo([x]() { x->bar(); }) } -- state variables in don't follow the normal scoping rules. So in flow a state variable can be defined in a inner scope and later it can be used in the outer scope. In order to not break compilation in IDE-mode, always define state variables in the outermost scope they will be used. +- state variables in flow don't follow the normal scoping rules. So in flow a state variable can be defined in a inner scope and later it can be used in the outer scope. In order to not break compilation in IDE-mode, always define state variables in the outermost scope they will be used. From b5773d2f6bfc34559f7bd78a0e10e8bc58bb7ea1 Mon Sep 17 00:00:00 2001 From: Pieter Joost van de Sande Date: Thu, 30 May 2019 18:16:29 +0200 Subject: [PATCH 63/69] fix spelling error Co-Authored-By: A.J. Beamon --- bindings/go/src/fdb/directory/directory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/go/src/fdb/directory/directory.go b/bindings/go/src/fdb/directory/directory.go index 78afc9a1ef..c47a48e5a2 100644 --- a/bindings/go/src/fdb/directory/directory.go +++ b/bindings/go/src/fdb/directory/directory.go @@ -59,7 +59,7 @@ var ( // ErrDirAlreadyExists is returned when trying to create a directory while it already exists. ErrDirAlreadyExists = errors.New("the directory already exists") - // ErrDirNotExists is returned when opening or listing a directory that does not exists. + // ErrDirNotExists is returned when opening or listing a directory that does not exist. ErrDirNotExists = errors.New("the directory does not exist") // ErrParentDirDoesNotExist is returned when opening a directory and one or more From 1acff2fdb7e8e3bf9341912311321783ea78d45f Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 27 Mar 2019 15:53:05 -0700 Subject: [PATCH 64/69] Design:Backup data format --- design/backup-dataFormat.md | 95 +++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 design/backup-dataFormat.md diff --git a/design/backup-dataFormat.md b/design/backup-dataFormat.md new file mode 100644 index 0000000000..002cd88984 --- /dev/null +++ b/design/backup-dataFormat.md @@ -0,0 +1,95 @@ +## FDB Backup Data Format + +### Introduction +This document describes the data format of the files generated by FoundationDB (FDB) backup procedure. +The target readers who may benefit from reading this document are: +* who make changes on the current backup or restore procedure; +* who writes tools to digest the backup data for analytical purpose; +* who wants to understand the internals of how backup and restore works. + +The description of the backup data format is based on FDB 5.2 to FDB 6.1. The backup data format may (although unlikely) change after FDB 6.1. + + +### Files generated by backup +The backup procedure generates two types of files: range files and log files. +* A range file describes key-value pairs in a range at the version when the backup process takes a snapshot of the range. Different range files have data for different ranges at different versions. +* A log file describes the mutations taken from a version v1 to v2 during the backup procedure. + +With the key-value pairs in range file and the mutations in log file, the restore procedure can restore the database into a consistent state at a user-provided version vk if the backup data is claimed by the restore as restorable at vk. (The details of determining if a set of backup data is restorable at a version is out of scope of this document and can be found at [backup.md](https://github.com/xumengpanda/foundationdb/blob/cd873831ecd18653c5bf459d6f72d14a99b619c4/design/backup.md). + + +### Filename conventions +The backup files will be saved in a directory (i.e., url) specified by users. Under the directory, the range files are in the `snapshots` folder. The log files are in the `logs` folder. + +The convention of the range filename is ` snapshots/snapshot,beginVersion,beginVersion,blockSize`, where `beginVersion` is the version when the key-values in the range file are recorded, and blockSize is the size of data blocks in the range file. + +The convention of the log filename is `logs/,versionPrefix/log,beginVersion,endVersion,randomUID, blockSize`, where the versionPrefix is a 2-level path (`x/y`) where beginVersion should go such that `x/y/*` contains (10^smallestBucket) possible versions; the randomUID is a random UID, the `beginVersion` and `endVersion` are the version range (left inclusive, right exclusive) when the mutations are recorded; and the `blockSize` is the data block size in the log file. + +We will use an example to explain what each field in the range and log filename means. +Suppose under the backup directory, we have a range file `snapshots/snapshot,78994177,78994177,97` and a log file `logs/0000/0000/log,78655645,98655645,149a0bdfedecafa2f648219d5eba816e,1048576`. +The range file’s filename tells us that all key-value pairs decoded from the file are the KV value in DB at the version `78994177`. The data block size is `97` bytes. +The log file’s filename tells us that the mutations in the log file were the mutations in the DB during the version range `[78655645,98655645)`, and the data block size is `1048576` bytes. + + +### Data format in a range file +A range file can have one to many data blocks. Each data block has a set of key-value pairs. +A data block is encoded as follows: `Header startKey k1v1 k2v2 Padding`. + + +Example: + + The client code writes keys in this sequence: + a c d e f g h i j z + The backup procedure records the key-value pairs in the database into range file. + + H = header P = padding a...z = keys v = value | = block boundary + + Encoded file: H a cv dv ev P | H e ev fv gv hv P | H h hv iv jv z + Decoded in blocks yields: + Block 1: range [a, e) with kv pairs cv, dv + Block 2: range [e, h) with kv pairs ev, fv, gv + Block 3: range [h, z) with kv pairs hv, iv, jv + +NOTE: All blocks except for the final block will have one last value which will not be used. This isn't actually a waste since if the next KV pair wouldn't fit within the block after the value then the space after the final key to the next 1MB boundary would just be padding anyway. + +The code related to how a range file is written is in the `struct RangeFileWriter` in `namespace fileBackup`. + +The code that decodes a range block is in `ACTOR Future>> decodeRangeFileBlock(Reference file, int64_t offset, int len)`. + + +### Data format in a log file +A log file can have one to many data blocks. +Each block is encoded as `Header, [Param1, Param2]... padding`. +The first 32bits in Param1 and Param2 specifies the length of the Param1 and Param2. +Param1 specifies the version when the mutations happened; +Param2 encodes the group of mutations happened at the version. + +Note that if the group of mutations is bigger than the block size, the mutation group will be split across multiple data blocks. +For example, we may get [Param1, Param2_part0], [Param1, Param2_part1]. By concatenating the Param2_part0 and Param2_part1, we can get the group of all mutations happened in the version specified in Param1. + +The encoding format for Param1 is as follows: +`hashValue|commitVersion|part`, +where `hashValue` is the hash of the commitVersion, `commitVersion` is the version when the mutations in Param2(s) are taken, and `part` is the part number in case we need to concatenate the Param2 to get the group of all mutations. + `hashValue` takes 8bits, `commitVersion` takes 64bits, and `part` takes 32bits. + +Note that in case of concatenating the partial group of mutations in Param2 to get the full group of all mutations, the part number should be continuous. + +The encoding format for the group of mutations, which is Param2 or the concatenated Param2 in case of partial group of mutations in a block, is as follows: +`length_of_the_mutation_group | encoded_mutation_1 | … | encoded_mutation_k`. +The `encoded_mutation_i` is encoded as follows + `type|kLen|vLen|Key|Value` +where type is the mutation type, such as Set or Clear, `kLen` and `vLen` respectively are the length of the key and value in the mutation. `Key` and `Value` are the serialized value of the Key and Value in the mutation + +The code related to how a log file is written is in the `struct LogFileWriter` in `namespace fileBackup`. + +The code that decodes a mutation block is in `ACTOR Future>> decodeLogFileBlock(Reference file, int64_t offset, int len)` + + + + + + + + + + From 7ff54290a22709de76987e864b4178df94fcc03c Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 27 Mar 2019 15:59:30 -0700 Subject: [PATCH 65/69] Design:Backup format: Fix types --- design/backup-dataFormat.md | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/design/backup-dataFormat.md b/design/backup-dataFormat.md index 002cd88984..6448d29f26 100644 --- a/design/backup-dataFormat.md +++ b/design/backup-dataFormat.md @@ -36,7 +36,7 @@ A range file can have one to many data blocks. Each data block has a set of key- A data block is encoded as follows: `Header startKey k1v1 k2v2 Padding`. -Example: + Example: The client code writes keys in this sequence: a c d e f g h i j z @@ -60,36 +60,26 @@ The code that decodes a range block is in `ACTOR Future>> decodeLogFileBlock(Reference file, int64_t offset, int len)` - - - - - - - - - - +The code that decodes a mutation block is in `ACTOR Future>> decodeLogFileBlock(Reference file, int64_t offset, int len)`. \ No newline at end of file From 15bfadbce4192f696849023c893c4030dc106206 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Thu, 28 Mar 2019 13:52:39 -0700 Subject: [PATCH 66/69] Design: Backup data format: Add endianness discussion --- design/backup-dataFormat.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/design/backup-dataFormat.md b/design/backup-dataFormat.md index 6448d29f26..c3e13def0c 100644 --- a/design/backup-dataFormat.md +++ b/design/backup-dataFormat.md @@ -82,4 +82,10 @@ where type is the mutation type, such as Set or Clear, `kLen` and `vLen` respect The code related to how a log file is written is in the `struct LogFileWriter` in `namespace fileBackup`. -The code that decodes a mutation block is in `ACTOR Future>> decodeLogFileBlock(Reference file, int64_t offset, int len)`. \ No newline at end of file +The code that decodes a mutation block is in `ACTOR Future>> decodeLogFileBlock(Reference file, int64_t offset, int len)`. + + +### Endianness +When the restore decodes a serialized integer from the backup file, it needs to convert the serialized value from big endian to little endian. + +The reason is as follows: When the backup procedure transfers the data to remote blob store, the backup data is encoded in big endian. However, FoundationDB currently only run on little endian machines. The endianness affects the interpretation of an integer, so we must perform the endianness convertion. \ No newline at end of file From a8c9432cedad89f79206cc53c8bc5530e6c8bc0a Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Thu, 30 May 2019 17:05:31 -0700 Subject: [PATCH 67/69] windows build fix : del sys/wait.h in Platform.cpp --- flow/Platform.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 04b51b407c..e3ee72fab1 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -42,7 +42,6 @@ #include #include #include -#include #include #include "flow/UnitTest.h" #include "flow/FaultInjection.h" From 21e574d33df9f520d4a92f3caefac87cbf6f9400 Mon Sep 17 00:00:00 2001 From: Adam Feldman Date: Fri, 31 May 2019 20:00:44 -0500 Subject: [PATCH 68/69] Fix minor typos in config docs the each --> east, datacenter have --> datacenters has, mean signal --> signal --- documentation/sphinx/source/configuration.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/documentation/sphinx/source/configuration.rst b/documentation/sphinx/source/configuration.rst index d226bc60fb..02ef379328 100644 --- a/documentation/sphinx/source/configuration.rst +++ b/documentation/sphinx/source/configuration.rst @@ -530,7 +530,7 @@ The second feature is the ability to add one or more synchronous replicas of the An example configuration would be four total datacenters, two on the east coast, two on the west coast, with a preference for fast write latencies from the west coast. One datacenter on each coast would be sized to store a full copy of the data. The second datacenter on each coast would only have a few FoundationDB processes. -While everything is healthy, writes need to be made durable in both west coast datacenters before a commit can succeed. The geographic proximity of the two datacenters minimizes the additional commit latency. Reads can be served from either region, and clients can get data from whichever region is closer. Getting a read version from the each coast region will still require communicating with a west coast datacenter. Clients can cache read versions if they can tolerate reading stale data to avoid waiting on read versions. +While everything is healthy, writes need to be made durable in both west coast datacenters before a commit can succeed. The geographic proximity of the two datacenters minimizes the additional commit latency. Reads can be served from either region, and clients can get data from whichever region is closer. Getting a read version from east coast region will still require communicating with a west coast datacenter. Clients can cache read versions if they can tolerate reading stale data to avoid waiting on read versions. If either west coast datacenter fails, the last few mutations will be propagated from the remaining west coast datacenter to the east coast. At this point, FoundationDB will start accepting commits on the east coast. Once the west coast comes back online, the system will automatically start copying all the data that was committed to the east coast back to the west coast replica. Once the west coast has caught up, the system will automatically switch back to accepting writes from the west coast again. @@ -615,7 +615,7 @@ The number of replicas in each region is controlled by redundancy level. For exa Asymmetric configurations ------------------------- -The fact that satellite policies are configured per region allows for asymmetric configurations. For example, FoudnationDB can have a three datacenter setup where there are two datacenters on the west coast (WC1, WC2) and one datacenter on the east coast (EC1). The west coast region can be set as the preferred active region by setting the priority of its primary datacenter higher than the east coast datacenter. The west coast region should have a satellite policy configured, so that when it is active, FoundationDB is making mutations durable in both west coast datacenters. In the rare event that one of the west coast datacenter have failed, FoundationDB will fail over to the east coast datacenter. Because this region does not a satellite datacenter, the mutations will only be made durable in one datacenter while the transaction subsystem is located here. However this is justifiable because the region will only be active if a datacenter has already been lost. +The fact that satellite policies are configured per region allows for asymmetric configurations. For example, FoudnationDB can have a three datacenter setup where there are two datacenters on the west coast (WC1, WC2) and one datacenter on the east coast (EC1). The west coast region can be set as the preferred active region by setting the priority of its primary datacenter higher than the east coast datacenter. The west coast region should have a satellite policy configured, so that when it is active, FoundationDB is making mutations durable in both west coast datacenters. In the rare event that one of the west coast datacenters has failed, FoundationDB will fail over to the east coast datacenter. Because this region does not a satellite datacenter, the mutations will only be made durable in one datacenter while the transaction subsystem is located here. However, this is justifiable because the region will only be active if a datacenter has already been lost. This is the region configuration that implements the example:: @@ -669,7 +669,7 @@ To configure an existing database to regions, do the following steps: 4. Configure ``usable_regions=2``. This will cause the cluster to start copying data between the regions. - 5. Watch ``status`` and wait until data movement is complete. This will mean signal that the remote datacenter has a full replica of all of the data in the database. + 5. Watch ``status`` and wait until data movement is complete. This will signal that the remote datacenter has a full replica of all of the data in the database. 6. Change the region configuration to have a non-negative priority for the primary datacenters in both regions. This will enable automatic failover between regions. @@ -680,7 +680,7 @@ When a primary datacenter fails, the cluster will go into a degraded state. It w .. warning:: While a datacenter has failed, the maximum write throughput of the cluster will be roughly 1/3 of normal performance. This is because the transaction logs need to store all of the mutations being committed, so that once the other datacenter comes back online, it can replay history to catch back up. -To drop the dead datacenter do the follow steps: +To drop the dead datacenter do the following steps: 1. Configure the region configuration so that the dead datacenter has a negative priority. From dc59f63d0ef31f812b77d1737bf7db7c7a6ca72e Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Sun, 2 Jun 2019 21:40:18 -0700 Subject: [PATCH 69/69] TraceEvent:First letter must be capitalized --- fdbserver/workloads/Cycle.actor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fdbserver/workloads/Cycle.actor.cpp b/fdbserver/workloads/Cycle.actor.cpp index 985aba2e36..e622ffe4a3 100644 --- a/fdbserver/workloads/Cycle.actor.cpp +++ b/fdbserver/workloads/Cycle.actor.cpp @@ -102,13 +102,13 @@ struct CycleWorkload : TestWorkload { try { // Reverse next and next^2 node Optional v = wait( tr.get( self->key(r) ) ); - if (!v.present()) self->badRead("r", r, tr); + if (!v.present()) self->badRead("KeyR", r, tr); state int r2 = self->fromValue(v.get()); Optional v2 = wait( tr.get( self->key(r2) ) ); - if (!v2.present()) self->badRead("r2", r2, tr); + if (!v2.present()) self->badRead("KeyR2", r2, tr); state int r3 = self->fromValue(v2.get()); Optional v3 = wait( tr.get( self->key(r3) ) ); - if (!v3.present()) self->badRead("r3", r3, tr); + if (!v3.present()) self->badRead("KeyR3", r3, tr); int r4 = self->fromValue(v3.get()); tr.clear( self->key(r) ); //< Shouldn't have an effect, but will break with wrong ordering