Merge branch 'master' into add-cache-memory-parameter
# Conflicts: # documentation/sphinx/source/release-notes.rst
This commit is contained in:
commit
a92b6cd3d1
|
@ -158,7 +158,7 @@ function(add_flow_target)
|
||||||
message(FATAL_ERROR "No sources provided")
|
message(FATAL_ERROR "No sources provided")
|
||||||
endif()
|
endif()
|
||||||
if(OPEN_FOR_IDE)
|
if(OPEN_FOR_IDE)
|
||||||
set(sources ${AFT_SRCS} ${AFT_DISABLE_ACTOR_WRITHOUT_WAIT_WARNING} ${AFT_ADDL_SRCS})
|
set(sources ${AFT_SRCS} ${AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING} ${AFT_ADDL_SRCS})
|
||||||
add_library(${AFT_NAME} OBJECT ${sources})
|
add_library(${AFT_NAME} OBJECT ${sources})
|
||||||
else()
|
else()
|
||||||
foreach(src IN LISTS AFT_SRCS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING)
|
foreach(src IN LISTS AFT_SRCS AFT_DISABLE_ACTOR_WITHOUT_WAIT_WARNING)
|
||||||
|
|
|
@ -9,6 +9,8 @@ Features
|
||||||
--------
|
--------
|
||||||
* Improved team collection for data distribution that builds a balanced number of teams per server and gurantees that each server has at least one team. `(PR #1785) <https://github.com/apple/foundationdb/pull/1785>`_.
|
* Improved team collection for data distribution that builds a balanced number of teams per server and gurantees that each server has at least one team. `(PR #1785) <https://github.com/apple/foundationdb/pull/1785>`_.
|
||||||
|
|
||||||
|
* CMake is now our official build system. The Makefile based build system is deprecated.
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
@ -37,6 +39,7 @@ Bindings
|
||||||
|
|
||||||
* Added a new API to get the approximated transaction size before commit, e.g., ``fdb_transaction_get_approximate_size`` in the C binding. `(PR #1756) <https://github.com/apple/foundationdb/pull/1756>`_.
|
* Added a new API to get the approximated transaction size before commit, e.g., ``fdb_transaction_get_approximate_size`` in the C binding. `(PR #1756) <https://github.com/apple/foundationdb/pull/1756>`_.
|
||||||
* C: ``fdb_future_get_version`` has been renamed to ``fdb_future_get_int64``. `(PR #1756) <https://github.com/apple/foundationdb/pull/1756>`_.
|
* C: ``fdb_future_get_version`` has been renamed to ``fdb_future_get_int64``. `(PR #1756) <https://github.com/apple/foundationdb/pull/1756>`_.
|
||||||
|
* C: Applications linking to libfdb_c can now use ``pkg-config foundationdb-client`` or ``find_package(FoundationDB-Client ...)`` (for cmake) to get the proper flags for compiling and linking. `(PR #1636) <https://github.com/apple/foundationdb/pull/1636>`_.
|
||||||
* Go: The Go bindings now require Go version 1.11 or later.
|
* Go: The Go bindings now require Go version 1.11 or later.
|
||||||
* Go: Fix issue with finalizers running too early that could lead to undefined behavior. `(PR #1451) <https://github.com/apple/foundationdb/pull/1451>`_.
|
* Go: Fix issue with finalizers running too early that could lead to undefined behavior. `(PR #1451) <https://github.com/apple/foundationdb/pull/1451>`_.
|
||||||
* Added transaction option to control the field length of keys and values in debug transaction logging in order to avoid truncation. `(PR #1844) <https://github.com/apple/foundationdb/pull/1844>`_.
|
* Added transaction option to control the field length of keys and values in debug transaction logging in order to avoid truncation. `(PR #1844) <https://github.com/apple/foundationdb/pull/1844>`_.
|
||||||
|
@ -45,6 +48,9 @@ Other Changes
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
* Trace files are now ordered lexicographically. This means that the filename format for trace files did change. `(PR #1828) <https://github.com/apple/foundationdb/pull/1828>`_.
|
* Trace files are now ordered lexicographically. This means that the filename format for trace files did change. `(PR #1828) <https://github.com/apple/foundationdb/pull/1828>`_.
|
||||||
|
* FoundationDB can now be built with clang and libc++ on Linux `(PR #1666) <https://github.com/apple/foundationdb/pull/1666>`_.
|
||||||
|
* Added experimental framework to run C and Java clients in simulator `(PR #1678) <https://github.com/apple/foundationdb/pull/1678>`_.
|
||||||
|
* Added new network option for client buggify which will randomly throw expected exceptions in the client. Intended for client testing `(PR #1417) <https://github.com/apple/foundationdb/pull/1417>`_.
|
||||||
* Added ``--cache_memory`` parameter for ``fdbserver`` processes to control the amount of memory dedicated to caching pages read from disk. `(PR #1889) <https://github.com/apple/foundationdb/pull/1889>`_.
|
* Added ``--cache_memory`` parameter for ``fdbserver`` processes to control the amount of memory dedicated to caching pages read from disk. `(PR #1889) <https://github.com/apple/foundationdb/pull/1889>`_.
|
||||||
|
|
||||||
Earlier release notes
|
Earlier release notes
|
||||||
|
|
|
@ -2170,7 +2170,19 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<bool> createSnapshot(Database db, StringRef snapCmd) {
|
ACTOR Future<bool> createSnapshot(Database db, StringRef snapCmd) {
|
||||||
wait(makeInterruptable(mgmtSnapCreate(db, snapCmd)));
|
try {
|
||||||
|
UID snapUID = wait(makeInterruptable(mgmtSnapCreate(db, snapCmd, 2 /* version */)));
|
||||||
|
int version = 2;
|
||||||
|
if (version == 1) {
|
||||||
|
printf("Snapshots tagged with UID: %s, check logs for status\n", snapUID.toString().c_str());
|
||||||
|
} else {
|
||||||
|
printf("Snapshots create succeeded with UID: %s\n", snapUID.toString().c_str());
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
fprintf(stderr, "Snapshot create failed, %d (%s)."
|
||||||
|
" Please cleanup any instance level snapshots created.\n", e.code(), e.what());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1403,11 +1403,12 @@ ACTOR Future<int> setDDMode( Database cx, int mode ) {
|
||||||
rd >> oldMode;
|
rd >> oldMode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!mode) {
|
BinaryWriter wrMyOwner(Unversioned());
|
||||||
BinaryWriter wrMyOwner(Unversioned());
|
wrMyOwner << dataDistributionModeLock;
|
||||||
wrMyOwner << dataDistributionModeLock;
|
tr.set( moveKeysLockOwnerKey, wrMyOwner.toValue() );
|
||||||
tr.set( moveKeysLockOwnerKey, wrMyOwner.toValue() );
|
BinaryWriter wrLastWrite(Unversioned());
|
||||||
}
|
wrLastWrite << deterministicRandom()->randomUniqueID();
|
||||||
|
tr.set( moveKeysLockWriteKey, wrLastWrite.toValue() );
|
||||||
|
|
||||||
tr.set( dataDistributionModeKey, wr.toValue() );
|
tr.set( dataDistributionModeKey, wr.toValue() );
|
||||||
|
|
||||||
|
@ -1481,27 +1482,23 @@ ACTOR Future<std::set<NetworkAddress>> checkForExcludingServers(Database cx, vec
|
||||||
return inProgressExclusion;
|
return inProgressExclusion;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, StringRef snapCmd) {
|
ACTOR Future<UID> mgmtSnapCreate(Database cx, StringRef snapCmd, int version) {
|
||||||
state int retryCount = 0;
|
state int retryCount = 0;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
state UID snapUID = deterministicRandom()->randomUniqueID();
|
state UID snapUID = deterministicRandom()->randomUniqueID();
|
||||||
try {
|
try {
|
||||||
wait(snapCreate(cx, snapCmd, snapUID));
|
wait(snapCreate(cx, snapCmd, snapUID, version));
|
||||||
printf("Snapshots tagged with UID: %s, check logs for status\n", snapUID.toString().c_str());
|
|
||||||
TraceEvent("SnapCreateSucceeded").detail("snapUID", snapUID);
|
TraceEvent("SnapCreateSucceeded").detail("snapUID", snapUID);
|
||||||
break;
|
return snapUID;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
++retryCount;
|
++retryCount;
|
||||||
TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e);
|
TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e);
|
||||||
if (retryCount > 3) {
|
if (retryCount > 3) {
|
||||||
fprintf(stderr, "Snapshot create failed, %d (%s)."
|
|
||||||
" Please cleanup any instance level snapshots created.\n", e.code(), e.what());
|
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> waitForFullReplication( Database cx ) {
|
ACTOR Future<Void> waitForFullReplication( Database cx ) {
|
||||||
|
|
|
@ -195,7 +195,7 @@ bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const&
|
||||||
|
|
||||||
// execute payload in 'snapCmd' on all the coordinators, TLogs and
|
// execute payload in 'snapCmd' on all the coordinators, TLogs and
|
||||||
// storage nodes
|
// storage nodes
|
||||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, StringRef snapCmd);
|
ACTOR Future<UID> mgmtSnapCreate(Database cx, StringRef snapCmd, int version);
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -49,9 +49,9 @@ struct MasterProxyInterface {
|
||||||
|
|
||||||
RequestStream< struct GetRawCommittedVersionRequest > getRawCommittedVersion;
|
RequestStream< struct GetRawCommittedVersionRequest > getRawCommittedVersion;
|
||||||
RequestStream< struct TxnStateRequest > txnState;
|
RequestStream< struct TxnStateRequest > txnState;
|
||||||
RequestStream<struct ExecRequest> execReq;
|
|
||||||
|
|
||||||
RequestStream< struct GetHealthMetricsRequest > getHealthMetrics;
|
RequestStream< struct GetHealthMetricsRequest > getHealthMetrics;
|
||||||
|
RequestStream< struct ExecRequest > execReq;
|
||||||
|
RequestStream< struct ProxySnapRequest > proxySnapReq;
|
||||||
|
|
||||||
UID id() const { return commit.getEndpoint().token; }
|
UID id() const { return commit.getEndpoint().token; }
|
||||||
std::string toString() const { return id().shortString(); }
|
std::string toString() const { return id().shortString(); }
|
||||||
|
@ -63,7 +63,7 @@ struct MasterProxyInterface {
|
||||||
void serialize(Archive& ar) {
|
void serialize(Archive& ar) {
|
||||||
serializer(ar, locality, provisional, commit, getConsistentReadVersion, getKeyServersLocations,
|
serializer(ar, locality, provisional, commit, getConsistentReadVersion, getKeyServersLocations,
|
||||||
waitFailure, getStorageServerRejoinInfo, getRawCommittedVersion,
|
waitFailure, getStorageServerRejoinInfo, getRawCommittedVersion,
|
||||||
txnState, getHealthMetrics, execReq);
|
txnState, getHealthMetrics, execReq, proxySnapReq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initEndpoints() {
|
void initEndpoints() {
|
||||||
|
@ -350,4 +350,22 @@ struct ExecRequest
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ProxySnapRequest
|
||||||
|
{
|
||||||
|
constexpr static FileIdentifier file_identifier = 22204900;
|
||||||
|
Arena arena;
|
||||||
|
StringRef snapPayload;
|
||||||
|
UID snapUID;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Optional<UID> debugID;
|
||||||
|
|
||||||
|
explicit ProxySnapRequest(Optional<UID> const& debugID = Optional<UID>()) : debugID(debugID) {}
|
||||||
|
explicit ProxySnapRequest(StringRef snap, UID snapUID, Optional<UID> debugID = Optional<UID>()) : snapPayload(snap), snapUID(snapUID), debugID(debugID) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, snapPayload, snapUID, reply, arena, debugID);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "fdbclient/FailureMonitorClient.h"
|
#include "fdbclient/FailureMonitorClient.h"
|
||||||
#include "fdbclient/KeyRangeMap.h"
|
#include "fdbclient/KeyRangeMap.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "fdbclient/MasterProxyInterface.h"
|
#include "fdbclient/MasterProxyInterface.h"
|
||||||
#include "fdbclient/MonitorLeader.h"
|
#include "fdbclient/MonitorLeader.h"
|
||||||
#include "fdbclient/MutationList.h"
|
#include "fdbclient/MutationList.h"
|
||||||
|
@ -3439,7 +3440,7 @@ void enableClientInfoLogging() {
|
||||||
TraceEvent(SevInfo, "ClientInfoLoggingEnabled");
|
TraceEvent(SevInfo, "ClientInfoLoggingEnabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) {
|
ACTOR Future<Void> snapCreateVersion1(Database inputCx, StringRef snapCmd, UID snapUID) {
|
||||||
state Transaction tr(inputCx);
|
state Transaction tr(inputCx);
|
||||||
state DatabaseContext* cx = inputCx.getPtr();
|
state DatabaseContext* cx = inputCx.getPtr();
|
||||||
// remember the client ID before the snap operation
|
// remember the client ID before the snap operation
|
||||||
|
@ -3452,7 +3453,7 @@ ACTOR Future<Void> snapCreate(Database inputCx, StringRef snapCmd, UID snapUID)
|
||||||
|
|
||||||
StringRef snapCmdArgs = snapCmd;
|
StringRef snapCmdArgs = snapCmd;
|
||||||
StringRef snapCmdPart = snapCmdArgs.eat(":");
|
StringRef snapCmdPart = snapCmdArgs.eat(":");
|
||||||
state Standalone<StringRef> snapUIDRef(snapUID.toString());
|
Standalone<StringRef> snapUIDRef(snapUID.toString());
|
||||||
state Standalone<StringRef> snapPayloadRef = snapCmdPart
|
state Standalone<StringRef> snapPayloadRef = snapCmdPart
|
||||||
.withSuffix(LiteralStringRef(":uid="))
|
.withSuffix(LiteralStringRef(":uid="))
|
||||||
.withSuffix(snapUIDRef)
|
.withSuffix(snapUIDRef)
|
||||||
|
@ -3537,3 +3538,89 @@ ACTOR Future<Void> snapCreate(Database inputCx, StringRef snapCmd, UID snapUID)
|
||||||
TraceEvent("SnapCreateComplete").detail("UID", snapUID);
|
TraceEvent("SnapCreateComplete").detail("UID", snapUID);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> snapshotDatabase(Reference<DatabaseContext> cx, StringRef snapPayload, UID snapUID, Optional<UID> debugID) {
|
||||||
|
TraceEvent("NativeAPI.SnapshotDatabaseEnter")
|
||||||
|
.detail("SnapPayload", snapPayload)
|
||||||
|
.detail("SnapUID", snapUID);
|
||||||
|
try {
|
||||||
|
if (debugID.present()) {
|
||||||
|
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.snapshotDatabase.Before");
|
||||||
|
}
|
||||||
|
|
||||||
|
ProxySnapRequest req(snapPayload, snapUID, debugID);
|
||||||
|
wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, req, cx->taskID, true /*atmostOnce*/ ));
|
||||||
|
if (debugID.present())
|
||||||
|
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(),
|
||||||
|
"NativeAPI.SnapshotDatabase.After");
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("NativeAPI.SnapshotDatabaseError")
|
||||||
|
.detail("SnapPayload", snapPayload)
|
||||||
|
.detail("SnapUID", snapUID)
|
||||||
|
.error(e, true /* includeCancelled */);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> snapCreateVersion2(Database cx, StringRef snapCmd, UID snapUID) {
|
||||||
|
// remember the client ID before the snap operation
|
||||||
|
state UID preSnapClientUID = cx->clientInfo->get().id;
|
||||||
|
|
||||||
|
TraceEvent("SnapCreateEnterVersion2")
|
||||||
|
.detail("SnapCmd", snapCmd.toString())
|
||||||
|
.detail("UID", snapUID)
|
||||||
|
.detail("PreSnapClientUID", preSnapClientUID);
|
||||||
|
|
||||||
|
StringRef snapCmdArgs = snapCmd;
|
||||||
|
StringRef snapCmdPart = snapCmdArgs.eat(":");
|
||||||
|
Standalone<StringRef> snapUIDRef(snapUID.toString());
|
||||||
|
Standalone<StringRef> snapPayloadRef = snapCmdPart
|
||||||
|
.withSuffix(LiteralStringRef(":uid="))
|
||||||
|
.withSuffix(snapUIDRef)
|
||||||
|
.withSuffix(LiteralStringRef(","))
|
||||||
|
.withSuffix(snapCmdArgs);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Future<Void> exec = snapshotDatabase(Reference<DatabaseContext>::addRef(cx.getPtr()), snapPayloadRef, snapUID, snapUID);
|
||||||
|
wait(exec);
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("SnapshotDatabaseErrorVersion2")
|
||||||
|
.detail("SnapCmd", snapCmd.toString())
|
||||||
|
.detail("UID", snapUID)
|
||||||
|
.error(e);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
UID postSnapClientUID = cx->clientInfo->get().id;
|
||||||
|
if (preSnapClientUID != postSnapClientUID) {
|
||||||
|
// if the client IDs changed then we fail the snapshot
|
||||||
|
TraceEvent("UIDMismatchVersion2")
|
||||||
|
.detail("SnapPreSnapClientUID", preSnapClientUID)
|
||||||
|
.detail("SnapPostSnapClientUID", postSnapClientUID);
|
||||||
|
throw coordinators_changed();
|
||||||
|
}
|
||||||
|
|
||||||
|
TraceEvent("SnapCreateExitVersion2")
|
||||||
|
.detail("SnapCmd", snapCmd.toString())
|
||||||
|
.detail("UID", snapUID)
|
||||||
|
.detail("PreSnapClientUID", preSnapClientUID);
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID, int version) {
|
||||||
|
if (version == 1) {
|
||||||
|
wait(snapCreateVersion1(cx, snapCmd, snapUID));
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
state int oldMode = wait( setDDMode( cx, 0 ) );
|
||||||
|
try {
|
||||||
|
wait(snapCreateVersion2(cx, snapCmd, snapUID));
|
||||||
|
} catch (Error& e) {
|
||||||
|
state Error err = e;
|
||||||
|
wait(success( setDDMode( cx, oldMode ) ));
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
wait(success( setDDMode( cx, oldMode ) ));
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
|
@ -344,7 +344,7 @@ int64_t extractIntOption( Optional<StringRef> value, int64_t minValue = std::num
|
||||||
|
|
||||||
// Takes a snapshot of the cluster, specifically the following persistent
|
// Takes a snapshot of the cluster, specifically the following persistent
|
||||||
// states: coordinator, TLog and storage state
|
// states: coordinator, TLog and storage state
|
||||||
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID);
|
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID, int version);
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -18,21 +18,24 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "flow/ActorCollection.h"
|
|
||||||
#include "fdbserver/DataDistribution.actor.h"
|
|
||||||
#include "fdbclient/SystemData.h"
|
|
||||||
#include "fdbclient/DatabaseContext.h"
|
|
||||||
#include "fdbserver/MoveKeys.actor.h"
|
|
||||||
#include "fdbserver/Knobs.h"
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include "fdbserver/WaitFailure.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbserver/ServerDBInfo.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
#include "fdbserver/IKeyValueStore.h"
|
|
||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "fdbrpc/Replication.h"
|
#include "fdbrpc/Replication.h"
|
||||||
#include "flow/UnitTest.h"
|
#include "fdbserver/DataDistribution.actor.h"
|
||||||
|
#include "fdbserver/FDBExecHelper.actor.h"
|
||||||
|
#include "fdbserver/IKeyValueStore.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
|
#include "fdbserver/MoveKeys.actor.h"
|
||||||
|
#include "fdbserver/QuietDatabase.h"
|
||||||
|
#include "fdbserver/ServerDBInfo.h"
|
||||||
|
#include "fdbserver/TLogInterface.h"
|
||||||
|
#include "fdbserver/WaitFailure.h"
|
||||||
|
#include "flow/ActorCollection.h"
|
||||||
#include "flow/Trace.h"
|
#include "flow/Trace.h"
|
||||||
|
#include "flow/UnitTest.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
class TCTeamInfo;
|
class TCTeamInfo;
|
||||||
|
@ -3997,9 +4000,134 @@ static std::set<int> const& normalDataDistributorErrors() {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<AsyncVar<struct ServerDBInfo>> db ) {
|
||||||
|
state Database cx = openDBOnServer(db, TaskPriority::DefaultDelay, true, true);
|
||||||
|
TraceEvent("SnapDataDistributor.SnapReqEnter")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
try {
|
||||||
|
// disable tlog pop on local tlog nodes
|
||||||
|
state std::vector<TLogInterface> tlogs = db->get().logSystemConfig.allLocalLogs(false);
|
||||||
|
std::vector<Future<Void>> disablePops;
|
||||||
|
for (const auto & tlog : tlogs) {
|
||||||
|
disablePops.push_back(
|
||||||
|
transformErrors(throwErrorOr(tlog.disablePopRequest.tryGetReply(TLogDisablePopRequest(snapReq.snapUID))), operation_failed())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
wait(waitForAll(disablePops));
|
||||||
|
|
||||||
|
TraceEvent("SnapDataDistributor.AfterDisableTLogPop")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
// snap local storage nodes
|
||||||
|
std::vector<WorkerInterface> storageWorkers = wait(getStorageWorkers(cx, db, true /* localOnly */));
|
||||||
|
TraceEvent("SnapDataDistributor.GotStorageWorkers")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
std::vector<Future<Void>> storageSnapReqs;
|
||||||
|
for (const auto & worker : storageWorkers) {
|
||||||
|
storageSnapReqs.push_back(
|
||||||
|
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("storage")))), operation_failed())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
wait(waitForAll(storageSnapReqs));
|
||||||
|
|
||||||
|
TraceEvent("SnapDataDistributor.AfterSnapStorage")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
// snap local tlog nodes
|
||||||
|
std::vector<Future<Void>> tLogSnapReqs;
|
||||||
|
for (const auto & tlog : tlogs) {
|
||||||
|
tLogSnapReqs.push_back(
|
||||||
|
transformErrors(throwErrorOr(tlog.snapRequest.tryGetReply(TLogSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("tlog")))), operation_failed())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
wait(waitForAll(tLogSnapReqs));
|
||||||
|
|
||||||
|
TraceEvent("SnapDataDistributor.AfterTLogStorage")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
// enable tlog pop on local tlog nodes
|
||||||
|
std::vector<Future<Void>> enablePops;
|
||||||
|
for (const auto & tlog : tlogs) {
|
||||||
|
enablePops.push_back(
|
||||||
|
transformErrors(throwErrorOr(tlog.enablePopRequest.tryGetReply(TLogEnablePopRequest(snapReq.snapUID))), operation_failed())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
wait(waitForAll(enablePops));
|
||||||
|
|
||||||
|
TraceEvent("SnapDataDistributor.AfterEnableTLogPops")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
// snap the coordinators
|
||||||
|
std::vector<WorkerInterface> coordWorkers = wait(getCoordWorkers(cx, db));
|
||||||
|
TraceEvent("SnapDataDistributor.GotCoordWorkers")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
std::vector<Future<Void>> coordSnapReqs;
|
||||||
|
for (const auto & worker : coordWorkers) {
|
||||||
|
coordSnapReqs.push_back(
|
||||||
|
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("coord")))), operation_failed())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
wait(waitForAll(coordSnapReqs));
|
||||||
|
TraceEvent("SnapDataDistributor.AfterSnapCoords")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("SnapDataDistributor.SnapReqExit")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID)
|
||||||
|
.error(e, true /*includeCancelled */);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> ddSnapCreate(DistributorSnapRequest snapReq, Reference<AsyncVar<struct ServerDBInfo>> db ) {
|
||||||
|
state Future<Void> dbInfoChange = db->onChange();
|
||||||
|
double delayTime = g_network->isSimulated() ? 70.0 : SERVER_KNOBS->SNAP_CREATE_MAX_TIMEOUT;
|
||||||
|
try {
|
||||||
|
choose {
|
||||||
|
when (wait(dbInfoChange)) {
|
||||||
|
TraceEvent("SnapDDCreateDBInfoChanged")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
snapReq.reply.sendError(operation_failed());
|
||||||
|
}
|
||||||
|
when (wait(ddSnapCreateCore(snapReq, db))) {
|
||||||
|
TraceEvent("SnapDDCreateSuccess")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
snapReq.reply.send(Void());
|
||||||
|
}
|
||||||
|
when (wait(delay(delayTime))) {
|
||||||
|
TraceEvent("SnapDDCreateTimedOut")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
snapReq.reply.sendError(operation_failed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("SnapDDCreateError")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID)
|
||||||
|
.error(e, true /*includeCancelled */);
|
||||||
|
if (e.code() != error_code_operation_cancelled) {
|
||||||
|
snapReq.reply.sendError(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncVar<struct ServerDBInfo>> db ) {
|
ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncVar<struct ServerDBInfo>> db ) {
|
||||||
state Reference<DataDistributorData> self( new DataDistributorData(db, di.id()) );
|
state Reference<DataDistributorData> self( new DataDistributorData(db, di.id()) );
|
||||||
state Future<Void> collection = actorCollection( self->addActor.getFuture() );
|
state Future<Void> collection = actorCollection( self->addActor.getFuture() );
|
||||||
|
state Database cx = openDBOnServer(db, TaskPriority::DefaultDelay, true, true);
|
||||||
|
state ActorCollection actors(false);
|
||||||
|
self->addActor.send(actors.getResult());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
TraceEvent("DataDistributorRunning", di.id());
|
TraceEvent("DataDistributorRunning", di.id());
|
||||||
|
@ -4016,6 +4144,9 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
||||||
TraceEvent("DataDistributorHalted", di.id()).detail("ReqID", req.requesterID);
|
TraceEvent("DataDistributorHalted", di.id()).detail("ReqID", req.requesterID);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
when(DistributorSnapRequest snapReq = waitNext(di.distributorSnapReq.getFuture())) {
|
||||||
|
actors.add(ddSnapCreate(snapReq, db));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch ( Error &err ) {
|
catch ( Error &err ) {
|
||||||
|
|
|
@ -29,6 +29,7 @@ struct DataDistributorInterface {
|
||||||
RequestStream<ReplyPromise<Void>> waitFailure;
|
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||||
RequestStream<struct HaltDataDistributorRequest> haltDataDistributor;
|
RequestStream<struct HaltDataDistributorRequest> haltDataDistributor;
|
||||||
struct LocalityData locality;
|
struct LocalityData locality;
|
||||||
|
RequestStream<struct DistributorSnapRequest> distributorSnapReq;
|
||||||
|
|
||||||
DataDistributorInterface() {}
|
DataDistributorInterface() {}
|
||||||
explicit DataDistributorInterface(const struct LocalityData& l) : locality(l) {}
|
explicit DataDistributorInterface(const struct LocalityData& l) : locality(l) {}
|
||||||
|
@ -45,7 +46,7 @@ struct DataDistributorInterface {
|
||||||
|
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar) {
|
void serialize(Archive& ar) {
|
||||||
serializer(ar, waitFailure, haltDataDistributor, locality);
|
serializer(ar, waitFailure, haltDataDistributor, locality, distributorSnapReq);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -63,4 +64,22 @@ struct HaltDataDistributorRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct DistributorSnapRequest
|
||||||
|
{
|
||||||
|
constexpr static FileIdentifier file_identifier = 22204900;
|
||||||
|
Arena arena;
|
||||||
|
StringRef snapPayload;
|
||||||
|
UID snapUID;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Optional<UID> debugID;
|
||||||
|
|
||||||
|
explicit DistributorSnapRequest(Optional<UID> const& debugID = Optional<UID>()) : debugID(debugID) {}
|
||||||
|
explicit DistributorSnapRequest(StringRef snap, UID snapUID, Optional<UID> debugID = Optional<UID>()) : snapPayload(snap), snapUID(snapUID), debugID(debugID) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, snapPayload, snapUID, reply, arena, debugID);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif //FDBSERVER_DATADISTRIBUTORINTERFACE_H
|
#endif //FDBSERVER_DATADISTRIBUTORINTERFACE_H
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#if defined(CMAKE_BUILD) || !defined(_WIN32)
|
#if defined(CMAKE_BUILD) || !defined(_WIN32)
|
||||||
#include "versions.h"
|
#include "versions.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) {
|
ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) {
|
||||||
|
@ -84,13 +85,13 @@ void ExecCmdValueString::dbgPrint() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(__APPLE__)
|
#if defined(_WIN32) || defined(__APPLE__)
|
||||||
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync)
|
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync, double maxSimDelayTime)
|
||||||
{
|
{
|
||||||
wait(delay(0.0));
|
wait(delay(0.0));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync)
|
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync, double maxSimDelayTime)
|
||||||
{
|
{
|
||||||
state std::string argsString;
|
state std::string argsString;
|
||||||
for (auto const& elem : paramList) {
|
for (auto const& elem : paramList) {
|
||||||
|
@ -103,10 +104,15 @@ ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> par
|
||||||
state boost::process::child c(binPath, boost::process::args(paramList),
|
state boost::process::child c(binPath, boost::process::args(paramList),
|
||||||
boost::process::std_err > boost::process::null);
|
boost::process::std_err > boost::process::null);
|
||||||
|
|
||||||
// for async calls in simulator, always delay by a fixed time, otherwise
|
// for async calls in simulator, always delay by a deterinistic amount of time and do the call
|
||||||
// the predictability of the simulator breaks
|
// synchronously, otherwise the predictability of the simulator breaks
|
||||||
if (!isSync && g_network->isSimulated()) {
|
if (!isSync && g_network->isSimulated()) {
|
||||||
wait(delay(deterministicRandom()->random01()));
|
double snapDelay = std::max(maxSimDelayTime - 1, 0.0);
|
||||||
|
// add some randomness
|
||||||
|
snapDelay += deterministicRandom()->random01();
|
||||||
|
TraceEvent("SnapDelaySpawnProcess")
|
||||||
|
.detail("SnapDelay", snapDelay);
|
||||||
|
wait(delay(snapDelay));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isSync && !g_network->isSimulated()) {
|
if (!isSync && !g_network->isSimulated()) {
|
||||||
|
@ -147,10 +153,11 @@ ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> par
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role) {
|
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role, int snapVersion) {
|
||||||
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||||
state int err = 0;
|
state int err = 0;
|
||||||
state Future<int> cmdErr;
|
state Future<int> cmdErr;
|
||||||
|
state double maxWaitTime = (snapVersion == 2) ? SERVER_KNOBS->SNAP_CREATE_MAX_TIMEOUT : 3.0;
|
||||||
if (!g_network->isSimulated()) {
|
if (!g_network->isSimulated()) {
|
||||||
// get bin path
|
// get bin path
|
||||||
auto snapBin = execArg->getBinaryPath();
|
auto snapBin = execArg->getBinaryPath();
|
||||||
|
@ -169,17 +176,24 @@ ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, st
|
||||||
versionString += version;
|
versionString += version;
|
||||||
paramList.push_back(versionString);
|
paramList.push_back(versionString);
|
||||||
paramList.push_back(role);
|
paramList.push_back(role);
|
||||||
cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0, false /*isSync*/);
|
cmdErr = spawnProcess(snapBin.toString(), paramList, maxWaitTime, false /*isSync*/, 0);
|
||||||
wait(success(cmdErr));
|
wait(success(cmdErr));
|
||||||
err = cmdErr.get();
|
err = cmdErr.get();
|
||||||
} else {
|
} else {
|
||||||
// copy the files
|
// copy the files
|
||||||
state std::string folderFrom = folder + "/.";
|
state std::string folderFrom = folder + "/.";
|
||||||
state std::string folderTo = folder + "-snap-" + uidStr.toString();
|
state std::string folderTo = folder + "-snap-" + uidStr.toString();
|
||||||
|
double maxSimDelayTime = 1.0;
|
||||||
|
if (snapVersion == 1) {
|
||||||
|
folderTo = folder + "-snap-" + uidStr.toString();
|
||||||
|
} else {
|
||||||
|
folderTo = folder + "-snap-" + uidStr.toString() + "-" + role;
|
||||||
|
maxSimDelayTime = 10.0;
|
||||||
|
}
|
||||||
std::vector<std::string> paramList;
|
std::vector<std::string> paramList;
|
||||||
std::string mkdirBin = "/bin/mkdir";
|
std::string mkdirBin = "/bin/mkdir";
|
||||||
paramList.push_back(folderTo);
|
paramList.push_back(folderTo);
|
||||||
cmdErr = spawnProcess(mkdirBin, paramList, 3.0, false /*isSync*/);
|
cmdErr = spawnProcess(mkdirBin, paramList, maxWaitTime, false /*isSync*/, maxSimDelayTime);
|
||||||
wait(success(cmdErr));
|
wait(success(cmdErr));
|
||||||
err = cmdErr.get();
|
err = cmdErr.get();
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
|
@ -188,7 +202,7 @@ ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, st
|
||||||
paramList.push_back("-a");
|
paramList.push_back("-a");
|
||||||
paramList.push_back(folderFrom);
|
paramList.push_back(folderFrom);
|
||||||
paramList.push_back(folderTo);
|
paramList.push_back(folderTo);
|
||||||
cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/);
|
cmdErr = spawnProcess(cpBin, paramList, maxWaitTime, true /*isSync*/, 1.0);
|
||||||
wait(success(cmdErr));
|
wait(success(cmdErr));
|
||||||
err = cmdErr.get();
|
err = cmdErr.get();
|
||||||
}
|
}
|
||||||
|
@ -233,3 +247,34 @@ bool isTLogInSameNode() {
|
||||||
NetworkAddress addr = g_network->getLocalAddress();
|
NetworkAddress addr = g_network->getLocalAddress();
|
||||||
return tLogsAlive[addr].size() >= 1;
|
return tLogsAlive[addr].size() >= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct StorageVersionInfo {
|
||||||
|
Version version;
|
||||||
|
Version durableVersion;
|
||||||
|
};
|
||||||
|
|
||||||
|
// storage nodes get snapshotted through the worker interface which does not have context about version information,
|
||||||
|
// following info is gathered at worker level to facilitate printing of version info during storage snapshots.
|
||||||
|
typedef std::map<UID, StorageVersionInfo> UidStorageVersionInfo;
|
||||||
|
|
||||||
|
std::map<NetworkAddress, UidStorageVersionInfo> workerStorageVersionInfo;
|
||||||
|
|
||||||
|
void setDataVersion(UID uid, Version version) {
|
||||||
|
NetworkAddress addr = g_network->getLocalAddress();
|
||||||
|
workerStorageVersionInfo[addr][uid].version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDataDurableVersion(UID uid, Version durableVersion) {
|
||||||
|
NetworkAddress addr = g_network->getLocalAddress();
|
||||||
|
workerStorageVersionInfo[addr][uid].durableVersion = durableVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
void printStorageVersionInfo() {
|
||||||
|
NetworkAddress addr = g_network->getLocalAddress();
|
||||||
|
for (auto itr = workerStorageVersionInfo[addr].begin(); itr != workerStorageVersionInfo[addr].end(); itr++) {
|
||||||
|
TraceEvent("StorageVersionInfo")
|
||||||
|
.detail("UID", itr->first)
|
||||||
|
.detail("Version", itr->second.version)
|
||||||
|
.detail("DurableVersion", itr->second.durableVersion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "flow/Arena.h"
|
#include "flow/Arena.h"
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "flow/actorcompiler.h"
|
#include "flow/actorcompiler.h"
|
||||||
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
|
||||||
// execute/snapshot command takes two arguments: <param1> <param2>
|
// execute/snapshot command takes two arguments: <param1> <param2>
|
||||||
// param1 - represents the command type/name
|
// param1 - represents the command type/name
|
||||||
|
@ -47,10 +48,11 @@ private: // data
|
||||||
// spawns a process pointed by `binPath` and the arguments provided at `paramList`,
|
// spawns a process pointed by `binPath` and the arguments provided at `paramList`,
|
||||||
// if the process spawned takes more than `maxWaitTime` then it will be killed
|
// if the process spawned takes more than `maxWaitTime` then it will be killed
|
||||||
// if isSync is set to true then the process will be synchronously executed
|
// if isSync is set to true then the process will be synchronously executed
|
||||||
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync);
|
// if async and in simulator then delay spawning the process to max of maxSimDelayTime
|
||||||
|
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync, double maxSimDelayTime);
|
||||||
|
|
||||||
// helper to run all the work related to running the exec command
|
// helper to run all the work related to running the exec command
|
||||||
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role);
|
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role, int version);
|
||||||
|
|
||||||
// returns true if the execUID op is in progress
|
// returns true if the execUID op is in progress
|
||||||
bool isExecOpInProgress(UID execUID);
|
bool isExecOpInProgress(UID execUID);
|
||||||
|
@ -67,5 +69,12 @@ void unregisterTLog(UID uid);
|
||||||
// checks if there is any non-stopped TLog instance
|
// checks if there is any non-stopped TLog instance
|
||||||
bool isTLogInSameNode();
|
bool isTLogInSameNode();
|
||||||
|
|
||||||
|
// set the data version for the specified storage server UID
|
||||||
|
void setDataVersion(UID uid, Version version);
|
||||||
|
// set the data durable version for the specified storage server UID
|
||||||
|
void setDataDurableVersion(UID uid, Version version);
|
||||||
|
// print the version info all the storages servers on this node
|
||||||
|
void printStorageVersionInfo();
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -81,6 +81,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
||||||
init( TLOG_DEGRADED_DURATION, 5.0 );
|
init( TLOG_DEGRADED_DURATION, 5.0 );
|
||||||
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
||||||
|
|
||||||
|
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
|
||||||
|
init( SNAP_CREATE_MAX_TIMEOUT, 300.0 );
|
||||||
|
|
||||||
// Data distribution queue
|
// Data distribution queue
|
||||||
init( HEALTH_POLL_TIME, 1.0 );
|
init( HEALTH_POLL_TIME, 1.0 );
|
||||||
init( BEST_TEAM_STUCK_DELAY, 1.0 );
|
init( BEST_TEAM_STUCK_DELAY, 1.0 );
|
||||||
|
|
|
@ -328,6 +328,9 @@ public:
|
||||||
int64_t TLOG_RECOVER_MEMORY_LIMIT;
|
int64_t TLOG_RECOVER_MEMORY_LIMIT;
|
||||||
double TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
double TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||||
|
|
||||||
|
// disk snapshot
|
||||||
|
double SNAP_CREATE_MAX_TIMEOUT;
|
||||||
|
|
||||||
double MAX_TRANSACTIONS_PER_BYTE;
|
double MAX_TRANSACTIONS_PER_BYTE;
|
||||||
|
|
||||||
int64_t MIN_FREE_SPACE;
|
int64_t MIN_FREE_SPACE;
|
||||||
|
|
|
@ -214,9 +214,13 @@ struct LogSystemConfig {
|
||||||
return format("type: %d oldGenerations: %d tags: %d %s", logSystemType, oldTLogs.size(), logRouterTags, describe(tLogs).c_str());
|
return format("type: %d oldGenerations: %d tags: %d %s", logSystemType, oldTLogs.size(), logRouterTags, describe(tLogs).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<TLogInterface> allLocalLogs() const {
|
std::vector<TLogInterface> allLocalLogs(bool includeSatellite = true) const {
|
||||||
std::vector<TLogInterface> results;
|
std::vector<TLogInterface> results;
|
||||||
for( int i = 0; i < tLogs.size(); i++ ) {
|
for( int i = 0; i < tLogs.size(); i++ ) {
|
||||||
|
// skip satellite TLogs, if it was not needed
|
||||||
|
if (!includeSatellite && tLogs[i].locality == tagLocalitySatellite) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if(tLogs[i].isLocal) {
|
if(tLogs[i].isLocal) {
|
||||||
for( int j = 0; j < tLogs[i].tLogs.size(); j++ ) {
|
for( int j = 0; j < tLogs[i].tLogs.size(); j++ ) {
|
||||||
if( tLogs[i].tLogs[j].present() ) {
|
if( tLogs[i].tLogs[j].present() ) {
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "fdbrpc/sim_validation.h"
|
#include "fdbrpc/sim_validation.h"
|
||||||
#include "fdbserver/ApplyMetadataMutation.h"
|
#include "fdbserver/ApplyMetadataMutation.h"
|
||||||
#include "fdbserver/ConflictSet.h"
|
#include "fdbserver/ConflictSet.h"
|
||||||
|
#include "fdbserver/DataDistributorInterface.h"
|
||||||
#include "fdbserver/FDBExecHelper.actor.h"
|
#include "fdbserver/FDBExecHelper.actor.h"
|
||||||
#include "fdbserver/IKeyValueStore.h"
|
#include "fdbserver/IKeyValueStore.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
|
@ -1523,6 +1524,85 @@ ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ACTOR Future<Void>
|
||||||
|
proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* commitData)
|
||||||
|
{
|
||||||
|
TraceEvent("SnapMasterProxy.SnapReqEnter")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
try {
|
||||||
|
// whitelist check
|
||||||
|
ExecCmdValueString execArg(snapReq.snapPayload);
|
||||||
|
StringRef binPath = execArg.getBinaryPath();
|
||||||
|
if (!isWhitelisted(commitData->whitelistedBinPathVec, binPath)) {
|
||||||
|
TraceEvent("SnapMasterProxy.WhiteListCheckFailed")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
throw transaction_not_permitted();
|
||||||
|
}
|
||||||
|
// db fully recovered check
|
||||||
|
if (commitData->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) {
|
||||||
|
// Cluster is not fully recovered and needs TLogs
|
||||||
|
// from previous generation for full recovery.
|
||||||
|
// Currently, snapshot of old tlog generation is not
|
||||||
|
// supported and hence failing the snapshot request until
|
||||||
|
// cluster is fully_recovered.
|
||||||
|
TraceEvent("SnapMasterProxy.ClusterNotFullyRecovered")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
throw cluster_not_fully_recovered();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto result =
|
||||||
|
commitData->txnStateStore->readValue(LiteralStringRef("log_anti_quorum").withPrefix(configKeysPrefix)).get();
|
||||||
|
int logAntiQuorum = 0;
|
||||||
|
if (result.present()) {
|
||||||
|
logAntiQuorum = atoi(result.get().toString().c_str());
|
||||||
|
}
|
||||||
|
// FIXME: logAntiQuorum not supported, remove it later,
|
||||||
|
// In version2, we probably don't need this limtiation, but this needs to be tested.
|
||||||
|
if (logAntiQuorum > 0) {
|
||||||
|
TraceEvent("SnapMasterProxy.LogAnitQuorumNotSupported")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
throw txn_exec_log_anti_quorum();
|
||||||
|
}
|
||||||
|
|
||||||
|
// send a snap request to DD
|
||||||
|
if (!commitData->db->get().distributor.present()) {
|
||||||
|
TraceEvent(SevWarnAlways, "DataDistributorNotPresent");
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
state Future<ErrorOr<Void>> ddSnapReq =
|
||||||
|
commitData->db->get().distributor.get().distributorSnapReq.tryGetReply(DistributorSnapRequest(snapReq.snapPayload, snapReq.snapUID));
|
||||||
|
try {
|
||||||
|
wait(throwErrorOr(ddSnapReq));
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("SnapMasterProxy.DDSnapResponseError")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID)
|
||||||
|
.error(e, true /*includeCancelled*/ );
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
snapReq.reply.send(Void());
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("SnapMasterProxy.SnapReqError")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID)
|
||||||
|
.error(e, true /*includeCancelled*/);
|
||||||
|
if (e.code() != error_code_operation_cancelled) {
|
||||||
|
snapReq.reply.sendError(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TraceEvent("SnapMasterProxy.SnapReqExit")
|
||||||
|
.detail("SnapPayload", snapReq.snapPayload)
|
||||||
|
.detail("SnapUID", snapReq.snapUID);
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> masterProxyServerCore(
|
ACTOR Future<Void> masterProxyServerCore(
|
||||||
MasterProxyInterface proxy,
|
MasterProxyInterface proxy,
|
||||||
MasterInterface master,
|
MasterInterface master,
|
||||||
|
@ -1713,6 +1793,9 @@ ACTOR Future<Void> masterProxyServerCore(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
when(ProxySnapRequest snapReq = waitNext(proxy.proxySnapReq.getFuture())) {
|
||||||
|
addActor.send(proxySnapCreate(snapReq, &commitData));
|
||||||
|
}
|
||||||
when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) {
|
when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) {
|
||||||
state ReplyPromise<Void> reply = req.reply;
|
state ReplyPromise<Void> reply = req.reply;
|
||||||
if(req.last) maxSequence = req.sequence + 1;
|
if(req.last) maxSequence = req.sequence + 1;
|
||||||
|
|
|
@ -920,8 +920,8 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
|
||||||
};
|
};
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
||||||
if (self->ignorePopRequest && inputTag.locality != tagLocalityTxs && inputTag != txsTag) {
|
if (self->ignorePopRequest) {
|
||||||
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||||
|
|
||||||
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
||||||
|| to > self->toBePopped[inputTag]) {
|
|| to > self->toBePopped[inputTag]) {
|
||||||
|
@ -1478,7 +1478,7 @@ ACTOR Future<Void> tLogSnapHelper(TLogData* self,
|
||||||
ASSERT(!isExecOpInProgress(execUID));
|
ASSERT(!isExecOpInProgress(execUID));
|
||||||
if (!otherRoleExeced) {
|
if (!otherRoleExeced) {
|
||||||
setExecOpInProgress(execUID);
|
setExecOpInProgress(execUID);
|
||||||
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog"));
|
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog", 1 /*version*/));
|
||||||
err = tmpErr;
|
err = tmpErr;
|
||||||
clearExecOpInProgress(execUID);
|
clearExecOpInProgress(execUID);
|
||||||
}
|
}
|
||||||
|
@ -1796,6 +1796,81 @@ void getQueuingMetrics( TLogData* self, Reference<LogData> logData, TLogQueuingM
|
||||||
req.reply.send( reply );
|
req.reply.send( reply );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void>
|
||||||
|
tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Reference<LogData> logData) {
|
||||||
|
if (self->ignorePopUid != snapReq.snapUID.toString()) {
|
||||||
|
snapReq.reply.sendError(operation_failed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
ExecCmdValueString snapArg(snapReq.snapPayload);
|
||||||
|
try {
|
||||||
|
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
|
||||||
|
int err = wait(execHelper(&snapArg, self->dataFolder, role.toString(), 2 /* version */));
|
||||||
|
|
||||||
|
std::string uidStr = snapReq.snapUID.toString();
|
||||||
|
TraceEvent("ExecTraceTLog")
|
||||||
|
.detail("Uid", uidStr)
|
||||||
|
.detail("Status", err)
|
||||||
|
.detail("Role", snapReq.role)
|
||||||
|
.detail("Value", self->dataFolder)
|
||||||
|
.detail("ExecPayload", snapReq.snapPayload)
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
snapReq.reply.send(Void());
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("TLogExecHelperError").error(e, true /*includeCancelled */);
|
||||||
|
if (e.code() != error_code_operation_cancelled) {
|
||||||
|
snapReq.reply.sendError(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void>
|
||||||
|
tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self, Reference<LogData> logData) {
|
||||||
|
if (self->ignorePopUid != enablePopReq.snapUID.toString()) {
|
||||||
|
TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch")
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("UidStr", enablePopReq.snapUID.toString());
|
||||||
|
enablePopReq.reply.sendError(operation_failed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
TraceEvent("EnableTLogPlayAllIgnoredPops2");
|
||||||
|
// use toBePopped and issue all the pops
|
||||||
|
std::map<Tag, Version>::iterator it;
|
||||||
|
vector<Future<Void>> ignoredPops;
|
||||||
|
self->ignorePopRequest = false;
|
||||||
|
self->ignorePopDeadline = 0.0;
|
||||||
|
self->ignorePopUid = "";
|
||||||
|
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||||
|
TraceEvent("PlayIgnoredPop")
|
||||||
|
.detail("Tag", it->first.toString())
|
||||||
|
.detail("Version", it->second);
|
||||||
|
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||||
|
}
|
||||||
|
TraceEvent("TLogExecCmdPopEnable")
|
||||||
|
.detail("UidStr", enablePopReq.snapUID.toString())
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||||
|
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
wait(waitForAll(ignoredPops));
|
||||||
|
self->toBePopped.clear();
|
||||||
|
enablePopReq.reply.send(Void());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Reference<LogData> logData, PromiseStream<Void> warningCollectorInput ) {
|
ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Reference<LogData> logData, PromiseStream<Void> warningCollectorInput ) {
|
||||||
state Future<Void> dbInfoChange = Void();
|
state Future<Void> dbInfoChange = Void();
|
||||||
|
|
||||||
|
@ -1857,6 +1932,30 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
||||||
else
|
else
|
||||||
req.reply.sendError( tlog_stopped() );
|
req.reply.sendError( tlog_stopped() );
|
||||||
}
|
}
|
||||||
|
when( TLogDisablePopRequest req = waitNext( tli.disablePopRequest.getFuture() ) ) {
|
||||||
|
if (self->ignorePopUid != "") {
|
||||||
|
TraceEvent(SevWarn, "TLogPopDisableonDisable")
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("UidStr", req.snapUID.toString())
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
req.reply.sendError(operation_failed());
|
||||||
|
} else {
|
||||||
|
//FIXME: As part of reverting snapshot V1, make ignorePopUid a UID instead of string
|
||||||
|
self->ignorePopRequest = true;
|
||||||
|
self->ignorePopUid = req.snapUID.toString();
|
||||||
|
self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||||
|
req.reply.send(Void());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when( TLogEnablePopRequest enablePopReq = waitNext( tli.enablePopRequest.getFuture() ) ) {
|
||||||
|
logData->addActor.send( tLogEnablePopReq( enablePopReq, self, logData) );
|
||||||
|
}
|
||||||
|
when( TLogSnapRequest snapReq = waitNext( tli.snapRequest.getFuture() ) ) {
|
||||||
|
logData->addActor.send( tLogSnapCreate( snapReq, self, logData) );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,10 @@
|
||||||
#include "flow/ActorCollection.h"
|
#include "flow/ActorCollection.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
#include "flow/Trace.h"
|
#include "flow/Trace.h"
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
|
#include "fdbclient/ReadYourWrites.h"
|
||||||
|
#include "fdbclient/RunTransaction.actor.h"
|
||||||
#include "fdbserver/TesterInterface.actor.h"
|
#include "fdbserver/TesterInterface.actor.h"
|
||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
#include "fdbserver/ServerDBInfo.h"
|
#include "fdbserver/ServerDBInfo.h"
|
||||||
|
@ -133,6 +135,39 @@ int64_t getPoppedVersionLag( const TraceEventFields& md ) {
|
||||||
return persistentDataDurableVersion - queuePoppedVersion;
|
return persistentDataDurableVersion - queuePoppedVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<vector<WorkerInterface>> getCoordWorkers( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
||||||
|
state std::vector<WorkerDetails> workers = wait(getWorkers(dbInfo));
|
||||||
|
|
||||||
|
Optional<Value> coordinators = wait(
|
||||||
|
runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Optional<Value>>
|
||||||
|
{
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
return tr->get(coordinatorsKey);
|
||||||
|
}));
|
||||||
|
if (!coordinators.present()) {
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
std::vector<NetworkAddress> coordinatorsAddr =
|
||||||
|
ClusterConnectionString(coordinators.get().toString()).coordinators();
|
||||||
|
std::set<NetworkAddress> coordinatorsAddrSet;
|
||||||
|
for (const auto & addr : coordinatorsAddr) {
|
||||||
|
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
||||||
|
coordinatorsAddrSet.insert(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<WorkerInterface> result;
|
||||||
|
for(const auto & worker : workers) {
|
||||||
|
NetworkAddress primary = worker.interf.address();
|
||||||
|
Optional<NetworkAddress> secondary = worker.interf.tLog.getEndpoint().addresses.secondaryAddress;
|
||||||
|
if (coordinatorsAddrSet.find(primary) != coordinatorsAddrSet.end()
|
||||||
|
|| (secondary.present() && (coordinatorsAddrSet.find(secondary.get()) != coordinatorsAddrSet.end()))) {
|
||||||
|
result.push_back(worker.interf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// This is not robust in the face of a TLog failure
|
// This is not robust in the face of a TLog failure
|
||||||
ACTOR Future<std::pair<int64_t,int64_t>> getTLogQueueInfo( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
ACTOR Future<std::pair<int64_t,int64_t>> getTLogQueueInfo( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
||||||
TraceEvent("MaxTLogQueueSize").detail("Stage", "ContactingLogs");
|
TraceEvent("MaxTLogQueueSize").detail("Stage", "ContactingLogs");
|
||||||
|
@ -195,6 +230,42 @@ ACTOR Future<vector<StorageServerInterface>> getStorageServers( Database cx, boo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<vector<WorkerInterface>> getStorageWorkers( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo, bool localOnly ) {
|
||||||
|
state std::vector<StorageServerInterface> servers = wait(getStorageServers(cx));
|
||||||
|
state std::map<NetworkAddress, WorkerInterface> workersMap;
|
||||||
|
std::vector<WorkerDetails> workers = wait(getWorkers(dbInfo));
|
||||||
|
|
||||||
|
for(const auto & worker : workers) {
|
||||||
|
workersMap[worker.interf.address()] = worker.interf;
|
||||||
|
}
|
||||||
|
Optional<Value> regionsValue = wait(
|
||||||
|
runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Optional<Value>>
|
||||||
|
{
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
return tr->get(LiteralStringRef("usable_regions").withPrefix(configKeysPrefix));
|
||||||
|
}));
|
||||||
|
ASSERT(regionsValue.present());
|
||||||
|
int usableRegions = atoi(regionsValue.get().toString().c_str());
|
||||||
|
auto masterDcId = dbInfo->get().master.locality.dcId();
|
||||||
|
|
||||||
|
vector<WorkerInterface> result;
|
||||||
|
for (const auto & server : servers) {
|
||||||
|
TraceEvent(SevDebug, "DcIdInfo")
|
||||||
|
.detail("ServerLocalityID", server.locality.dcId())
|
||||||
|
.detail("MasterDcID", masterDcId);
|
||||||
|
if (!localOnly || (usableRegions == 1 || server.locality.dcId() == masterDcId)) {
|
||||||
|
auto itr = workersMap.find(server.address());
|
||||||
|
if(itr == workersMap.end()) {
|
||||||
|
TraceEvent(SevWarn, "GetStorageWorkers").detail("Reason", "Could not find worker for storage server").detail("SS", server.id());
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
result.push_back(itr->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
//Gets the maximum size of all the storage server queues
|
//Gets the maximum size of all the storage server queues
|
||||||
ACTOR Future<int64_t> getMaxStorageServerQueueSize( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
ACTOR Future<int64_t> getMaxStorageServerQueueSize( Database cx, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
||||||
TraceEvent("MaxStorageServerQueueSize").detail("Stage", "ContactingStorageServers");
|
TraceEvent("MaxStorageServerQueueSize").detail("Stage", "ContactingStorageServers");
|
||||||
|
|
|
@ -38,6 +38,8 @@ Future<vector<StorageServerInterface>> getStorageServers( Database const& cx, bo
|
||||||
Future<vector<WorkerDetails>> getWorkers( Reference<AsyncVar<ServerDBInfo>> const& dbInfo, int const& flags = 0 );
|
Future<vector<WorkerDetails>> getWorkers( Reference<AsyncVar<ServerDBInfo>> const& dbInfo, int const& flags = 0 );
|
||||||
Future<WorkerInterface> getMasterWorker( Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo );
|
Future<WorkerInterface> getMasterWorker( Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo );
|
||||||
Future<Void> repairDeadDatacenter(Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo, std::string const& context);
|
Future<Void> repairDeadDatacenter(Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo, std::string const& context);
|
||||||
|
Future<vector<WorkerInterface>> getStorageWorkers( Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo, bool const& localOnly );
|
||||||
|
Future<vector<WorkerInterface>> getCoordWorkers( Database const& cx, Reference<AsyncVar<ServerDBInfo>> const& dbInfo );
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -45,6 +45,10 @@ struct TLogInterface {
|
||||||
RequestStream< struct TLogConfirmRunningRequest > confirmRunning; // used for getReadVersion requests from client
|
RequestStream< struct TLogConfirmRunningRequest > confirmRunning; // used for getReadVersion requests from client
|
||||||
RequestStream<ReplyPromise<Void>> waitFailure;
|
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||||
RequestStream< struct TLogRecoveryFinishedRequest > recoveryFinished;
|
RequestStream< struct TLogRecoveryFinishedRequest > recoveryFinished;
|
||||||
|
RequestStream< struct TLogDisablePopRequest> disablePopRequest;
|
||||||
|
RequestStream< struct TLogEnablePopRequest> enablePopRequest;
|
||||||
|
RequestStream< struct TLogSnapRequest> snapRequest;
|
||||||
|
|
||||||
|
|
||||||
TLogInterface() {}
|
TLogInterface() {}
|
||||||
explicit TLogInterface(LocalityData locality) : uniqueID( deterministicRandom()->randomUniqueID() ), locality(locality) { sharedTLogID = uniqueID; }
|
explicit TLogInterface(LocalityData locality) : uniqueID( deterministicRandom()->randomUniqueID() ), locality(locality) { sharedTLogID = uniqueID; }
|
||||||
|
@ -69,7 +73,8 @@ struct TLogInterface {
|
||||||
ASSERT(ar.isDeserializing || uniqueID != UID());
|
ASSERT(ar.isDeserializing || uniqueID != UID());
|
||||||
}
|
}
|
||||||
serializer(ar, uniqueID, sharedTLogID, locality, peekMessages, popMessages
|
serializer(ar, uniqueID, sharedTLogID, locality, peekMessages, popMessages
|
||||||
, commit, lock, getQueuingMetrics, confirmRunning, waitFailure, recoveryFinished);
|
, commit, lock, getQueuingMetrics, confirmRunning, waitFailure, recoveryFinished
|
||||||
|
, disablePopRequest, enablePopRequest, snapRequest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -231,6 +236,7 @@ struct TLogCommitRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct TLogQueuingMetricsReply {
|
struct TLogQueuingMetricsReply {
|
||||||
constexpr static FileIdentifier file_identifier = 12206626;
|
constexpr static FileIdentifier file_identifier = 12206626;
|
||||||
double localTime;
|
double localTime;
|
||||||
|
@ -255,4 +261,53 @@ struct TLogQueuingMetricsRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TLogDisablePopRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 4022806;
|
||||||
|
Arena arena;
|
||||||
|
UID snapUID;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Optional<UID> debugID;
|
||||||
|
|
||||||
|
TLogDisablePopRequest() = default;
|
||||||
|
TLogDisablePopRequest(const UID uid) : snapUID(uid) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, snapUID, reply, arena, debugID);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TLogEnablePopRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 4022809;
|
||||||
|
Arena arena;
|
||||||
|
UID snapUID;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Optional<UID> debugID;
|
||||||
|
|
||||||
|
TLogEnablePopRequest() = default;
|
||||||
|
TLogEnablePopRequest(const UID uid) : snapUID(uid) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, snapUID, reply, arena, debugID);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TLogSnapRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 8184128;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Arena arena;
|
||||||
|
StringRef snapPayload;
|
||||||
|
UID snapUID;
|
||||||
|
StringRef role;
|
||||||
|
|
||||||
|
TLogSnapRequest(StringRef snapPayload, UID snapUID, StringRef role) : snapPayload(snapPayload), snapUID(snapUID), role(role) {}
|
||||||
|
TLogSnapRequest() = default;
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, reply, snapPayload, snapUID, role, arena);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1170,8 +1170,8 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
|
||||||
};
|
};
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
||||||
if (self->ignorePopRequest && inputTag.locality != tagLocalityTxs && inputTag != txsTag) {
|
if (self->ignorePopRequest) {
|
||||||
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||||
|
|
||||||
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
||||||
|| to > self->toBePopped[inputTag]) {
|
|| to > self->toBePopped[inputTag]) {
|
||||||
|
@ -1221,8 +1221,8 @@ ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogDat
|
||||||
|
|
||||||
TraceEvent("EnableTLogPlayAllIgnoredPops");
|
TraceEvent("EnableTLogPlayAllIgnoredPops");
|
||||||
// use toBePopped and issue all the pops
|
// use toBePopped and issue all the pops
|
||||||
state std::map<Tag, Version>::iterator it;
|
std::map<Tag, Version>::iterator it;
|
||||||
state vector<Future<Void>> ignoredPops;
|
vector<Future<Void>> ignoredPops;
|
||||||
self->ignorePopRequest = false;
|
self->ignorePopRequest = false;
|
||||||
self->ignorePopUid = "";
|
self->ignorePopUid = "";
|
||||||
self->ignorePopDeadline = 0.0;
|
self->ignorePopDeadline = 0.0;
|
||||||
|
@ -1853,7 +1853,7 @@ ACTOR Future<Void> tLogSnapHelper(TLogData* self,
|
||||||
ASSERT(!isExecOpInProgress(execUID));
|
ASSERT(!isExecOpInProgress(execUID));
|
||||||
if (!otherRoleExeced) {
|
if (!otherRoleExeced) {
|
||||||
setExecOpInProgress(execUID);
|
setExecOpInProgress(execUID);
|
||||||
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog"));
|
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog", 1 /*version*/));
|
||||||
err = tmpErr;
|
err = tmpErr;
|
||||||
clearExecOpInProgress(execUID);
|
clearExecOpInProgress(execUID);
|
||||||
}
|
}
|
||||||
|
@ -2174,6 +2174,83 @@ void getQueuingMetrics( TLogData* self, Reference<LogData> logData, TLogQueuingM
|
||||||
req.reply.send( reply );
|
req.reply.send( reply );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ACTOR Future<Void>
|
||||||
|
tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Reference<LogData> logData) {
|
||||||
|
if (self->ignorePopUid != snapReq.snapUID.toString()) {
|
||||||
|
snapReq.reply.sendError(operation_failed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
ExecCmdValueString snapArg(snapReq.snapPayload);
|
||||||
|
try {
|
||||||
|
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
|
||||||
|
int err = wait(execHelper(&snapArg, self->dataFolder, role.toString(), 2 /* version */));
|
||||||
|
|
||||||
|
std::string uidStr = snapReq.snapUID.toString();
|
||||||
|
TraceEvent("ExecTraceTLog")
|
||||||
|
.detail("Uid", uidStr)
|
||||||
|
.detail("Status", err)
|
||||||
|
.detail("Role", snapReq.role)
|
||||||
|
.detail("Value", self->dataFolder)
|
||||||
|
.detail("ExecPayload", snapReq.snapPayload)
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
snapReq.reply.send(Void());
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("TLogExecHelperError").error(e, true /*includeCancelled */);
|
||||||
|
if (e.code() != error_code_operation_cancelled) {
|
||||||
|
snapReq.reply.sendError(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ACTOR Future<Void>
|
||||||
|
tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self, Reference<LogData> logData) {
|
||||||
|
if (self->ignorePopUid != enablePopReq.snapUID.toString()) {
|
||||||
|
TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch")
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("UidStr", enablePopReq.snapUID.toString());
|
||||||
|
enablePopReq.reply.sendError(operation_failed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
TraceEvent("EnableTLogPlayAllIgnoredPops2");
|
||||||
|
// use toBePopped and issue all the pops
|
||||||
|
std::map<Tag, Version>::iterator it;
|
||||||
|
state vector<Future<Void>> ignoredPops;
|
||||||
|
self->ignorePopRequest = false;
|
||||||
|
self->ignorePopDeadline = 0.0;
|
||||||
|
self->ignorePopUid = "";
|
||||||
|
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||||
|
TraceEvent("PlayIgnoredPop")
|
||||||
|
.detail("Tag", it->first.toString())
|
||||||
|
.detail("Version", it->second);
|
||||||
|
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||||
|
}
|
||||||
|
TraceEvent("TLogExecCmdPopEnable")
|
||||||
|
.detail("UidStr", enablePopReq.snapUID.toString())
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||||
|
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
wait(waitForAll(ignoredPops));
|
||||||
|
self->toBePopped.clear();
|
||||||
|
enablePopReq.reply.send(Void());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Reference<LogData> logData, PromiseStream<Void> warningCollectorInput ) {
|
ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Reference<LogData> logData, PromiseStream<Void> warningCollectorInput ) {
|
||||||
state Future<Void> dbInfoChange = Void();
|
state Future<Void> dbInfoChange = Void();
|
||||||
|
|
||||||
|
@ -2235,6 +2312,30 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
||||||
else
|
else
|
||||||
req.reply.sendError( tlog_stopped() );
|
req.reply.sendError( tlog_stopped() );
|
||||||
}
|
}
|
||||||
|
when( TLogDisablePopRequest req = waitNext( tli.disablePopRequest.getFuture() ) ) {
|
||||||
|
if (self->ignorePopUid != "") {
|
||||||
|
TraceEvent(SevWarn, "TLogPopDisableonDisable")
|
||||||
|
.detail("IgnorePopUid", self->ignorePopUid)
|
||||||
|
.detail("UidStr", req.snapUID.toString())
|
||||||
|
.detail("PersistentDataVersion", logData->persistentDataVersion)
|
||||||
|
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
|
||||||
|
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
|
||||||
|
.detail("Version", logData->version.get());
|
||||||
|
req.reply.sendError(operation_failed());
|
||||||
|
} else {
|
||||||
|
//FIXME: As part of reverting snapshot V1, make ignorePopUid a UID instead of string
|
||||||
|
self->ignorePopRequest = true;
|
||||||
|
self->ignorePopUid = req.snapUID.toString();
|
||||||
|
self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||||
|
req.reply.send(Void());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when( TLogEnablePopRequest enablePopReq = waitNext( tli.enablePopRequest.getFuture() ) ) {
|
||||||
|
logData->addActor.send( tLogEnablePopReq( enablePopReq, self, logData) );
|
||||||
|
}
|
||||||
|
when( TLogSnapRequest snapReq = waitNext( tli.snapRequest.getFuture() ) ) {
|
||||||
|
logData->addActor.send( tLogSnapCreate( snapReq, self, logData) );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1302,6 +1302,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
||||||
knownCommittedVersion = std::max(knownCommittedVersion, results[i].knownCommittedVersion);
|
knownCommittedVersion = std::max(knownCommittedVersion, results[i].knownCommittedVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (knownCommittedVersion > results[new_safe_range_begin].end) {
|
||||||
|
knownCommittedVersion = results[new_safe_range_begin].end;
|
||||||
|
}
|
||||||
|
|
||||||
TraceEvent("GetDurableResult", dbgid).detail("Required", requiredCount).detail("Present", results.size()).detail("ServerState", sServerState)
|
TraceEvent("GetDurableResult", dbgid).detail("Required", requiredCount).detail("Present", results.size()).detail("ServerState", sServerState)
|
||||||
.detail("RecoveryVersion", ((safe_range_end > 0) && (safe_range_end-1 < results.size())) ? results[ safe_range_end-1 ].end : -1)
|
.detail("RecoveryVersion", ((safe_range_end > 0) && (safe_range_end-1 < results.size())) ? results[ safe_range_end-1 ].end : -1)
|
||||||
.detail("EndVersion", results[ new_safe_range_begin ].end).detail("SafeBegin", safe_range_begin).detail("SafeEnd", safe_range_end)
|
.detail("EndVersion", results[ new_safe_range_begin ].end).detail("SafeBegin", safe_range_begin).detail("SafeEnd", safe_range_end)
|
||||||
|
@ -1588,17 +1592,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
||||||
logSystem->logSystemType = prevState.logSystemType;
|
logSystem->logSystemType = prevState.logSystemType;
|
||||||
logSystem->rejoins = rejoins;
|
logSystem->rejoins = rejoins;
|
||||||
logSystem->lockResults = lockResults;
|
logSystem->lockResults = lockResults;
|
||||||
logSystem->recoverAt = minEnd;
|
|
||||||
if (knownCommittedVersion > minEnd) {
|
if (knownCommittedVersion > minEnd) {
|
||||||
// FIXME: Remove the Sev40 once disk snapshot v2 feature is enabled, in all other
|
knownCommittedVersion = minEnd;
|
||||||
// code paths we should never be here.
|
|
||||||
TraceEvent(SevError, "KCVIsInvalid")
|
|
||||||
.detail("KnownCommittedVersion", knownCommittedVersion)
|
|
||||||
.detail("MinEnd", minEnd);
|
|
||||||
logSystem->knownCommittedVersion = minEnd;
|
|
||||||
} else {
|
|
||||||
logSystem->knownCommittedVersion = knownCommittedVersion;
|
|
||||||
}
|
}
|
||||||
|
logSystem->recoverAt = minEnd;
|
||||||
|
logSystem->knownCommittedVersion = knownCommittedVersion;
|
||||||
|
TraceEvent(SevDebug, "FinalRecoveryVersionInfo")
|
||||||
|
.detail("KCV", knownCommittedVersion)
|
||||||
|
.detail("MinEnd", minEnd);
|
||||||
logSystem->remoteLogsWrittenToCoreState = true;
|
logSystem->remoteLogsWrittenToCoreState = true;
|
||||||
logSystem->stopped = true;
|
logSystem->stopped = true;
|
||||||
logSystem->pseudoLocalities = prevState.pseudoLocalities;
|
logSystem->pseudoLocalities = prevState.pseudoLocalities;
|
||||||
|
|
|
@ -61,6 +61,7 @@ struct WorkerInterface {
|
||||||
RequestStream< struct TraceBatchDumpRequest > traceBatchDumpRequest;
|
RequestStream< struct TraceBatchDumpRequest > traceBatchDumpRequest;
|
||||||
RequestStream< struct DiskStoreRequest > diskStoreRequest;
|
RequestStream< struct DiskStoreRequest > diskStoreRequest;
|
||||||
RequestStream<struct ExecuteRequest> execReq;
|
RequestStream<struct ExecuteRequest> execReq;
|
||||||
|
RequestStream<struct WorkerSnapRequest> workerSnapReq;
|
||||||
|
|
||||||
TesterInterface testerInterface;
|
TesterInterface testerInterface;
|
||||||
|
|
||||||
|
@ -72,7 +73,7 @@ struct WorkerInterface {
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq);
|
serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq, workerSnapReq);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -258,6 +259,23 @@ struct ExecuteRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct WorkerSnapRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 8194122;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
Arena arena;
|
||||||
|
StringRef snapPayload;
|
||||||
|
UID snapUID;
|
||||||
|
StringRef role;
|
||||||
|
|
||||||
|
WorkerSnapRequest(StringRef snapPayload, UID snapUID, StringRef role) : snapPayload(snapPayload), snapUID(snapUID), role(role) {}
|
||||||
|
WorkerSnapRequest() = default;
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, reply, snapPayload, snapUID, role, arena);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct LoadedReply {
|
struct LoadedReply {
|
||||||
constexpr static FileIdentifier file_identifier = 9956350;
|
constexpr static FileIdentifier file_identifier = 9956350;
|
||||||
Standalone<StringRef> payload;
|
Standalone<StringRef> payload;
|
||||||
|
|
|
@ -877,6 +877,20 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(const Cl
|
||||||
return std::make_pair(publicNetworkAddresses, listenNetworkAddresses);
|
return std::make_pair(publicNetworkAddresses, listenNetworkAddresses);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// moves files from 'dirSrc' to 'dirToMove' if their name contains 'role'
|
||||||
|
void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::string role) {
|
||||||
|
std::vector<std::string> returnFiles = platform::listFiles(dirSrc, "");
|
||||||
|
for (const auto & fileEntry: returnFiles) {
|
||||||
|
if (fileEntry != "fdb.cluster" && fileEntry.find(role) != std::string::npos) {
|
||||||
|
//rename files
|
||||||
|
TraceEvent("RenamingSnapFile")
|
||||||
|
.detail("Oldname", dirSrc + "/" + fileEntry)
|
||||||
|
.detail("Newname", dirToMove + "/" + fileEntry);
|
||||||
|
renameFile(dirSrc + "/" + fileEntry, dirToMove + "/" + fileEntry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
try {
|
try {
|
||||||
platformInit();
|
platformInit();
|
||||||
|
@ -1706,6 +1720,7 @@ int main(int argc, char* argv[]) {
|
||||||
std::string absDataFolder = abspath(dataFolder);
|
std::string absDataFolder = abspath(dataFolder);
|
||||||
ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str());
|
ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str());
|
||||||
int backupFailed = true;
|
int backupFailed = true;
|
||||||
|
int backupVersion = 1;
|
||||||
const char* isRestoringStr = ini.GetValue("RESTORE", "isRestoring", NULL);
|
const char* isRestoringStr = ini.GetValue("RESTORE", "isRestoring", NULL);
|
||||||
if (isRestoringStr) {
|
if (isRestoringStr) {
|
||||||
isRestoring = atoi(isRestoringStr);
|
isRestoring = atoi(isRestoringStr);
|
||||||
|
@ -1713,64 +1728,142 @@ int main(int argc, char* argv[]) {
|
||||||
if (isRestoring && backupFailedStr) {
|
if (isRestoring && backupFailedStr) {
|
||||||
backupFailed = atoi(backupFailedStr);
|
backupFailed = atoi(backupFailedStr);
|
||||||
}
|
}
|
||||||
|
const char* backupVersionStr = ini.GetValue("RESTORE", "BackupVersion", NULL);
|
||||||
|
if (isRestoring && backupVersionStr) {
|
||||||
|
backupVersion = atoi(backupVersionStr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (isRestoring && !backupFailed) {
|
if (isRestoring && !backupFailed) {
|
||||||
std::vector<std::string> returnList;
|
if (backupVersion == 1) {
|
||||||
std::string ext = "";
|
std::vector<std::string> returnList;
|
||||||
returnList = platform::listDirectories(absDataFolder);
|
std::string ext = "";
|
||||||
std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID");
|
returnList = platform::listDirectories(absDataFolder);
|
||||||
|
std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID");
|
||||||
|
|
||||||
TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder);
|
TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder);
|
||||||
TraceEvent("RestoreSnapUID").detail("UID", snapStr);
|
TraceEvent("RestoreSnapUID").detail("UID", snapStr);
|
||||||
|
|
||||||
// delete all files (except fdb.cluster) in non-snap directories
|
// delete all files (except fdb.cluster) in non-snap directories
|
||||||
for (int i = 0; i < returnList.size(); i++) {
|
for (int i = 0; i < returnList.size(); i++) {
|
||||||
if (returnList[i] == "." || returnList[i] == "..") {
|
if (returnList[i] == "." || returnList[i] == "..") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (returnList[i].find(snapStr) != std::string::npos) {
|
if (returnList[i].find(snapStr) != std::string::npos) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string childf = absDataFolder + "/" + returnList[i];
|
std::string childf = absDataFolder + "/" + returnList[i];
|
||||||
std::vector<std::string> returnFiles = platform::listFiles(childf, ext);
|
std::vector<std::string> returnFiles = platform::listFiles(childf, ext);
|
||||||
for (int j = 0; j < returnFiles.size(); j++) {
|
for (int j = 0; j < returnFiles.size(); j++) {
|
||||||
if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") {
|
if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") {
|
||||||
TraceEvent("DeletingNonSnapfiles")
|
TraceEvent("DeletingNonSnapfiles")
|
||||||
.detail("FileBeingDeleted", childf + "/" + returnFiles[j]);
|
.detail("FileBeingDeleted", childf + "/" + returnFiles[j]);
|
||||||
deleteFile(childf + "/" + returnFiles[j]);
|
deleteFile(childf + "/" + returnFiles[j]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
// move the contents from snap folder to the original folder,
|
||||||
// move the contents from snap folder to the original folder,
|
// delete snap folders
|
||||||
// delete snap folders
|
for (int i = 0; i < returnList.size(); i++) {
|
||||||
for (int i = 0; i < returnList.size(); i++) {
|
if (returnList[i] == "." || returnList[i] == "..") {
|
||||||
if (returnList[i] == "." || returnList[i] == "..") {
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
std::string dirSrc = absDataFolder + "/" + returnList[i];
|
||||||
std::string dirSrc = absDataFolder + "/" + returnList[i];
|
// delete snap directories which are not part of restoreSnapUID
|
||||||
// delete snap directories which are not part of restoreSnapUID
|
if (returnList[i].find(snapStr) == std::string::npos) {
|
||||||
if (returnList[i].find(snapStr) == std::string::npos) {
|
if (returnList[i].find("snap") != std::string::npos) {
|
||||||
if (returnList[i].find("snap") != std::string::npos) {
|
platform::eraseDirectoryRecursive(dirSrc);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// remove empty/partial snap directories
|
||||||
|
std::vector<std::string> childrenList = platform::listFiles(dirSrc);
|
||||||
|
if (childrenList.size() == 0) {
|
||||||
|
TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc);
|
||||||
platform::eraseDirectoryRecursive(dirSrc);
|
platform::eraseDirectoryRecursive(dirSrc);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
continue;
|
std::string origDir = returnList[i].substr(0, 32);
|
||||||
|
std::string dirToRemove = absDataFolder + "/" + origDir;
|
||||||
|
TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove);
|
||||||
|
platform::eraseDirectoryRecursive(dirToRemove);
|
||||||
|
renameFile(dirSrc, dirToRemove);
|
||||||
|
TraceEvent("RenamingSnapToOriginalDirectory")
|
||||||
|
.detail("Oldname", dirSrc)
|
||||||
|
.detail("Newname", dirToRemove);
|
||||||
}
|
}
|
||||||
// remove empty/partial snap directories
|
} else if (backupVersion == 2) {
|
||||||
std::vector<std::string> childrenList = platform::listFiles(dirSrc);
|
std::vector<std::string> returnList;
|
||||||
if (childrenList.size() == 0) {
|
std::string ext = "";
|
||||||
TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc);
|
returnList = platform::listDirectories(absDataFolder);
|
||||||
platform::eraseDirectoryRecursive(dirSrc);
|
std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID");
|
||||||
continue;
|
|
||||||
|
TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder);
|
||||||
|
TraceEvent("RestoreSnapUID").detail("UID", snapStr);
|
||||||
|
|
||||||
|
// delete all files (except fdb.cluster) in non-snap directories
|
||||||
|
for (const auto & dirEntry : returnList) {
|
||||||
|
if (dirEntry == "." || dirEntry == "..") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (dirEntry.find(snapStr) != std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string childf = absDataFolder + "/" + dirEntry;
|
||||||
|
std::vector<std::string> returnFiles = platform::listFiles(childf, ext);
|
||||||
|
for (const auto & fileEntry : returnFiles) {
|
||||||
|
if (fileEntry != "fdb.cluster" && fileEntry != "fitness") {
|
||||||
|
TraceEvent("DeletingNonSnapfiles")
|
||||||
|
.detail("FileBeingDeleted", childf + "/" + fileEntry);
|
||||||
|
deleteFile(childf + "/" + fileEntry);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
std::string origDir = returnList[i].substr(0, 32);
|
// cleanup unwanted and partial directories
|
||||||
std::string dirToRemove = absDataFolder + "/" + origDir;
|
for (const auto & dirEntry : returnList) {
|
||||||
TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove);
|
if (dirEntry == "." || dirEntry == "..") {
|
||||||
platform::eraseDirectoryRecursive(dirToRemove);
|
continue;
|
||||||
renameFile(dirSrc, dirToRemove);
|
}
|
||||||
TraceEvent("RenamingSnapToOriginalDirectory")
|
std::string dirSrc = absDataFolder + "/" + dirEntry;
|
||||||
.detail("Oldname", dirSrc)
|
// delete snap directories which are not part of restoreSnapUID
|
||||||
.detail("Newname", dirToRemove);
|
if (dirEntry.find(snapStr) == std::string::npos) {
|
||||||
|
if (dirEntry.find("snap") != std::string::npos) {
|
||||||
|
platform::eraseDirectoryRecursive(dirSrc);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// remove empty/partial snap directories
|
||||||
|
std::vector<std::string> childrenList = platform::listFiles(dirSrc);
|
||||||
|
if (childrenList.size() == 0) {
|
||||||
|
TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc);
|
||||||
|
platform::eraseDirectoryRecursive(dirSrc);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// move snapshotted files to appropriate locations
|
||||||
|
for (const auto & dirEntry : returnList) {
|
||||||
|
if (dirEntry == "." || dirEntry == "..") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::string dirSrc = absDataFolder + "/" + dirEntry;
|
||||||
|
std::string origDir = dirEntry.substr(0, 32);
|
||||||
|
std::string dirToMove = absDataFolder + "/" + origDir;
|
||||||
|
if ((dirEntry.find("snap") != std::string::npos) &&
|
||||||
|
(dirEntry.find("tlog") != std::string::npos)) {
|
||||||
|
// restore tlog files
|
||||||
|
restoreRoleFilesHelper(dirSrc, dirToMove, "log");
|
||||||
|
} else if ((dirEntry.find("snap") != std::string::npos) &&
|
||||||
|
(dirEntry.find("storage") != std::string::npos)) {
|
||||||
|
// restore storage files
|
||||||
|
restoreRoleFilesHelper(dirSrc, dirToMove, "storage");
|
||||||
|
} else if ((dirEntry.find("snap") != std::string::npos) &&
|
||||||
|
(dirEntry.find("coord") != std::string::npos)) {
|
||||||
|
// restore coordinator files
|
||||||
|
restoreRoleFilesHelper(dirSrc, dirToMove, "coordination");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1615,6 +1615,7 @@ bool changeDurableVersion( StorageServer* data, Version desiredDurableVersion )
|
||||||
|
|
||||||
Future<Void> checkFatalError = data->otherError.getFuture();
|
Future<Void> checkFatalError = data->otherError.getFuture();
|
||||||
data->durableVersion.set( nextDurableVersion );
|
data->durableVersion.set( nextDurableVersion );
|
||||||
|
setDataDurableVersion(data->thisServerID, data->durableVersion.get());
|
||||||
if (checkFatalError.isReady()) checkFatalError.get();
|
if (checkFatalError.isReady()) checkFatalError.get();
|
||||||
|
|
||||||
//TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
|
//TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
|
||||||
|
@ -1960,7 +1961,7 @@ snapHelper(StorageServer* data, MutationRef m, Version ver)
|
||||||
|
|
||||||
if (!skip) {
|
if (!skip) {
|
||||||
setExecOpInProgress(execUID);
|
setExecOpInProgress(execUID);
|
||||||
int err = wait(execHelper(&execArg, data->folder, "role=storage"));
|
int err = wait(execHelper(&execArg, data->folder, "role=storage", 1 /*version*/));
|
||||||
clearExecOpInProgress(execUID);
|
clearExecOpInProgress(execUID);
|
||||||
}
|
}
|
||||||
TraceEvent te = TraceEvent("ExecTraceStorage");
|
TraceEvent te = TraceEvent("ExecTraceStorage");
|
||||||
|
@ -2821,6 +2822,7 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
||||||
data->noRecentUpdates.set(false);
|
data->noRecentUpdates.set(false);
|
||||||
data->lastUpdate = now();
|
data->lastUpdate = now();
|
||||||
data->version.set( ver ); // Triggers replies to waiting gets for new version(s)
|
data->version.set( ver ); // Triggers replies to waiting gets for new version(s)
|
||||||
|
setDataVersion(data->thisServerID, data->version.get());
|
||||||
if (data->otherError.getFuture().isReady()) data->otherError.getFuture().get();
|
if (data->otherError.getFuture().isReady()) data->otherError.getFuture().get();
|
||||||
|
|
||||||
Version maxVersionsInMemory = SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS;
|
Version maxVersionsInMemory = SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS;
|
||||||
|
|
|
@ -1024,6 +1024,7 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle
|
||||||
state double databasePingDelay = 1e9;
|
state double databasePingDelay = 1e9;
|
||||||
state ISimulator::BackupAgentType simBackupAgents = ISimulator::NoBackupAgents;
|
state ISimulator::BackupAgentType simBackupAgents = ISimulator::NoBackupAgents;
|
||||||
state ISimulator::BackupAgentType simDrAgents = ISimulator::NoBackupAgents;
|
state ISimulator::BackupAgentType simDrAgents = ISimulator::NoBackupAgents;
|
||||||
|
state bool enableDD = false;
|
||||||
if (tests.empty()) useDB = true;
|
if (tests.empty()) useDB = true;
|
||||||
for( auto iter = tests.begin(); iter != tests.end(); ++iter ) {
|
for( auto iter = tests.begin(); iter != tests.end(); ++iter ) {
|
||||||
if( iter->useDB ) useDB = true;
|
if( iter->useDB ) useDB = true;
|
||||||
|
@ -1036,6 +1037,7 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle
|
||||||
if (iter->simDrAgents != ISimulator::NoBackupAgents) {
|
if (iter->simDrAgents != ISimulator::NoBackupAgents) {
|
||||||
simDrAgents = iter->simDrAgents;
|
simDrAgents = iter->simDrAgents;
|
||||||
}
|
}
|
||||||
|
enableDD = enableDD || getOption(iter->options[0], LiteralStringRef("enableDD"), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_network->isSimulated()) {
|
if (g_network->isSimulated()) {
|
||||||
|
@ -1058,6 +1060,9 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle
|
||||||
if(useDB && startingConfiguration != StringRef()) {
|
if(useDB && startingConfiguration != StringRef()) {
|
||||||
try {
|
try {
|
||||||
wait(timeoutError(changeConfiguration(cx, testers, startingConfiguration), 2000.0));
|
wait(timeoutError(changeConfiguration(cx, testers, startingConfiguration), 2000.0));
|
||||||
|
if (g_network->isSimulated() && enableDD) {
|
||||||
|
wait(success(setDDMode(cx, 1)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch(Error& e) {
|
catch(Error& e) {
|
||||||
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to set starting configuration");
|
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to set starting configuration");
|
||||||
|
|
|
@ -655,6 +655,36 @@ void endRole(const Role &role, UID id, std::string reason, bool ok, Error e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> workerSnapCreate(WorkerSnapRequest snapReq, StringRef snapFolder) {
|
||||||
|
state ExecCmdValueString snapArg(snapReq.snapPayload);
|
||||||
|
try {
|
||||||
|
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
|
||||||
|
int err = wait(execHelper(&snapArg, snapFolder.toString(), role.toString(), 2 /* version */));
|
||||||
|
std::string uidStr = snapReq.snapUID.toString();
|
||||||
|
TraceEvent("ExecTraceWorker")
|
||||||
|
.detail("Uid", uidStr)
|
||||||
|
.detail("Status", err)
|
||||||
|
.detail("Role", snapReq.role)
|
||||||
|
.detail("Value", snapFolder)
|
||||||
|
.detail("ExecPayload", snapReq.snapPayload);
|
||||||
|
if (err != 0) {
|
||||||
|
throw operation_failed();
|
||||||
|
}
|
||||||
|
if (snapReq.role.toString() == "storage") {
|
||||||
|
printStorageVersionInfo();
|
||||||
|
}
|
||||||
|
snapReq.reply.send(Void());
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("ExecHelperError").error(e, true /*includeCancelled*/);
|
||||||
|
if (e.code() != error_code_operation_cancelled) {
|
||||||
|
snapReq.reply.sendError(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, Reference<ClusterConnectionFile> connFile, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, Reference<ClusterConnectionFile> connFile, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
||||||
// Initially most of the serverDBInfo is not known, but we know our locality right away
|
// Initially most of the serverDBInfo is not known, but we know our locality right away
|
||||||
ServerDBInfo localInfo;
|
ServerDBInfo localInfo;
|
||||||
|
@ -1201,7 +1231,7 @@ ACTOR Future<Void> workerServer(
|
||||||
when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) {
|
when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) {
|
||||||
state ExecCmdValueString execArg(req.execPayload);
|
state ExecCmdValueString execArg(req.execPayload);
|
||||||
try {
|
try {
|
||||||
int err = wait(execHelper(&execArg, coordFolder, "role=coordinator"));
|
int err = wait(execHelper(&execArg, coordFolder, "role=coordinator", 1 /*version*/));
|
||||||
StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid"));
|
StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid"));
|
||||||
auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString();
|
auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString();
|
||||||
auto te = TraceEvent("ExecTraceCoordinators");
|
auto te = TraceEvent("ExecTraceCoordinators");
|
||||||
|
@ -1217,6 +1247,13 @@ ACTOR Future<Void> workerServer(
|
||||||
req.reply.sendError(broken_promise());
|
req.reply.sendError(broken_promise());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
when(state WorkerSnapRequest snapReq = waitNext(interf.workerSnapReq.getFuture())) {
|
||||||
|
Standalone<StringRef> snapFolder = StringRef(folder);
|
||||||
|
if (snapReq.role.toString() == "coord") {
|
||||||
|
snapFolder = coordFolder;
|
||||||
|
}
|
||||||
|
errorForwarders.add(workerSnapCreate(snapReq, snapFolder));
|
||||||
|
}
|
||||||
when( wait( errorForwarders.getResult() ) ) {}
|
when( wait( errorForwarders.getResult() ) ) {}
|
||||||
when( wait( handleErrors ) ) {}
|
when( wait( handleErrors ) ) {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,6 +82,7 @@ public: // variables
|
||||||
std::string restartInfoLocation; // file location to store the snap restore info
|
std::string restartInfoLocation; // file location to store the snap restore info
|
||||||
int maxRetryCntToRetrieveMessage; // number of retires to do trackLatest
|
int maxRetryCntToRetrieveMessage; // number of retires to do trackLatest
|
||||||
bool skipCheck; // disable check if the exec fails
|
bool skipCheck; // disable check if the exec fails
|
||||||
|
int snapVersion; // snapVersion to invoke
|
||||||
|
|
||||||
public: // ctor & dtor
|
public: // ctor & dtor
|
||||||
SnapTestWorkload(WorkloadContext const& wcx)
|
SnapTestWorkload(WorkloadContext const& wcx)
|
||||||
|
@ -97,6 +98,7 @@ public: // ctor & dtor
|
||||||
getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini"))
|
getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini"))
|
||||||
.toString();
|
.toString();
|
||||||
skipCheck = false;
|
skipCheck = false;
|
||||||
|
snapVersion = getOption(options, LiteralStringRef("version"), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public: // workload functions
|
public: // workload functions
|
||||||
|
@ -235,7 +237,7 @@ public: // workload functions
|
||||||
self->snapUID = deterministicRandom()->randomUniqueID();
|
self->snapUID = deterministicRandom()->randomUniqueID();
|
||||||
try {
|
try {
|
||||||
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create.sh");
|
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create.sh");
|
||||||
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID);
|
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID, self->snapVersion);
|
||||||
wait(status);
|
wait(status);
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
@ -250,6 +252,14 @@ public: // workload functions
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (self->snapVersion == 2) {
|
||||||
|
++retry;
|
||||||
|
// snap v2 can fail for many reasons, so retry for 5 times and then fail it
|
||||||
|
if (retry > 5) {
|
||||||
|
snapFailed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CSimpleIni ini;
|
CSimpleIni ini;
|
||||||
|
@ -258,6 +268,7 @@ public: // workload functions
|
||||||
std::string uidStr = self->snapUID.toString();
|
std::string uidStr = self->snapUID.toString();
|
||||||
ini.SetValue("RESTORE", "RestoreSnapUID", uidStr.c_str());
|
ini.SetValue("RESTORE", "RestoreSnapUID", uidStr.c_str());
|
||||||
ini.SetValue("RESTORE", "BackupFailed", format("%d", snapFailed).c_str());
|
ini.SetValue("RESTORE", "BackupFailed", format("%d", snapFailed).c_str());
|
||||||
|
ini.SetValue("RESTORE", "BackupVersion", format("%d", self->snapVersion).c_str());
|
||||||
ini.SaveFile(self->restartInfoLocation.c_str());
|
ini.SaveFile(self->restartInfoLocation.c_str());
|
||||||
// write the snapUID to a file
|
// write the snapUID to a file
|
||||||
TraceEvent("SnapshotCreateStatus").detail("Status", !snapFailed ? "Success" : "Failure");
|
TraceEvent("SnapshotCreateStatus").detail("Status", !snapFailed ? "Success" : "Failure");
|
||||||
|
@ -349,7 +360,7 @@ public: // workload functions
|
||||||
self->snapUID = deterministicRandom()->randomUniqueID();
|
self->snapUID = deterministicRandom()->randomUniqueID();
|
||||||
try {
|
try {
|
||||||
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create1.sh");
|
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create1.sh");
|
||||||
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID);
|
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID, self->snapVersion);
|
||||||
wait(status);
|
wait(status);
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
|
|
@ -198,6 +198,7 @@ Future<T> timeoutError( Future<T> what, double time, TaskPriority taskID = TaskP
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ACTOR template <class T>
|
ACTOR template <class T>
|
||||||
Future<T> delayed( Future<T> what, double time = 0.0, TaskPriority taskID = TaskPriority::DefaultDelay ) {
|
Future<T> delayed( Future<T> what, double time = 0.0, TaskPriority taskID = TaskPriority::DefaultDelay ) {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -148,6 +148,9 @@ add_fdb_test(
|
||||||
add_fdb_test(
|
add_fdb_test(
|
||||||
TEST_FILES restarting/StorefrontTestRestart-1.txt
|
TEST_FILES restarting/StorefrontTestRestart-1.txt
|
||||||
restarting/StorefrontTestRestart-2.txt)
|
restarting/StorefrontTestRestart-2.txt)
|
||||||
|
add_fdb_test(
|
||||||
|
TEST_FILES restarting/from_6.2.0/SnapTestAttrition-1.txt
|
||||||
|
restarting/from_6.2.0/SnapTestAttrition-2.txt)
|
||||||
add_fdb_test(
|
add_fdb_test(
|
||||||
TEST_FILES restarting/from_6.2.0/SnapTestSimpleRestart-1.txt
|
TEST_FILES restarting/from_6.2.0/SnapTestSimpleRestart-1.txt
|
||||||
restarting/from_6.2.0/SnapTestSimpleRestart-2.txt)
|
restarting/from_6.2.0/SnapTestSimpleRestart-2.txt)
|
||||||
|
|
|
@ -12,6 +12,7 @@ testTitle=SnapCyclePre
|
||||||
maxSnapDelay=10.0
|
maxSnapDelay=10.0
|
||||||
testID=1
|
testID=1
|
||||||
clearAfterTest=false
|
clearAfterTest=false
|
||||||
|
version=2
|
||||||
|
|
||||||
testTitle=SnapCycleShutdown
|
testTitle=SnapCycleShutdown
|
||||||
;save and shutdown
|
;save and shutdown
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
testTitle=SnapCycleRestore
|
testTitle=SnapCycleRestore
|
||||||
;Post snap restore test
|
;Post snap restore test
|
||||||
runSetup=false
|
runSetup=false
|
||||||
testName=Cycle
|
testName=Cycle
|
||||||
transactionsPerSecond=2500.0
|
transactionsPerSecond=2500.0
|
||||||
nodeCount=2500
|
nodeCount=2500
|
||||||
testDuration=10.0
|
testDuration=10.0
|
||||||
expectedRate=0
|
expectedRate=0
|
||||||
|
buggify=off
|
||||||
|
enableDD=true
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
testTitle=SnapTestPre
|
||||||
|
;write 1000 Keys ending with even numbers
|
||||||
|
testName=SnapTest
|
||||||
|
numSnaps=1
|
||||||
|
maxSnapDelay=3.0
|
||||||
|
testID=0
|
||||||
|
clearAfterTest=false
|
||||||
|
|
||||||
|
testTitle=SnapTestTakeSnap
|
||||||
|
;Take snap and do read/write
|
||||||
|
testName=ReadWrite
|
||||||
|
testDuration=10.0
|
||||||
|
transactionsPerSecond=10000
|
||||||
|
writesPerTransactionA=0
|
||||||
|
readsPerTransactionA=10
|
||||||
|
writesPerTransactionB=10
|
||||||
|
readsPerTransactionB=1
|
||||||
|
alpha=0.5
|
||||||
|
nodeCount=100000
|
||||||
|
valueBytes=16
|
||||||
|
discardEdgeMeasurements=false
|
||||||
|
|
||||||
|
testName=SnapTest
|
||||||
|
numSnaps=1
|
||||||
|
maxSnapDelay=10.0
|
||||||
|
testID=1
|
||||||
|
clearAfterTest=false
|
||||||
|
version=2
|
||||||
|
|
||||||
|
testName=Attrition
|
||||||
|
testDuration=10.0
|
||||||
|
|
||||||
|
testTitle=SnapTestPost
|
||||||
|
;write 1000 Keys ending with odd numbers
|
||||||
|
testName=SnapTest
|
||||||
|
numSnaps=1
|
||||||
|
maxSnapDelay=25.0
|
||||||
|
testID=2
|
||||||
|
clearAfterTest=false
|
||||||
|
|
||||||
|
; save and shutdown
|
||||||
|
testTitle=SnapSimpleShutdown
|
||||||
|
testName=SaveAndKill
|
||||||
|
restartInfoLocation=simfdb/restartInfo.ini
|
||||||
|
testDuration=10.0
|
||||||
|
isRestoring=1
|
|
@ -0,0 +1,9 @@
|
||||||
|
; verify all keys are even numbered
|
||||||
|
testTitle=SnapTestVerify
|
||||||
|
testName=SnapTest
|
||||||
|
numSnaps=1
|
||||||
|
maxSnapDelay=3.0
|
||||||
|
testID=3
|
||||||
|
restartInfoLocation=simfdb/restartInfo.ini
|
||||||
|
buggify=off
|
||||||
|
enableDD=true
|
|
@ -25,6 +25,7 @@ testTitle=SnapTestTakeSnap
|
||||||
maxSnapDelay=10.0
|
maxSnapDelay=10.0
|
||||||
testID=1
|
testID=1
|
||||||
clearAfterTest=false
|
clearAfterTest=false
|
||||||
|
version=2
|
||||||
|
|
||||||
testTitle=SnapTestPost
|
testTitle=SnapTestPost
|
||||||
;write 1000 Keys ending with odd numbers
|
;write 1000 Keys ending with odd numbers
|
||||||
|
|
|
@ -4,3 +4,5 @@ testName=SnapTest
|
||||||
numSnaps=1
|
numSnaps=1
|
||||||
maxSnapDelay=3.0
|
maxSnapDelay=3.0
|
||||||
testID=3
|
testID=3
|
||||||
|
buggify=off
|
||||||
|
enableDD=true
|
||||||
|
|
|
@ -13,6 +13,7 @@ testTitle=SnapSimpleTakeSnap
|
||||||
maxSnapDelay=5.0
|
maxSnapDelay=5.0
|
||||||
testID=1
|
testID=1
|
||||||
clearAfterTest=false
|
clearAfterTest=false
|
||||||
|
version=2
|
||||||
|
|
||||||
;write 1000 Keys ending with odd number
|
;write 1000 Keys ending with odd number
|
||||||
testTitle=SnapSimplePost
|
testTitle=SnapSimplePost
|
||||||
|
|
|
@ -4,3 +4,5 @@ testName=SnapTest
|
||||||
numSnaps=1
|
numSnaps=1
|
||||||
maxSnapDelay=3.0
|
maxSnapDelay=3.0
|
||||||
testID=3
|
testID=3
|
||||||
|
buggify=off
|
||||||
|
enableDD=true
|
||||||
|
|
Loading…
Reference in New Issue