From 4083af0b01db9405a77ec2841b9d305e027a4c09 Mon Sep 17 00:00:00 2001 From: sramamoorthy Date: Tue, 23 Apr 2019 16:17:54 -0700 Subject: [PATCH] Avoid using trackLatest for TLog pop test cases --- fdbclient/SystemData.cpp | 2 + fdbclient/SystemData.h | 1 + fdbserver/MasterProxyServer.actor.cpp | 5 +- fdbserver/OldTLogServer_6_0.actor.cpp | 52 +-- fdbserver/TLogServer.actor.cpp | 52 +-- fdbserver/storageserver.actor.cpp | 4 - fdbserver/workloads/SnapTest.actor.cpp | 325 ++----------------- tests/fast/SnapTestFailAndDisablePop.txt | 18 +- tests/restarting/SnapCycleRestart-1.txt | 1 - tests/restarting/SnapTestAttrition-1.txt | 3 - tests/restarting/SnapTestAttrition-2.txt | 1 - tests/restarting/SnapTestRestart-1.txt | 3 - tests/restarting/SnapTestRestart-2.txt | 1 - tests/restarting/SnapTestSimpleRestart-1.txt | 3 - tests/restarting/SnapTestSimpleRestart-2.txt | 1 - 15 files changed, 100 insertions(+), 372 deletions(-) diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 888f9bc743..ea7d91544d 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -41,6 +41,8 @@ const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent stat // storage, TLog and coordinated state const StringRef execDisableTLogPop = LiteralStringRef("\xff/TLogDisablePop"); // disable pop on TLog const StringRef execEnableTLogPop = LiteralStringRef("\xff/TLogEnablePop"); // enable pop on TLog +// used to communicate snap failures between TLog and SnapTest Workload, used only in simulator +const StringRef snapTestFailStatus = LiteralStringRef("\xff/SnapTestFailStatus/"); const Key keyServersKey( const KeyRef& k ) { return k.withPrefix( keyServersPrefix ); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 0f7d5d591c..b29805208c 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -285,6 +285,7 @@ std::pair decodeHealthyZoneValue( ValueRef const& ); extern const StringRef execSnap; extern const StringRef execDisableTLogPop; extern const StringRef execEnableTLogPop; +extern const StringRef snapTestFailStatus; // All mutations done to this range are blindly copied into txnStateStore. // Used to create artifically large txnStateStore instances in testing. diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 572b7dd1b0..8af540ea33 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -836,15 +836,12 @@ ACTOR Future commitBatch( allSources.insert(localTags.begin(), localTags.end()); } - std::string tokenStr = "ExecTrace/Proxy/" + uidStr.toString(); auto te1 = TraceEvent("ProxyCommitTo", self->dbgid); te1.detail("To", "all sources"); + te1.detail("UidStr", uidStr); te1.detail("Mutation", m.toString()); te1.detail("Version", commitVersion); te1.detail("NumTags", allSources.size()); - if (m.param1 == execSnap) { - te1.trackLatest(tokenStr.c_str()); - } for (auto& tag : allSources) { toCommit.addTag(tag); } diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 88c079ffe4..84991a489a 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -25,6 +25,7 @@ #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/SystemData.h" +#include "fdbclient/RunTransaction.actor.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" @@ -252,6 +253,7 @@ struct TLogData : NonCopyable { AsyncVar largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES Reference> dbInfo; + Database cx; NotifiedVersion queueCommitEnd; Version queueCommitBegin; @@ -295,6 +297,7 @@ struct TLogData : NonCopyable { concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { + cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true); } }; @@ -969,8 +972,7 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Referencenow()) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("DisableTLogPopTimedOut"); + .detail("IgnorePopDeadline", self->ignorePopDeadline); } wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); @@ -1282,7 +1284,8 @@ ACTOR Future execProcessingHelper(TLogData* self, Standalone>* execTags, ExecCmdValueString* execArg, StringRef* execCmd, - Version* execVersion) + Version* execVersion, + vector>* snapFailKeySetters) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each @@ -1335,7 +1338,7 @@ ACTOR Future execProcessingHelper(TLogData* self, } if (*execCmd == execSnap) { // validation check specific to snap request - std::string reason; + state std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1348,17 +1351,21 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); + .detail("Reason", reason); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString() - + "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); + .detail("Role", "TLog"); + if (g_network->isSimulated()) { + // write SnapFailedTLog.$UID + Standalone keyStr = snapTestFailStatus.withSuffix(uidStr); + Standalone valStr = LiteralStringRef("Success"); + TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr); + snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference tr) -> Future + { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); })); + } } } if (*execCmd == execDisableTLogPop) { @@ -1376,15 +1383,13 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); + .detail("UidStr", uidStr.toString()); } TraceEvent("EnableTLogPlayAllIgnoredPops"); @@ -1408,8 +1413,7 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } } return Void(); @@ -1454,8 +1458,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, } poppedTagVersion = tagv->popped; - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1469,9 +1471,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } } return Void(); } @@ -1530,6 +1529,7 @@ ACTOR Future tLogCommit( state TLogQueueEntryRef qe; state StringRef execCmd; state Standalone> execTags; + state vector> snapFailKeySetters; if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!) if(req.debugID.present()) @@ -1542,7 +1542,7 @@ ACTOR Future tLogCommit( qe.id = logData->logId; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1567,6 +1567,7 @@ ACTOR Future tLogCommit( // Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors logData->version.set( req.version ); + if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit"); } @@ -1593,6 +1594,13 @@ ACTOR Future tLogCommit( g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After"); req.reply.send( logData->durableKnownCommittedVersion ); + if (g_network->isSimulated()) { + if (snapFailKeySetters.size() > 0) { + TraceEvent(SevDebug, "SettingSnapFailKey"); + wait(waitForAll(snapFailKeySetters)); + TraceEvent(SevDebug, "SettingSnapFailKeyDone"); + } + } return Void(); } diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d0b9ecf76a..bc3256ac64 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -25,6 +25,7 @@ #include "fdbclient/Notified.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/SystemData.h" +#include "fdbclient/RunTransaction.actor.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/TLogInterface.h" #include "fdbserver/Knobs.h" @@ -302,6 +303,7 @@ struct TLogData : NonCopyable { AsyncVar largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES Reference> dbInfo; + Database cx; NotifiedVersion queueCommitEnd; Version queueCommitBegin; @@ -347,6 +349,7 @@ struct TLogData : NonCopyable { concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped() { + cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true); } }; @@ -1220,8 +1223,7 @@ ACTOR Future tLogPop( TLogData* self, TLogPopRequest req, Referencenow()) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline) - .trackLatest("DisableTLogPopTimedOut"); + .detail("IgnorePopDeadline", self->ignorePopDeadline); } wait(tLogPopCore(self, req.tag, req.to, logData)); req.reply.send(Void()); @@ -1649,7 +1651,8 @@ ACTOR Future execProcessingHelper(TLogData* self, Standalone>* execTags, ExecCmdValueString* execArg, StringRef* execCmd, - Version* execVersion) + Version* execVersion, + vector>* snapFailKeySetters) { // inspect the messages to find if there is an Exec type and print // it. message are prefixed by the length of the message and each @@ -1702,7 +1705,7 @@ ACTOR Future execProcessingHelper(TLogData* self, } if (*execCmd == execSnap) { // validation check specific to snap request - std::string reason; + state std::string reason; if (!self->ignorePopRequest) { *execVersion = invalidVersion; reason = "SnapFailIgnorePopNotSet"; @@ -1715,17 +1718,22 @@ ACTOR Future execProcessingHelper(TLogData* self, TraceEvent(SevWarn, "TLogSnapFailed") .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("Reason", reason) - .trackLatest(reason.c_str()); + .detail("Reason", reason); - std::string message = "ExecTrace/TLog/" + logData->allTags.begin()->toString(); - "/" + uidStr.toString(); TraceEvent("ExecCmdSnapCreate") .detail("Uid", uidStr.toString()) .detail("Status", -1) .detail("Tag", logData->allTags.begin()->toString()) - .detail("Role", "TLog") - .trackLatest(message.c_str()); + .detail("Role", "TLog"); + + if (g_network->isSimulated()) { + // write SnapFailedTLog.$UID + Standalone keyStr = snapTestFailStatus.withSuffix(uidStr); + StringRef valStr = LiteralStringRef("Success"); + TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr); + snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference tr) -> Future + { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); })); + } } } if (*execCmd == execDisableTLogPop) { @@ -1743,15 +1751,13 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("DisablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } if (*execCmd == execEnableTLogPop) { if (self->ignorePopUid != uidStr.toString()) { TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch") .detail("IgnorePopUid", self->ignorePopUid) - .detail("UidStr", uidStr.toString()) - .trackLatest("TLogPopDisableEnableUidMismatch"); + .detail("UidStr", uidStr.toString()); } TraceEvent("EnableTLogPlayAllIgnoredPops"); @@ -1775,8 +1781,7 @@ ACTOR Future execProcessingHelper(TLogData* self, .detail("UidStr", uidStr.toString()) .detail("IgnorePopUid", self->ignorePopUid) .detail("IgnporePopRequest", self->ignorePopRequest) - .detail("IgnporePopDeadline", self->ignorePopDeadline) - .trackLatest("EnablePopTLog"); + .detail("IgnporePopDeadline", self->ignorePopDeadline); } } return Void(); @@ -1820,8 +1825,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, } poppedTagVersion = tagv->popped; - state std::string message = "ExecTrace/TLog/" + tagv->tag.toString() + "/" + uidStr.toString(); - TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1835,9 +1838,6 @@ ACTOR Future tLogSnapHelper(TLogData* self, te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion); te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get()); te.detail("IgnorePopUid", self->ignorePopUid); - if (execCmd == execSnap) { - te.trackLatest(message.c_str()); - } } return Void(); } @@ -1906,9 +1906,10 @@ ACTOR Future tLogCommit( qe.knownCommittedVersion = logData->knownCommittedVersion; qe.messages = req.messages; qe.id = logData->logId; + state vector> snapFailKeySetters; if (req.hasExecOp) { - wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion)); + wait(execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters)); if (execVersion != invalidVersion) { TraceEvent(SevDebug, "SettingExecOpCommit") .detail("ExecVersion", execVersion) @@ -1960,6 +1961,13 @@ ACTOR Future tLogCommit( g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After"); req.reply.send( logData->durableKnownCommittedVersion ); + if (g_network->isSimulated()) { + if (snapFailKeySetters.size() > 0) { + TraceEvent(SevDebug, "SettingSnapFailKey"); + wait(waitForAll(snapFailKeySetters)); + TraceEvent(SevDebug, "SettingSnapFailKeyDone"); + } + } return Void(); } diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 0e9e583e3f..f3b0a1e9da 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1892,7 +1892,6 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) int err = wait(execHelper(&execArg, data->folder, "role=storage")); clearExecOpInProgress(execUID); } - auto tokenStr = "ExecTrace/storage/" + uidStr.toString(); TraceEvent te = TraceEvent("ExecTraceStorage"); te.detail("Uid", uidStr.toString()); te.detail("Status", err); @@ -1904,9 +1903,6 @@ snapHelper(StorageServer* data, MutationRef m, Version ver) te.detail("DataVersion", data->version.get()); te.detail("Tag", data->tag.toString()); te.detail("SnapCreateSkipped", skip); - if (cmd == execSnap) { - te.trackLatest(tokenStr.c_str()); - } return Void(); } diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index b9c41c3965..908d53c37a 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -77,7 +77,6 @@ public: // variables int numSnaps; // num of snapshots to be taken // FIXME: currently validation works on numSnap = 1 double maxSnapDelay; // max delay before which a snapshot will be taken - bool snapCheck; // check for the successful snap create int testID; // test id UID snapUID; // UID used for snap name std::string restartInfoLocation; // file location to store the snap restore info @@ -85,14 +84,13 @@ public: // variables public: // ctor & dtor SnapTestWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), snapCheck(false), testID(0), snapUID() { + : TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), testID(0), snapUID() { TraceEvent("SnapTestWorkload Constructor"); std::string workloadName = "SnapTest"; maxRetryCntToRetrieveMessage = 10; numSnaps = getOption(options, LiteralStringRef("numSnaps"), 0); maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0); - snapCheck = getOption(options, LiteralStringRef("snapCheck"), false); testID = getOption(options, LiteralStringRef("testID"), 0); restartInfoLocation = getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini")) @@ -113,45 +111,33 @@ public: // workload functions return Void(); } + ACTOR Future _check(Database cx, SnapTestWorkload* self) { + state Transaction tr(cx); + // read the key SnapFailedTLog.$UID + loop { + try { + Standalone keyStr = snapTestFailStatus.withSuffix(StringRef(self->snapUID.toString())); + TraceEvent("TestKeyStr").detail("Value", keyStr); + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + Optional val = wait(tr.get(keyStr)); + ASSERT(val.present()); + break; + } catch (Error &e) { + wait(tr.onError(e)); + } + } + return true; + } + Future check(Database const& cx) override { - TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId).detail("SnapCheck", this->snapCheck); - if (!this->snapCheck || clientId != 0) { - TraceEvent("SnapTestCheckSucc"); + TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId); + if (clientId != 0) { return true; } - switch (this->testID) { - case 0: - case 1: - case 2: - case 3: { - Future> proxyIfaces; - return (verifyExecTraceVersion(cx, this)); - break; + if (this->testID != 5 && this->testID != 6) { + return true; } - case 4: { - std::string token = "DisableTLogPopTimedOut"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 5: { - std::string token = "TLogPopDisableEnableUidMismatch"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 6: { - std::string token = "SnapFailIgnorePopNotSet"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - case 7: { - std::string token = "SnapFailedDisableTLogUidMismatch"; - return verifyTLogTrackLatest(cx, this, token); - break; - } - default: { break; } - } - TraceEvent(SevError, "InvalidPathCheckOptions"); - return false; + return _check(cx, this); } void getMetrics(vector& m) override { TraceEvent("SnapTestWorkloadGetMetrics"); } @@ -268,7 +254,6 @@ public: // workload functions begin = firstGreaterThan(kvRange.end()[-1].key); } catch (Error& e) { wait(tr.onError(e)); - cnt = 0; } } TraceEvent("SnapTestVerifyCntValue").detail("Value", cnt); @@ -292,45 +277,15 @@ public: // workload functions wait(tr.onError(e)); } } - // wait for 40 seconds and verify that the enabled pop happened - // automatically - wait(delay(40.0)); - self->snapUID = UID::fromString("a36b2ca0e8dab0452ac3e12b6b926f4b"); } else if (self->testID == 5) { - // description: disable TLog pop and enable TLog pop with - // different UIDs should mis-match and print an error - tr.reset(); - loop { - // disable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=956349f5f368d37a802f1f37d7f4b9c1"); - tr.execute(execDisableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - tr.reset(); - loop { - // enable pop of the TLog - try { - StringRef payLoadRef = LiteralStringRef("empty-binary:uid=5810898ca2f3143a246886c79d1bea92"); - tr.execute(execEnableTLogPop, payLoadRef); - wait(tr.commit()); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - self->snapUID = UID::fromString("5810898ca2f3143a246886c79d1bea92"); - } else if (self->testID == 6) { // snapshot create without disabling pop of the TLog tr.reset(); + state Standalone uidStr = LiteralStringRef("d78b08d47f341158e9a54d4baaf4a4dd"); + self->snapUID = UID::fromString(uidStr.toString()); loop { try { - StringRef snapPayload = LiteralStringRef("/bin/" - "snap_create.sh:uid=d78b08d47f341158e9a54d4baaf4a4dd"); + Standalone snapPayload = LiteralStringRef("/bin/" + "snap_create.sh:uid=").withSuffix(uidStr); tr.execute(execSnap, snapPayload); wait(tr.commit()); break; @@ -339,8 +294,7 @@ public: // workload functions wait(tr.onError(e)); } } - self->snapUID = UID::fromString("d78b08d47f341158e9a54d4baaf4a4dd"); - } else if (self->testID == 7) { + } else if (self->testID == 6) { // disable popping of TLog and snapshot create with mis-matching tr.reset(); loop { @@ -355,10 +309,12 @@ public: // workload functions } } tr.reset(); + uidStr = LiteralStringRef("ba61e9612a561d60bd83ad83e1b63568"); + self->snapUID = UID::fromString(uidStr.toString()); loop { // snap create with different UID try { - StringRef snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=ba61e9612a561d60bd83ad83e1b63568"); + Standalone snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=").withSuffix(uidStr); tr.execute(execSnap, snapPayload); wait(tr.commit()); break; @@ -367,8 +323,7 @@ public: // workload functions wait(tr.onError(e)); } } - self->snapUID = UID::fromString("ba61e9612a561d60bd83ad83e1b63568"); - } else if (self->testID == 8) { + } else if (self->testID == 7) { // create a snapshot with a non whitelisted binary path and operation // should fail state bool testedFailure = false; @@ -396,220 +351,6 @@ public: // workload functions wait(delay(0.0)); return Void(); } - - ACTOR Future verifyTLogTrackLatest(Database cx, SnapTestWorkload* self, std::string event) { - TraceEvent("VerifyTLogTrackLatest"); - state StringRef eventTokenRef(event); - state vector tLogWorkers; - state std::vector> tLogMessages; - state std::vector workers = wait(getWorkers(self->dbInfo)); - state std::map address_workers; - - for (auto const& worker : workers) { - address_workers[worker.interf.address()] = worker.interf; - } - vector tLogServers = self->dbInfo->get().logSystemConfig.allLocalLogs(); - - for (auto s : tLogServers) { - auto it = address_workers.find(s.address()); - if (it != address_workers.end()) { - tLogWorkers.push_back(it->second); - TraceEvent("TLogWorker") - .detail("Address", s.address()) - .detail("Id", s.id()) - .detail("Locality", s.locality.toString()); - } - } - - state int entryi = 0; - state int foundTagServers = 0; - for (; entryi < tLogWorkers.size(); entryi++) { - tLogMessages.push_back( - timeoutError(tLogWorkers[entryi].eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - - try { - TraceEvent(SevDebug, "WaitingForTlogMessages"); - wait(waitForAll(tLogMessages)); - } catch (Error& e) { - TraceEvent(SevError, "UnableToRetrieveTLogMessages") - .detail("Token", eventTokenRef.toString()) - .detail("Reason", "FailedToGetTLogMessages") - .detail("Code", e.what()); - return false; - } - printMessages(tLogMessages); - filterEmptyMessages(tLogMessages); - if (tLogMessages.size() < 1) { - TraceEvent("VerifyTLogTrackLatestMessageNotFound") - .detail("Address", tLogWorkers[entryi].address()) - .detail("Token", eventTokenRef.toString()); - } else { - ++foundTagServers; - } - tLogMessages.clear(); - } - // FIXME: logSystemConfig.allLocalLogs returns remote tlogServers also in few cases and hence the test fails. - // Verify that foundTagServers matches the number of TLogServers in the local region - if (foundTagServers < 1) { - TraceEvent(SevError, "VerifyTLogTrackLatestMessageNotReachAllTLogservers") - .detail("Token", eventTokenRef.toString()) - .detail("FoundaTagServers", foundTagServers); - return false; - } - TraceEvent("VerifyTLogTrackLatestDone"); - return true; - } - - ACTOR Future verifyExecTraceVersion(Database cx, SnapTestWorkload* self) { - state std::vector coordAddrs = wait(getCoordinators(cx)); - state vector proxyWorkers = wait(getWorkers(self->dbInfo)); - state vector storageWorkers = wait(getWorkers(self->dbInfo)); - state vector tLogWorkers = wait(getWorkers(self->dbInfo)); - state vector workers = wait(getWorkers(self->dbInfo)); - - state std::vector> proxyMessages; - state std::vector> tLogMessages; - state std::vector> storageMessages; - state std::vector> coordMessages; - state int numDurableVersionChecks = 0; - state std::map visitedStorageTags; - - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Coordinators/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - coordMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - for (int i = 0; i < workers.size(); i++) { - std::string eventToken = "ExecTrace/Proxy/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - proxyMessages.push_back( - timeoutError(workers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - for (int i = 0; i < storageWorkers.size(); i++) { - std::string eventToken = "ExecTrace/storage/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - storageMessages.push_back(timeoutError( - storageWorkers[i].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - - try { - wait(waitForAll(proxyMessages)); - wait(waitForAll(storageMessages)); - wait(waitForAll(coordMessages)); - } catch (Error& e) { - TraceEvent(SevError, "UnableToRetrieveProxyStorageCoordMessages"); - return false; - } - - // filter out empty messages - filterEmptyMessages(proxyMessages); - filterEmptyMessages(storageMessages); - filterEmptyMessages(coordMessages); - - TraceEvent("SnapTestProxyMessages"); - printMessages(proxyMessages); - TraceEvent("SnapTestStorageMessages"); - printMessages(storageMessages); - TraceEvent("SnapTestCoordMessages"); - printMessages(coordMessages); - - if (proxyMessages.size() != 1) { - // if no message from proxy or more than one fail the check - TraceEvent(SevError, "NoExecTraceMessageFromProxy"); - return false; - } - - TraceEvent("CoordinatorSnapStatus") - .detail("CoordMessageSize", coordMessages.size()) - .detail("CoordAddrssize", coordAddrs.size()); - if (coordMessages.size() < (coordAddrs.size() + 1) / 2) { - TraceEvent(SevError, "NoExecTraceMessageFromQuorumOfCoordinators"); - return false; - } - - state int entryi = 0; - state int numTags = -1; - - for (; entryi < proxyMessages.size(); entryi++) { - state Version execVersion = -1; - state std::string emptyStr; - - TraceEvent("RelevantProxyMessage").detail("Msg", proxyMessages[entryi].get().toString()); - if (proxyMessages[entryi].get().toString() != emptyStr) { - getVersionAndnumTags(proxyMessages[entryi].get(), execVersion, numTags); - ASSERT(numTags > 0); - } - state int entryj = 0; - for (; (execVersion != -1) && entryj < storageMessages.size(); entryj++) { - // for each message that has this verison, get the tag and - // the durable version - state Tag tag; - state Tag invalidTag; - state Version durableVersion = -1; - TraceEvent("RelevantStorageMessage").detail("Msg", storageMessages[entryj].get().toString()); - ASSERT(storageMessages[entryj].get().toString() != emptyStr); - getTagAndDurableVersion(storageMessages[entryj].get(), execVersion, tag, durableVersion); - TraceEvent("SearchingTLogMessages").detail("Tag", tag.toString()); - - tLogMessages.clear(); - for (int m = 0; (tag != invalidTag) && m < tLogWorkers.size(); m++) { - visitedStorageTags[tag] = true; - std::string eventToken = "ExecTrace/TLog/" + tag.toString() + "/" + self->snapUID.toString(); - StringRef eventTokenRef(eventToken); - tLogMessages.push_back(timeoutError( - tLogWorkers[m].interf.eventLogRequest.getReply(EventLogRequest(eventTokenRef)), 3.0)); - } - try { - TraceEvent("WaitingForTlogMessages"); - if (tag != invalidTag) { - wait(waitForAll(tLogMessages)); - } - } catch (Error& e) { - TraceEvent(SevError, "VerifyExecTraceVersionFailure") - .detail("Reason", "FailedToGetTLogMessages") - .detail("Code", e.what()); - return false; - } - filterEmptyMessages(tLogMessages); - state int entryk = 0; - numDurableVersionChecks = 0; - for (; (tag != invalidTag) && entryk < tLogMessages.size(); entryk++) { - // for each of the message that has this version and tag - // verify that - // 1) durableVersion >= minTLogVersion -1 - // 2) durableVersion < maxTLogVersion - Version minTLogVersion = -1; - Version maxTLogVersion = -1; - TraceEvent("TLogMessage").detail("Msg", tLogMessages[entryk].get().toString()); - ASSERT(tLogMessages[entryk].get().toString() != emptyStr); - getMinAndMaxTLogVersions(tLogMessages[entryk].get(), execVersion, tag, minTLogVersion, maxTLogVersion); - if (minTLogVersion != -1 && maxTLogVersion != -1) { - if ((durableVersion >= minTLogVersion - 1) && (durableVersion < maxTLogVersion)) { - ++numDurableVersionChecks; - TraceEvent("Successs!!!"); - } - } - } - // if we did not find even one tlog for a given tag fail the check - if (numDurableVersionChecks < 1) { - TraceEvent(SevError, "NoTLogFoundForATag"); - return false; - } - tLogMessages.clear(); - } - } - - // validates that we encountered unique tags of value numTags - if (numTags != visitedStorageTags.size()) { - TraceEvent(SevError, "StorageMessagesWereNotFound"); - return false; - } - TraceEvent("VerifyExecTraceVersionSuccess"); - return true; - } }; WorkloadFactory SnapTestWorkloadFactory("SnapTest"); diff --git a/tests/fast/SnapTestFailAndDisablePop.txt b/tests/fast/SnapTestFailAndDisablePop.txt index e19532be99..00676a78bb 100644 --- a/tests/fast/SnapTestFailAndDisablePop.txt +++ b/tests/fast/SnapTestFailAndDisablePop.txt @@ -5,36 +5,24 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=4 -snapCheck=true - -; TLog pop enable and disable UID mismatch -testTitle=SnapTLogPopEnableDisableMismatch -testName=SnapTest -numSnaps=1 -maxSnapDelay=3.0 -testID=5 -snapCheck=true ; snapCreate without TLogPopDisable testTitle=SnapCreateWithNoDisablePop testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=6 -snapCheck=true +testID=5 ; snapCreate and tlogPopDisable with mis-matched UID testTitle=SnapCreateDisableTLogPopMismatch testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=7 -snapCheck=true +testID=6 ; snapCreate with binary path that is not whitelisted testTitle=SnapCreateNotWhitelistedBinaryPath testName=SnapTest numSnaps=1 maxSnapDelay=3.0 -testID=8 -snapCheck=false +testID=7 diff --git a/tests/restarting/SnapCycleRestart-1.txt b/tests/restarting/SnapCycleRestart-1.txt index 0898e0b1ea..2a1a8f275c 100644 --- a/tests/restarting/SnapCycleRestart-1.txt +++ b/tests/restarting/SnapCycleRestart-1.txt @@ -12,7 +12,6 @@ testTitle=SnapCyclePre maxSnapDelay=10.0 testID=1 clearAfterTest=false - snapCheck=true testTitle=SnapCycleShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestAttrition-1.txt b/tests/restarting/SnapTestAttrition-1.txt index e85c024ef9..2ff2d35437 100644 --- a/tests/restarting/SnapTestAttrition-1.txt +++ b/tests/restarting/SnapTestAttrition-1.txt @@ -5,7 +5,6 @@ testTitle=SnapTestPre maxSnapDelay=3.0 testID=0 clearAfterTest=false - snapCheck=false testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -26,7 +25,6 @@ testTitle=SnapTestTakeSnap maxSnapDelay=30.0 testID=1 clearAfterTest=false - snapCheck=false testName=Attrition testDuration=20.0 @@ -38,7 +36,6 @@ testTitle=SnapTestPost maxSnapDelay=25.0 testID=2 clearAfterTest=false - snapCheck=false ; save and shutdown testTitle=SnapSimpleShutdown diff --git a/tests/restarting/SnapTestAttrition-2.txt b/tests/restarting/SnapTestAttrition-2.txt index fd6a3ab7a3..07d71073e1 100644 --- a/tests/restarting/SnapTestAttrition-2.txt +++ b/tests/restarting/SnapTestAttrition-2.txt @@ -4,5 +4,4 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false restartInfoLocation=simfdb/restartInfo.ini diff --git a/tests/restarting/SnapTestRestart-1.txt b/tests/restarting/SnapTestRestart-1.txt index 6ae2d6bb0c..97972ff2de 100644 --- a/tests/restarting/SnapTestRestart-1.txt +++ b/tests/restarting/SnapTestRestart-1.txt @@ -5,7 +5,6 @@ testTitle=SnapTestPre maxSnapDelay=3.0 testID=0 clearAfterTest=false - snapCheck=false testTitle=SnapTestTakeSnap ;Take snap and do read/write @@ -26,7 +25,6 @@ testTitle=SnapTestTakeSnap maxSnapDelay=30.0 testID=1 clearAfterTest=false - snapCheck=true testTitle=SnapTestPost ;write 1000 Keys ending with odd numbers @@ -35,7 +33,6 @@ testTitle=SnapTestPost maxSnapDelay=25.0 testID=2 clearAfterTest=false - snapCheck=false testTitle=SnapTestShutdown ;save and shutdown diff --git a/tests/restarting/SnapTestRestart-2.txt b/tests/restarting/SnapTestRestart-2.txt index d8dd4b711e..b8bdfc6b34 100644 --- a/tests/restarting/SnapTestRestart-2.txt +++ b/tests/restarting/SnapTestRestart-2.txt @@ -4,4 +4,3 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false diff --git a/tests/restarting/SnapTestSimpleRestart-1.txt b/tests/restarting/SnapTestSimpleRestart-1.txt index 773ac6c909..bf74326ecc 100644 --- a/tests/restarting/SnapTestSimpleRestart-1.txt +++ b/tests/restarting/SnapTestSimpleRestart-1.txt @@ -5,7 +5,6 @@ testTitle=SnapSimplePre maxSnapDelay=30.0 testID=0 clearAfterTest=false - snapCheck=false ;take snap testTitle=SnapSimpleTakeSnap @@ -14,7 +13,6 @@ testTitle=SnapSimpleTakeSnap maxSnapDelay=5.0 testID=1 clearAfterTest=false - snapCheck=true ;write 1000 Keys ending with odd number testTitle=SnapSimplePost @@ -23,7 +21,6 @@ testTitle=SnapSimplePost maxSnapDelay=3.0 testID=2 clearAfterTest=false - snapCheck=false ; save and shutdown testTitle=SnapSimpleShutdown diff --git a/tests/restarting/SnapTestSimpleRestart-2.txt b/tests/restarting/SnapTestSimpleRestart-2.txt index 54cb126362..6e17c33151 100644 --- a/tests/restarting/SnapTestSimpleRestart-2.txt +++ b/tests/restarting/SnapTestSimpleRestart-2.txt @@ -4,4 +4,3 @@ testName=SnapTest numSnaps=1 maxSnapDelay=3.0 testID=3 -snapCheck=false