From 56053c565b9a920ae69551c3157bd52b62647216 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Wed, 12 Feb 2020 15:11:38 -0800 Subject: [PATCH 1/2] Improve TLog "Role" event by adding the worker ID, the TLog version, and under what circumstances the TLog is being started (Restored, Recruited, or Recovered). The SharedTLog role was being started and stopped twice, so remove one instance of it. --- fdbserver/OldTLogServer_4_6.actor.cpp | 14 ++++++---- fdbserver/OldTLogServer_6_0.actor.cpp | 39 ++++++++++++++------------- fdbserver/TLogServer.actor.cpp | 34 ++++++++++++----------- fdbserver/WorkerInterface.actor.h | 10 +++---- fdbserver/worker.actor.cpp | 4 +-- 5 files changed, 56 insertions(+), 45 deletions(-) diff --git a/fdbserver/OldTLogServer_4_6.actor.cpp b/fdbserver/OldTLogServer_4_6.actor.cpp index 0e02cd57b6..b6444efdd5 100644 --- a/fdbserver/OldTLogServer_4_6.actor.cpp +++ b/fdbserver/OldTLogServer_4_6.actor.cpp @@ -43,6 +43,7 @@ using std::make_pair; using std::min; using std::max; +#define TLOG_VERSION "4.6" namespace oldTLog_4_6 { typedef int16_t OldTag; @@ -270,6 +271,7 @@ namespace oldTLog_4_6 { std::map> id_data; UID dbgid; + UID workerID; IKeyValueStore* persistentData; IDiskQueue* rawPersistentQueue; @@ -303,8 +305,8 @@ namespace oldTLog_4_6 { PromiseStream> sharedActors; bool terminated; - TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> const& dbInfo) - : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), + TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> const& dbInfo) + : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), @@ -412,7 +414,9 @@ namespace oldTLog_4_6 { // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void()) { - startRole(Role::TRANSACTION_LOG,interf.id(), UID()); + std::map details; + details["TLogVersion"] = TLOG_VERSION; + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, "Restored"); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -1421,9 +1425,9 @@ namespace oldTLog_4_6 { return Void(); } - ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, UID tlogId ) + ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, UID tlogId, UID workerID ) { - state TLogData self( tlogId, persistentData, persistentQueue, db ); + state TLogData self( tlogId, workerID, persistentData, persistentQueue, db ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 36a8ac77bb..53f7625f04 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -47,6 +47,7 @@ using std::make_pair; using std::min; using std::max; +#define TLOG_VERSION "6.0" namespace oldTLog_6_0 { struct TLogQueueEntryRef { @@ -245,6 +246,7 @@ struct TLogData : NonCopyable { std::map> id_data; UID dbgid; + UID workerID; IKeyValueStore* persistentData; IDiskQueue* rawPersistentQueue; @@ -286,8 +288,8 @@ struct TLogData : NonCopyable { Reference> degraded; std::vector tempTagMessages; - TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) - : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), + TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) + : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), @@ -439,14 +441,17 @@ struct LogData : NonCopyable, public ReferenceCounted { bool execOpCommitInProgress; int txsTags; - explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), - cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), - logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), - // These are initialized differently on init() or recovery - recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), - logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) + explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector tags, std::string context) + : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), + cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), + logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), + // These are initialized differently on init() or recovery + recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), + logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { - startRole(Role::TRANSACTION_LOG, interf.id(), UID()); + std::map details; + details["TLogVersion"] = TLOG_VERSION; + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, context); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -1973,7 +1978,7 @@ ACTOR Future restorePersistentState( TLogData* self, LocalityData locality tlogRequests.getFuture().pop().reply.sendError(recruitment_failed()); } - wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid) ); + wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid, self->workerID) ); throw internal_error(); } @@ -2019,7 +2024,7 @@ ACTOR Future restorePersistentState( TLogData* self, LocalityData locality DUMPTOKEN( recruited.confirmRunning ); //We do not need the remoteTag, because we will not be loading any additional data - logData = Reference( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector()) ); + logData = Reference( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector(), "Restored") ); logData->locality = id_locality[id1]; logData->stopped = true; self->id_data[id1] = logData; @@ -2202,7 +2207,8 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit it.second->stopCommit.trigger(); } - state Reference logData = Reference( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) ); + bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned); + state Reference logData = Reference( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags, recovering ? "Recovered" : "Recruited") ); self->id_data[recruited.id()] = logData; logData->locality = req.locality; logData->recoveryCount = req.epoch; @@ -2218,7 +2224,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit throw logData->removed.getError(); } - if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) { + if (recovering) { logData->unrecoveredBefore = req.startVersion; logData->recoveredAt = req.recoverAt; logData->knownCommittedVersion = req.startVersion - 1; @@ -2324,13 +2330,11 @@ ACTOR Future startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen } // New tLog (if !recoverFrom.size()) or restore from network -ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog) { - state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); +ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog) { + state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); - // FIXME: Pass the worker id instead of stubbing it - startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID()); try { if(restoreFromDisk) { wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) ); @@ -2371,7 +2375,6 @@ ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ } catch (Error& e) { self.terminated.send(Void()); TraceEvent("TLogError", tlogId).error(e, true); - endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true); if(recovered.canBeSet()) recovered.send(Void()); while(!tlogRequests.isEmpty()) { diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index f58b97f179..97c91ba33b 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -47,6 +47,8 @@ using std::make_pair; using std::min; using std::max; +#define TLOG_VERSION "6.2" + struct TLogQueueEntryRef { UID id; Version version; @@ -295,6 +297,7 @@ struct TLogData : NonCopyable { std::map> id_data; UID dbgid; + UID workerID; IKeyValueStore* persistentData; IDiskQueue* rawPersistentQueue; @@ -337,8 +340,8 @@ struct TLogData : NonCopyable { Reference> degraded; std::vector tempTagMessages; - TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) - : dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), + TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference> dbInfo, Reference> degraded, std::string folder) + : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), @@ -499,15 +502,18 @@ struct LogData : NonCopyable, public ReferenceCounted { bool execOpCommitInProgress; int txsTags; - explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), - cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), - logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), - minPoppedTagVersion(0), minPoppedTag(invalidTag), + explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector tags, std::string context) + : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), + cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), + logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), + minPoppedTagVersion(0), minPoppedTag(invalidTag), // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { - startRole(Role::TRANSACTION_LOG, interf.id(), UID()); + std::map details; + details["TLogVersion"] = TLOG_VERSION; + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, context); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -2424,7 +2430,7 @@ ACTOR Future restorePersistentState( TLogData* self, LocalityData locality ProtocolVersion protocolVersion = BinaryReader::fromStringRef( fProtocolVersions.get()[idx].value, Unversioned() ); //We do not need the remoteTag, because we will not be loading any additional data - logData = Reference( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector()) ); + logData = Reference( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector(), "Restored") ); logData->locality = id_locality[id1]; logData->stopped = true; self->id_data[id1] = logData; @@ -2631,7 +2637,8 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit stopAllTLogs(self, recruited.id()); - state Reference logData = Reference( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags) ); + bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned); + state Reference logData = Reference( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") ); self->id_data[recruited.id()] = logData; logData->locality = req.locality; logData->recoveryCount = req.epoch; @@ -2649,7 +2656,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit throw logData->removed.getError(); } - if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) { + if (recovering) { logData->unrecoveredBefore = req.startVersion; logData->recoveredAt = req.recoverAt; logData->knownCommittedVersion = req.startVersion - 1; @@ -2758,13 +2765,11 @@ ACTOR Future startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen } // New tLog (if !recoverFrom.size()) or restore from network -ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog ) { - state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); +ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, PromiseStream tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog ) { + state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder ); state Future error = actorCollection( self.sharedActors.getFuture() ); TraceEvent("SharedTlog", tlogId); - // FIXME: Pass the worker id instead of stubbing it - startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID()); try { if(restoreFromDisk) { wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) ); @@ -2808,7 +2813,6 @@ ACTOR Future tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ } catch (Error& e) { self.terminated.send(Void()); TraceEvent("TLogError", tlogId).error(e, true); - endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true); if(recovered.canBeSet()) recovered.send(Void()); while(!tlogRequests.isEmpty()) { diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 114b1f1c36..6b15142180 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -455,8 +455,8 @@ ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMaste Reference> db, std::string whitelistBinPaths); ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, - PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, - Promise oldLog, Promise recovered, std::string folder, + PromiseStream tlogRequests, UID tlogId, UID workerID, + bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog); ACTOR Future monitorServerDBInfo(Reference>> ccInterface, @@ -474,13 +474,13 @@ void updateCpuProfiler(ProfilerRequest req); namespace oldTLog_4_6 { ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, - Reference> db, LocalityData locality, UID tlogId); + Reference> db, LocalityData locality, UID tlogId, UID workerID); } namespace oldTLog_6_0 { ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference> db, LocalityData locality, - PromiseStream tlogRequests, UID tlogId, bool restoreFromDisk, - Promise oldLog, Promise recovered, std::string folder, + PromiseStream tlogRequests, UID tlogId, UID workerID, + bool restoreFromDisk, Promise oldLog, Promise recovered, std::string folder, Reference> degraded, Reference> activeSharedTLog); } diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 0d86c86974..6cdfd19375 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -921,7 +921,7 @@ ACTOR Future workerServer( auto& logData = sharedLogs[std::make_tuple(s.tLogOptions.version, s.storeType, s.tLogOptions.spillType)]; // FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we // be sending a fake InitializeTLogRequest rather than calling tLog() ? - Future tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream(), s.storeID, true, oldLog, recovery, folder, degraded, activeSharedTLog ); + Future tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream(), s.storeID, interf.id(), true, oldLog, recovery, folder, degraded, activeSharedTLog ); recoveries.push_back(recovery.getFuture()); activeSharedTLog->set(s.storeID); @@ -1087,7 +1087,7 @@ ACTOR Future workerServer( filesClosed.add( data->onClosed() ); filesClosed.add( queue->onClosed() ); - Future tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, false, Promise(), Promise(), folder, degraded, activeSharedTLog ); + Future tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, interf.id(), false, Promise(), Promise(), folder, degraded, activeSharedTLog ); tLogCore = handleIOErrors( tLogCore, data, logId ); tLogCore = handleIOErrors( tLogCore, queue, logId ); errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) ); From 1d9140d87492b46c7ddcfd7ccd60678012f47319 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 14 Feb 2020 12:33:43 -0800 Subject: [PATCH 2/2] Removed TLogVersion logging. Added logging of SharedTLog ID for each TLog. Switched ID logged for TLogRejoining event to the TLog instead of the SharedTLog. Made some parameters to startRole passed by reference. --- fdbserver/OldTLogServer_4_6.actor.cpp | 7 ++----- fdbserver/OldTLogServer_6_0.actor.cpp | 7 ++----- fdbserver/TLogServer.actor.cpp | 8 ++------ fdbserver/WorkerInterface.actor.h | 2 +- fdbserver/worker.actor.cpp | 2 +- 5 files changed, 8 insertions(+), 18 deletions(-) diff --git a/fdbserver/OldTLogServer_4_6.actor.cpp b/fdbserver/OldTLogServer_4_6.actor.cpp index b6444efdd5..388f6effed 100644 --- a/fdbserver/OldTLogServer_4_6.actor.cpp +++ b/fdbserver/OldTLogServer_4_6.actor.cpp @@ -43,7 +43,6 @@ using std::make_pair; using std::min; using std::max; -#define TLOG_VERSION "4.6" namespace oldTLog_4_6 { typedef int16_t OldTag; @@ -414,9 +413,7 @@ namespace oldTLog_4_6 { // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void()) { - std::map details; - details["TLogVersion"] = TLOG_VERSION; - startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, "Restored"); + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, "Restored"); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -1124,7 +1121,7 @@ namespace oldTLog_4_6 { // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface TLogRejoinRequest req; req.myInterface = tli; - TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); + TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id()); choose { when(TLogRejoinReply rep = wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 53f7625f04..20bf5ec392 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -47,7 +47,6 @@ using std::make_pair; using std::min; using std::max; -#define TLOG_VERSION "6.0" namespace oldTLog_6_0 { struct TLogQueueEntryRef { @@ -449,9 +448,7 @@ struct LogData : NonCopyable, public ReferenceCounted { recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { - std::map details; - details["TLogVersion"] = TLOG_VERSION; - startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, context); + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -1483,7 +1480,7 @@ ACTOR Future rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC if ( self->dbInfo->get().master.id() != lastMasterID) { // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface TLogRejoinRequest req(tli); - TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); + TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id()); choose { when(TLogRejoinReply rep = wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 97c91ba33b..fcfb40a3ab 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -47,8 +47,6 @@ using std::make_pair; using std::min; using std::max; -#define TLOG_VERSION "6.2" - struct TLogQueueEntryRef { UID id; Version version; @@ -511,9 +509,7 @@ struct LogData : NonCopyable, public ReferenceCounted { recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) { - std::map details; - details["TLogVersion"] = TLOG_VERSION; - startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, details, context); + startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); @@ -1876,7 +1872,7 @@ ACTOR Future rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC if ( self->dbInfo->get().master.id() != lastMasterID) { // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface TLogRejoinRequest req(tli); - TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); + TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id()); choose { when(TLogRejoinReply rep = wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index 6b15142180..a2eb851db2 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -418,7 +418,7 @@ private: } }; -void startRole(const Role &role, UID roleId, UID workerId, std::map details = std::map(), std::string origination = "Recruited"); +void startRole(const Role &role, UID roleId, UID workerId, const std::map &details = std::map(), const std::string &origination = "Recruited"); void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error()); struct ServerDBInfo; diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 6cdfd19375..3c4f0f48c9 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -618,7 +618,7 @@ Standalone roleString(std::set> r return StringRef(result); } -void startRole(const Role &role, UID roleId, UID workerId, std::map details, std::string origination) { +void startRole(const Role &role, UID roleId, UID workerId, const std::map &details, const std::string &origination) { if(role.includeInTraceRoles) { addTraceRole(role.abbreviation); }