shard the txs tag so that more transaction logs are involved in its recovery

This commit is contained in:
Evan Tschannen 2019-06-19 18:15:09 -07:00
parent df0baa0066
commit e0be631414
12 changed files with 193 additions and 83 deletions

View File

@ -43,6 +43,7 @@ enum {
tagLocalityUpgraded = -4,
tagLocalitySatellite = -5,
tagLocalityLogRouterMapped = -6,
tagLocalityTxs = -7,
tagLocalityInvalid = -99
}; //The TLog and LogRouter require these number to be as compact as possible

View File

@ -89,9 +89,9 @@ public:
return result;
}
void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags) {
void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags, int txsTags, int oldTxsTags) {
satelliteTagLocations.clear();
satelliteTagLocations.resize(std::max(logRouterTags,oldLogRouterTags) + 1);
satelliteTagLocations.resize(std::max({logRouterTags,oldLogRouterTags,txsTags,oldTxsTags})+1);
std::map<int,int> server_usedBest;
std::set<std::pair<int,int>> used_servers;
@ -235,7 +235,7 @@ public:
bool allLocations = false) {
if(locality == tagLocalitySatellite) {
for(auto& t : tags) {
if(t == txsTag || t.locality == tagLocalityLogRouter) {
if(t == txsTag || t.locality == tagLocalityTxs || t.locality == tagLocalityLogRouter) {
for(int loc : satelliteTagLocations[t == txsTag ? 0 : t.id + 1]) {
locations.push_back(locationOffset + loc);
}
@ -520,8 +520,9 @@ struct ILogSystem {
std::vector<Reference<IPeekCursor>> cursors;
std::vector<LogMessageVersion> epochEnds;
Version poppedVersion;
bool needsPopped;
MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds );
MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds, bool needsPopped = true );
virtual Reference<IPeekCursor> cloneNoMore();
virtual void setProtocolVersion( ProtocolVersion version );
@ -575,13 +576,14 @@ struct ILogSystem {
LogMessageVersion messageVersion;
Version end;
bool hasNextMessage;
bool withTags;
//FIXME: collectTags is needed to support upgrades from 5.X to 6.0. Remove this code when we no longer support that upgrade.
bool collectTags;
std::vector<Tag> tags;
void combineMessages();
BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool collectTags );
BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool withTags, bool collectTags = false );
virtual Reference<IPeekCursor> cloneNoMore();
virtual void setProtocolVersion( ProtocolVersion version );
@ -652,13 +654,15 @@ struct ILogSystem {
// Same contract as peek(), but can only peek from the logs elected in the same generation.
// If the preferred log server is down, a different log from the same generation will merge results locally before sending them to the log router.
virtual Reference<IPeekCursor> peekSpecial( UID dbgid, Version begin, Tag tag, int8_t peekLocality, Version localEnd ) = 0;
// Same contract as peek(), but it allows specifying a preferred peek locality for tags that do not have locality
virtual Reference<IPeekCursor> peekTxs( UID dbgid, Version begin, int8_t peekLocality, Version localEnd ) = 0;
// Same contract as peek(), but only for peeking the txsLocality. It allows specifying a preferred peek locality.
virtual Version getKnownCommittedVersion() = 0;
virtual Future<Void> onKnownCommittedVersionChange() = 0;
virtual void popTxs( Version upTo, int8_t popLocality = tagLocalityInvalid ) = 0;
virtual void pop( Version upTo, Tag tag, Version knownCommittedVersion = 0, int8_t popLocality = tagLocalityInvalid ) = 0;
// Permits, but does not require, the log subsystem to strip `tag` from any or all messages with message versions < (upTo,0)
// The popping of any given message may be arbitrarily delayed.
@ -705,6 +709,8 @@ struct ILogSystem {
virtual Tag getRandomRouterTag() = 0;
virtual Tag getRandomTxsTag() = 0;
virtual void stopRejoins() = 0;
// Returns the pseudo tag to be popped for the given process class. If the
@ -752,6 +758,10 @@ struct LogPushData : NonCopyable {
}
}
void addTxsTag() {
next_message_tags.push_back( logSystem->getRandomTxsTag() );
}
// addTag() adds a tag for the *next* message to be added
void addTag( Tag tag ) {
next_message_tags.push_back( tag );

View File

@ -42,19 +42,19 @@ public:
break;
}
when( wait( self->localityChanged ) ) {
self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->localityChanged = self->peekLocality->onChange();
}
when( wait( delay(self->peekTypeSwitches==0 ? SERVER_KNOBS->DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME : SERVER_KNOBS->DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME)) ) {
self->peekTypeSwitches++;
if(self->peekTypeSwitches%3==1) {
self->cursor = self->logSystem->peek( UID(), self->recoveryLoc, self->tag, true );
self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, tagLocalityInvalid, invalidVersion );
self->localityChanged = Never();
} else if(self->peekTypeSwitches%3==2) {
self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().secondaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().secondaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->localityChanged = self->peekLocality->onChange();
} else {
self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
self->localityChanged = self->peekLocality->onChange();
}
}
@ -168,6 +168,6 @@ Future<LogSystemDiskQueueAdapter::CommitMessage> LogSystemDiskQueueAdapter::getC
return pcm.getFuture();
}
LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality ) {
return new LogSystemDiskQueueAdapter( logSystem, tag, peekLocality );
LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality ) {
return new LogSystemDiskQueueAdapter( logSystem, peekLocality );
}

View File

@ -25,16 +25,16 @@
#include "fdbclient/FDBTypes.h"
#include "fdbserver/IDiskQueue.h"
struct PeekSpecialInfo {
struct PeekTxsInfo {
int8_t primaryLocality;
int8_t secondaryLocality;
Version knownCommittedVersion;
bool operator == (const PeekSpecialInfo& r) const {
bool operator == (const PeekTxsInfo& r) const {
return primaryLocality == r.primaryLocality && secondaryLocality == r.secondaryLocality && knownCommittedVersion == r.knownCommittedVersion;
}
PeekSpecialInfo(int8_t primaryLocality, int8_t secondaryLocality, Version knownCommittedVersion) : primaryLocality(primaryLocality), secondaryLocality(secondaryLocality), knownCommittedVersion(knownCommittedVersion) {}
PeekTxsInfo(int8_t primaryLocality, int8_t secondaryLocality, Version knownCommittedVersion) : primaryLocality(primaryLocality), secondaryLocality(secondaryLocality), knownCommittedVersion(knownCommittedVersion) {}
};
class LogSystemDiskQueueAdapter : public IDiskQueue {
@ -52,10 +52,10 @@ public:
// It does, however, peek the specified tag directly at recovery time.
LogSystemDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality, bool recover=true ) : logSystem(logSystem), tag(tag), peekLocality(peekLocality), enableRecovery(recover), recoveryLoc(1), recoveryQueueLoc(1), poppedUpTo(0), nextCommit(1), recoveryQueueDataSize(0), peekTypeSwitches(0) {
LogSystemDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality, bool recover=true ) : logSystem(logSystem), peekLocality(peekLocality), enableRecovery(recover), recoveryLoc(1), recoveryQueueLoc(1), poppedUpTo(0), nextCommit(1), recoveryQueueDataSize(0), peekTypeSwitches(0) {
if (enableRecovery) {
localityChanged = peekLocality ? peekLocality->onChange() : Never();
cursor = logSystem->peekSpecial( UID(), 1, tag, peekLocality ? peekLocality->get().primaryLocality : tagLocalityInvalid, peekLocality ? peekLocality->get().knownCommittedVersion : invalidVersion );
cursor = logSystem->peekTxs( UID(), 1, peekLocality ? peekLocality->get().primaryLocality : tagLocalityInvalid, peekLocality ? peekLocality->get().knownCommittedVersion : invalidVersion );
}
}
@ -92,11 +92,10 @@ public:
virtual int getCommitOverhead() { return 0; } //SOMEDAY: could this be more accurate?
private:
Reference<AsyncVar<PeekSpecialInfo>> peekLocality;
Reference<AsyncVar<PeekTxsInfo>> peekLocality;
Future<Void> localityChanged;
Reference<ILogSystem::IPeekCursor> cursor;
int peekTypeSwitches;
Tag tag;
// Recovery state (used while readNext() is being called repeatedly)
bool enableRecovery;
@ -114,6 +113,6 @@ private:
friend class LogSystemDiskQueueAdapterImpl;
};
LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality );
LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality );
#endif

View File

@ -797,7 +797,7 @@ Version ILogSystem::SetPeekCursor::popped() {
return poppedVersion;
}
ILogSystem::MultiCursor::MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds ) : cursors(cursors), epochEnds(epochEnds), poppedVersion(0) {
ILogSystem::MultiCursor::MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds, bool needsPopped ) : cursors(cursors), epochEnds(epochEnds), needsPopped(needsPopped), poppedVersion(0) {
for(int i = 0; i < std::min<int>(cursors.size(),SERVER_KNOBS->MULTI_CURSOR_PRE_FETCH_LIMIT); i++) {
cursors[cursors.size()-i-1]->getMore();
}
@ -841,7 +841,7 @@ const std::vector<Tag>& ILogSystem::MultiCursor::getTags() {
void ILogSystem::MultiCursor::advanceTo(LogMessageVersion n) {
while( cursors.size() > 1 && n >= epochEnds.back() ) {
poppedVersion = std::max(poppedVersion, cursors.back()->popped());
if(needsPopped) poppedVersion = std::max(poppedVersion, cursors.back()->popped());
cursors.pop_back();
epochEnds.pop_back();
}
@ -851,7 +851,7 @@ void ILogSystem::MultiCursor::advanceTo(LogMessageVersion n) {
Future<Void> ILogSystem::MultiCursor::getMore(int taskID) {
LogMessageVersion startVersion = cursors.back()->version();
while( cursors.size() > 1 && cursors.back()->version() >= epochEnds.back() ) {
poppedVersion = std::max(poppedVersion, cursors.back()->popped());
if(needsPopped) poppedVersion = std::max(poppedVersion, cursors.back()->popped());
cursors.pop_back();
epochEnds.pop_back();
}
@ -882,10 +882,11 @@ Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() {
}
Version ILogSystem::MultiCursor::popped() {
ASSERT(needsPopped);
return std::max(poppedVersion, cursors.back()->popped());
}
ILogSystem::BufferedCursor::BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool collectTags ) : cursors(cursors), messageVersion(begin), end(end), collectTags(collectTags), hasNextMessage(false), messageIndex(0) {
ILogSystem::BufferedCursor::BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool withTags, bool collectTags ) : cursors(cursors), messageVersion(begin), end(end), withTags(withTags), collectTags(collectTags), hasNextMessage(false), messageIndex(0) {
messages.reserve(10000);
}
@ -948,15 +949,17 @@ void ILogSystem::BufferedCursor::nextMessage() {
}
StringRef ILogSystem::BufferedCursor::getMessage() {
ASSERT(false);
return StringRef();
ASSERT(!withTags);
return messages[messageIndex].message;
}
StringRef ILogSystem::BufferedCursor::getMessageWithTags() {
ASSERT(withTags);
return messages[messageIndex].message;
}
const std::vector<Tag>& ILogSystem::BufferedCursor::getTags() {
ASSERT(withTags);
return messages[messageIndex].tags;
}
@ -971,7 +974,7 @@ ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Refe
return Void();
}
while(cursor->hasMessage()) {
self->messages.push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), self->collectTags ? cursor->getMessage() : cursor->getMessageWithTags(), cursor->getTags(), cursor->version()));
self->messages.push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), (!self->withTags || self->collectTags) ? cursor->getMessage() : cursor->getMessageWithTags(), !self->withTags ? std::vector<Tag>() : cursor->getTags(), cursor->version()));
cursor->nextMessage();
if(cursor->version().version >= maxVersion) {
return Void();

View File

@ -986,7 +986,7 @@ ACTOR Future<Void> commitBatch(
bool firstMessage = true;
for(auto m : msg.messages) {
if(firstMessage) {
toCommit.addTag(txsTag);
toCommit.addTxsTag();
}
toCommit.addMessage(StringRef(m.begin(), m.size()), !firstMessage);
firstMessage = false;
@ -1033,7 +1033,7 @@ ACTOR Future<Void> commitBatch(
self->txsPopVersions.emplace_back(commitVersion, msg.popTo);
}
self->logSystem->pop(msg.popTo, txsTag);
self->logSystem->popTxs(msg.popTo);
/////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency)
if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 )
@ -1505,7 +1505,7 @@ ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self) {
while(self->txsPopVersions.size() && self->txsPopVersions.front().first <= minVersion) {
self->lastTxsPop = self->txsPopVersions.front().second;
self->logSystem->pop(self->txsPopVersions.front().second, txsTag, 0, tagLocalityRemoteLog);
self->logSystem->popTxs(self->txsPopVersions.front().second, tagLocalityRemoteLog);
self->txsPopVersions.pop_front();
}
@ -1563,7 +1563,7 @@ ACTOR Future<Void> masterProxyServerCore(
r->value().emplace_back(0,0);
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference<AsyncVar<PeekSpecialInfo>>(), false);
commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, Reference<AsyncVar<PeekTxsInfo>>(), false);
commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true);
createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec);
@ -1595,7 +1595,7 @@ ACTOR Future<Void> masterProxyServerCore(
for(auto it : commitData.tag_popped) {
commitData.logSystem->pop(it.second, it.first);
}
commitData.logSystem->pop(commitData.lastTxsPop, txsTag, 0, tagLocalityRemoteLog);
commitData.logSystem->popTxs(commitData.lastTxsPop, tagLocalityRemoteLog);
}
Optional<LatencyBandConfig> newLatencyBandConfig = commitData.db->get().latencyBandConfig;

View File

@ -48,7 +48,7 @@ namespace oldTLog_4_6 {
typedef int16_t OldTag;
OldTag convertTag( Tag tag ) {
if(tag == invalidTag) return invalidTagOld;
if(tag == invalidTag || tag.locality == tagLocalityTxs) return invalidTagOld;
if(tag == txsTag) return txsTagOld;
ASSERT(tag.id >= 0);
return tag.id;

View File

@ -195,6 +195,7 @@ static const KeyRangeRef persistCurrentVersionKeys = KeyRangeRef( LiteralStringR
static const KeyRangeRef persistKnownCommittedVersionKeys = KeyRangeRef( LiteralStringRef( "knownCommitted/" ), LiteralStringRef( "knownCommitted0" ) );
static const KeyRangeRef persistLocalityKeys = KeyRangeRef( LiteralStringRef( "Locality/" ), LiteralStringRef( "Locality0" ) );
static const KeyRangeRef persistLogRouterTagsKeys = KeyRangeRef( LiteralStringRef( "LogRouterTags/" ), LiteralStringRef( "LogRouterTags0" ) );
static const KeyRangeRef persistTxsTagsKeys = KeyRangeRef( LiteralStringRef( "TxsTags/" ), LiteralStringRef( "TxsTags0" ) );
static const KeyRange persistTagMessagesKeys = prefixRange(LiteralStringRef("TagMsg/"));
static const KeyRange persistTagPoppedKeys = prefixRange(LiteralStringRef("TagPop/"));
@ -333,7 +334,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
auto const& m = self->versionMessages.front();
++messagesErased;
if(self->tag != txsTag) {
if(self->tag.locality != tagLocalityTxs && self->tag != txsTag) {
sizes.first -= m.second.expectedSize();
} else {
sizes.second -= m.second.expectedSize();
@ -433,9 +434,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
Future<Void> terminated;
FlowLock execOpLock;
bool execOpCommitInProgress;
int txsTags;
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID),
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
// These are initialized differently on init() or recovery
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
@ -482,6 +484,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistKnownCommittedVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLocalityKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLogRouterTagsKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistTxsTagsKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistRecoveryCountKeys.begin)) );
Key msgKey = logIdKey.withPrefix(persistTagMessagesKeys.begin);
tLogData->persistentData->clear( KeyRangeRef( msgKey, strinc(msgKey) ) );
@ -814,7 +817,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
block.append(block.arena(), msg.message.begin(), msg.message.size());
for(auto tag : msg.tags) {
if(logData->locality == tagLocalitySatellite) {
if(!(tag == txsTag || tag.locality == tagLocalityLogRouter)) {
if(!(tag.locality == tagLocalityTxs || tag.locality == tagLocalityLogRouter || tag == txsTag)) {
continue;
}
} else if(!(logData->locality == tagLocalitySpecial || logData->locality == tag.locality || tag.locality < 0)) {
@ -827,6 +830,9 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
}
tag.id = tag.id % logData->logRouterTags;
}
if(tag.locality == tagLocalityTxs) {
tag.id = tag.id % logData->txsTags;
}
Reference<LogData::TagData> tagData = logData->getTagData(tag);
if(!tagData) {
tagData = logData->createTagData(tag, 0, true, true, false);
@ -837,7 +843,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
if(tagData->versionMessages.back().second.expectedSize() > SERVER_KNOBS->MAX_MESSAGE_SIZE) {
TraceEvent(SevWarnAlways, "LargeMessage").detail("Size", tagData->versionMessages.back().second.expectedSize());
}
if (tag != txsTag) {
if (tag.locality != tagLocalityTxs && tag != txsTag) {
expectedBytes += tagData->versionMessages.back().second.expectedSize();
} else {
txsBytes += tagData->versionMessages.back().second.expectedSize();
@ -905,7 +911,7 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
};
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
if (self->ignorePopRequest && inputTag != txsTag) {
if (self->ignorePopRequest && inputTag.locality != tagLocalityTxs && inputTag != txsTag) {
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
@ -1062,7 +1068,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
wait( delay(0.0, TaskLowPriority) );
}
if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
if( req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
// slightly faster over keeping the rest of the cluster operating normally.
@ -1303,7 +1309,7 @@ void execProcessingHelper(TLogData* self,
rd >> messageLength >> sub >> tagCount;
for (int i = 0; i < tagCount; i++) {
rd >> tmpTag;
if (tmpTag == txsTag) {
if (tmpTag.locality == tagLocalityTxs || tmpTag == txsTag) {
hasTxsTag = true;
}
execTags->push_back(execTags->arena(), tmpTag);
@ -1632,6 +1638,7 @@ ACTOR Future<Void> initPersistentState( TLogData* self, Reference<LogData> logDa
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistKnownCommittedVersionKeys.begin), BinaryWriter::toValue(logData->knownCommittedVersion, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLocalityKeys.begin), BinaryWriter::toValue(logData->locality, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLogRouterTagsKeys.begin), BinaryWriter::toValue(logData->logRouterTags, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistTxsTagsKeys.begin), BinaryWriter::toValue(logData->txsTags, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistRecoveryCountKeys.begin), BinaryWriter::toValue(logData->recoveryCount, Unversioned()) ) );
for(auto tag : logData->allTags) {
@ -2039,12 +2046,13 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
// FIXME: metadata in queue?
wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat ) ) );
wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fRecoverCounts) ) );
wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fTxsTags, fRecoverCounts) ) );
if (fFormat.get().present() && !persistFormatReadableRange.contains( fFormat.get().get() )) {
//FIXME: remove when we no longer need to test upgrades from 4.X releases
@ -2096,6 +2104,11 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
id_logRouterTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistLogRouterTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
}
state std::map<UID, int> id_txsTags;
for(auto it : fTxsTags.get()) {
id_txsTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistTxsTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
}
state std::map<UID, Version> id_knownCommitted;
for(auto it : fKnownCommitted.get()) {
id_knownCommitted[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistKnownCommittedVersionKeys.begin), Unversioned())] = BinaryReader::fromStringRef<Version>( it.value, Unversioned() );
@ -2121,7 +2134,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
DUMPTOKEN( recruited.confirmRunning );
//We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], UID(), std::vector<Tag>()) );
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>()) );
logData->locality = id_locality[id1];
logData->stopped = true;
self->id_data[id1] = logData;
@ -2304,7 +2317,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
it.second->stopCommit.trigger();
}
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.recruitmentID, req.allTags) );
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) );
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -205,6 +205,7 @@ static const KeyRangeRef persistKnownCommittedVersionKeys = KeyRangeRef( Literal
static const KeyRef persistRecoveryLocationKey = KeyRef( LiteralStringRef( "recoveryLocation" ) );
static const KeyRangeRef persistLocalityKeys = KeyRangeRef( LiteralStringRef( "Locality/" ), LiteralStringRef( "Locality0" ) );
static const KeyRangeRef persistLogRouterTagsKeys = KeyRangeRef( LiteralStringRef( "LogRouterTags/" ), LiteralStringRef( "LogRouterTags0" ) );
static const KeyRangeRef persistTxsTagsKeys = KeyRangeRef( LiteralStringRef( "TxsTags/" ), LiteralStringRef( "TxsTags0" ) );
static const KeyRange persistTagMessagesKeys = prefixRange(LiteralStringRef("TagMsg/"));
static const KeyRange persistTagMessageRefsKeys = prefixRange(LiteralStringRef("TagMsgRef/"));
static const KeyRange persistTagPoppedKeys = prefixRange(LiteralStringRef("TagPop/"));
@ -389,7 +390,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
auto const& m = self->versionMessages.front();
++messagesErased;
if(self->tag != txsTag) {
if(self->tag.locality != tagLocalityTxs && self->tag != txsTag) {
sizes.first -= m.second.expectedSize();
} else {
sizes.second -= m.second.expectedSize();
@ -491,9 +492,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
Future<Void> terminated;
FlowLock execOpLock;
bool execOpCommitInProgress;
int txsTags;
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
// These are initialized differently on init() or recovery
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
@ -542,6 +544,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistKnownCommittedVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLocalityKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLogRouterTagsKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistTxsTagsKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistRecoveryCountKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistProtocolVersionKeys.begin)) );
tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistRecoveryLocationKey)) );
@ -637,7 +640,7 @@ void updatePersistentPopped( TLogData* self, Reference<LogData> logData, Referen
if (data->nothingPersistent) return;
if (data->tag == txsTag) {
if (data->tag.locality == tagLocalityTxs || data->tag == txsTag) {
self->persistentData->clear( KeyRangeRef(
persistTagMessagesKey( logData->logId, data->tag, Version(0) ),
persistTagMessagesKey( logData->logId, data->tag, data->popped ) ) );
@ -654,7 +657,7 @@ void updatePersistentPopped( TLogData* self, Reference<LogData> logData, Referen
ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logData, Reference<LogData::TagData> data ) {
// txsTag is spilled by value, so we do not need to track its popped location.
if (data->tag == txsTag) {
if (data->tag.locality == tagLocalityTxs || data->tag == txsTag) {
return Void();
}
@ -724,7 +727,7 @@ ACTOR Future<Void> popDiskQueue( TLogData* self, Reference<LogData> logData ) {
for(int tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
for(int tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
Reference<LogData::TagData> tagData = logData->tag_data[tagLocality][tagId];
if (tagData && tagData->tag != txsTag && !tagData->nothingPersistent) {
if (tagData && tagData->tag.locality != tagLocalityTxs && tagData->tag != txsTag && !tagData->nothingPersistent) {
minLocation = std::min(minLocation, tagData->poppedLocation);
minVersion = std::min(minVersion, tagData->popped);
}
@ -783,7 +786,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
anyData = true;
tagData->nothingPersistent = false;
if (tagData->tag == txsTag) {
if (tagData->tag.locality == tagLocalityTxs || tagData->tag == txsTag) {
// spill txsTag by value
wr = BinaryWriter( Unversioned() );
for(; msg != tagData->versionMessages.end() && msg->first == currentVersion; ++msg) {
@ -889,7 +892,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
Reference<LogData::TagData> tagData = logData->tag_data[tagLocality][tagId];
if (tagData) {
if (tagData->tag == txsTag) {
if (tagData->tag.locality == tagLocalityTxs || tagData->tag == txsTag) {
minVersion = std::min(minVersion, newPersistentDataVersion);
} else {
minVersion = std::min(minVersion, tagData->popped);
@ -1064,7 +1067,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
block.append(block.arena(), msg.message.begin(), msg.message.size());
for(auto tag : msg.tags) {
if(logData->locality == tagLocalitySatellite) {
if(!(tag == txsTag || tag.locality == tagLocalityLogRouter)) {
if(!(tag.locality == tagLocalityTxs || tag.locality == tagLocalityLogRouter || tag == txsTag)) {
continue;
}
} else if(!(logData->locality == tagLocalitySpecial || logData->locality == tag.locality || tag.locality < 0)) {
@ -1077,6 +1080,9 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
}
tag.id = tag.id % logData->logRouterTags;
}
if(tag.locality == tagLocalityTxs) {
tag.id = tag.id % logData->txsTags;
}
Reference<LogData::TagData> tagData = logData->getTagData(tag);
if(!tagData) {
tagData = logData->createTagData(tag, 0, true, true, false);
@ -1087,7 +1093,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
if(tagData->versionMessages.back().second.expectedSize() > SERVER_KNOBS->MAX_MESSAGE_SIZE) {
TraceEvent(SevWarnAlways, "LargeMessage").detail("Size", tagData->versionMessages.back().second.expectedSize());
}
if (tag != txsTag) {
if (tag.locality != tagLocalityTxs && tag != txsTag) {
expectedBytes += tagData->versionMessages.back().second.expectedSize();
} else {
txsBytes += tagData->versionMessages.back().second.expectedSize();
@ -1155,7 +1161,7 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
};
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
if (self->ignorePopRequest && inputTag != txsTag) {
if (self->ignorePopRequest && inputTag.locality != tagLocalityTxs && inputTag != txsTag) {
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
@ -1296,7 +1302,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
state BinaryWriter messages2(Unversioned());
state int sequence = -1;
state UID peekId;
if(req.sequence.present()) {
try {
peekId = req.sequence.get().first;
@ -1349,7 +1355,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
wait( delay(0.0, TaskLowPriority) );
}
if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
if( req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
// slightly faster over keeping the rest of the cluster operating normally.
@ -1402,7 +1408,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
peekMessagesFromMemory( logData, req, messages2, endVersion );
if (req.tag == txsTag) {
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
Standalone<VectorRef<KeyValueRef>> kvs = wait(
self->persistentData->readRange(KeyRangeRef(
persistTagMessagesKey(logData->logId, req.tag, req.begin),
@ -1670,7 +1676,7 @@ void execProcessingHelper(TLogData* self,
rd >> messageLength >> sub >> tagCount;
for (int i = 0; i < tagCount; i++) {
rd >> tmpTag;
if (tmpTag == txsTag) {
if (tmpTag.locality == tagLocalityTxs || tmpTag == txsTag) {
hasTxsTag = true;
}
execTags->push_back(execTags->arena(), tmpTag);
@ -2001,6 +2007,7 @@ ACTOR Future<Void> initPersistentState( TLogData* self, Reference<LogData> logDa
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistKnownCommittedVersionKeys.begin), BinaryWriter::toValue(logData->knownCommittedVersion, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLocalityKeys.begin), BinaryWriter::toValue(logData->locality, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLogRouterTagsKeys.begin), BinaryWriter::toValue(logData->logRouterTags, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistTxsTagsKeys.begin), BinaryWriter::toValue(logData->txsTags, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistRecoveryCountKeys.begin), BinaryWriter::toValue(logData->recoveryCount, Unversioned()) ) );
storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistProtocolVersionKeys.begin), BinaryWriter::toValue(logData->protocolVersion, Unversioned()) ) );
@ -2417,13 +2424,14 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
// FIXME: metadata in queue?
wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat, fRecoveryLocation ) ) );
wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fRecoverCounts, fProtocolVersions ) ) );
wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fTxsTags, fRecoverCounts, fProtocolVersions ) ) );
if (fFormat.get().present() && !persistFormatReadableRange.contains( fFormat.get().get() )) {
//FIXME: remove when we no longer need to test upgrades from 4.X releases
@ -2465,6 +2473,11 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
id_logRouterTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistLogRouterTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
}
state std::map<UID, int> id_txsTags;
for(auto it : fTxsTags.get()) {
id_txsTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistTxsTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
}
state std::map<UID, Version> id_knownCommitted;
for(auto it : fKnownCommitted.get()) {
id_knownCommitted[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistKnownCommittedVersionKeys.begin), Unversioned())] = BinaryReader::fromStringRef<Version>( it.value, Unversioned() );
@ -2498,7 +2511,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() );
//We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], UID(), protocolVersion, std::vector<Tag>()) );
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>()) );
logData->locality = id_locality[id1];
logData->stopped = true;
self->id_data[id1] = logData;
@ -2700,7 +2713,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
it.second->stopCommit.trigger();
}
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.recruitmentID, currentProtocolVersion, req.allTags) );
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags) );
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -454,7 +454,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
foundSpecial = true;
}
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality ||
tag == txsTag || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
tag == txsTag || tag.locality == tagLocalityTxs || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
lastBegin = std::max(lastBegin, log->startVersion);
localSets.push_back(log);
if(log->locality != tagLocalitySatellite) {
@ -481,7 +481,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
int i = 0;
while(begin < lastBegin) {
if(i == oldLogData.size()) {
if(tag == txsTag) {
if(tag == txsTag || tag.locality == tagLocalityTxs) {
break;
}
TraceEvent("TLogPeekAllDead", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("LastBegin", lastBegin).detail("OldLogDataSize", oldLogData.size());
@ -497,7 +497,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
thisSpecial = true;
}
if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality ||
tag == txsTag || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
tag == txsTag || tag.locality == tagLocalityTxs || tag.locality == tagLocalityLogRouter || (tag.locality == tagLocalityUpgraded && log->locality != tagLocalitySatellite))) {
thisBegin = std::max(thisBegin, log->startVersion);
localOldSets.push_back(log);
if(log->locality != tagLocalitySatellite) {
@ -624,7 +624,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
for(auto tag : tags) {
cursors.push_back(peek(dbgid, begin, tag, parallelGetMore));
}
return Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(cursors, begin, end.present() ? end.get() + 1 : getPeekEnd(), tLogs[0]->locality == tagLocalityUpgraded) );
return Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(cursors, begin, end.present() ? end.get() + 1 : getPeekEnd(), true, tLogs[0]->locality == tagLocalityUpgraded) );
}
Reference<IPeekCursor> peekLocal( UID dbgid, Tag tag, Version begin, Version end, bool useMergePeekCursors, int8_t peekLocality = tagLocalityInvalid ) {
@ -682,7 +682,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
int i = 0;
while(begin < lastBegin) {
if(i == oldLogData.size()) {
if(tag == txsTag && cursors.size()) {
if((tag == txsTag || tag.locality == tagLocalityTxs) && cursors.size()) {
break;
}
TraceEvent("TLogPeekLocalDead", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("LastBegin", lastBegin).detail("OldLogDataSize", oldLogData.size());
@ -738,30 +738,67 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
virtual Reference<IPeekCursor> peekSpecial( UID dbgid, Version begin, Tag tag, int8_t peekLocality, Version localEnd ) {
virtual Reference<IPeekCursor> peekTxs( UID dbgid, Version begin, int8_t peekLocality, Version localEnd ) {
Version end = getEnd();
TraceEvent("TLogPeekSpecial", dbgid).detail("Begin", begin).detail("End", end).detail("LocalEnd", localEnd).detail("PeekLocality", peekLocality);
if(!tLogs.size()) {
TraceEvent("TLogPeekTxsNoLogs", dbgid);
return Reference<ILogSystem::ServerPeekCursor>( new ILogSystem::ServerPeekCursor( Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), txsTag, begin, end, false, false ) );
}
TraceEvent("TLogPeekTxs", dbgid).detail("Begin", begin).detail("End", end).detail("LocalEnd", localEnd).detail("PeekLocality", peekLocality);
if(peekLocality < 0 || localEnd == invalidVersion || localEnd <= begin) {
return peekAll(dbgid, begin, end, tag, true);
std::vector< Reference<ILogSystem::IPeekCursor> > cursors;
for(int i = 0; i < tLogs[0]->logServers.size(); i++) {
cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
}
//SOMEDAY: remove once upgrades from 6.2 are no longer supported
cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
return Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(cursors, begin, end, false) );
}
try {
if(localEnd >= end) {
return peekLocal(dbgid, tag, begin, end, true, peekLocality);
std::vector< Reference<ILogSystem::IPeekCursor> > cursors;
for(int i = 0; i < tLogs[0]->logServers.size(); i++) {
cursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, end, true, peekLocality));
}
//SOMEDAY: remove once upgrades from 6.2 are no longer supported
cursors.push_back(peekLocal(dbgid, txsTag, begin, end, true, peekLocality));
return Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(cursors, begin, end, false) );
}
std::vector< Reference<ILogSystem::IPeekCursor> > cursors;
std::vector< LogMessageVersion > epochEnds;
cursors.resize(2);
cursors[1] = peekLocal(dbgid, tag, begin, localEnd, true, peekLocality);
cursors[0] = peekAll(dbgid, localEnd, end, tag, true);
std::vector< Reference<ILogSystem::IPeekCursor> > localCursors;
std::vector< Reference<ILogSystem::IPeekCursor> > allCursors;
for(int i = 0; i < tLogs[0]->logServers.size(); i++) {
localCursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, localEnd, true, peekLocality));
allCursors.push_back(peekAll(dbgid, localEnd, end, Tag(tagLocalityTxs, i), true));
}
//SOMEDAY: remove once upgrades from 6.2 are no longer supported
localCursors.push_back(peekLocal(dbgid, txsTag, begin, localEnd, true, peekLocality));
allCursors.push_back(peekAll(dbgid, localEnd, end, txsTag, true));
cursors[1] = Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(localCursors, begin, localEnd, false) );
cursors[0] = Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(allCursors, localEnd, end, false) );
epochEnds.emplace_back(localEnd);
return Reference<ILogSystem::MultiCursor>( new ILogSystem::MultiCursor(cursors, epochEnds) );
return Reference<ILogSystem::MultiCursor>( new ILogSystem::MultiCursor(cursors, epochEnds, false) );
} catch( Error& e ) {
if(e.code() == error_code_worker_removed) {
return peekAll(dbgid, begin, end, tag, true);
std::vector< Reference<ILogSystem::IPeekCursor> > cursors;
for(int i = 0; i < tLogs[0]->logServers.size(); i++) {
cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
}
//SOMEDAY: remove once upgrades from 6.2 are no longer supported
cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
return Reference<ILogSystem::BufferedCursor>( new ILogSystem::BufferedCursor(cursors, begin, end, false) );
}
throw;
}
@ -909,6 +946,16 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
virtual void popTxs( Version upTo, int8_t popLocality ) {
if(tLogs.size()) {
for(int i = 0; i < tLogs[0]->logServers.size(); i++) {
pop(upTo, Tag(tagLocalityTxs, i), 0, popLocality);
}
}
//SOMEDAY: remove once upgrades from 6.2 are no longer supported
pop(upTo, txsTag, 0, popLocality);
}
virtual void pop( Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality ) {
if (upTo <= 0) return;
if( tag.locality == tagLocalityRemoteLog) {
@ -1126,6 +1173,11 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
return Tag(tagLocalityLogRouter, deterministicRandom()->randomInt(0, logRouterTags));
}
virtual Tag getRandomTxsTag() {
ASSERT(tLogs.size());
return Tag(tagLocalityTxs, deterministicRandom()->randomInt(0, tLogs[0]->logServers.size()));
}
ACTOR static Future<Void> monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer, Reference<AsyncVar<bool>> failed) {
state Future<Void> waitFailure;
loop {
@ -1730,6 +1782,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.allTags = localTags;
req.startVersion = logSet->startVersion;
req.logRouterTags = 0;
req.txsTags = self->tLogs[0]->logServers.size();
}
logSet->tLogLocalities.resize( remoteWorkers.remoteTLogs.size() );
@ -1823,7 +1876,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystem->tLogs[1]->logServers.resize( recr.satelliteTLogs.size() ); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
logSystem->tLogs[1]->updateLocalitySet(logSystem->tLogs[1]->tLogLocalities);
logSystem->tLogs[1]->populateSatelliteTagLocations(logSystem->logRouterTags,oldLogSystem->logRouterTags);
logSystem->tLogs[1]->populateSatelliteTagLocations(logSystem->logRouterTags,oldLogSystem->logRouterTags,recr.tLogs.size(),oldLogSystem->tLogs.size() ? oldLogSystem->tLogs[0]->logServers.size() : 0);
logSystem->expectedLogSets++;
}
@ -1903,6 +1956,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.allTags = localTags;
req.startVersion = logSystem->tLogs[0]->startVersion;
req.logRouterTags = logSystem->logRouterTags;
req.txsTags = recr.tLogs.size();
}
logSystem->tLogs[0]->tLogLocalities.resize( recr.tLogs.size() );
@ -1927,7 +1981,11 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
state std::vector<Future<Void>> recoveryComplete;
if(region.satelliteTLogReplicationFactor > 0) {
std::vector<Tag> satelliteTags(1, txsTag);
std::vector<Tag> satelliteTags;
for(int i = 0; i < recr.tLogs.size(); i++) {
satelliteTags.push_back(Tag(tagLocalityTxs, i));
}
satelliteTags.push_back(txsTag);
state vector<Future<TLogInterface>> satelliteInitializationReplies;
vector< InitializeTLogRequest > sreqs( recr.satelliteTLogs.size() );
@ -1947,6 +2005,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.allTags = satelliteTags;
req.startVersion = oldLogSystem->knownCommittedVersion + 1;
req.logRouterTags = logSystem->logRouterTags;
req.txsTags = recr.tLogs.size();
}
for(int i = -1; i < oldLogSystem->logRouterTags; i++) {
@ -1957,6 +2016,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
sreqs[ loc ].recoverTags.push_back( tag );
}
for(int i = 0; i < recr.tLogs.size(); i++) {
Tag tag = Tag(tagLocalityTxs, i);
locations.clear();
logSystem->tLogs[1]->getPushLocations( vector<Tag>(1, tag), locations, 0 );
for(int loc : locations)
sreqs[ loc ].recoverTags.push_back( tag );
}
for( int i = 0; i < recr.satelliteTLogs.size(); i++ )
satelliteInitializationReplies.push_back( transformErrors( throwErrorOr( recr.satelliteTLogs[i].tLog.getReplyUnlessFailedFor( sreqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) );

View File

@ -108,6 +108,7 @@ struct InitializeTLogRequest {
bool isPrimary;
Version startVersion;
int logRouterTags;
int txsTags;
ReplyPromise< struct TLogInterface > reply;
@ -115,7 +116,7 @@ struct InitializeTLogRequest {
template <class Ar>
void serialize( Ar& ar ) {
serializer(ar, recruitmentID, recoverFrom, recoverAt, knownCommittedVersion, epoch, recoverTags, allTags, storeType, remoteTag, locality, isPrimary, startVersion, logRouterTags, reply, logVersion, spillType);
serializer(ar, recruitmentID, recoverFrom, recoverAt, knownCommittedVersion, epoch, recoverTags, allTags, storeType, remoteTag, locality, isPrimary, startVersion, logRouterTags, reply, logVersion, spillType, txsTags);
}
};

View File

@ -605,21 +605,21 @@ ACTOR Future<vector<Standalone<CommitTransactionRef>>> recruitEverything( Refere
return confChanges;
}
ACTOR Future<Void> updateLocalityForDcId(Optional<Key> dcId, Reference<ILogSystem> oldLogSystem, Reference<AsyncVar<PeekSpecialInfo>> locality) {
ACTOR Future<Void> updateLocalityForDcId(Optional<Key> dcId, Reference<ILogSystem> oldLogSystem, Reference<AsyncVar<PeekTxsInfo>> locality) {
loop {
std::pair<int8_t,int8_t> loc = oldLogSystem->getLogSystemConfig().getLocalityForDcId(dcId);
Version ver = locality->get().knownCommittedVersion;
if(ver == invalidVersion) {
ver = oldLogSystem->getKnownCommittedVersion();
}
locality->set( PeekSpecialInfo(loc.first,loc.second,ver) );
locality->set( PeekTxsInfo(loc.first,loc.second,ver) );
TraceEvent("UpdatedLocalityForDcId").detail("DcId", dcId).detail("Locality0", loc.first).detail("Locality1", loc.second).detail("Version", ver);
wait( oldLogSystem->onLogSystemConfigChange() || oldLogSystem->onKnownCommittedVersionChange() );
}
}
ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Reference<ILogSystem> oldLogSystem ) {
state Reference<AsyncVar<PeekSpecialInfo>> myLocality = Reference<AsyncVar<PeekSpecialInfo>>( new AsyncVar<PeekSpecialInfo>(PeekSpecialInfo(tagLocalityInvalid,tagLocalityInvalid,invalidVersion) ) );
state Reference<AsyncVar<PeekTxsInfo>> myLocality = Reference<AsyncVar<PeekTxsInfo>>( new AsyncVar<PeekTxsInfo>(PeekTxsInfo(tagLocalityInvalid,tagLocalityInvalid,invalidVersion) ) );
state Future<Void> localityUpdater = updateLocalityForDcId(self->myInterface.locality.dcId(), oldLogSystem, myLocality);
// Peek the txnStateTag in oldLogSystem and recover self->txnStateStore
@ -630,7 +630,7 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
// Recover transaction state store
if(self->txnStateStore) self->txnStateStore->close();
self->txnStateLogAdapter = openDiskQueueAdapter( oldLogSystem, txsTag, myLocality );
self->txnStateLogAdapter = openDiskQueueAdapter( oldLogSystem, myLocality );
self->txnStateStore = keyValueStoreLogSystem( self->txnStateLogAdapter, self->dbgid, self->memoryLimit, false, false, true );
// Versionstamped operations (particularly those applied from DR) define a minimum commit version
@ -676,6 +676,9 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
Standalone<VectorRef<KeyValueRef>> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
self->allTags.clear();
if(self->lastEpochEnd > 0) {
for(int i = 0; i < oldLogSystem->getLogSystemConfig().tLogs[0].tLogs.size(); i++) {
self->allTags.push_back(Tag(tagLocalityTxs, i));
}
self->allTags.push_back(txsTag);
}