Add Alex comment on tLog

This commit is contained in:
Meng Xu 2020-11-12 13:29:11 -08:00
parent c2dd7d1d38
commit 046a6e8427
2 changed files with 29 additions and 14 deletions

View File

@ -325,7 +325,7 @@ struct TLogData : NonCopyable {
FlowLock concurrentLogRouterReads;
FlowLock persistentDataCommitLock;
// fields used by snapshot based backup and restore: start
// Beginning of fields used by snapshot based backup and restore
bool ignorePopRequest; // ignore pop request from storage servers
double ignorePopDeadline; // time until which the ignorePopRequest will be
// honored
@ -337,7 +337,7 @@ struct TLogData : NonCopyable {
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
// that came when ignorePopRequest was set
Reference<AsyncVar<bool>> degraded;
// fields used by snapshot based backup and restore: end
// End of fields used by snapshot based backup and restore
std::vector<TagsAndMessage> tempTagMessages;
@ -435,13 +435,19 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool stopped, initialized;
DBRecoveryCount recoveryCount;
// If persistentDataVersion != persistentDurableDataVersion,
// then spilling is happening from persistentDurableDataVersion to persistentDataVersion.
// Data less than persistentDataDurableVersion is spilled on disk (or fully popped from the TLog);
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version, queueCommittedVersion;
NotifiedVersion version;
NotifiedVersion queueCommittedVersion; // The disk queue has committed up until the queueCommittedVersion version.
Version queueCommittingVersion;
Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion;
Version knownCommittedVersion; // The maximum version that a proxy has told us that is committed (all TLogs have
// ack'd a commit for this version).
Version durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion; // The disk queue has been popped up until the location which represents this version.
Version minPoppedTagVersion;
Tag minPoppedTag;
Tag minPoppedTag; // The tag that makes tLog hold its data and cause tLog's disk queue increasing.
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
std::vector<std::vector<Reference<TagData>>> tag_data; //tag.locality | tag.id
@ -558,14 +564,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
queueCommittedVersion.initMetric(LiteralStringRef("TLog.QueueCommittedVersion"), cc.id);
specialCounter(cc, "Version", [this](){ return this->version.get(); });
specialCounter(cc, "QueueCommittedVersion",
[this]() { return this->queueCommittedVersion.get(); }); // Q: What is this?
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
specialCounter(cc, "PersistentDataVersion", [this](){ return this->persistentDataVersion; });
specialCounter(cc, "PersistentDataDurableVersion", [this](){ return this->persistentDataDurableVersion; });
specialCounter(cc, "KnownCommittedVersion", [this](){ return this->knownCommittedVersion; });
specialCounter(cc, "QueuePoppedVersion", [this](){ return this->queuePoppedVersion; });
specialCounter(cc, "MinPoppedTagVersion",
[this]() { return this->minPoppedTagVersion; }); // Q: why can it be 0 in some trace logs
specialCounter(cc, "MinPoppedTagVersion", [this]() { return this->minPoppedTagVersion; });
// The locality and id of the tag that is responsible for making the TLog hold onto its oldest piece of data.
// If disk queues are growing and no one is sure why, then you shall look at this to find the tag responsible
// for why the TLog thinks it can't throw away data.
specialCounter(cc, "MinPoppedTagLocality", [this](){ return this->minPoppedTag.locality; });
specialCounter(cc, "MinPoppedTagId", [this](){ return this->minPoppedTag.id; });
specialCounter(cc, "SharedBytesInput", [tLogData](){ return tLogData->bytesInput; });
@ -765,7 +772,9 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
return Void();
}
// Q: pop disk queue to what?
// It runs against the oldest TLog instance, calculates the first location in the disk queue that contains un-popped
// data, and then issues a pop to the disk queue at that location so that anything earlier can be
// removed/forgotten/overwritten. In effect, it applies the effect of TLogPop RPCs to disk.
ACTOR Future<Void> popDiskQueue( TLogData* self, Reference<LogData> logData ) {
if (!logData->initialized) return Void();
@ -983,7 +992,8 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all
// CPU resources. For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce
// latencies for more important work (e.g. commits).
// Q: Can someone explain what this actor is doing?
// This actor is just a loop that calls updatePersistentData and popDiskQueue whenever
// (a) there's data to be spilled or (b) we should update metadata after some commits have been fully popped.
ACTOR Future<Void> updateStorage( TLogData* self ) {
while(self->spillOrder.size() && !self->id_data.count(self->spillOrder.front())) {
self->spillOrder.pop_front();

View File

@ -184,9 +184,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Optional<Version> recoveredAt;
Version knownCommittedVersion;
LocalityData locality;
// For each currently running popFromLog actor, outstandingPops is
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
// will need to be copied into the next generation to restore the replication factor.
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
// For each currently running popFromLog actor, (logID, tag)->(max popped version, durableKnownCommittedVersion)
// Q: why do we need tag and durableKnownCommittedVersion?
Optional<PromiseStream<Future<Void>>> addActor;
ActorCollection popActors;
std::vector<OldLogData> oldLogData;