From 297d831192b797157851125a71af4deec9a4773c Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 19 Apr 2022 11:22:35 -0700 Subject: [PATCH 1/2] Put guard pages next to fast alloc memory (#6885) * Put guard pages next to fast alloc memory I verified that we can now detect #6753 without creating tons of threads. * Use pageSize instead of 4096 * Don't include mmapInternal for windows --- flow/FastAlloc.cpp | 2 +- flow/Platform.actor.cpp | 27 +++++++++++++++++++++------ flow/Platform.h | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/flow/FastAlloc.cpp b/flow/FastAlloc.cpp index 262d1b017c..dd6f68c542 100644 --- a/flow/FastAlloc.cpp +++ b/flow/FastAlloc.cpp @@ -519,7 +519,7 @@ void FastAllocator::getMagazine() { --g_allocation_tracing_disabled; } #endif - block = (void**)::allocate(magazine_size * Size, false); + block = (void**)::allocate(magazine_size * Size, /*allowLargePages*/ false, /*includeGuardPages*/ true); #endif // void** block = new void*[ magazine_size * PSize ]; diff --git a/flow/Platform.actor.cpp b/flow/Platform.actor.cpp index c07ff01bca..4661c0c6ea 100644 --- a/flow/Platform.actor.cpp +++ b/flow/Platform.actor.cpp @@ -2037,7 +2037,22 @@ static void enableLargePages() { #endif } -static void* allocateInternal(size_t length, bool largePages) { +#ifndef _WIN32 +static void* mmapInternal(size_t length, int flags, bool guardPages) { + if (guardPages) { + constexpr size_t pageSize = 4096; + length += 2 * pageSize; // Map enough for the guard pages + void* resultWithGuardPages = mmap(nullptr, length, PROT_READ | PROT_WRITE, flags, -1, 0); + mprotect(resultWithGuardPages, pageSize, PROT_NONE); // left guard page + mprotect((void*)(uintptr_t(resultWithGuardPages) + length - pageSize), pageSize, PROT_NONE); // right guard page + return (void*)(uintptr_t(resultWithGuardPages) + pageSize); + } else { + return mmap(nullptr, length, PROT_READ | PROT_WRITE, flags, -1, 0); + } +} +#endif + +static void* allocateInternal(size_t length, bool largePages, bool guardPages) { #ifdef _WIN32 DWORD allocType = MEM_COMMIT | MEM_RESERVE; @@ -2052,31 +2067,31 @@ static void* allocateInternal(size_t length, bool largePages) { if (largePages) flags |= MAP_HUGETLB; - return mmap(nullptr, length, PROT_READ | PROT_WRITE, flags, -1, 0); + return mmapInternal(length, flags, guardPages); #elif defined(__APPLE__) || defined(__FreeBSD__) int flags = MAP_PRIVATE | MAP_ANON; - return mmap(nullptr, length, PROT_READ | PROT_WRITE, flags, -1, 0); + return mmapInternal(length, flags, guardPages); #else #error Port me! #endif } static bool largeBlockFail = false; -void* allocate(size_t length, bool allowLargePages) { +void* allocate(size_t length, bool allowLargePages, bool includeGuardPages) { if (allowLargePages) enableLargePages(); void* block = ALLOC_FAIL; if (allowLargePages && !largeBlockFail) { - block = allocateInternal(length, true); + block = allocateInternal(length, true, includeGuardPages); if (block == ALLOC_FAIL) largeBlockFail = true; } if (block == ALLOC_FAIL) - block = allocateInternal(length, false); + block = allocateInternal(length, false, includeGuardPages); // FIXME: SevWarnAlways trace if "close" to out of memory diff --git a/flow/Platform.h b/flow/Platform.h index 020544178f..272c8e3588 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -284,7 +284,7 @@ std::string epochsToGMTString(double epochs); void setMemoryQuota(size_t limit); -void* allocate(size_t length, bool allowLargePages); +void* allocate(size_t length, bool allowLargePages, bool includeGuardPages); void setAffinity(int proc); From 442d2b34c794f05b4f0cc0c7d9dd39aee2a6d9a6 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 19 Apr 2022 16:57:41 -0700 Subject: [PATCH 2/2] fix: pops which were ignored during a snapshot would not be replayed on the proper tlogs within a shared tlog (#6892) --- fdbserver/OldTLogServer_6_0.actor.cpp | 17 ++++---- fdbserver/OldTLogServer_6_2.actor.cpp | 17 ++++---- fdbserver/TLogServer.actor.cpp | 56 +++++++++++++-------------- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index e52ab4955c..2d016d4a6a 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -291,8 +291,6 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - std::map toBePopped; // map of Tag->Version for all the pops - // that came when ignorePopRequest was set Reference> degraded; std::vector tempTagMessages; @@ -514,6 +512,9 @@ struct LogData : NonCopyable, public ReferenceCounted { bool execOpCommitInProgress; int txsTags; + std::map toBePopped; // map of Tag->Version for all the pops + // that came when ignorePopRequest was set + explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, @@ -819,8 +820,8 @@ ACTOR Future tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen if (self->ignorePopRequest) { TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); - if (self->toBePopped.find(inputTag) == self->toBePopped.end() || to > self->toBePopped[inputTag]) { - self->toBePopped[inputTag] = to; + if (logData->toBePopped.find(inputTag) == logData->toBePopped.end() || to > logData->toBePopped[inputTag]) { + logData->toBePopped[inputTag] = to; } // add the pop to the toBePopped map TraceEvent(SevDebug, "IgnoringPopRequest") @@ -882,11 +883,11 @@ ACTOR Future tLogPop(TLogData* self, TLogPopRequest req, ReferenceignorePopRequest = false; self->ignorePopUid = ""; self->ignorePopDeadline = 0.0; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + for (it = logData->toBePopped.begin(); it != logData->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop").detail("Tag", it->first.toString()).detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - self->toBePopped.clear(); + logData->toBePopped.clear(); wait(waitForAll(ignoredPops)); TraceEvent("ResetIgnorePopRequest") .detail("Now", g_network->now()) @@ -1937,7 +1938,7 @@ ACTOR Future tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + for (it = logData->toBePopped.begin(); it != logData->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop").detail("Tag", it->first.toString()).detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } @@ -1951,7 +1952,7 @@ ACTOR Future tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* .detail("QueueCommittedVersion", logData->queueCommittedVersion.get()) .detail("Version", logData->version.get()); wait(waitForAll(ignoredPops)); - self->toBePopped.clear(); + logData->toBePopped.clear(); enablePopReq.reply.send(Void()); return Void(); } diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index 35818053f9..ffb449aff5 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -355,8 +355,6 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - std::map toBePopped; // map of Tag->Version for all the pops - // that came when ignorePopRequest was set Reference> degraded; TLogData(UID dbgid, @@ -596,6 +594,9 @@ struct LogData : NonCopyable, public ReferenceCounted { bool execOpCommitInProgress; int txsTags; + std::map toBePopped; // map of Tag->Version for all the pops + // that came when ignorePopRequest was set + explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, @@ -1400,8 +1401,8 @@ ACTOR Future tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen if (self->ignorePopRequest) { TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); - if (self->toBePopped.find(inputTag) == self->toBePopped.end() || to > self->toBePopped[inputTag]) { - self->toBePopped[inputTag] = to; + if (logData->toBePopped.find(inputTag) == logData->toBePopped.end() || to > logData->toBePopped[inputTag]) { + logData->toBePopped[inputTag] = to; } // add the pop to the toBePopped map TraceEvent(SevDebug, "IgnoringPopRequest") @@ -1478,11 +1479,11 @@ ACTOR Future tLogPop(TLogData* self, TLogPopRequest req, ReferenceignorePopRequest = false; self->ignorePopUid = ""; self->ignorePopDeadline = 0.0; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + for (it = logData->toBePopped.begin(); it != logData->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop").detail("Tag", it->first.toString()).detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } - self->toBePopped.clear(); + logData->toBePopped.clear(); wait(waitForAll(ignoredPops)); TraceEvent("ResetIgnorePopRequest") .detail("Now", g_network->now()) @@ -2382,7 +2383,7 @@ ACTOR Future tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self->ignorePopRequest = false; self->ignorePopDeadline = 0.0; self->ignorePopUid = ""; - for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) { + for (it = logData->toBePopped.begin(); it != logData->toBePopped.end(); it++) { TraceEvent("PlayIgnoredPop").detail("Tag", it->first.toString()).detail("Version", it->second); ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData)); } @@ -2396,7 +2397,7 @@ ACTOR Future tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* .detail("QueueCommittedVersion", logData->queueCommittedVersion.get()) .detail("Version", logData->version.get()); wait(waitForAll(ignoredPops)); - self->toBePopped.clear(); + logData->toBePopped.clear(); enablePopReq.reply.send(Void()); return Void(); } diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 7184da95cd..749f63949a 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -358,7 +358,6 @@ struct TLogData : NonCopyable { FlowLock persistentDataCommitLock; // Beginning of fields used by snapshot based backup and restore - bool ignorePopRequest; // ignore pop request from storage servers double ignorePopDeadline; // time until which the ignorePopRequest will be // honored std::string ignorePopUid; // callers that set ignorePopRequest will set this @@ -366,8 +365,6 @@ struct TLogData : NonCopyable { // the set and for callers that unset will // be able to match it up std::string dataFolder; // folder where data is stored - std::map toBePopped; // map of Tag->Version for all the pops - // that came when ignorePopRequest was set Reference> degraded; // End of fields used by snapshot based backup and restore @@ -388,11 +385,10 @@ struct TLogData : NonCopyable { instanceID(deterministicRandom()->randomUniqueID().first()), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES), - concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false), - dataFolder(folder), degraded(degraded), - commitLatencyDist(Histogram::getHistogram(LiteralStringRef("tLog"), - LiteralStringRef("commit"), - Histogram::Unit::microseconds)) { + concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopDeadline(0), dataFolder(folder), + degraded(degraded), commitLatencyDist(Histogram::getHistogram(LiteralStringRef("tLog"), + LiteralStringRef("commit"), + Histogram::Unit::microseconds)) { cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True); } }; @@ -629,6 +625,9 @@ struct LogData : NonCopyable, public ReferenceCounted { bool execOpCommitInProgress; int txsTags; + std::map toBePopped; // map of Tag->Version for all the pops + // that came when ignorePopRequest was set + explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, @@ -1234,14 +1233,17 @@ ACTOR Future processPopRequests(TLogData* self, Reference logData state std::map::const_iterator it; state int ignoredPopsPlayed = 0; state std::map toBePopped; - toBePopped = std::move(self->toBePopped); - self->toBePopped.clear(); - self->ignorePopRequest = false; - self->ignorePopDeadline = 0.0; + + while (now() < self->ignorePopDeadline) { + wait(delayUntil(self->ignorePopDeadline + 0.0001)); + } + + toBePopped = std::move(logData->toBePopped); + logData->toBePopped.clear(); self->ignorePopUid = ""; for (it = toBePopped.cbegin(); it != toBePopped.cend(); ++it) { const auto& [tag, version] = *it; - TraceEvent("PlayIgnoredPop").detail("Tag", tag.toString()).detail("Version", version); + TraceEvent("PlayIgnoredPop", logData->logId).detail("Tag", tag.toString()).detail("Version", version); ignoredPops.push_back(tLogPopCore(self, tag, version, logData)); if (++ignoredPopsPlayed % SERVER_KNOBS->TLOG_POP_BATCH_SIZE == 0) { TEST(true); // Yielding while processing pop requests @@ -1249,20 +1251,22 @@ ACTOR Future processPopRequests(TLogData* self, Reference logData } } wait(waitForAll(ignoredPops)); - TraceEvent("ResetIgnorePopRequest") - .detail("IgnorePopRequest", self->ignorePopRequest) - .detail("IgnorePopDeadline", self->ignorePopDeadline); + TraceEvent("ResetIgnorePopRequest", logData->logId).detail("IgnorePopDeadline", self->ignorePopDeadline); return Void(); } ACTOR Future tLogPop(TLogData* self, TLogPopRequest req, Reference logData) { - if (self->ignorePopRequest) { - TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline); + if (now() < self->ignorePopDeadline) { + TraceEvent(SevDebug, "IgnoringPopRequest", logData->logId).detail("IgnorePopDeadline", self->ignorePopDeadline); - auto& v = self->toBePopped[req.tag]; + if (logData->toBePopped.empty()) { + logData->addActor.send(processPopRequests(self, logData)); + } + + auto& v = logData->toBePopped[req.tag]; v = std::max(v, req.to); - TraceEvent(SevDebug, "IgnoringPopRequest") + TraceEvent(SevDebug, "IgnoringPopRequest", logData->logId) .detail("IgnorePopDeadline", self->ignorePopDeadline) .detail("Tag", req.tag.toString()) .detail("Version", req.to); @@ -2571,15 +2575,15 @@ ACTOR Future tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* enablePopReq.reply.sendError(operation_failed()); return Void(); } - TraceEvent("EnableTLogPlayAllIgnoredPops2") + TraceEvent("EnableTLogPlayAllIgnoredPops2", logData->logId) .detail("UidStr", enablePopReq.snapUID.toString()) .detail("IgnorePopUid", self->ignorePopUid) - .detail("IgnorePopRequest", self->ignorePopRequest) .detail("IgnorePopDeadline", self->ignorePopDeadline) .detail("PersistentDataVersion", logData->persistentDataVersion) .detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion) .detail("QueueCommittedVersion", logData->queueCommittedVersion.get()) .detail("Version", logData->version.get()); + self->ignorePopDeadline = 0; wait(processPopRequests(self, logData)); enablePopReq.reply.send(Void()); return Void(); @@ -2681,9 +2685,8 @@ ACTOR Future serveTLogInterface(TLogData* self, req.reply.sendError(operation_failed()); } else { // FIXME: As part of reverting snapshot V1, make ignorePopUid a UID instead of string - self->ignorePopRequest = true; self->ignorePopUid = req.snapUID.toString(); - self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY; + self->ignorePopDeadline = now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY; req.reply.send(Void()); } } @@ -2693,11 +2696,6 @@ ACTOR Future serveTLogInterface(TLogData* self, when(TLogSnapRequest snapReq = waitNext(tli.snapRequest.getFuture())) { logData->addActor.send(tLogSnapCreate(snapReq, self, logData)); } - when(wait(self->ignorePopRequest ? delayUntil(self->ignorePopDeadline) : Never())) { - TEST(true); // Hit ignorePopDeadline - TraceEvent("EnableTLogPlayAllIgnoredPops").detail("IgnoredPopDeadline", self->ignorePopDeadline); - logData->addActor.send(processPopRequests(self, logData)); - } } }