From f0d79b3d86f8d8b58d977e690ae9e556339e43a7 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Mon, 15 Mar 2021 23:23:56 +0000 Subject: [PATCH 01/49] Inital implementation of network busyness --- bindings/c/fdb_c.cpp | 4 +++ bindings/c/foundationdb/fdb_c.h | 2 ++ fdbclient/IClientApi.h | 1 + fdbclient/Knobs.cpp | 1 + fdbclient/Knobs.h | 1 + fdbclient/MultiVersionTransaction.actor.cpp | 21 +++++++++++ fdbclient/MultiVersionTransaction.h | 3 ++ fdbclient/NativeAPI.actor.cpp | 4 +++ fdbclient/ThreadSafeTransaction.cpp | 11 ++++++ fdbclient/ThreadSafeTransaction.h | 1 + flow/Net2.actor.cpp | 39 ++++++++++++++------- flow/SystemMonitor.cpp | 38 ++++++++++++++++++++ flow/SystemMonitor.h | 2 ++ flow/network.h | 7 ++-- 14 files changed, 120 insertions(+), 15 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index a5d6a206d8..f98c1c2404 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -366,6 +366,10 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_create_snapshot(FDBDatabase* db, .extractPtr()); } +extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d) { + return DB(d)->getMainThreadBusyness(); +} + extern "C" DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr) { try { TXN(tr)->delref(); diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 0bda60790b..4fdd852890 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -190,6 +190,8 @@ extern "C" { int uid_length, uint8_t const *snap_command, int snap_command_length); + DLLEXPORT WARN_UNUSED_RESULT double fdb_database_get_main_thread_busyness(FDBDatabase* db); + DLLEXPORT void fdb_transaction_destroy( FDBTransaction* tr); DLLEXPORT void fdb_transaction_cancel( FDBTransaction* tr); diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index 25f5098b0f..6f3ad07cd1 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -96,6 +96,7 @@ public: virtual Reference createTransaction() = 0; virtual void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) = 0; + virtual double getMainThreadBusyness() = 0; virtual void addref() = 0; virtual void delref() = 0; diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index 47f858e58f..13fc34222d 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -38,6 +38,7 @@ void ClientKnobs::initialize(bool randomize) { init( TOO_MANY, 1000000 ); init( SYSTEM_MONITOR_INTERVAL, 5.0 ); + init( NETWORK_BUSYNESS_MONITOR_INTERVAL, 1.0 ); init( FAILURE_MAX_DELAY, 5.0 ); init( FAILURE_MIN_DELAY, 4.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 1.0; diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 3c21e58bff..3280005efb 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -30,6 +30,7 @@ public: int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically double SYSTEM_MONITOR_INTERVAL; + double NETWORK_BUSYNESS_MONITOR_INTERVAL; double FAILURE_MAX_DELAY; double FAILURE_MIN_DELAY; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 3fe56620c5..66a1634359 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -347,6 +347,14 @@ ThreadFuture DLDatabase::createSnapshot(const StringRef& uid, const String return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { return Void(); }); } +double DLDatabase::getMainThreadBusyness() { + if (api->databaseGetMainThreadBusyness != nullptr) { + return api->databaseGetMainThreadBusyness(db); + } + + return 0; +} + // DLApi template void loadClientFunction(T* fp, void* lib, std::string libPath, const char* functionName, bool requireFunction = true) { @@ -388,6 +396,11 @@ void DLApi::init() { loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction"); loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option"); + loadClientFunction(&api->databaseGetMainThreadBusyness, + lib, + fdbCPath, + "fdb_database_get_main_thread_busyness", + headerVersion >= 700); loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy"); loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700); loadClientFunction(&api->databaseForceRecoveryWithDataLoss, @@ -917,6 +930,14 @@ ThreadFuture MultiVersionDatabase::createSnapshot(const StringRef& uid, co return abortableFuture(f, dbState->dbVar->get().onChange); } +double MultiVersionDatabase::getMainThreadBusyness() { + if (dbState->db) { + return dbState->db->getMainThreadBusyness(); + } + + return 0; +} + void MultiVersionDatabase::Connector::connect() { addref(); onMainThreadVoid( diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index 625eea061c..ea16f4f35e 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -80,6 +80,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted { int uidLength, uint8_t const* snapshotCommmand, int snapshotCommandLength); + double (*databaseGetMainThreadBusyness)(FDBDatabase* database); // Transaction fdb_error_t (*transactionSetOption)(FDBTransaction* tr, @@ -262,6 +263,7 @@ public: Reference createTransaction() override; void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; + double getMainThreadBusyness() override; void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -422,6 +424,7 @@ public: Reference createTransaction() override; void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; + double getMainThreadBusyness() override; void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index f0a5fc1ad6..142cfedb2a 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1746,6 +1746,10 @@ void setupNetwork(uint64_t transportId, bool useMetrics) { g_network->addStopCallback(TLS::DestroyOpenSSLGlobalState); FlowTransport::createInstance(true, transportId); Net2FileSystem::newFileSystem(); + + TraceEvent("Nim_setupNetwork"); + systemMonitorNetworkBusyness(); + uncancellable(recurring(&systemMonitorNetworkBusyness, CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); } void runNetwork() { diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 2eb59ebc15..765e1e1270 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -91,6 +91,17 @@ ThreadFuture ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons return onMainThread([db, snapUID, cmd]() -> Future { return db->createSnapshot(snapUID, cmd); }); } +double ThreadSafeDatabase::getMainThreadBusyness() { + // Return the main network thread busyness + if (!g_network) { + TraceEvent("Nim_getBusyness g_network null"); + // TODO: Is this the right thing to do in this case? + return 0.0; + } + TraceEvent("Nim_getBusyness g_network good"); + return g_network->networkInfo.metrics.networkBusyness; +} + ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { ClusterConnectionFile* connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first); diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index 4c7b26fb94..a62e503c11 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -35,6 +35,7 @@ public: Reference createTransaction() override; void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; + double getMainThreadBusyness() override; ThreadFuture onConnected(); // Returns after a majority of coordination servers are available and have reported a leader. The diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 3a1e52dc4a..7086b89614 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -131,6 +131,8 @@ thread_local INetwork* thread_network = 0; class Net2 final : public INetwork, public INetworkConnections { +private: + void updateStarvationTrackers(struct NetworkMetrics::PriorityStats &binStats, TaskPriority priority, TaskPriority lastPriority, double now); public: Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics); void initTLS(ETLSInitState targetState) override; @@ -1572,6 +1574,24 @@ void Net2::run() { #endif } +void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats &binStats, TaskPriority priority, TaskPriority lastPriority, double now) { + // Updates the PriorityStats found in NetworkMetrics + + // Busy -> idle at binStats.priority + if (binStats.priority > priority && binStats.priority <= lastPriority) { + binStats.active = false; + binStats.duration += now - binStats.windowedTimer; + binStats.maxDuration = std::max(binStats.maxDuration, now - binStats.timer); + } + + // Idle -> busy at binStats.priority + else if (binStats.priority <= priority && binStats.priority > lastPriority) { + binStats.active = true; + binStats.timer = now; + binStats.windowedTimer = now; + } +} + void Net2::trackAtPriority(TaskPriority priority, double now) { if (lastPriorityStats == nullptr || priority != lastPriorityStats->priority) { // Start tracking current priority @@ -1591,20 +1611,15 @@ void Net2::trackAtPriority(TaskPriority priority, double now) { if (binStats.priority > lastPriority && binStats.priority > priority) { break; } + updateStarvationTrackers(binStats, priority, lastPriority, now); + } - // Busy -> idle at binStats.priority - if (binStats.priority > priority && binStats.priority <= lastPriority) { - binStats.active = false; - binStats.duration += now - binStats.windowedTimer; - binStats.maxDuration = std::max(binStats.maxDuration, now - binStats.timer); - } - - // Idle -> busy at binStats.priority - else if (binStats.priority <= priority && binStats.priority > lastPriority) { - binStats.active = true; - binStats.timer = now; - binStats.windowedTimer = now; + // Update starvation trackers for 1s measurment interval + for (auto& binStats : networkInfo.metrics.starvationTrackersOneSecondInterval) { + if (binStats.priority > lastPriority && binStats.priority > priority) { + break; } + updateStarvationTrackers(binStats, priority, lastPriority, now); } lastPriorityStats = &activeStatsItr.first->second; diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index 37dadf9dc1..22800a2ae3 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -42,6 +42,11 @@ void systemMonitor() { customSystemMonitor("ProcessMetrics", &statState, true); } +void systemMonitorNetworkBusyness() { + static StatisticsState statStateNetworkBusyness = StatisticsState(); + customSystemMonitorNetworkBusyness("ProcessMetricsNetworkBusyness", &statStateNetworkBusyness); +} + SystemStatistics getSystemStatistics() { static StatisticsState statState = StatisticsState(); const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); @@ -61,6 +66,39 @@ SystemStatistics getSystemStatistics() { .detail("ApproximateUnusedMemory" #size, FastAllocator::getApproximateMemoryUnused()) \ .detail("ActiveThreads" #size, FastAllocator::getActiveThreads()) +SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState) { + const IPAddress ipAddr = IPAddress(); + SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &statState->systemState, true); + NetworkData netData; + netData.init(); + if (!g_network->isSimulated() && currentStats.initialized) { + { + bool firstTracker = true; + for (auto& itr : g_network->networkInfo.metrics.starvationTrackersOneSecondInterval) { + if (itr.active) { + itr.duration += now() - itr.windowedTimer; + itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); + itr.windowedTimer = now(); + } + + if (firstTracker) { + g_network->networkInfo.metrics.networkBusyness = + std::min(currentStats.elapsed, itr.duration) / currentStats.elapsed; + TraceEvent("Nim_system monitor").detail("busyness", g_network->networkInfo.metrics.networkBusyness); + firstTracker = false; + } + + itr.duration = 0; + itr.maxDuration = 0; + } + } + } + + statState->networkMetricsState = g_network->networkInfo.metrics; + statState->networkState = netData; + return currentStats; +} + SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics) { const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); SystemStatistics currentStats = getSystemStatistics( diff --git a/flow/SystemMonitor.h b/flow/SystemMonitor.h index 2d2470bd48..28bbcb100c 100644 --- a/flow/SystemMonitor.h +++ b/flow/SystemMonitor.h @@ -148,7 +148,9 @@ struct StatisticsState { }; void systemMonitor(); +void systemMonitorNetworkBusyness(); SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics = false); +SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState); SystemStatistics getSystemStatistics(); #endif /* FLOW_SYSTEM_MONITOR_H */ diff --git a/flow/network.h b/flow/network.h index 33fb7b0f26..fb6c5cf5ec 100644 --- a/flow/network.h +++ b/flow/network.h @@ -340,14 +340,15 @@ struct NetworkMetrics { }; std::unordered_map activeTrackers; - double lastRunLoopBusyness; - std::vector starvationTrackers; + double lastRunLoopBusyness, networkBusyness; + std::vector starvationTrackers, starvationTrackersOneSecondInterval; static const std::vector starvationBins; - NetworkMetrics() : lastRunLoopBusyness(0) { + NetworkMetrics() : lastRunLoopBusyness(0), networkBusyness(0) { for (int priority : starvationBins) { starvationTrackers.emplace_back(static_cast(priority)); + starvationTrackersOneSecondInterval.emplace_back(static_cast(priority)); } } }; From cc6e3956648c8dc176d162d7c0d9ecf09130bd9d Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 02:16:43 +0000 Subject: [PATCH 02/49] atomic network busyness --- flow/SystemMonitor.cpp | 3 +-- flow/network.h | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index 22800a2ae3..d6c2cbdb9d 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -68,7 +68,7 @@ SystemStatistics getSystemStatistics() { SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState) { const IPAddress ipAddr = IPAddress(); - SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &statState->systemState, true); + SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &statState->systemState, true); NetworkData netData; netData.init(); if (!g_network->isSimulated() && currentStats.initialized) { @@ -84,7 +84,6 @@ SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, Stati if (firstTracker) { g_network->networkInfo.metrics.networkBusyness = std::min(currentStats.elapsed, itr.duration) / currentStats.elapsed; - TraceEvent("Nim_system monitor").detail("busyness", g_network->networkInfo.metrics.networkBusyness); firstTracker = false; } diff --git a/flow/network.h b/flow/network.h index fb6c5cf5ec..ef18fbda97 100644 --- a/flow/network.h +++ b/flow/network.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "boost/asio.hpp" #ifndef TLS_DISABLED #include "boost/asio/ssl.hpp" @@ -340,7 +341,8 @@ struct NetworkMetrics { }; std::unordered_map activeTrackers; - double lastRunLoopBusyness, networkBusyness; + double lastRunLoopBusyness; + std::atomic networkBusyness; std::vector starvationTrackers, starvationTrackersOneSecondInterval; static const std::vector starvationBins; @@ -351,6 +353,21 @@ struct NetworkMetrics { starvationTrackersOneSecondInterval.emplace_back(static_cast(priority)); } } + + NetworkMetrics& operator=(const NetworkMetrics& rhs) { + // Since networkBusyness is atomic we need to redfine copy assignment oeprator + for (int i = 0; i < SLOW_EVENT_BINS; i++) { + countSlowEvents[i] = rhs.countSlowEvents[i]; + } + secSquaredSubmit = rhs.secSquaredSubmit; + secSquaredDiskStall = rhs.secSquaredDiskStall; + activeTrackers = rhs.activeTrackers; + lastRunLoopBusyness = rhs.lastRunLoopBusyness; + networkBusyness = rhs.networkBusyness.load(); + starvationTrackers = rhs.starvationTrackers; + starvationTrackersOneSecondInterval = starvationTrackersOneSecondInterval; + return *this; + } }; struct FlowLock; From 17f9da69623808e92988a77ac4cf17252572d93c Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 16:47:50 +0000 Subject: [PATCH 03/49] remove traces --- fdbclient/NativeAPI.actor.cpp | 1 - fdbclient/ThreadSafeTransaction.cpp | 2 -- flow/network.h | 4 ++-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 142cfedb2a..3b845e2a8f 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1747,7 +1747,6 @@ void setupNetwork(uint64_t transportId, bool useMetrics) { FlowTransport::createInstance(true, transportId); Net2FileSystem::newFileSystem(); - TraceEvent("Nim_setupNetwork"); systemMonitorNetworkBusyness(); uncancellable(recurring(&systemMonitorNetworkBusyness, CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); } diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 765e1e1270..1141d60f2a 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -94,11 +94,9 @@ ThreadFuture ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons double ThreadSafeDatabase::getMainThreadBusyness() { // Return the main network thread busyness if (!g_network) { - TraceEvent("Nim_getBusyness g_network null"); // TODO: Is this the right thing to do in this case? return 0.0; } - TraceEvent("Nim_getBusyness g_network good"); return g_network->networkInfo.metrics.networkBusyness; } diff --git a/flow/network.h b/flow/network.h index ef18fbda97..fbebf33075 100644 --- a/flow/network.h +++ b/flow/network.h @@ -355,7 +355,7 @@ struct NetworkMetrics { } NetworkMetrics& operator=(const NetworkMetrics& rhs) { - // Since networkBusyness is atomic we need to redfine copy assignment oeprator + // Since networkBusyness is atomic we need to redfine copy assignment operator for (int i = 0; i < SLOW_EVENT_BINS; i++) { countSlowEvents[i] = rhs.countSlowEvents[i]; } @@ -365,7 +365,7 @@ struct NetworkMetrics { lastRunLoopBusyness = rhs.lastRunLoopBusyness; networkBusyness = rhs.networkBusyness.load(); starvationTrackers = rhs.starvationTrackers; - starvationTrackersOneSecondInterval = starvationTrackersOneSecondInterval; + starvationTrackersOneSecondInterval = rhs.starvationTrackersOneSecondInterval; return *this; } }; From 5f079904d40543f642c0b85df4cd7025d7bcd420 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 18:30:39 +0000 Subject: [PATCH 04/49] add unit test --- bindings/c/test/unit/unit_tests.cpp | 19 +++++++++++++++++++ fdbclient/NativeAPI.actor.cpp | 1 + flow/Net2.actor.cpp | 1 + flow/SystemMonitor.cpp | 2 ++ 4 files changed, 23 insertions(+) diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index d54f604a24..67692f4af9 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #define DOCTEST_CONFIG_IMPLEMENT #include "doctest.h" @@ -2112,6 +2113,24 @@ TEST_CASE("block_from_callback") { context.event.wait(); } +TEST_CASE("monitor_network_busyness") { + // monitors network busyness for 2 sec (40 readings) + bool containsGreaterZero = false; + for (int i = 0; i < 40; i++) { + double busyness = fdb_database_get_main_thread_busyness(db); + // make sure the busyness is between 0 and 1 + CHECK(busyness >= 0); + CHECK(busyness <= 1); + if (busyness > 0) { + containsGreaterZero = true; + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + + // assert that at least one of the busyness readings was greater than 0 + CHECK(containsGreaterZero); +} + int main(int argc, char** argv) { if (argc != 3 && argc != 4) { std::cout << "Unit tests for the FoundationDB C API.\n" diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3b845e2a8f..da038d12af 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1734,6 +1734,7 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu } void setupNetwork(uint64_t transportId, bool useMetrics) { + // Setup g_network and start monitoring for network busyness if (g_network) throw network_already_setup(); diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 7086b89614..196304fd10 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -1593,6 +1593,7 @@ void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats &binSta } void Net2::trackAtPriority(TaskPriority priority, double now) { + // Update both vectors of starvation trackers (one that updates every 5s and the other every 1s) if (lastPriorityStats == nullptr || priority != lastPriorityStats->priority) { // Start tracking current priority auto activeStatsItr = networkInfo.metrics.activeTrackers.try_emplace(priority, priority); diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index d6c2cbdb9d..ece7172fa0 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -43,6 +43,7 @@ void systemMonitor() { } void systemMonitorNetworkBusyness() { + // update network busyness and save the state in StatisticsState static StatisticsState statStateNetworkBusyness = StatisticsState(); customSystemMonitorNetworkBusyness("ProcessMetricsNetworkBusyness", &statStateNetworkBusyness); } @@ -67,6 +68,7 @@ SystemStatistics getSystemStatistics() { .detail("ActiveThreads" #size, FastAllocator::getActiveThreads()) SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState) { + // Custom system monitor to update the network busyness on a 1s cadence const IPAddress ipAddr = IPAddress(); SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &statState->systemState, true); NetworkData netData; From bf5f83d2bff73bc3c3ba6ee0ab4240a33d3e4964 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 21:49:36 +0000 Subject: [PATCH 05/49] address pr comments --- bindings/c/fdb_c.cpp | 1 + fdbclient/Knobs.h | 2 +- fdbclient/MultiVersionTransaction.actor.cpp | 3 ++ fdbclient/NativeAPI.actor.cpp | 6 +-- flow/Net2.actor.cpp | 15 ++++-- flow/SystemMonitor.cpp | 53 ++++++++------------- flow/SystemMonitor.h | 3 +- flow/network.h | 6 +-- 8 files changed, 44 insertions(+), 45 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index f98c1c2404..26ba181c67 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -366,6 +366,7 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_create_snapshot(FDBDatabase* db, .extractPtr()); } +// Get network thread busyness (updated every 1s) extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d) { return DB(d)->getMainThreadBusyness(); } diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 3280005efb..5b3cb1160c 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -30,7 +30,7 @@ public: int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically double SYSTEM_MONITOR_INTERVAL; - double NETWORK_BUSYNESS_MONITOR_INTERVAL; + double NETWORK_BUSYNESS_MONITOR_INTERVAL; // The interval in which we should up the network busyness metric double FAILURE_MAX_DELAY; double FAILURE_MIN_DELAY; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 66a1634359..b47c18a482 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -347,6 +347,7 @@ ThreadFuture DLDatabase::createSnapshot(const StringRef& uid, const String return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { return Void(); }); } +// Get network thread busyness double DLDatabase::getMainThreadBusyness() { if (api->databaseGetMainThreadBusyness != nullptr) { return api->databaseGetMainThreadBusyness(db); @@ -368,6 +369,7 @@ void loadClientFunction(T* fp, void* lib, std::string libPath, const char* funct DLApi::DLApi(std::string fdbCPath, bool unlinkOnLoad) : api(new FdbCApi()), fdbCPath(fdbCPath), unlinkOnLoad(unlinkOnLoad), networkSetup(false) {} +// Loads client API functions (definitions are in FdbCApi struct) void DLApi::init() { if (isLibraryLoaded(fdbCPath.c_str())) { throw external_client_already_loaded(); @@ -930,6 +932,7 @@ ThreadFuture MultiVersionDatabase::createSnapshot(const StringRef& uid, co return abortableFuture(f, dbState->dbVar->get().onChange); } +// Get network thread busyness double MultiVersionDatabase::getMainThreadBusyness() { if (dbState->db) { return dbState->db->getMainThreadBusyness(); diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index da038d12af..5666a831c4 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1733,8 +1733,8 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu } } +// Setup g_network and start monitoring for network busyness void setupNetwork(uint64_t transportId, bool useMetrics) { - // Setup g_network and start monitoring for network busyness if (g_network) throw network_already_setup(); @@ -1748,8 +1748,8 @@ void setupNetwork(uint64_t transportId, bool useMetrics) { FlowTransport::createInstance(true, transportId); Net2FileSystem::newFileSystem(); - systemMonitorNetworkBusyness(); - uncancellable(recurring(&systemMonitorNetworkBusyness, CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); + monitorNetworkBusyness(); + uncancellable(recurring(&monitorNetworkBusyness, CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); } void runNetwork() { diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 196304fd10..28c1ef1e1f 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -132,7 +132,11 @@ thread_local INetwork* thread_network = 0; class Net2 final : public INetwork, public INetworkConnections { private: - void updateStarvationTrackers(struct NetworkMetrics::PriorityStats &binStats, TaskPriority priority, TaskPriority lastPriority, double now); + void updateStarvationTrackers(struct NetworkMetrics::PriorityStats& binStats, + TaskPriority priority, + TaskPriority lastPriority, + double now); + public: Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics); void initTLS(ETLSInitState targetState) override; @@ -1574,8 +1578,11 @@ void Net2::run() { #endif } -void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats &binStats, TaskPriority priority, TaskPriority lastPriority, double now) { - // Updates the PriorityStats found in NetworkMetrics +// Updates the PriorityStats found in NetworkMetrics +void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats& binStats, + TaskPriority priority, + TaskPriority lastPriority, + double now) { // Busy -> idle at binStats.priority if (binStats.priority > priority && binStats.priority <= lastPriority) { @@ -1616,7 +1623,7 @@ void Net2::trackAtPriority(TaskPriority priority, double now) { } // Update starvation trackers for 1s measurment interval - for (auto& binStats : networkInfo.metrics.starvationTrackersOneSecondInterval) { + for (auto& binStats : networkInfo.metrics.starvationTrackersNetworkBusyness) { if (binStats.priority > lastPriority && binStats.priority > priority) { break; } diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index ece7172fa0..76fa78e0b8 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -42,12 +42,6 @@ void systemMonitor() { customSystemMonitor("ProcessMetrics", &statState, true); } -void systemMonitorNetworkBusyness() { - // update network busyness and save the state in StatisticsState - static StatisticsState statStateNetworkBusyness = StatisticsState(); - customSystemMonitorNetworkBusyness("ProcessMetricsNetworkBusyness", &statStateNetworkBusyness); -} - SystemStatistics getSystemStatistics() { static StatisticsState statState = StatisticsState(); const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); @@ -67,37 +61,32 @@ SystemStatistics getSystemStatistics() { .detail("ApproximateUnusedMemory" #size, FastAllocator::getApproximateMemoryUnused()) \ .detail("ActiveThreads" #size, FastAllocator::getActiveThreads()) -SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState) { - // Custom system monitor to update the network busyness on a 1s cadence +// update the network busyness on a 1s cadence +void monitorNetworkBusyness() { const IPAddress ipAddr = IPAddress(); - SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &statState->systemState, true); - NetworkData netData; - netData.init(); + static StatisticsState statState = StatisticsState(); + SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &(&statState)->systemState, true); if (!g_network->isSimulated() && currentStats.initialized) { - { - bool firstTracker = true; - for (auto& itr : g_network->networkInfo.metrics.starvationTrackersOneSecondInterval) { - if (itr.active) { - itr.duration += now() - itr.windowedTimer; - itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); - itr.windowedTimer = now(); - } - - if (firstTracker) { - g_network->networkInfo.metrics.networkBusyness = - std::min(currentStats.elapsed, itr.duration) / currentStats.elapsed; - firstTracker = false; - } - - itr.duration = 0; - itr.maxDuration = 0; + bool firstTracker = true; + // iterate over the starvation trackers which are updated in Net2.actor.cpp + for (auto& itr : g_network->networkInfo.metrics.starvationTrackersNetworkBusyness) { + if (itr.active) { + itr.duration += now() - itr.windowedTimer; + itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); + itr.windowedTimer = now(); } + + if (firstTracker) { + g_network->networkInfo.metrics.networkBusyness = + std::min(currentStats.elapsed, itr.duration) / + currentStats.elapsed; // average duration spent doing "work" + firstTracker = false; + } + + itr.duration = 0; + itr.maxDuration = 0; } } - - statState->networkMetricsState = g_network->networkInfo.metrics; - statState->networkState = netData; - return currentStats; } SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics) { diff --git a/flow/SystemMonitor.h b/flow/SystemMonitor.h index 28bbcb100c..6e25f2fff9 100644 --- a/flow/SystemMonitor.h +++ b/flow/SystemMonitor.h @@ -148,9 +148,8 @@ struct StatisticsState { }; void systemMonitor(); -void systemMonitorNetworkBusyness(); +void monitorNetworkBusyness(); SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics = false); -SystemStatistics customSystemMonitorNetworkBusyness(std::string eventName, StatisticsState* statState); SystemStatistics getSystemStatistics(); #endif /* FLOW_SYSTEM_MONITOR_H */ diff --git a/flow/network.h b/flow/network.h index fbebf33075..04b93653dd 100644 --- a/flow/network.h +++ b/flow/network.h @@ -343,14 +343,14 @@ struct NetworkMetrics { std::unordered_map activeTrackers; double lastRunLoopBusyness; std::atomic networkBusyness; - std::vector starvationTrackers, starvationTrackersOneSecondInterval; + std::vector starvationTrackers, starvationTrackersNetworkBusyness; static const std::vector starvationBins; NetworkMetrics() : lastRunLoopBusyness(0), networkBusyness(0) { for (int priority : starvationBins) { starvationTrackers.emplace_back(static_cast(priority)); - starvationTrackersOneSecondInterval.emplace_back(static_cast(priority)); + starvationTrackersNetworkBusyness.emplace_back(static_cast(priority)); } } @@ -365,7 +365,7 @@ struct NetworkMetrics { lastRunLoopBusyness = rhs.lastRunLoopBusyness; networkBusyness = rhs.networkBusyness.load(); starvationTrackers = rhs.starvationTrackers; - starvationTrackersOneSecondInterval = rhs.starvationTrackersOneSecondInterval; + starvationTrackersNetworkBusyness = rhs.starvationTrackersNetworkBusyness; return *this; } }; From 0c6b9bc541382529f4743694b70369058a1ef288 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 21:53:24 +0000 Subject: [PATCH 06/49] remove guard around g_network --- fdbclient/ThreadSafeTransaction.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 1141d60f2a..5a999afb6a 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -93,10 +93,6 @@ ThreadFuture ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons double ThreadSafeDatabase::getMainThreadBusyness() { // Return the main network thread busyness - if (!g_network) { - // TODO: Is this the right thing to do in this case? - return 0.0; - } return g_network->networkInfo.metrics.networkBusyness; } From 822cbf895d3eaf8da073fea80f7d6f9068a6c235 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 16 Mar 2021 23:29:02 +0000 Subject: [PATCH 07/49] address pr comments --- documentation/sphinx/source/api-c.rst | 6 +++++- fdbclient/NativeAPI.actor.cpp | 30 +++++++++++++++++++++++++-- fdbclient/ThreadSafeTransaction.cpp | 1 + flow/Net2.actor.cpp | 9 ++------ flow/SystemMonitor.cpp | 28 ------------------------- flow/SystemMonitor.h | 1 - flow/network.h | 10 +++++---- 7 files changed, 42 insertions(+), 43 deletions(-) diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 2c2e256884..2de7056baf 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -481,7 +481,11 @@ An |database-blurb1| Modifications to a database are performed via transactions. |length-of| ``snapshot_command`` .. note:: The function is exposing the functionality of the fdbcli command ``snapshot``. Please take a look at the documentation before using (see :ref:`disk-snapshot-backups`). - + +.. function:: double fdb_database_get_main_thread_busyness(FDBDatabase* database) + + Returns a value between `0` and `1` representing the percentage of time the Network Thread is busy executing work from the TaskQueue. This value is updated every second. + Transaction =========== diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 5666a831c4..1f576d27f8 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1733,6 +1733,33 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu } } +// update the network busyness on a 1s cadence +ACTOR Future monitorNetworkBusyness() { + state double prevTime = now(); + loop choose { + when(wait(delay(CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace))) { + double elapsed = now() - prevTime; // get elapsed time from last execution + prevTime = now(); + if (!g_network->isSimulated()) { + struct NetworkMetrics::PriorityStats& itr = + g_network->networkInfo.metrics.starvationTrackerNetworkBusyness; + + if (itr.active) { // update metrics + itr.duration += now() - itr.windowedTimer; + itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); + itr.windowedTimer = now(); + } + + g_network->networkInfo.metrics.networkBusyness = + std::min(elapsed, itr.duration) / elapsed; // average duration spent doing "work" + + itr.duration = 0; + itr.maxDuration = 0; + } + } + } +} + // Setup g_network and start monitoring for network busyness void setupNetwork(uint64_t transportId, bool useMetrics) { if (g_network) @@ -1748,8 +1775,7 @@ void setupNetwork(uint64_t transportId, bool useMetrics) { FlowTransport::createInstance(true, transportId); Net2FileSystem::newFileSystem(); - monitorNetworkBusyness(); - uncancellable(recurring(&monitorNetworkBusyness, CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); + uncancellable(monitorNetworkBusyness()); } void runNetwork() { diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 5a999afb6a..1d07e30782 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -93,6 +93,7 @@ ThreadFuture ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons double ThreadSafeDatabase::getMainThreadBusyness() { // Return the main network thread busyness + ASSERT(g_network); return g_network->networkInfo.metrics.networkBusyness; } diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 28c1ef1e1f..feb6dad8f2 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -1622,13 +1622,8 @@ void Net2::trackAtPriority(TaskPriority priority, double now) { updateStarvationTrackers(binStats, priority, lastPriority, now); } - // Update starvation trackers for 1s measurment interval - for (auto& binStats : networkInfo.metrics.starvationTrackersNetworkBusyness) { - if (binStats.priority > lastPriority && binStats.priority > priority) { - break; - } - updateStarvationTrackers(binStats, priority, lastPriority, now); - } + // Update starvation trackers for network busyness + updateStarvationTrackers(networkInfo.metrics.starvationTrackerNetworkBusyness, priority, lastPriority, now); lastPriorityStats = &activeStatsItr.first->second; } diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index 76fa78e0b8..37dadf9dc1 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -61,34 +61,6 @@ SystemStatistics getSystemStatistics() { .detail("ApproximateUnusedMemory" #size, FastAllocator::getApproximateMemoryUnused()) \ .detail("ActiveThreads" #size, FastAllocator::getActiveThreads()) -// update the network busyness on a 1s cadence -void monitorNetworkBusyness() { - const IPAddress ipAddr = IPAddress(); - static StatisticsState statState = StatisticsState(); - SystemStatistics currentStats = getSystemStatistics("", &ipAddr, &(&statState)->systemState, true); - if (!g_network->isSimulated() && currentStats.initialized) { - bool firstTracker = true; - // iterate over the starvation trackers which are updated in Net2.actor.cpp - for (auto& itr : g_network->networkInfo.metrics.starvationTrackersNetworkBusyness) { - if (itr.active) { - itr.duration += now() - itr.windowedTimer; - itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); - itr.windowedTimer = now(); - } - - if (firstTracker) { - g_network->networkInfo.metrics.networkBusyness = - std::min(currentStats.elapsed, itr.duration) / - currentStats.elapsed; // average duration spent doing "work" - firstTracker = false; - } - - itr.duration = 0; - itr.maxDuration = 0; - } - } -} - SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics) { const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); SystemStatistics currentStats = getSystemStatistics( diff --git a/flow/SystemMonitor.h b/flow/SystemMonitor.h index 6e25f2fff9..2d2470bd48 100644 --- a/flow/SystemMonitor.h +++ b/flow/SystemMonitor.h @@ -148,7 +148,6 @@ struct StatisticsState { }; void systemMonitor(); -void monitorNetworkBusyness(); SystemStatistics customSystemMonitor(std::string eventName, StatisticsState* statState, bool machineMetrics = false); SystemStatistics getSystemStatistics(); diff --git a/flow/network.h b/flow/network.h index 04b93653dd..fc641d39a8 100644 --- a/flow/network.h +++ b/flow/network.h @@ -343,14 +343,16 @@ struct NetworkMetrics { std::unordered_map activeTrackers; double lastRunLoopBusyness; std::atomic networkBusyness; - std::vector starvationTrackers, starvationTrackersNetworkBusyness; + std::vector starvationTrackers; + struct PriorityStats starvationTrackerNetworkBusyness; static const std::vector starvationBins; - NetworkMetrics() : lastRunLoopBusyness(0), networkBusyness(0) { + NetworkMetrics() + : lastRunLoopBusyness(0), networkBusyness(0), + starvationTrackerNetworkBusyness(PriorityStats(static_cast(starvationBins.at(0)))) { for (int priority : starvationBins) { starvationTrackers.emplace_back(static_cast(priority)); - starvationTrackersNetworkBusyness.emplace_back(static_cast(priority)); } } @@ -365,7 +367,7 @@ struct NetworkMetrics { lastRunLoopBusyness = rhs.lastRunLoopBusyness; networkBusyness = rhs.networkBusyness.load(); starvationTrackers = rhs.starvationTrackers; - starvationTrackersNetworkBusyness = rhs.starvationTrackersNetworkBusyness; + starvationTrackerNetworkBusyness = rhs.starvationTrackerNetworkBusyness; return *this; } }; From ea922aa648335af2a4f1b593c14ac60b7108c0de Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Wed, 17 Mar 2021 18:16:18 +0000 Subject: [PATCH 08/49] address pr comments --- bindings/c/fdb_c.cpp | 2 ++ documentation/sphinx/source/api-c.rst | 2 +- fdbclient/Knobs.h | 2 +- flow/network.h | 9 ++++++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index 26ba181c67..b491c2c0ec 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -367,6 +367,8 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_create_snapshot(FDBDatabase* db, } // Get network thread busyness (updated every 1s) +// A value of 0 indicates that the client is more or less idle +// A value of 1 (or more) indicates that the client is saturated extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d) { return DB(d)->getMainThreadBusyness(); } diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 2de7056baf..0ad77ad43c 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -484,7 +484,7 @@ An |database-blurb1| Modifications to a database are performed via transactions. .. function:: double fdb_database_get_main_thread_busyness(FDBDatabase* database) - Returns a value between `0` and `1` representing the percentage of time the Network Thread is busy executing work from the TaskQueue. This value is updated every second. + Returns a value where `0` indicates that the client is idle, whereas a value of 1 (or larger) indicates that the client is saturated. By default, this value is updated every second. Transaction =========== diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 5b3cb1160c..3d22b5a24b 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -30,7 +30,7 @@ public: int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically double SYSTEM_MONITOR_INTERVAL; - double NETWORK_BUSYNESS_MONITOR_INTERVAL; // The interval in which we should up the network busyness metric + double NETWORK_BUSYNESS_MONITOR_INTERVAL; // The interval in which we should update the network busyness metric double FAILURE_MAX_DELAY; double FAILURE_MIN_DELAY; diff --git a/flow/network.h b/flow/network.h index fc641d39a8..8ca620803e 100644 --- a/flow/network.h +++ b/flow/network.h @@ -322,6 +322,7 @@ template class Promise; struct NetworkMetrics { + // Metrics which represent various network properties enum { SLOW_EVENT_BINS = 16 }; uint64_t countSlowEvents[SLOW_EVENT_BINS] = {}; @@ -341,8 +342,10 @@ struct NetworkMetrics { }; std::unordered_map activeTrackers; - double lastRunLoopBusyness; - std::atomic networkBusyness; + double lastRunLoopBusyness; // network thread busyness (measured every 5s by default) + std::atomic networkBusyness; // network thread busyness which is returned to the the client (measured every 1s by default) + + // starvation trackers which keeps track of different task priorities std::vector starvationTrackers; struct PriorityStats starvationTrackerNetworkBusyness; @@ -351,7 +354,7 @@ struct NetworkMetrics { NetworkMetrics() : lastRunLoopBusyness(0), networkBusyness(0), starvationTrackerNetworkBusyness(PriorityStats(static_cast(starvationBins.at(0)))) { - for (int priority : starvationBins) { + for (int priority : starvationBins) { // initalize starvation trackers with given priorities starvationTrackers.emplace_back(static_cast(priority)); } } From 27060cf4a920245c69bb6377c7a7117f59f1e4fb Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Wed, 17 Mar 2021 15:45:12 -0700 Subject: [PATCH 09/49] Populate min_replicas_remaining field for all regions during cluster status computation. This provides more information about the number of available replicas currently in the database. --- fdbclient/DatabaseConfiguration.h | 43 +++++++---- fdbserver/Status.actor.cpp | 117 ++++++++++++++++-------------- 2 files changed, 91 insertions(+), 69 deletions(-) diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index e087139912..4b1b29c021 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -160,33 +160,44 @@ struct DatabaseConfiguration { } // Retuns the maximum number of discrete failures a cluster can tolerate. - // In HA mode, `fullyReplicatedRegions` is set to false initially when data is being - // replicated to remote, and will be true later. `forAvailablity` is set to true + // In HA mode, `fullyReplicatedRegions` is set to 1 initially when data is being + // replicated to remote, and will be incremented later. `forAvailablity` is set to true // if we want to account the number for machines that can recruit new tLogs/SS after failures. - // Killing an entire datacenter counts as killing one zone in modes that support it + // Killing an entire datacenter counts as killing one zone in modes that support it. int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const { - int worstSatellite = regions.size() ? std::numeric_limits::max() : 0; + int worstSatelliteTLogReplicationFactor = regions.size() ? std::numeric_limits::max() : 0; int regionsWithNonNegativePriority = 0; for (auto& r : regions) { if (r.priority >= 0) { regionsWithNonNegativePriority++; } - worstSatellite = - std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum); + worstSatelliteTLogReplicationFactor = std::min( + worstSatelliteTLogReplicationFactor, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum); if (r.satelliteTLogUsableDcsFallback > 0) { - worstSatellite = std::min( - worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback); + worstSatelliteTLogReplicationFactor = + std::min(worstSatelliteTLogReplicationFactor, + r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback); } } - if (usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && - (!forAvailability || regionsWithNonNegativePriority > 1)) { - return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), - storageTeamSize - 1); - } else if (worstSatellite > 0) { - // Primary and Satellite tLogs are synchronously replicated, hence we can lose all but 1. - return std::min(tLogReplicationFactor + worstSatellite - 1 - tLogWriteAntiQuorum, storageTeamSize - 1); + + if (worstSatelliteTLogReplicationFactor <= 0) { + // HA is not enabled in this database. Return single cluster zone failures to tolerate. + return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1); } - return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1); + + // Compute HA enabled database zone failure tolerance. + auto isGeoReplicatedData = [this, &fullyReplicatedRegions]() { + return usableRegions > 1 && fullyReplicatedRegions > 1; + }; + + if (isGeoReplicatedData() && (!forAvailability || regionsWithNonNegativePriority > 1)) { + return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, + worstSatelliteTLogReplicationFactor - 1), + storageTeamSize - 1); + } + // Primary and Satellite tLogs are synchronously replicated, hence we can lose all but 1. + return std::min(tLogReplicationFactor + worstSatelliteTLogReplicationFactor - 1 - tLogWriteAntiQuorum, + storageTeamSize - 1); } // CommitProxy Servers diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index b53c5cd041..4430593bb8 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1585,7 +1585,7 @@ static JsonBuilderObject configurationFetcher(Optional co ACTOR static Future dataStatusFetcher(WorkerDetails ddWorker, DatabaseConfiguration configuration, - int* minReplicasRemaining) { + int* minStorageReplicasRemaining) { state JsonBuilderObject statusObjData; try { @@ -1648,9 +1648,9 @@ ACTOR static Future dataStatusFetcher(WorkerDetails ddWorker, } JsonBuilderArray teamTrackers; - for (int i = 0; i < 2; i++) { - TraceEventFields inFlight = dataInfo[3 + i]; - if (!inFlight.size()) { + for (int i = 3; i < 5; i++) { + const TraceEventFields& inFlight = dataInfo[i]; + if (inFlight.size() == 0) { continue; } @@ -1674,19 +1674,16 @@ ACTOR static Future dataStatusFetcher(WorkerDetails ddWorker, stateSectionObj["healthy"] = false; stateSectionObj["name"] = "missing_data"; stateSectionObj["description"] = "No replicas remain of some data"; - stateSectionObj["min_replicas_remaining"] = 0; replicas = 0; } else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_1_LEFT) { stateSectionObj["healthy"] = false; stateSectionObj["name"] = "healing"; stateSectionObj["description"] = "Only one replica remains of some data"; - stateSectionObj["min_replicas_remaining"] = 1; replicas = 1; } else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) { stateSectionObj["healthy"] = false; stateSectionObj["name"] = "healing"; stateSectionObj["description"] = "Only two replicas remain of some data"; - stateSectionObj["min_replicas_remaining"] = 2; replicas = 2; } else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) { stateSectionObj["healthy"] = false; @@ -1720,6 +1717,10 @@ ACTOR static Future dataStatusFetcher(WorkerDetails ddWorker, stateSectionObj["name"] = "healthy"; } + // Track the number of min replicas the storage servers in this region has. The sum of the replicas from + // both primary and remote region give the total number of data replicas this database currently has. + stateSectionObj["min_replicas_remaining"] = replicas; + if (!stateSectionObj.empty()) { team_tracker["state"] = stateSectionObj; teamTrackers.push_back(team_tracker); @@ -1728,10 +1729,13 @@ ACTOR static Future dataStatusFetcher(WorkerDetails ddWorker, } } + // Update minStorageReplicasRemaining. It is mainly used for fault tolerance computation later. Note that + // FDB treats the entire remote region as one zone, and all the zones in the remote region are in the same + // failure domain. if (primary) { - *minReplicasRemaining = std::max(*minReplicasRemaining, 0) + replicas; + *minStorageReplicasRemaining = std::max(*minStorageReplicasRemaining, 0) + replicas; } else if (replicas > 0) { - *minReplicasRemaining = std::max(*minReplicasRemaining, 0) + 1; + *minStorageReplicasRemaining = std::max(*minStorageReplicasRemaining, 0) + 1; } } statusObjData["team_trackers"] = teamTrackers; @@ -1850,7 +1854,7 @@ ACTOR static Future>> getGrvProxie return results; } -// Returns the number of zones eligble for recruiting new tLogs after failures, to maintain the current replication +// Returns the number of zones eligble for recruiting new tLogs after zone failures, to maintain the current replication // factor. static int getExtraTLogEligibleZones(const vector& workers, const DatabaseConfiguration& configuration) { std::set allZones; @@ -1868,17 +1872,20 @@ static int getExtraTLogEligibleZones(const vector& workers, const if (configuration.regions.size() == 0) { return allZones.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize); } + int extraTlogEligibleZones = 0; int regionsWithNonNegativePriority = 0; - for (auto& region : configuration.regions) { + int maxRequiredReplicationFactor = + std::max(configuration.remoteTLogReplicationFactor, + std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize)); + for (const auto& region : configuration.regions) { if (region.priority >= 0) { - int eligible = dcId_zone[region.dcId].size() - - std::max(configuration.remoteTLogReplicationFactor, - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize)); + int eligible = dcId_zone[region.dcId].size() - maxRequiredReplicationFactor; + // FIXME: does not take into account fallback satellite policies if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) { int totalSatelliteEligible = 0; - for (auto& sat : region.satellites) { + for (const auto& sat : region.satellites) { totalSatelliteEligible += dcId_zone[sat.dcId].size(); } eligible = std::min(eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor); @@ -1890,6 +1897,8 @@ static int getExtraTLogEligibleZones(const vector& workers, const } } if (regionsWithNonNegativePriority > 1) { + // If the database is replicated across multiple regions, we can afford to lose one entire region without + // losing data. extraTlogEligibleZones++; } return extraTlogEligibleZones; @@ -2229,9 +2238,9 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, int minFaultTolerance = 1000; int localSetsWithNonNegativeFaultTolerance = 0; - for (int i = 0; i < tLogs.size(); i++) { + for (const auto& tLogSet : tLogs) { int failedLogs = 0; - for (auto& log : tLogs[i].tLogs) { + for (auto& log : tLogSet.tLogs) { JsonBuilderObject logObj; bool failed = !log.present() || !address_workers.count(log.interf().address()); logObj["id"] = log.id().shortString(); @@ -2245,13 +2254,13 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, } } - if (tLogs[i].isLocal) { - int currentFaultTolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs; + if (tLogSet.isLocal) { + int currentFaultTolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs; if (currentFaultTolerance >= 0) { localSetsWithNonNegativeFaultTolerance++; } - if (tLogs[i].locality == tagLocalitySatellite) { + if (tLogSet.locality == tagLocalitySatellite) { // FIXME: This hack to bump satellite fault tolerance, is to make it consistent // with 6.2. minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance + 1); @@ -2260,17 +2269,17 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, } } - if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) { - sat_log_replication_factor = tLogs[i].tLogReplicationFactor; - sat_log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum; - sat_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs; - } else if (tLogs[i].isLocal) { - log_replication_factor = tLogs[i].tLogReplicationFactor; - log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum; - log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs; + if (tLogSet.isLocal && tLogSet.locality == tagLocalitySatellite) { + sat_log_replication_factor = tLogSet.tLogReplicationFactor; + sat_log_write_anti_quorum = tLogSet.tLogWriteAntiQuorum; + sat_log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs; + } else if (tLogSet.isLocal) { + log_replication_factor = tLogSet.tLogReplicationFactor; + log_write_anti_quorum = tLogSet.tLogWriteAntiQuorum; + log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs; } else { - remote_log_replication_factor = tLogs[i].tLogReplicationFactor; - remote_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - failedLogs; + remote_log_replication_factor = tLogSet.tLogReplicationFactor; + remote_log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - failedLogs; } } if (minFaultTolerance == 1000) { @@ -2313,6 +2322,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance, std::unordered_map const& address_workers) { JsonBuilderArray tlogsArray; JsonBuilderObject tlogsStatus; + + // First, fetch from the current TLog generation. tlogsStatus = tlogFetcher(logFaultTolerance, db->get().logSystemConfig.tLogs, address_workers); tlogsStatus["epoch"] = db->get().logSystemConfig.epoch; tlogsStatus["current"] = true; @@ -2320,6 +2331,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance, tlogsStatus["begin_version"] = db->get().logSystemConfig.recoveredAt.get(); } tlogsArray.push_back(tlogsStatus); + + // fetch all the old generations of TLogs. for (auto it : db->get().logSystemConfig.oldTLogs) { JsonBuilderObject oldTlogsStatus = tlogFetcher(logFaultTolerance, it.tLogs, address_workers); oldTlogsStatus["epoch"] = it.epoch; @@ -2335,7 +2348,7 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi ServerCoordinators coordinators, std::vector& workers, int extraTlogEligibleZones, - int minReplicasRemaining, + int minStorageReplicasRemaining, int oldLogFaultTolerance, int fullyReplicatedRegions, bool underMaintenance) { @@ -2375,8 +2388,8 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi // max zone failures that we can tolerate to not lose data int zoneFailuresWithoutLosingData = std::min(maxZoneFailures, maxCoordinatorZoneFailures); - if (minReplicasRemaining >= 0) { - zoneFailuresWithoutLosingData = std::min(zoneFailuresWithoutLosingData, minReplicasRemaining - 1); + if (minStorageReplicasRemaining >= 0) { + zoneFailuresWithoutLosingData = std::min(zoneFailuresWithoutLosingData, minStorageReplicasRemaining - 1); } // oldLogFaultTolerance means max failures we can tolerate to lose logs data. -1 means we lose data or availability. @@ -2625,10 +2638,9 @@ ACTOR Future clusterGetStatus( Version datacenterVersionDifference) { state double tStart = timer(); - // Check if master worker is present state JsonBuilderArray messages; state std::set status_incomplete_reasons; - state WorkerDetails mWorker; + state WorkerDetails mWorker; // Master worker state WorkerDetails ddWorker; // DataDistributor worker state WorkerDetails rkWorker; // Ratekeeper worker @@ -2641,6 +2653,7 @@ ACTOR Future clusterGetStatus( messages.push_back( JsonString::makeMessage("unreachable_master_worker", "Unable to locate the master worker.")); } + // Get the DataDistributor worker interface Optional _ddWorker; if (db->get().distributor.present()) { @@ -2669,12 +2682,12 @@ ACTOR Future clusterGetStatus( // Get latest events for various event types from ALL workers // WorkerEvents is a map of worker's NetworkAddress to its event string - // The pair represents worker responses and a set of worker NetworkAddress strings which did not respond + // The pair represents worker responses and a set of worker NetworkAddress strings which did not respond. std::vector>>>> futures; futures.push_back(latestEventOnWorkers(workers, "MachineMetrics")); futures.push_back(latestEventOnWorkers(workers, "ProcessMetrics")); futures.push_back(latestEventOnWorkers(workers, "NetworkMetrics")); - futures.push_back(latestErrorOnWorkers(workers)); + futures.push_back(latestErrorOnWorkers(workers)); // Get all latest errors. futures.push_back(latestEventOnWorkers(workers, "TraceFileOpenError")); futures.push_back(latestEventOnWorkers(workers, "ProgramStart")); @@ -2689,13 +2702,13 @@ ACTOR Future clusterGetStatus( // For each (optional) pair, if the pair is present and not empty then add the unreachable workers to the set. for (auto pair : workerEventsVec) { - if (pair.present() && pair.get().second.size()) + if (pair.present() && !pair.get().second.empty()) mergeUnreachable.insert(pair.get().second.begin(), pair.get().second.end()); } // We now have a unique set of workers who were in some way unreachable. If there is anything in that set, // create a message for it and include the list of unreachable processes. - if (mergeUnreachable.size()) { + if (!mergeUnreachable.empty()) { JsonBuilderObject message = JsonBuilder::makeMessage("unreachable_processes", "The cluster has some unreachable processes."); JsonBuilderArray unreachableProcs; @@ -2806,11 +2819,11 @@ ACTOR Future clusterGetStatus( state Future>>> grvProxyFuture = errorOr(getGrvProxiesAndMetrics(db, address_workers)); - state int minReplicasRemaining = -1; + state int minStorageReplicasRemaining = -1; state int fullyReplicatedRegions = -1; state Future> primaryDCFO = getActivePrimaryDC(cx, &fullyReplicatedRegions, &messages); std::vector> futures2; - futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining)); + futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minStorageReplicasRemaining)); futures2.push_back(workloadStatusFetcher( db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture)); futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons)); @@ -2825,18 +2838,16 @@ ACTOR Future clusterGetStatus( statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers); } - if (configuration.present()) { - int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get()); - statusObj["fault_tolerance"] = - faultToleranceStatusFetcher(configuration.get(), - coordinators, - workers, - extraTlogEligibleZones, - minReplicasRemaining, - logFaultTolerance, - fullyReplicatedRegions, - loadResult.present() && loadResult.get().healthyZone.present()); - } + int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get()); + statusObj["fault_tolerance"] = + faultToleranceStatusFetcher(configuration.get(), + coordinators, + workers, + extraTlogEligibleZones, + minStorageReplicasRemaining, + logFaultTolerance, + fullyReplicatedRegions, + loadResult.present() && loadResult.get().healthyZone.present()); state JsonBuilderObject configObj = configurationFetcher(configuration, coordinators, &status_incomplete_reasons); From 6552d7e0a72da9e43ddc7f4fb59818c75102afe7 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Wed, 17 Mar 2021 23:49:52 +0000 Subject: [PATCH 10/49] fix docs --- documentation/sphinx/source/api-c.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 0ad77ad43c..4909ae8dea 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -484,7 +484,7 @@ An |database-blurb1| Modifications to a database are performed via transactions. .. function:: double fdb_database_get_main_thread_busyness(FDBDatabase* database) - Returns a value where `0` indicates that the client is idle, whereas a value of 1 (or larger) indicates that the client is saturated. By default, this value is updated every second. + Returns a value where 0 indicates that the client is idle and a value of 1 (or larger) indicates that the client is saturated. By default, this value is updated every second. Transaction =========== From cb26576b95da88fe31430da103a48832cde309dd Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 21 Mar 2021 18:25:02 -0700 Subject: [PATCH 11/49] Fix DD assertion failure This fixes #4493, where DDTeamCollection::~DDTeamCollection creates new teams that hold pointer to the DDTeamCollection, thus later causes assertion failure because the memory is invalid. The fix is to cancel teamBuilder at the begining of the ~DDTeamCollection. --- fdbserver/DataDistribution.actor.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 00fa458916..d70d9d490a 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -620,6 +620,7 @@ struct DDTeamCollection : ReferenceCounted { int healthyTeamCount; Reference> zeroHealthyTeams; + bool shuttingDown = false; int optimalTeamCount; AsyncVar zeroOptimalTeams; @@ -717,6 +718,12 @@ struct DDTeamCollection : ReferenceCounted { } ~DDTeamCollection() { + shuttingDown = true; // Prevent recruiting new teams when zeroHealthyTeams signals + teamBuilder.cancel(); + // TraceEvent("DDTeamCollectionDestructed", distributorId) + // .detail("Primary", primary) + // .detail("TeamBuilderDestroyed", server_info.size()); + TraceEvent("DDTeamCollectionDestructed", distributorId).detail("Primary", primary); // Other teamCollections also hold pointer to this teamCollection; // TeamTracker may access the destructed DDTeamCollection if we do not reset the pointer @@ -753,13 +760,9 @@ struct DDTeamCollection : ReferenceCounted { info->tracker.cancel(); info->collection = nullptr; } - // TraceEvent("DDTeamCollectionDestructed", distributorId) - // .detail("Primary", primary) - // .detail("ServerTrackerDestroyed", server_info.size()); - teamBuilder.cancel(); - // TraceEvent("DDTeamCollectionDestructed", distributorId) - // .detail("Primary", primary) - // .detail("TeamBuilderDestroyed", server_info.size()); + TraceEvent("DDTeamCollectionDestructed", distributorId) + .detail("Primary", primary) + .detail("ServerTrackerDestroyed", server_info.size()); } void addLaggingStorageServer(Key zoneId) { @@ -4688,7 +4691,11 @@ ACTOR Future monitorHealthyTeams(DDTeamCollection* self) { self->doBuildTeams = true; wait(DDTeamCollection::checkBuildTeams(self)); } - when(wait(self->zeroHealthyTeams->onChange())) {} + when(wait(self->zeroHealthyTeams->onChange())) { + if (self->shuttingDown) { + return Void(); + } + } } } @@ -4754,7 +4761,7 @@ ACTOR Future dataDistributionTeamCollection(Reference te self->restartRecruiting.trigger(); } when(wait(self->zeroHealthyTeams->onChange())) { - if (self->zeroHealthyTeams->get()) { + if (self->zeroHealthyTeams->get() && !self->shuttingDown) { self->restartRecruiting.trigger(); self->noHealthyTeams(); } From 0c3bc09524ab46f9eedbe17e71845b9d53607117 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 21 Mar 2021 20:03:05 -0700 Subject: [PATCH 12/49] Remove the shuttingDown flag --- fdbserver/DataDistribution.actor.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index d70d9d490a..50e79e5384 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -620,7 +620,6 @@ struct DDTeamCollection : ReferenceCounted { int healthyTeamCount; Reference> zeroHealthyTeams; - bool shuttingDown = false; int optimalTeamCount; AsyncVar zeroOptimalTeams; @@ -718,13 +717,14 @@ struct DDTeamCollection : ReferenceCounted { } ~DDTeamCollection() { - shuttingDown = true; // Prevent recruiting new teams when zeroHealthyTeams signals + TraceEvent("DDTeamCollectionDestructed", distributorId).detail("Primary", primary); + + // Cancel the teamBuilder to avoid creating new teams after teams are cancelled. teamBuilder.cancel(); // TraceEvent("DDTeamCollectionDestructed", distributorId) // .detail("Primary", primary) // .detail("TeamBuilderDestroyed", server_info.size()); - TraceEvent("DDTeamCollectionDestructed", distributorId).detail("Primary", primary); // Other teamCollections also hold pointer to this teamCollection; // TeamTracker may access the destructed DDTeamCollection if we do not reset the pointer for (int i = 0; i < teamCollections.size(); i++) { @@ -760,9 +760,9 @@ struct DDTeamCollection : ReferenceCounted { info->tracker.cancel(); info->collection = nullptr; } - TraceEvent("DDTeamCollectionDestructed", distributorId) - .detail("Primary", primary) - .detail("ServerTrackerDestroyed", server_info.size()); + // TraceEvent("DDTeamCollectionDestructed", distributorId) + // .detail("Primary", primary) + // .detail("ServerTrackerDestroyed", server_info.size()); } void addLaggingStorageServer(Key zoneId) { @@ -4691,11 +4691,7 @@ ACTOR Future monitorHealthyTeams(DDTeamCollection* self) { self->doBuildTeams = true; wait(DDTeamCollection::checkBuildTeams(self)); } - when(wait(self->zeroHealthyTeams->onChange())) { - if (self->shuttingDown) { - return Void(); - } - } + when(wait(self->zeroHealthyTeams->onChange())) {} } } @@ -4761,7 +4757,7 @@ ACTOR Future dataDistributionTeamCollection(Reference te self->restartRecruiting.trigger(); } when(wait(self->zeroHealthyTeams->onChange())) { - if (self->zeroHealthyTeams->get() && !self->shuttingDown) { + if (self->zeroHealthyTeams->get()) { self->restartRecruiting.trigger(); self->noHealthyTeams(); } From 0ba99c8e83bc0ff58ac6c3803275bfbb7f1c7c3b Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Tue, 23 Mar 2021 09:38:07 -0600 Subject: [PATCH 13/49] Pretty-print topology when starting simulation --- cmake/Jemalloc.cmake | 2 +- fdbserver/tester.actor.cpp | 54 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/cmake/Jemalloc.cmake b/cmake/Jemalloc.cmake index 6dff173b93..e89ef3ce82 100644 --- a/cmake/Jemalloc.cmake +++ b/cmake/Jemalloc.cmake @@ -3,7 +3,7 @@ add_library(jemalloc INTERFACE) set(USE_JEMALLOC ON) # We don't want to use jemalloc on Windows # Nor on FreeBSD, where jemalloc is the default system allocator -if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")) +if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE) set(USE_JEMALLOC OFF) return() endif() diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 20a71b3efc..1b7725cb76 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -363,6 +363,59 @@ TestWorkload* getWorkloadIface(WorkloadRequest work, ReferenceisSimulated()) { + return; + } + auto processes = g_simulator.getAllProcesses(); + std::sort(processes.begin(), processes.end(), [](ISimulator::ProcessInfo* lhs, ISimulator::ProcessInfo* rhs) { + auto l = lhs->locality; + auto r = rhs->locality; + if (l.dcId() != r.dcId()) { + return l.dcId() < r.dcId(); + } + if (l.dataHallId() != r.dataHallId()) { + return l.dataHallId() < r.dataHallId(); + } + if (l.zoneId() != r.zoneId()) { + return l.zoneId() < r.zoneId(); + } + if (l.machineId() != r.zoneId()) { + return l.machineId() < r.machineId(); + } + return lhs->address < rhs->address; + }); + printf("Simulated Cluster Topology:\n"); + printf("===========================\n"); + Optional> dcId, dataHallId, zoneId, machineId; + for (auto p : processes) { + std::string indent = ""; + if (dcId != p->locality.dcId()) { + dcId = p->locality.dcId(); + printf("%sdcId: %s\n", indent.c_str(), p->locality.describeDcId().c_str()); + } + indent += " "; + if (dataHallId != p->locality.dataHallId()) { + dataHallId = p->locality.dataHallId(); + printf("%sdataHallId: %s\n", indent.c_str(), p->locality.describeDataHall().c_str()); + } + indent += " "; + if (zoneId != p->locality.zoneId()) { + zoneId = p->locality.zoneId(); + printf("%szoneId: %s\n", indent.c_str(), p->locality.describeZone().c_str()); + } + indent += " "; + if (machineId != p->locality.machineId()) { + machineId = p->locality.machineId(); + printf("%smachineId: %s\n", indent.c_str(), p->locality.describeMachineId().c_str()); + } + indent += " "; + printf("%sAddress: %s\n", indent.c_str(), p->address.toString().c_str(), p->name); + printf("%sClass: %s\n", indent.c_str(), p->startingClass.toString().c_str()); + printf("%sName: %s\n", indent.c_str(), p->name); + } +} + ACTOR Future databaseWarmer(Database cx) { loop { state Transaction tr(cx); @@ -1346,6 +1399,7 @@ ACTOR Future runTests(Reference Date: Tue, 23 Mar 2021 10:51:16 -0600 Subject: [PATCH 14/49] Add documentation --- fdbserver/tester.actor.cpp | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 1b7725cb76..1c7c407a44 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -363,6 +363,10 @@ TestWorkload* getWorkloadIface(WorkloadRequest work, ReferenceisSimulated()) { return; @@ -1344,6 +1348,24 @@ ACTOR Future monitorServerDBInfo(Reference runTests(Reference>> cc, Reference>> ci, vector testers, @@ -1456,6 +1478,24 @@ ACTOR Future runTests(Reference runTests(Reference>> cc, Reference>> ci, vector tests, @@ -1497,6 +1537,32 @@ ACTOR Future runTests(Reference runTests(Reference connFile, test_type_t whatToRun, test_location_t at, From af869ac2658cdc410436af48aa9a2bdc2ac4efa7 Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Tue, 23 Mar 2021 10:59:01 -0600 Subject: [PATCH 15/49] minor formatting fix --- fdbserver/tester.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 1c7c407a44..d28865fde8 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -415,6 +415,7 @@ void printSimulatedTopology() { } indent += " "; printf("%sAddress: %s\n", indent.c_str(), p->address.toString().c_str(), p->name); + indent += " "; printf("%sClass: %s\n", indent.c_str(), p->startingClass.toString().c_str()); printf("%sName: %s\n", indent.c_str(), p->name); } From 1254a21caa49eb34651906e86e9c35d1bbdc8889 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Tue, 23 Mar 2021 14:58:00 -0400 Subject: [PATCH 16/49] Initialize extraDB in simulationConfig according to TestConfig parameter --- fdbserver/SimulatedCluster.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 76f392aea5..d6ea3e2e35 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -857,7 +857,7 @@ private: void generateNormalConfig(const TestConfig& testConfig); }; -SimulationConfig::SimulationConfig(const TestConfig& testConfig) { +SimulationConfig::SimulationConfig(const TestConfig& testConfig) : extraDB(testConfig.extraDB) { generateNormalConfig(testConfig); } From 0daf6cf6326f9faa9bb3fcfa44def26e056ac48d Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 23 Mar 2021 19:40:47 +0000 Subject: [PATCH 17/49] Consider extending a file with truncate as a "pending modification" Before this, truncating and reading concurrently could cause to read uninitialized memory. So could truncating then reading, since the effect of the truncate in the actual file was allowed to be delayed. Now reads will wait for a truncate that extends the file to complete if they intersect the newly-zeroed region. --- fdbrpc/AsyncFileNonDurable.actor.h | 32 +++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/fdbrpc/AsyncFileNonDurable.actor.h b/fdbrpc/AsyncFileNonDurable.actor.h index 49fe0e2c8f..7df6d2c491 100644 --- a/fdbrpc/AsyncFileNonDurable.actor.h +++ b/fdbrpc/AsyncFileNonDurable.actor.h @@ -145,8 +145,17 @@ private: // The maximum amount of time a write is delayed before being passed along to the underlying file double maxWriteDelay; - // Modifications which haven't been pushed to file, mapped by the location in the file that is being modified + // Modifications which haven't been pushed to file, mapped by the location in the file that is being modified. + // Be sure to update minSizeAfterPendingModifications when modifying pendingModifications. RangeMap> pendingModifications; + // The size of the file after the set of pendingModifications completes, + // (the set pending at the time of reading this member). Must be updated in + // lockstep with any inserts into the pendingModifications map. Tracking + // this variable is necessary so that we can know the range of the file a + // truncate is modifying, so we can insert it into the pendingModifications + // map. Until minSizeAfterPendingModificationsIsExact is true, this is only a lower bound. + mutable int64_t minSizeAfterPendingModifications = 0; + mutable bool minSizeAfterPendingModificationsIsExact = false; // Will be blocked whenever kill is running Promise killed; @@ -437,6 +446,7 @@ private: Future writeEnded = wait(ownFuture); std::vector> priorModifications = self->getModificationsAndInsert(offset, length, true, writeEnded); + self->minSizeAfterPendingModifications = offset + length; if (BUGGIFY_WITH_PROB(0.001)) priorModifications.push_back( @@ -603,9 +613,20 @@ private: //TraceEvent("AsyncFileNonDurable_Truncate", self->id).detail("Delay", delayDuration).detail("Filename", self->filename); wait(checkKilled(self, "Truncate")); - Future truncateEnded = wait(ownFuture); + state Future truncateEnded = wait(ownFuture); + + // Need to know the size of the file directly before this truncate + // takes effect to see what range it modifies. + if (!self->minSizeAfterPendingModificationsIsExact) { + wait(success(self->size())); + } + ASSERT(self->minSizeAfterPendingModificationsIsExact); + int64_t beginModifiedRange = std::min(size, self->minSizeAfterPendingModifications); + int64_t endModifiedRange = std::max(size, self->minSizeAfterPendingModifications); + self->minSizeAfterPendingModifications = size; + std::vector> priorModifications = - self->getModificationsAndInsert(size, -1, true, truncateEnded); + self->getModificationsAndInsert(beginModifiedRange, endModifiedRange, true, truncateEnded); if (BUGGIFY_WITH_PROB(0.001)) priorModifications.push_back( @@ -751,8 +772,9 @@ private: wait(checkKilled(self, "SizeEnd")); // Include any modifications which extend past the end of the file - uint64_t maxModification = self->pendingModifications.lastItem().begin(); - self->approximateSize = std::max(sizeFuture.get(), maxModification); + self->approximateSize = self->minSizeAfterPendingModifications = + std::max(sizeFuture.get(), self->minSizeAfterPendingModifications); + self->minSizeAfterPendingModificationsIsExact = true; return self->approximateSize; } From de1c354b129dbb6dcaf13943b25f4a4f9747165c Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 23 Mar 2021 20:56:37 +0000 Subject: [PATCH 18/49] address pr comments --- bindings/c/test/unit/unit_tests.cpp | 2 +- fdbclient/NativeAPI.actor.cpp | 34 +++++++++++++---------------- fdbclient/ThreadSafeTransaction.cpp | 2 +- flow/Net2.actor.cpp | 10 ++++----- flow/network.h | 4 ++-- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index 67692f4af9..59264dd4f2 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -2113,8 +2113,8 @@ TEST_CASE("block_from_callback") { context.event.wait(); } +// monitors network busyness for 2 sec (40 readings) TEST_CASE("monitor_network_busyness") { - // monitors network busyness for 2 sec (40 readings) bool containsGreaterZero = false; for (int i = 0; i < 40; i++) { double busyness = fdb_database_get_main_thread_busyness(db); diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index a7e0d81bff..8b55757621 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1746,27 +1746,23 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu // update the network busyness on a 1s cadence ACTOR Future monitorNetworkBusyness() { state double prevTime = now(); - loop choose { - when(wait(delay(CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace))) { - double elapsed = now() - prevTime; // get elapsed time from last execution - prevTime = now(); - if (!g_network->isSimulated()) { - struct NetworkMetrics::PriorityStats& itr = - g_network->networkInfo.metrics.starvationTrackerNetworkBusyness; + loop { + wait(delay(CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace)); + double elapsed = now() - prevTime; // get elapsed time from last execution + prevTime = now(); + struct NetworkMetrics::PriorityStats& tracker = g_network->networkInfo.metrics.starvationTrackerNetworkBusyness; - if (itr.active) { // update metrics - itr.duration += now() - itr.windowedTimer; - itr.maxDuration = std::max(itr.maxDuration, now() - itr.timer); - itr.windowedTimer = now(); - } - - g_network->networkInfo.metrics.networkBusyness = - std::min(elapsed, itr.duration) / elapsed; // average duration spent doing "work" - - itr.duration = 0; - itr.maxDuration = 0; - } + if (tracker.active) { // update metrics + tracker.duration += now() - tracker.windowedTimer; + tracker.maxDuration = std::max(tracker.maxDuration, now() - tracker.timer); + tracker.windowedTimer = now(); } + + g_network->networkInfo.metrics.networkBusyness = + std::min(elapsed, tracker.duration) / elapsed; // average duration spent doing "work" + + tracker.duration = 0; + tracker.maxDuration = 0; } } diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 1d07e30782..4691ffb557 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -91,8 +91,8 @@ ThreadFuture ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons return onMainThread([db, snapUID, cmd]() -> Future { return db->createSnapshot(snapUID, cmd); }); } +// Return the main network thread busyness double ThreadSafeDatabase::getMainThreadBusyness() { - // Return the main network thread busyness ASSERT(g_network); return g_network->networkInfo.metrics.networkBusyness; } diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 7676232897..eca3d41acb 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -136,7 +136,7 @@ thread_local INetwork* thread_network = 0; class Net2 final : public INetwork, public INetworkConnections { private: - void updateStarvationTrackers(struct NetworkMetrics::PriorityStats& binStats, + void updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats, TaskPriority priority, TaskPriority lastPriority, double now); @@ -1588,7 +1588,7 @@ void Net2::run() { } // Updates the PriorityStats found in NetworkMetrics -void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats& binStats, +void Net2::updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats, TaskPriority priority, TaskPriority lastPriority, double now) { @@ -1608,8 +1608,8 @@ void Net2::updateStarvationTrackers(struct NetworkMetrics::PriorityStats& binSta } } +// Update both vectors of starvation trackers (one that updates every 5s and the other every 1s) void Net2::trackAtPriority(TaskPriority priority, double now) { - // Update both vectors of starvation trackers (one that updates every 5s and the other every 1s) if (lastPriorityStats == nullptr || priority != lastPriorityStats->priority) { // Start tracking current priority auto activeStatsItr = networkInfo.metrics.activeTrackers.try_emplace(priority, priority); @@ -1628,11 +1628,11 @@ void Net2::trackAtPriority(TaskPriority priority, double now) { if (binStats.priority > lastPriority && binStats.priority > priority) { break; } - updateStarvationTrackers(binStats, priority, lastPriority, now); + updateStarvationTracker(binStats, priority, lastPriority, now); } // Update starvation trackers for network busyness - updateStarvationTrackers(networkInfo.metrics.starvationTrackerNetworkBusyness, priority, lastPriority, now); + updateStarvationTracker(networkInfo.metrics.starvationTrackerNetworkBusyness, priority, lastPriority, now); lastPriorityStats = &activeStatsItr.first->second; } diff --git a/flow/network.h b/flow/network.h index 8ca620803e..d4970fa33c 100644 --- a/flow/network.h +++ b/flow/network.h @@ -321,8 +321,8 @@ class Future; template class Promise; +// Metrics which represent various network properties struct NetworkMetrics { - // Metrics which represent various network properties enum { SLOW_EVENT_BINS = 16 }; uint64_t countSlowEvents[SLOW_EVENT_BINS] = {}; @@ -359,8 +359,8 @@ struct NetworkMetrics { } } + // Since networkBusyness is atomic we need to redfine copy assignment operator NetworkMetrics& operator=(const NetworkMetrics& rhs) { - // Since networkBusyness is atomic we need to redfine copy assignment operator for (int i = 0; i < SLOW_EVENT_BINS; i++) { countSlowEvents[i] = rhs.countSlowEvents[i]; } From e83de2b799febe2cb3f6a8207829453f3fc25c14 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 23 Mar 2021 21:00:21 +0000 Subject: [PATCH 19/49] Fix bug: minSizeAfterPendingModifications needs to be maxed --- fdbrpc/AsyncFileNonDurable.actor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/AsyncFileNonDurable.actor.h b/fdbrpc/AsyncFileNonDurable.actor.h index 7df6d2c491..1fc7bd572a 100644 --- a/fdbrpc/AsyncFileNonDurable.actor.h +++ b/fdbrpc/AsyncFileNonDurable.actor.h @@ -446,7 +446,7 @@ private: Future writeEnded = wait(ownFuture); std::vector> priorModifications = self->getModificationsAndInsert(offset, length, true, writeEnded); - self->minSizeAfterPendingModifications = offset + length; + self->minSizeAfterPendingModifications = std::max(self->minSizeAfterPendingModifications, offset + length); if (BUGGIFY_WITH_PROB(0.001)) priorModifications.push_back( From 170c197c4cd10c2c4a556507fc124e5e5cf00d24 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 23 Mar 2021 21:00:35 +0000 Subject: [PATCH 20/49] Truncate marks everything after size modified --- fdbrpc/AsyncFileNonDurable.actor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fdbrpc/AsyncFileNonDurable.actor.h b/fdbrpc/AsyncFileNonDurable.actor.h index 1fc7bd572a..b682a7741b 100644 --- a/fdbrpc/AsyncFileNonDurable.actor.h +++ b/fdbrpc/AsyncFileNonDurable.actor.h @@ -622,11 +622,10 @@ private: } ASSERT(self->minSizeAfterPendingModificationsIsExact); int64_t beginModifiedRange = std::min(size, self->minSizeAfterPendingModifications); - int64_t endModifiedRange = std::max(size, self->minSizeAfterPendingModifications); self->minSizeAfterPendingModifications = size; std::vector> priorModifications = - self->getModificationsAndInsert(beginModifiedRange, endModifiedRange, true, truncateEnded); + self->getModificationsAndInsert(beginModifiedRange, /*through end of file*/ -1, true, truncateEnded); if (BUGGIFY_WITH_PROB(0.001)) priorModifications.push_back( From 9fd3c559c2d4d41ff6fcd4bf5bbf4ce8139bad74 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Tue, 23 Mar 2021 17:24:00 -0400 Subject: [PATCH 21/49] Added some missing comments for structs --- fdbserver/SimulatedCluster.actor.cpp | 1 + fdbserver/TesterInterface.actor.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index d6ea3e2e35..9099914cd5 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -839,6 +839,7 @@ ACTOR Future restartSimulatedSystem(vector>* systemActors, return Void(); } +// Configuration details compiled in a structure used when setting up a simulated cluster struct SimulationConfig { explicit SimulationConfig(const TestConfig& testConfig); int extraDB; diff --git a/fdbserver/TesterInterface.actor.h b/fdbserver/TesterInterface.actor.h index 6d3ddffa67..f5e84a2a58 100644 --- a/fdbserver/TesterInterface.actor.h +++ b/fdbserver/TesterInterface.actor.h @@ -99,6 +99,8 @@ struct WorkloadRequest { } }; +// Configuration details specified in workload test files that change the simulation +// environment details struct TestConfig { int extraDB = 0; int minimumReplication = 0; From e174ea0dfde5869e34dc66ed500bb0c10364e4a2 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 23 Mar 2021 22:04:32 +0000 Subject: [PATCH 22/49] fix comment typo --- flow/network.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/network.h b/flow/network.h index d4970fa33c..d0f117dede 100644 --- a/flow/network.h +++ b/flow/network.h @@ -359,7 +359,7 @@ struct NetworkMetrics { } } - // Since networkBusyness is atomic we need to redfine copy assignment operator + // Since networkBusyness is atomic we need to redefine copy assignment operator NetworkMetrics& operator=(const NetworkMetrics& rhs) { for (int i = 0; i < SLOW_EVENT_BINS; i++) { countSlowEvents[i] = rhs.countSlowEvents[i]; From ea16d2cccd7877a74c54dc7cd82e44e19978d319 Mon Sep 17 00:00:00 2001 From: Nim Wijetunga Date: Tue, 23 Mar 2021 23:09:08 +0000 Subject: [PATCH 23/49] address pr comments --- documentation/sphinx/source/api-c.rst | 2 +- flow/Net2.actor.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 4909ae8dea..0d02dc18dd 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -484,7 +484,7 @@ An |database-blurb1| Modifications to a database are performed via transactions. .. function:: double fdb_database_get_main_thread_busyness(FDBDatabase* database) - Returns a value where 0 indicates that the client is idle and a value of 1 (or larger) indicates that the client is saturated. By default, this value is updated every second. + Returns a value where 0 indicates that the client is idle and 1 (or larger) indicates that the client is saturated. By default, this value is updated every second. Transaction =========== diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index eca3d41acb..d781b1fc1b 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -137,9 +137,9 @@ class Net2 final : public INetwork, public INetworkConnections { private: void updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats, - TaskPriority priority, - TaskPriority lastPriority, - double now); + TaskPriority priority, + TaskPriority lastPriority, + double now); public: Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics); @@ -1589,9 +1589,9 @@ void Net2::run() { // Updates the PriorityStats found in NetworkMetrics void Net2::updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats, - TaskPriority priority, - TaskPriority lastPriority, - double now) { + TaskPriority priority, + TaskPriority lastPriority, + double now) { // Busy -> idle at binStats.priority if (binStats.priority > priority && binStats.priority <= lastPriority) { From 17a19021fb260794bfc495874a58539fb845261d Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Wed, 24 Mar 2021 06:38:50 -0700 Subject: [PATCH 24/49] Added mutation batch filtering during restore, to the extent possible where each restore mutation log task only sees one block of data (usually 1MB) which can contain incomplete and therefore unfilterable mutation sets. --- .../release-notes/release-notes-630.rst | 1 + fdbclient/FileBackupAgent.actor.cpp | 167 +++++++++++++++++- 2 files changed, 163 insertions(+), 5 deletions(-) diff --git a/documentation/sphinx/source/release-notes/release-notes-630.rst b/documentation/sphinx/source/release-notes/release-notes-630.rst index a6ad388b44..076f85d74d 100644 --- a/documentation/sphinx/source/release-notes/release-notes-630.rst +++ b/documentation/sphinx/source/release-notes/release-notes-630.rst @@ -6,6 +6,7 @@ Release Notes ====== * Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) `_ * Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) `_ +* As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database. `(PR #4554) `_ 6.3.11 ====== diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 87433d6679..1eaf151cfb 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3192,6 +3192,155 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase { StringRef RestoreRangeTaskFunc::name = LiteralStringRef("restore_range_data"); REGISTER_TASKFUNC(RestoreRangeTaskFunc); +// Decodes a mutation log key, which contains (hash, commitVersion, chunkNumber) and +// returns (commitVersion, chunkNumber) +std::pair decodeLogKey(const StringRef& key) { + ASSERT(key.size() == sizeof(uint8_t) + sizeof(Version) + sizeof(int32_t)); + + uint8_t hash; + Version version; + int32_t part; + BinaryReader rd(key, Unversioned()); + rd >> hash >> version >> part; + version = bigEndian64(version); + part = bigEndian32(part); + + int32_t v = version / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE; + ASSERT(((uint8_t)hashlittle(&v, sizeof(v), 0)) == hash); + + return std::make_pair(version, part); +} + +// Decodes an encoded list of mutations in the format of: +// [includeVersion:uint64_t][val_length:uint32_t][mutation_1][mutation_2]...[mutation_k], +// where a mutation is encoded as: +// [type:uint32_t][keyLength:uint32_t][valueLength:uint32_t][param1][param2] +std::vector decodeLogValue(const StringRef& value) { + StringRefReader reader(value, restore_corrupted_data()); + + Version protocolVersion = reader.consume(); + if (protocolVersion <= 0x0FDB00A200090001) { + throw incompatible_protocol_version(); + } + + uint32_t val_length = reader.consume(); + if (val_length != value.size() - sizeof(uint64_t) - sizeof(uint32_t)) { + TraceEvent(SevError, "FileRestoreLogValueError") + .detail("ValueLen", val_length) + .detail("ValueSize", value.size()) + .detail("Value", printable(value)); + } + + std::vector mutations; + while (1) { + if (reader.eof()) + break; + + // Deserialization of a MutationRef, which was packed by MutationListRef::push_back_deep() + uint32_t type, p1len, p2len; + type = reader.consume(); + p1len = reader.consume(); + p2len = reader.consume(); + + const uint8_t* key = reader.consume(p1len); + const uint8_t* val = reader.consume(p2len); + + mutations.emplace_back((MutationRef::Type)type, StringRef(key, p1len), StringRef(val, p2len)); + } + return mutations; +} + +// Accumulates mutation log value chunks, as both a vector of chunks and as a combined chunk, +// in chunk order, and can check the chunk set for completion or intersection with a set +// of ranges. +struct AccumulatedMutations { + AccumulatedMutations() : lastChunkNumber(-1) {} + + // Add a KV pair for this mutation chunk set + // It will be accumulated onto serializedMutations if the chunk number is + // the next expected value. + void addChunk(int chunkNumber, const KeyValueRef &kv) { + if(chunkNumber == lastChunkNumber + 1) { + lastChunkNumber = chunkNumber; + serializedMutations += kv.value.toString(); + } + else { + lastChunkNumber = -2; + serializedMutations.clear(); + } + kvs.push_back(kv); + } + + // Returns true if both + // - 1 or more chunks were added to this set + // - The header of the first chunk contains a valid protocol version and a length + // that matches the bytes after the header in the combined value in serializedMutations + bool isComplete() const { + if(lastChunkNumber >= 0) { + StringRefReader reader(serializedMutations, restore_corrupted_data()); + + Version protocolVersion = reader.consume(); + if (protocolVersion <= 0x0FDB00A200090001) { + throw incompatible_protocol_version(); + } + + uint32_t vLen = reader.consume(); + return vLen == reader.remainder().size(); + } + + return false; + } + + // Returns true if a complete chunk contains any MutationRefs which intersect with any + // range in ranges. + // It is undefined behavior to run this if isComplete() does not return true. + bool matchesAnyRange(const std::vector &ranges) { + std::vector mutations = decodeLogValue(serializedMutations); + for(auto &m : mutations) { + for(auto &r : ranges) { + if(m.type == MutationRef::ClearRange) { + if(r.intersects(KeyRangeRef(m.param1, m.param2))) { + return true; + } + } else { + if(r.contains(m.param1)) { + return true; + } + } + } + } + + return false; + } + + std::vector kvs; + std::string serializedMutations; + int lastChunkNumber; +}; + +// Returns a vector of filtered KV refs from data which are either part of incomplete mutation groups OR complete +// and have data relevant to one of the KV ranges in ranges +std::vector filterLogMutationKVPairs(VectorRef data, const std::vector &ranges) { + std::unordered_map mutationBlocksByVersion; + + for(auto &kv : data) { + auto versionAndChunkNumber = decodeLogKey(kv.key); + mutationBlocksByVersion[versionAndChunkNumber.first].addChunk(versionAndChunkNumber.second, kv); + } + + std::vector output; + + for(auto &vb : mutationBlocksByVersion) { + AccumulatedMutations &m = vb.second; + + // If the mutations are incomplete or match one of the ranges, include in results. + if(!m.isComplete() || m.matchesAnyRange(ranges)) { + output.insert(output.end(), m.kvs.begin(), m.kvs.end()); + } + } + + return output; +} struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { static StringRef name; static constexpr uint32_t version = 1; @@ -3223,6 +3372,7 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { state Reference tr(new ReadYourWritesTransaction(cx)); state Reference bc; + state std::vector ranges; loop { try { @@ -3232,6 +3382,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { Reference _bc = wait(restore.sourceContainer().getOrThrow(tr)); bc = _bc; + wait(store(ranges, restore.getRestoreRangesOrDefault(tr))); + wait(checkTaskVersion(tr->getDatabase(), task, name, version)); wait(taskBucket->keepRunning(tr, task)); @@ -3243,10 +3395,14 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { state Key mutationLogPrefix = restore.mutationLogPrefix(); state Reference inFile = wait(bc->readFile(logFile.fileName)); - state Standalone> data = wait(decodeLogFileBlock(inFile, readOffset, readLen)); + state Standalone> dataOriginal = wait(decodeLogFileBlock(inFile, readOffset, readLen)); + + // Filter the KV pairs extracted from the log file block to remove any records known to not be needed for this + // restore based on the restore range set. + state std::vector dataFiltered = filterLogMutationKVPairs(dataOriginal, ranges); state int start = 0; - state int end = data.size(); + state int end = dataFiltered.size(); state int dataSizeLimit = BUGGIFY ? deterministicRandom()->randomInt(256 * 1024, 10e6) : CLIENT_KNOBS->RESTORE_WRITE_TX_SIZE; @@ -3262,8 +3418,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { state int i = start; state int txBytes = 0; for (; i < end && txBytes < dataSizeLimit; ++i) { - Key k = data[i].key.withPrefix(mutationLogPrefix); - ValueRef v = data[i].value; + Key k = dataFiltered[i].key.withPrefix(mutationLogPrefix); + ValueRef v = dataFiltered[i].value; tr->set(k, v); txBytes += k.expectedSize(); txBytes += v.expectedSize(); @@ -3291,7 +3447,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase { .detail("CommitVersion", tr->getCommittedVersion()) .detail("StartIndex", start) .detail("EndIndex", i) - .detail("DataSize", data.size()) + .detail("RecordCountOriginal", dataOriginal.size()) + .detail("RecordCountFiltered", dataFiltered.size()) .detail("Bytes", txBytes) .detail("TaskInstance", THIS_ADDR); From a6136ffc628e156a4f71d519715529c0f2dd4be0 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Wed, 24 Mar 2021 06:51:08 -0700 Subject: [PATCH 25/49] Applied clang-format to recent changes. --- fdbclient/FileBackupAgent.actor.cpp | 39 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 1eaf151cfb..8b4cc3355f 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3192,7 +3192,7 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase { StringRef RestoreRangeTaskFunc::name = LiteralStringRef("restore_range_data"); REGISTER_TASKFUNC(RestoreRangeTaskFunc); -// Decodes a mutation log key, which contains (hash, commitVersion, chunkNumber) and +// Decodes a mutation log key, which contains (hash, commitVersion, chunkNumber) and // returns (commitVersion, chunkNumber) std::pair decodeLogKey(const StringRef& key) { ASSERT(key.size() == sizeof(uint8_t) + sizeof(Version) + sizeof(int32_t)); @@ -3255,16 +3255,15 @@ std::vector decodeLogValue(const StringRef& value) { // of ranges. struct AccumulatedMutations { AccumulatedMutations() : lastChunkNumber(-1) {} - + // Add a KV pair for this mutation chunk set // It will be accumulated onto serializedMutations if the chunk number is // the next expected value. - void addChunk(int chunkNumber, const KeyValueRef &kv) { - if(chunkNumber == lastChunkNumber + 1) { + void addChunk(int chunkNumber, const KeyValueRef& kv) { + if (chunkNumber == lastChunkNumber + 1) { lastChunkNumber = chunkNumber; serializedMutations += kv.value.toString(); - } - else { + } else { lastChunkNumber = -2; serializedMutations.clear(); } @@ -3276,15 +3275,15 @@ struct AccumulatedMutations { // - The header of the first chunk contains a valid protocol version and a length // that matches the bytes after the header in the combined value in serializedMutations bool isComplete() const { - if(lastChunkNumber >= 0) { + if (lastChunkNumber >= 0) { StringRefReader reader(serializedMutations, restore_corrupted_data()); - Version protocolVersion = reader.consume(); + Version protocolVersion = reader.consume(); if (protocolVersion <= 0x0FDB00A200090001) { throw incompatible_protocol_version(); } - uint32_t vLen = reader.consume(); + uint32_t vLen = reader.consume(); return vLen == reader.remainder().size(); } @@ -3294,16 +3293,16 @@ struct AccumulatedMutations { // Returns true if a complete chunk contains any MutationRefs which intersect with any // range in ranges. // It is undefined behavior to run this if isComplete() does not return true. - bool matchesAnyRange(const std::vector &ranges) { + bool matchesAnyRange(const std::vector& ranges) { std::vector mutations = decodeLogValue(serializedMutations); - for(auto &m : mutations) { - for(auto &r : ranges) { - if(m.type == MutationRef::ClearRange) { - if(r.intersects(KeyRangeRef(m.param1, m.param2))) { + for (auto& m : mutations) { + for (auto& r : ranges) { + if (m.type == MutationRef::ClearRange) { + if (r.intersects(KeyRangeRef(m.param1, m.param2))) { return true; } } else { - if(r.contains(m.param1)) { + if (r.contains(m.param1)) { return true; } } @@ -3320,21 +3319,21 @@ struct AccumulatedMutations { // Returns a vector of filtered KV refs from data which are either part of incomplete mutation groups OR complete // and have data relevant to one of the KV ranges in ranges -std::vector filterLogMutationKVPairs(VectorRef data, const std::vector &ranges) { +std::vector filterLogMutationKVPairs(VectorRef data, const std::vector& ranges) { std::unordered_map mutationBlocksByVersion; - for(auto &kv : data) { + for (auto& kv : data) { auto versionAndChunkNumber = decodeLogKey(kv.key); mutationBlocksByVersion[versionAndChunkNumber.first].addChunk(versionAndChunkNumber.second, kv); } std::vector output; - for(auto &vb : mutationBlocksByVersion) { - AccumulatedMutations &m = vb.second; + for (auto& vb : mutationBlocksByVersion) { + AccumulatedMutations& m = vb.second; // If the mutations are incomplete or match one of the ranges, include in results. - if(!m.isComplete() || m.matchesAnyRange(ranges)) { + if (!m.isComplete() || m.matchesAnyRange(ranges)) { output.insert(output.end(), m.kvs.begin(), m.kvs.end()); } } From c186d363c6b2360b043b34e452750040468c29d0 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 24 Mar 2021 17:32:07 +0000 Subject: [PATCH 26/49] Add unit test --- fdbrpc/IAsyncFile.actor.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fdbrpc/IAsyncFile.actor.cpp b/fdbrpc/IAsyncFile.actor.cpp index 902e86a98b..3d8c9d6fe1 100644 --- a/fdbrpc/IAsyncFile.actor.cpp +++ b/fdbrpc/IAsyncFile.actor.cpp @@ -182,3 +182,22 @@ TEST_CASE("/fileio/rename") { wait(IAsyncFileSystem::filesystem()->deleteFile(renamedFile, true)); return Void(); } + +// Truncating to extend size should zero the new data +TEST_CASE("/fileio/truncateAndRead") { + state std::string filename = "/tmp/__JUNK__"; + state Reference f = wait(IAsyncFileSystem::filesystem()->open( + filename, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE, 0)); + state std::array data; + wait(f->sync()); + wait(f->truncate(4096)); + int length = wait(f->read(data.begin(), 4096, 0)); + ASSERT(length == 4096); + for (auto c : data) { + ASSERT(c == '\0'); + } + // close the file by deleting the reference + f.clear(); + wait(IAsyncFileSystem::filesystem()->incrementalDeleteFile(filename, true)); + return Void(); +} From eb80321ea317fdef3adbf2da393a4227d0961737 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 24 Mar 2021 18:48:10 +0000 Subject: [PATCH 27/49] Attempt to fix windows build --- fdbrpc/IAsyncFile.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/IAsyncFile.actor.cpp b/fdbrpc/IAsyncFile.actor.cpp index 3d8c9d6fe1..bfe955b635 100644 --- a/fdbrpc/IAsyncFile.actor.cpp +++ b/fdbrpc/IAsyncFile.actor.cpp @@ -191,7 +191,7 @@ TEST_CASE("/fileio/truncateAndRead") { state std::array data; wait(f->sync()); wait(f->truncate(4096)); - int length = wait(f->read(data.begin(), 4096, 0)); + int length = wait(f->read(&data[0], 4096, 0)); ASSERT(length == 4096); for (auto c : data) { ASSERT(c == '\0'); From 720c0b3a4fc6328cabe039d502c100287b221e9d Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Wed, 24 Mar 2021 14:36:55 -0700 Subject: [PATCH 28/49] Added const. Co-authored-by: Trevor Clinkenbeard --- fdbclient/FileBackupAgent.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 8b4cc3355f..72a3ab984f 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3293,7 +3293,7 @@ struct AccumulatedMutations { // Returns true if a complete chunk contains any MutationRefs which intersect with any // range in ranges. // It is undefined behavior to run this if isComplete() does not return true. - bool matchesAnyRange(const std::vector& ranges) { + bool matchesAnyRange(const std::vector& ranges) const { std::vector mutations = decodeLogValue(serializedMutations); for (auto& m : mutations) { for (auto& r : ranges) { From 73d0c2ac154e7b2b1cc3466e55fdbec8f065cf8e Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Wed, 24 Mar 2021 19:59:03 -0500 Subject: [PATCH 29/49] Docker updates (#4551) * add alias to start joshua with default correctness package add lxml to pip packages being installed add byobu, fish, ripgrep, the_silver_searcher and zsh to devel images (fish and ripgrep not avaialbe in c6) sort yum packages being installed add aws cli and kubectl for starting correctness/joshua fix cmk output location, remove cluster-file in j add docker to centos7 build image use printf to create/update files in devel image * add ycsb incomplete Dockerfile, remove buildspec.yml --- build/docker/buildspec.yml | 158 -------------------------- build/docker/centos6/build/Dockerfile | 8 +- build/docker/centos6/devel/Dockerfile | 60 ++++++---- build/docker/centos7/build/Dockerfile | 7 +- build/docker/centos7/devel/Dockerfile | 87 ++++++++++---- build/docker/centos7/ycsb/Dockerfile | 20 ++++ 6 files changed, 134 insertions(+), 206 deletions(-) delete mode 100644 build/docker/buildspec.yml create mode 100644 build/docker/centos7/ycsb/Dockerfile diff --git a/build/docker/buildspec.yml b/build/docker/buildspec.yml deleted file mode 100644 index edb522d5ee..0000000000 --- a/build/docker/buildspec.yml +++ /dev/null @@ -1,158 +0,0 @@ -version: 0.2 - -env: - secrets-manager: - DOCKERHUB_AUTH: dockerhub_foundationdb:foundationdb -phases: - install: - commands: - - echo "install phase" - - 'ACCOUNT_ID=$(echo $CODEBUILD_BUILD_ARN | cut -d : -f 5)' - - REGISTRY=${ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com - - aws ecr get-login-password | docker login --username AWS --password-stdin ${REGISTRY} - - docker pull ${REGISTRY}/centos:6 - - docker tag ${REGISTRY}/centos:6 centos:6 - - docker pull ${REGISTRY}/centos:7 - - docker tag ${REGISTRY}/centos:7 centos:7 - pre_build: - commands: - - echo "pre_build phase" - - COMMIT_HASH=$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | cut -c 1-7) - - DATE_STR=$(date +"%Y%m%d%H%M%S") - build: - commands: - - echo "build phase" - - ################################################################################ - - # CENTOS 7 foundationdb/build - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/build - - docker pull ${REGISTRY}/foundationdb/build:centos7-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/build:centos7-latest - --tag ${REGISTRY}/foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/build:centos7-latest - --tag ${REGISTRY}/foundationdb/build:latest - --tag foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/build:centos7-latest - --tag foundationdb/build:latest - . - - ################################################################################ - - # CENTOS 7 foundationdb/devel - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/devel - - docker pull ${REGISTRY}/foundationdb/devel:centos7-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/devel:centos7-latest - --build-arg REPOSITORY=${REGISTRY}/foundationdb/build - --tag ${REGISTRY}/foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/devel:centos7-latest - --tag ${REGISTRY}/foundationdb/devel:latest - --tag foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/devel:centos7-latest - --tag foundationdb/devel:latest - . - - ################################################################################ - - # CENTOS 7 foundationdb/distcc - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/distcc - - docker pull ${REGISTRY}/foundationdb/distcc:centos7-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/distcc:centos7-latest - --build-arg REPOSITORY=${REGISTRY}/foundationdb/build - --tag ${REGISTRY}/foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/distcc:centos7-latest - --tag ${REGISTRY}/foundationdb/distcc:latest - --tag foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/distcc:centos7-latest - --tag foundationdb/distcc:latest - . - - ################################################################################ - - # CENTOS 6 foundationdb/build - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/build - - docker pull ${REGISTRY}/foundationdb/build:centos6-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/build:centos6-latest - --tag ${REGISTRY}/foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/build:centos6-latest - --tag foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/build:centos6-latest - . - - ################################################################################ - - # CENTOS 6 foundationdb/devel - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/devel - - docker pull ${REGISTRY}/foundationdb/devel:centos6-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/devel:centos6-latest - --build-arg REPOSITORY=${REGISTRY}/foundationdb/build - --tag ${REGISTRY}/foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/devel:centos6-latest - --tag foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/devel:centos6-latest - . - - ################################################################################ - - # CENTOS 6 foundationdb/distcc - - ################################################################################ - - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/distcc - - docker pull ${REGISTRY}/foundationdb/distcc:centos6-latest || true - - docker build --cache-from ${REGISTRY}/foundationdb/distcc:centos6-latest - --build-arg REPOSITORY=${REGISTRY}/foundationdb/build - --tag ${REGISTRY}/foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH} - --tag ${REGISTRY}/foundationdb/distcc:centos6-latest - --tag foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH} - --tag foundationdb/distcc:centos6-latest - . - post_build: - commands: - - echo "post_build phase" - - echo ${DOCKERHUB_AUTH} | docker login --username foundationdb --password-stdin - - ################################################################################ - - # CENTOS 7 PUSH TO ECR - - ################################################################################ - - # PUSH TO build ECR - - docker push ${REGISTRY}/foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/build:centos7-latest - - docker push ${REGISTRY}/foundationdb/build:latest - - # PUSH TO devel ECR - - docker push ${REGISTRY}/foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/devel:centos7-latest - - docker push ${REGISTRY}/foundationdb/devel:latest - - # PUSH TO distcc ECR - - docker push ${REGISTRY}/foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/distcc:centos7-latest - - docker push ${REGISTRY}/foundationdb/distcc:latest - - ################################################################################ - - # CENTOS 7 PUSH TO DOCKERHUB - - ################################################################################ - - # PUSH TO build DOCKERHUB - - docker push foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/build:centos7-latest - - docker push foundationdb/build:latest - - # PUSH TO devel DOCKERHUB - - docker push foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/devel:centos7-latest - - docker push foundationdb/devel:latest - - # PUSH TO distcc DOCKERHUB - - docker push foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/distcc:centos7-latest - - docker push foundationdb/distcc:latest - - ################################################################################ - - # CENTOS 6 PUSH TO ECR - - ################################################################################ - - # PUSH TO build ECR - - docker push ${REGISTRY}/foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/build:centos6-latest - - # PUSH TO devel ECR - - docker push ${REGISTRY}/foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/devel:centos6-latest - - # PUSH TO distcc ECR - - docker push ${REGISTRY}/foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push ${REGISTRY}/foundationdb/distcc:centos6-latest - - ################################################################################ - - # CENTOS 6 PUSH TO DOCKERHUB - - ################################################################################ - - # PUSH TO build DOCKERHUB - - docker push foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/build:centos6-latest - - # PUSH TO devel DOCKERHUB - - docker push foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/devel:centos6-latest - - # PUSH TO distcc DOCKERHUB - - docker push foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH} - - docker push foundationdb/distcc:centos6-latest \ No newline at end of file diff --git a/build/docker/centos6/build/Dockerfile b/build/docker/centos6/build/Dockerfile index 9c094cb88a..aec0a4dd11 100644 --- a/build/docker/centos6/build/Dockerfile +++ b/build/docker/centos6/build/Dockerfile @@ -37,13 +37,13 @@ RUN sed -i -e '/enabled/d' /etc/yum.repos.d/CentOS-Base.repo && \ lz4-devel \ lz4-static \ mono-devel \ - rh-python36 \ - rh-python36-python-devel \ - rh-ruby24 \ rpm-build \ tcl-devel \ unzip \ - wget && \ + wget \ + rh-python36 \ + rh-python36-python-devel \ + rh-ruby24 && \ yum clean all && \ rm -rf /var/cache/yum diff --git a/build/docker/centos6/devel/Dockerfile b/build/docker/centos6/devel/Dockerfile index 12e891e701..6dd0b822a6 100644 --- a/build/docker/centos6/devel/Dockerfile +++ b/build/docker/centos6/devel/Dockerfile @@ -5,13 +5,16 @@ FROM ${REPOSITORY}:${VERSION} # add vscode server RUN yum repolist && \ yum -y install \ + bash-completion \ + byobu \ + cgdb \ + emacs-nox \ + jq \ + the_silver_searcher \ tmux \ tree \ - emacs-nox \ vim \ - bash-completion \ - jq \ - cgdb && \ + zsh && \ yum clean all && \ rm -rf /var/cache/yum @@ -19,14 +22,25 @@ WORKDIR /tmp RUN source /opt/rh/devtoolset-8/enable && \ source /opt/rh/rh-python36/enable && \ pip3 install \ + lxml \ + psutil \ python-dateutil \ - subprocess32 \ - psutil && \ + subprocess32 && \ mkdir fdb-joshua && \ cd fdb-joshua && \ git clone --branch code_pipeline https://github.com/FoundationDB/fdb-joshua . && \ pip3 install /tmp/fdb-joshua && \ cd /tmp && \ + curl -Ls https://amazon-eks.s3.us-west-2.amazonaws.com/1.18.9/2020-11-02/bin/linux/amd64/kubectl -o kubectl && \ + echo "3dbe69e6deb35fbd6fec95b13d20ac1527544867ae56e3dae17e8c4d638b25b9 kubectl" > kubectl.txt && \ + sha256sum -c kubectl.txt && \ + mv kubectl /usr/local/bin/kubectl && \ + chmod 755 /usr/local/bin/kubectl && \ + curl https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.0.30.zip -o "awscliv2.zip" && \ + echo "7ee475f22c1b35cc9e53affbf96a9ffce91706e154a9441d0d39cbf8366b718e awscliv2.zip" > awscliv2.txt && \ + sha256sum -c awscliv2.txt && \ + unzip -qq awscliv2.zip && \ + ./aws/install && \ rm -rf /tmp/* ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/ @@ -45,17 +59,23 @@ RUN mkdir -p ${OLD_FDB_BINARY_DIR} \ ln -s ${OLD_TLS_LIBRARY_DIR}/FDBGnuTLS.so /usr/lib/foundationdb/plugins/FDBGnuTLS.so WORKDIR /root -RUN echo -en "\n"\ - "source /opt/rh/devtoolset-8/enable\n"\ - "source /opt/rh/rh-python36/enable\n"\ - "source /opt/rh/rh-ruby24/enable\n"\ - "\n"\ - "function cmk() {\n"\ - " cmake -S ${HOME}/src/foundationdb -B build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84\n"\ - "}\n"\ - "function ct() {\n"\ - " cd ${HOME}/build_output && ctest -j 32 --output-on-failure\n"\ - "}\n"\ - "function j() {\n"\ - " python3 -m joshua.joshua --cluster-file /etc/foundationdb/cluster-file \"\${@}\"\n"\ - "}\n" >> .bashrc +RUN rm -f /root/anaconda-ks.cfg && \ + printf '%s\n' \ + 'source /opt/rh/devtoolset-8/enable' \ + 'source /opt/rh/rh-python36/enable' \ + 'source /opt/rh/rh-ruby26/enable' \ + '' \ + 'function cmk() {' \ + ' cmake -S ${HOME}/src/foundationdb -B ${HOME}/build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84' \ + '}' \ + 'function ct() {' \ + ' cd ${HOME}/build_output && ctest -j 32 --output-on-failure' \ + '}' \ + 'function j() {' \ + ' python3 -m joshua.joshua "${@}"' \ + '}' \ + 'function jsd() {' \ + ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ + '}' \ + '' \ + >> .bashrc \ No newline at end of file diff --git a/build/docker/centos7/build/Dockerfile b/build/docker/centos7/build/Dockerfile index 133cab010d..16c1bb4164 100644 --- a/build/docker/centos7/build/Dockerfile +++ b/build/docker/centos7/build/Dockerfile @@ -10,6 +10,7 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \ epel-release \ scl-utils \ yum-utils && \ + yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && \ yum install -y \ autoconf \ automake \ @@ -19,6 +20,7 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \ devtoolset-8 \ devtoolset-8-libubsan-devel \ devtoolset-8-valgrind-devel \ + docker-ce \ dos2unix \ dpkg \ gettext-devel \ @@ -59,9 +61,10 @@ RUN source /opt/rh/devtoolset-8/enable && \ tar --strip-components 1 --no-same-owner --directory git -xf git.tar.gz && \ cd git && \ make configure && \ - ./configure \ - && make && \ + ./configure && \ + make && \ make install && \ + cd ../ && \ rm -rf /tmp/* # build/install ninja diff --git a/build/docker/centos7/devel/Dockerfile b/build/docker/centos7/devel/Dockerfile index 5e7d2f94c7..48d459bf7b 100644 --- a/build/docker/centos7/devel/Dockerfile +++ b/build/docker/centos7/devel/Dockerfile @@ -3,15 +3,21 @@ ARG VERSION=centos7-latest FROM ${REPOSITORY}:${VERSION} # add vscode server -RUN yum repolist && \ +RUN yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo && \ + yum repolist && \ yum -y install \ + bash-completion \ + byobu \ + cgdb \ + emacs-nox \ + fish \ + jq \ + ripgrep \ + the_silver_searcher \ tmux \ tree \ - emacs-nox \ vim \ - bash-completion \ - jq \ - cgdb && \ + zsh && \ yum clean all && \ rm -rf /var/cache/yum @@ -19,14 +25,25 @@ WORKDIR /tmp RUN source /opt/rh/devtoolset-8/enable && \ source /opt/rh/rh-python36/enable && \ pip3 install \ + lxml \ + psutil \ python-dateutil \ - subprocess32 \ - psutil && \ + subprocess32 && \ mkdir fdb-joshua && \ cd fdb-joshua && \ git clone --branch code_pipeline https://github.com/FoundationDB/fdb-joshua . && \ pip3 install /tmp/fdb-joshua && \ cd /tmp && \ + curl -Ls https://amazon-eks.s3.us-west-2.amazonaws.com/1.18.9/2020-11-02/bin/linux/amd64/kubectl -o kubectl && \ + echo "3dbe69e6deb35fbd6fec95b13d20ac1527544867ae56e3dae17e8c4d638b25b9 kubectl" > kubectl.txt && \ + sha256sum -c kubectl.txt && \ + mv kubectl /usr/local/bin/kubectl && \ + chmod 755 /usr/local/bin/kubectl && \ + curl https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.0.30.zip -o "awscliv2.zip" && \ + echo "7ee475f22c1b35cc9e53affbf96a9ffce91706e154a9441d0d39cbf8366b718e awscliv2.zip" > awscliv2.txt && \ + sha256sum -c awscliv2.txt && \ + unzip -qq awscliv2.zip && \ + ./aws/install && \ rm -rf /tmp/* ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/ @@ -49,18 +66,44 @@ RUN curl -Ls https://update.code.visualstudio.com/latest/server-linux-x64/stable mkdir -p .vscode-server/bin/latest && \ tar --strip-components 1 --no-same-owner --directory .vscode-server/bin/latest -xf /tmp/vscode-server-linux-x64.tar.gz && \ touch .vscode-server/bin/latest/0 && \ - rm /tmp/* -RUN echo -en "\n"\ - "source /opt/rh/devtoolset-8/enable\n"\ - "source /opt/rh/rh-python36/enable\n"\ - "source /opt/rh/rh-ruby26/enable\n"\ - "\n"\ - "function cmk() {\n"\ - " cmake -S ${HOME}/src/foundationdb -B build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84\n"\ - "}\n"\ - "function ct() {\n"\ - " cd ${HOME}/build_output && ctest -j 32 --output-on-failure\n"\ - "}\n"\ - "function j() {\n"\ - " python3 -m joshua.joshua --cluster-file /etc/foundationdb/cluster-file \"\${@}\"\n"\ - "}\n" >> .bashrc + rm -rf /tmp/* +RUN rm -f /root/anaconda-ks.cfg && \ + printf '%s\n' \ + '#!/usr/bin/env bash' \ + 'set -Eeuo pipefail' \ + '' \ + 'mkdir -p ~/.docker' \ + 'cat > ~/.docker/config.json << EOF' \ + '{' \ + ' "proxies":' \ + ' {' \ + ' "default":' \ + ' {' \ + ' "httpProxy": "${HTTP_PROXY}",' \ + ' "httpsProxy": "${HTTPS_PROXY}",' \ + ' "noProxy": "${NO_PROXY}"' \ + ' }' \ + ' }' \ + '}' \ + 'EOF' \ + > docker_proxy.sh && \ + chmod 755 docker_proxy.sh && \ + printf '%s\n' \ + 'source /opt/rh/devtoolset-8/enable' \ + 'source /opt/rh/rh-python36/enable' \ + 'source /opt/rh/rh-ruby26/enable' \ + '' \ + 'function cmk() {' \ + ' cmake -S ${HOME}/src/foundationdb -B ${HOME}/build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84' \ + '}' \ + 'function ct() {' \ + ' cd ${HOME}/build_output && ctest -j 32 --output-on-failure' \ + '}' \ + 'function j() {' \ + ' python3 -m joshua.joshua "${@}"' \ + '}' \ + 'function jsd() {' \ + ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ + '}' \ + '' \ + >> .bashrc \ No newline at end of file diff --git a/build/docker/centos7/ycsb/Dockerfile b/build/docker/centos7/ycsb/Dockerfile new file mode 100644 index 0000000000..a8b60230b3 --- /dev/null +++ b/build/docker/centos7/ycsb/Dockerfile @@ -0,0 +1,20 @@ +ARG REPOSITORY=foundationdb/build +ARG VERSION=centos7-latest +FROM ${REPOSITORY}:${VERSION} + +ENV YCSB_VERSION=ycsb-foundationdb-binding-0.17.0 \ + PATH=${PATH}:/usr/bin + +RUN cd /opt \ + && eval curl "-Ls https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-foundationdb-binding-0.17.0.tar.gz" \ + | tar -xzvf - + +RUN rm -Rf /opt/${YCSB_VERSION}/lib/fdb-java-5.2.5.jar + +# COPY The Appropriate fdb-java-.jar Aaron from packages +# COPY binary RPM for foundationd-db +# Install Binary + +WORKDIR "/opt/${YCSB_VERSION}" + +ENTRYPOINT ["bin/ycsb.sh"] \ No newline at end of file From a9d880682837b911c5051f8a395fcd4b3559a984 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 24 Mar 2021 15:54:06 -0700 Subject: [PATCH 30/49] Use the restored range in the actual restore --- fdbbackup/backup.actor.cpp | 2 +- fdbclient/BackupContainerFileSystem.actor.cpp | 4 ++++ fdbclient/FileBackupAgent.actor.cpp | 14 +++++++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index dee6c87eee..ea3847d035 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -2328,7 +2328,7 @@ ACTOR Future runRestore(Database db, printf("Restored to version %" PRId64 "\n", restoredVersion); } } else { - state Optional rset = wait(bc->getRestoreSet(targetVersion)); + state Optional rset = wait(bc->getRestoreSet(targetVersion, ranges)); if (!rset.present()) { fprintf(stderr, diff --git a/fdbclient/BackupContainerFileSystem.actor.cpp b/fdbclient/BackupContainerFileSystem.actor.cpp index 040796d35f..66fdff3ee8 100644 --- a/fdbclient/BackupContainerFileSystem.actor.cpp +++ b/fdbclient/BackupContainerFileSystem.actor.cpp @@ -903,6 +903,10 @@ public: return Optional(); } + for (const auto& range : keyRangesFilter) { + TraceEvent("BackupContainerGetRestoreSet").detail("RangeFilter", printable(range)); + } + if (logsOnly) { state RestorableFileSet restorableSet; state std::vector logFiles; diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 72a3ab984f..e77e50b5f9 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -4010,6 +4010,7 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { state Version restoreVersion; state Version beginVersion; state Reference bc; + state std::vector ranges; loop { try { @@ -4017,10 +4018,12 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { tr->setOption(FDBTransactionOptions::LOCK_AWARE); wait(checkTaskVersion(tr->getDatabase(), task, name, version)); - Version _restoreVersion = wait(restore.restoreVersion().getOrThrow(tr)); - restoreVersion = _restoreVersion; Optional _beginVersion = wait(restore.beginVersion().get(tr)); beginVersion = _beginVersion.present() ? _beginVersion.get() : invalidVersion; + + wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr))); + wait(store(ranges, restore.getRestoreRangesOrDefault(tr))); + wait(taskBucket->keepRunning(tr, task)); ERestoreState oldState = wait(restore.stateEnum().getD(tr)); @@ -4065,13 +4068,18 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { wait(tr->onError(e)); } } + Optional _incremental = wait(restore.incrementalBackupOnly().get(tr)); state bool incremental = _incremental.present() ? _incremental.get() : false; if (beginVersion == invalidVersion) { beginVersion = 0; } + state Standalone> keyRangesFilter; + for (auto const& r : ranges) { + keyRangesFilter.push_back_deep(keyRangesFilter.arena(), KeyRangeRef(r)); + } Optional restorable = - wait(bc->getRestoreSet(restoreVersion, VectorRef(), incremental, beginVersion)); + wait(bc->getRestoreSet(restoreVersion, keyRangesFilter, incremental, beginVersion)); if (!incremental) { beginVersion = restorable.get().snapshot.beginVersion; } From da5930171e6798751adf873ab8a27b6fc4b49807 Mon Sep 17 00:00:00 2001 From: Chaoguang Lin Date: Thu, 25 Mar 2021 11:30:22 -0700 Subject: [PATCH 31/49] Fix boost build on OS X --- cmake/CompileBoost.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CompileBoost.cmake b/cmake/CompileBoost.cmake index 9e7fbd2971..0b1cc68502 100644 --- a/cmake/CompileBoost.cmake +++ b/cmake/CompileBoost.cmake @@ -10,7 +10,7 @@ function(compile_boost) set(BOOST_COMPILER_FLAGS -fvisibility=hidden -fPIC -std=c++14 -w) set(BOOST_CXX_COMPILER "${CMAKE_CXX_COMPILER}") if(APPLE) - set(BOOST_TOOLSET "darwin") + set(BOOST_TOOLSET "clang-darwin") # this is to fix a weird macOS issue -- by default # cmake would otherwise pass a compiler that can't # compile boost From 1541f4234acd5b59a326836c45b537c0e006abfe Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 25 Mar 2021 14:40:06 -0400 Subject: [PATCH 32/49] Updated test parser with new option and also added option to test file with default value --- fdbserver/tester.actor.cpp | 4 +++- tests/restarting/to_6.3.10/CycleTestRestart-1.txt | 2 ++ tests/restarting/to_6.3.10/CycleTestRestart-2.txt | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 20a71b3efc..4eeb3e1034 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -977,7 +977,9 @@ std::map> testSpecGlobalKey TraceEvent("TestParserTest").detail("ClientInfoLogging", value); } }, { "startIncompatibleProcess", - [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value); } } + [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value); } }, + { "storageEngineExcludeType", + [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStorageEngineExcludeType", ""); } } }; std::map> testSpecTestKeys = { diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt index 647c2f3fe3..1c866784a3 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt @@ -28,3 +28,5 @@ testTitle=Clogged testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 + +storageEngineExcludeType=-1 \ No newline at end of file diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt index 7d498f2be1..107f8fe84d 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt @@ -24,3 +24,5 @@ testTitle=Clogged machinesToLeave=3 reboot=true testDuration=10.0 + +storageEngineExcludeType=-1 \ No newline at end of file From 69d88dcf96e564e2076af2bb35f5c2c8abde8061 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 25 Mar 2021 15:25:50 -0400 Subject: [PATCH 33/49] adjust placement of option in test workload --- tests/restarting/to_6.3.10/CycleTestRestart-1.txt | 3 +-- tests/restarting/to_6.3.10/CycleTestRestart-2.txt | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt index 1c866784a3..59e764c697 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt @@ -1,3 +1,4 @@ +storageEngineExcludeType=-1 testTitle=Clogged clearAfterTest=false testName=Cycle @@ -28,5 +29,3 @@ testTitle=Clogged testName=SaveAndKill restartInfoLocation=simfdb/restartInfo.ini testDuration=10.0 - -storageEngineExcludeType=-1 \ No newline at end of file diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt index 107f8fe84d..ecd3c77b52 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt @@ -1,3 +1,4 @@ +storageEngineExcludeType=-1 testTitle=Clogged runSetup=false testName=Cycle @@ -24,5 +25,3 @@ testTitle=Clogged machinesToLeave=3 reboot=true testDuration=10.0 - -storageEngineExcludeType=-1 \ No newline at end of file From 59db01a7be95535f23e9569554e108484f5523b3 Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Thu, 25 Mar 2021 19:02:07 -0500 Subject: [PATCH 34/49] update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e42dfccd63..b12183255f 100755 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ FoundationDB logo +![Build Status](https://codebuild.us-west-2.amazonaws.com/badges?uuid=eyJlbmNyeXB0ZWREYXRhIjoiSCtVR1ZmOE1QSW8wMEtJNnAzNmVaY1hpMldYbFF5L3l1ZW10bUtpZGV1Zy9YTHA0dkJqYXFocElKUTZyZU91UlB2cUl6NU5HZ0g0UmpyQ0Y1V294L3hBPSIsIml2UGFyYW1ldGVyU3BlYyI6InRBVXROL3JCQlJFWm5hNHciLCJtYXRlcmlhbFNldFNlcmlhbCI6MX0%3D&branch=master) + FoundationDB is a distributed database designed to handle large volumes of structured data across clusters of commodity servers. It organizes data as an ordered key-value store and employs ACID transactions for all operations. It is especially well-suited for read/write workloads but also has excellent performance for write-intensive workloads. Users interact with the database using API language binding. To learn more about FoundationDB, visit [foundationdb.org](https://www.foundationdb.org/) From a8601024dd06dc7b050c87d49eaf226e492f3719 Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Thu, 25 Mar 2021 23:03:00 -0500 Subject: [PATCH 35/49] Update build badge in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b12183255f..41afbb7eef 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ FoundationDB logo -![Build Status](https://codebuild.us-west-2.amazonaws.com/badges?uuid=eyJlbmNyeXB0ZWREYXRhIjoiSCtVR1ZmOE1QSW8wMEtJNnAzNmVaY1hpMldYbFF5L3l1ZW10bUtpZGV1Zy9YTHA0dkJqYXFocElKUTZyZU91UlB2cUl6NU5HZ0g0UmpyQ0Y1V294L3hBPSIsIml2UGFyYW1ldGVyU3BlYyI6InRBVXROL3JCQlJFWm5hNHciLCJtYXRlcmlhbFNldFNlcmlhbCI6MX0%3D&branch=master) +![Build Status](https://codebuild.us-west-2.amazonaws.com/badges?uuid=eyJlbmNyeXB0ZWREYXRhIjoidTh3TTlZa2FQdkdPL1drZzJUQnA1NWg0MzQ3c1VnMXlaVWQ0MVUwcUJpRlltUExBYmRCc3h2c0p1TXZLdWhDQ3BoS0Jmc2ZZdG5yVmxGUHNJM0JtV0MwPSIsIml2UGFyYW1ldGVyU3BlYyI6InBrclM3R0J2d3hmRUFDTjgiLCJtYXRlcmlhbFNldFNlcmlhbCI6MX0%3D&branch=master) FoundationDB is a distributed database designed to handle large volumes of structured data across clusters of commodity servers. It organizes data as an ordered key-value store and employs ACID transactions for all operations. It is especially well-suited for read/write workloads but also has excellent performance for write-intensive workloads. Users interact with the database using API language binding. From bd68591ea22adecc6d8899e4b67f4744ab0c7f23 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Tue, 23 Mar 2021 14:15:37 -0400 Subject: [PATCH 36/49] add SYSTEM_MONITOR_FREQUENCY knob. --- fdbserver/Knobs.cpp | 1 + fdbserver/Knobs.h | 1 + fdbserver/fdbserver.actor.cpp | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 100e770ec5..2a69139316 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -492,6 +492,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( MAX_REBOOT_TIME, 5.0 ); if( longReboots ) MAX_REBOOT_TIME = 20.0; init( LOG_DIRECTORY, "."); // Will be set to the command line flag. init( SERVER_MEM_LIMIT, 8LL << 30 ); + init( SYSTEM_MONITOR_FREQUENCY, 5.0 ); //Ratekeeper bool slowRatekeeper = randomize && BUGGIFY; diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 759014fd21..9a5f2a528c 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -416,6 +416,7 @@ public: double MAX_REBOOT_TIME; std::string LOG_DIRECTORY; int64_t SERVER_MEM_LIMIT; + double SYSTEM_MONITOR_FREQUENCY; // Ratekeeper double SMOOTHING_AMOUNT; diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index ff28269e4f..545d407953 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -464,7 +464,7 @@ Future startSystemMonitor(std::string dataFolder, SystemMonitorMachineState(dataFolder, dcId, zoneId, machineId, g_network->getLocalAddress().ip)); systemMonitor(); - return recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace); + return recurring(&systemMonitor, SERVER_KNOBS->SYSTEM_MONITOR_FREQUENCY, TaskPriority::FlushTrace); } void testIndexedSet(); From 43bed1d9ddbd28913f4e52294948c860f762bf01 Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Fri, 26 Mar 2021 15:06:59 -0600 Subject: [PATCH 37/49] Fix bug where betterMasterExist and recruitment disagree --- fdbserver/ClusterController.actor.cpp | 140 ++++++++++++++------------ 1 file changed, 78 insertions(+), 62 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index e36ddaf2b0..d1ffcf2354 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -339,7 +339,7 @@ public: bool checkStable = false, std::set> dcIds = std::set>(), std::vector exclusionWorkerIds = {}) { - std::map, vector> fitness_workers; + std::map, vector> fitness_workers; std::vector results; std::vector unavailableLocals; Reference logServerSet; @@ -406,80 +406,93 @@ public: } // This worker is a candidate for TLog recruitment. - fitness_workers[std::make_pair(fitness, worker_details.degraded)].push_back(worker_details); + fitness_workers[std::make_tuple(fitness, 0 /* id_used[worker_process_id]*/, worker_details.degraded)] + .push_back(worker_details); } - results.reserve(results.size() + id_worker.size()); - for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign && !bCompleted; fitness++) { + // FIXME: it's not clear whether this is necessary. + for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign; fitness++) { auto fitnessEnum = (ProcessClass::Fitness)fitness; for (int addingDegraded = 0; addingDegraded < 2; addingDegraded++) { - auto workerItr = fitness_workers.find(std::make_pair(fitnessEnum, (bool)addingDegraded)); - if (workerItr != fitness_workers.end()) { - for (auto& worker : workerItr->second) { - logServerMap->add(worker.interf.locality, &worker); - } - } + fitness_workers[std::make_tuple(fitnessEnum, 0, addingDegraded)]; + } + } + results.reserve(results.size() + id_worker.size()); + for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) { + auto fitness = std::get<0>(workerIter->first); + auto used = std::get<1>(workerIter->first); + auto addingDegraded = std::get<2>(workerIter->first); + ASSERT(fitness < ProcessClass::NeverAssign); + if (bCompleted) { + break; + } - if (logServerSet->size() < (addingDegraded == 0 ? desired : required)) { - } else if (logServerSet->size() == required || logServerSet->size() <= desired) { - if (logServerSet->validate(policy)) { - for (auto& object : logServerMap->getObjects()) { - results.push_back(*object); - } - bCompleted = true; - break; + for (auto& worker : workerIter->second) { + logServerMap->add(worker.interf.locality, &worker); + } + + if (logServerSet->size() < (std::get<2>(workerIter->first) ? required : desired)) { + } else if (logServerSet->size() == required || logServerSet->size() <= desired) { + if (logServerSet->validate(policy)) { + for (auto& object : logServerMap->getObjects()) { + results.push_back(*object); } - TraceEvent(SevWarn, "GWFTADNotAcceptable", id) + bCompleted = true; + break; + } + TraceEvent(SevWarn, "GWFTADNotAcceptable", id) + .detail("DcIds", dcList) + .detail("Fitness", fitness) + .detail("Processes", logServerSet->size()) + .detail("Required", required) + .detail("TLogPolicy", policy->info()) + .detail("DesiredLogs", desired) + .detail("Used", used) + .detail("AddingDegraded", addingDegraded); + } + // Try to select the desired size, if larger + else { + std::vector bestSet; + std::vector tLocalities; + + // Try to find the best team of servers to fulfill the policy + if (findBestPolicySet(bestSet, + logServerSet, + policy, + desired, + SERVER_KNOBS->POLICY_RATING_TESTS, + SERVER_KNOBS->POLICY_GENERATIONS)) { + results.reserve(results.size() + bestSet.size()); + for (auto& entry : bestSet) { + auto object = logServerMap->getObject(entry); + ASSERT(object); + results.push_back(*object); + tLocalities.push_back(object->interf.locality); + } + TraceEvent("GWFTADBestResults", id) .detail("DcIds", dcList) .detail("Fitness", fitness) + .detail("Used", used) .detail("Processes", logServerSet->size()) - .detail("Required", required) - .detail("TLogPolicy", policy->info()) - .detail("DesiredLogs", desired) - .detail("AddingDegraded", addingDegraded); - } - // Try to select the desired size, if larger - else { - std::vector bestSet; - std::vector tLocalities; - - // Try to find the best team of servers to fulfill the policy - if (findBestPolicySet(bestSet, - logServerSet, - policy, - desired, - SERVER_KNOBS->POLICY_RATING_TESTS, - SERVER_KNOBS->POLICY_GENERATIONS)) { - results.reserve(results.size() + bestSet.size()); - for (auto& entry : bestSet) { - auto object = logServerMap->getObject(entry); - ASSERT(object); - results.push_back(*object); - tLocalities.push_back(object->interf.locality); - } - TraceEvent("GWFTADBestResults", id) - .detail("DcIds", dcList) - .detail("Fitness", fitness) - .detail("Processes", logServerSet->size()) - .detail("BestCount", bestSet.size()) - .detail("BestZones", ::describeZones(tLocalities)) - .detail("BestDataHalls", ::describeDataHalls(tLocalities)) - .detail("TLogPolicy", policy->info()) - .detail("TotalResults", results.size()) - .detail("DesiredLogs", desired) - .detail("AddingDegraded", addingDegraded); - bCompleted = true; - break; - } - TraceEvent(SevWarn, "GWFTADNoBest", id) - .detail("DcIds", dcList) - .detail("Fitness", fitness) - .detail("Processes", logServerSet->size()) - .detail("Required", required) + .detail("BestCount", bestSet.size()) + .detail("BestZones", ::describeZones(tLocalities)) + .detail("BestDataHalls", ::describeDataHalls(tLocalities)) .detail("TLogPolicy", policy->info()) + .detail("TotalResults", results.size()) .detail("DesiredLogs", desired) .detail("AddingDegraded", addingDegraded); + bCompleted = true; + break; } + TraceEvent(SevWarn, "GWFTADNoBest", id) + .detail("DcIds", dcList) + .detail("Fitness", fitness) + .detail("Used", used) + .detail("Processes", logServerSet->size()) + .detail("Required", required) + .detail("TLogPolicy", policy->info()) + .detail("DesiredLogs", desired) + .detail("AddingDegraded", addingDegraded); } } @@ -1157,12 +1170,14 @@ public: req.configuration, used, first_commit_proxy); + auto grv_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::GrvProxy, req.configuration.getDesiredGrvProxies(), req.configuration, used, first_grv_proxy); + auto resolvers = getWorkersForRoleInDatacenter(dcId, ProcessClass::Resolver, req.configuration.getDesiredResolvers(), @@ -1216,6 +1231,7 @@ public: } if (bestDC != clusterControllerDcId) { + TraceEvent("BestDCIsNotClusterDC"); vector> dcPriority; dcPriority.push_back(bestDC); desiredDcIds.set(dcPriority); From 8555723b980e456f35913096142d1c67f0791c41 Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Fri, 26 Mar 2021 15:46:54 -0600 Subject: [PATCH 38/49] removing testing case --- fdbserver/ClusterController.actor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index d1ffcf2354..91dcf281a5 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -406,8 +406,8 @@ public: } // This worker is a candidate for TLog recruitment. - fitness_workers[std::make_tuple(fitness, 0 /* id_used[worker_process_id]*/, worker_details.degraded)] - .push_back(worker_details); + fitness_workers[std::make_tuple(fitness, id_used[worker_process_id], worker_details.degraded)].push_back( + worker_details); } // FIXME: it's not clear whether this is necessary. From 1a0ef39e5d425635aecd1d7d6926ac393b8efa71 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sat, 27 Mar 2021 21:48:27 -0700 Subject: [PATCH 39/49] Fix keyRangesFilter with logsOnly flag Remove the Sev 40 trace event. --- fdbclient/BackupContainerFileSystem.actor.cpp | 7 ------- fdbclient/FileBackupAgent.actor.cpp | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/fdbclient/BackupContainerFileSystem.actor.cpp b/fdbclient/BackupContainerFileSystem.actor.cpp index 66fdff3ee8..e0dbc0baaa 100644 --- a/fdbclient/BackupContainerFileSystem.actor.cpp +++ b/fdbclient/BackupContainerFileSystem.actor.cpp @@ -896,13 +896,6 @@ public: VectorRef keyRangesFilter, bool logsOnly = false, Version beginVersion = invalidVersion) { - // Does not support use keyRangesFilter for logsOnly yet - if (logsOnly && !keyRangesFilter.empty()) { - TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI") - .detail("KeyRangesFilter", keyRangesFilter.size()); - return Optional(); - } - for (const auto& range : keyRangesFilter) { TraceEvent("BackupContainerGetRestoreSet").detail("RangeFilter", printable(range)); } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index e77e50b5f9..b6e3f5a338 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -5229,7 +5229,7 @@ public: } Optional restoreSet = - wait(bc->getRestoreSet(targetVersion, VectorRef(), incrementalBackupOnly, beginVersion)); + wait(bc->getRestoreSet(targetVersion, ranges, incrementalBackupOnly, beginVersion)); if (!restoreSet.present()) { TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") From 55ef40864fba244442c4a7cadb42be50b44c40b6 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 29 Mar 2021 10:09:07 -0700 Subject: [PATCH 40/49] Add comments for touched functions --- fdbbackup/backup.actor.cpp | 2 ++ fdbclient/BackupContainerFileSystem.actor.cpp | 7 +++++++ fdbclient/FileBackupAgent.actor.cpp | 20 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index ea3847d035..7614324afc 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -2236,6 +2236,8 @@ Reference openBackupContainer(const char* name, std::string de return c; } +// Submit the restore request to the database if "performRestore" is true. Otherwise, +// check if the restore can be performed. ACTOR Future runRestore(Database db, std::string originalClusterFile, std::string tagName, diff --git a/fdbclient/BackupContainerFileSystem.actor.cpp b/fdbclient/BackupContainerFileSystem.actor.cpp index e0dbc0baaa..796748181e 100644 --- a/fdbclient/BackupContainerFileSystem.actor.cpp +++ b/fdbclient/BackupContainerFileSystem.actor.cpp @@ -891,6 +891,13 @@ public: return Optional(); } + // Get a set of files that can restore the given "keyRangesFilter" to the "targetVersion". + // If "keyRangesFilter" is empty, the restore will recover the whole database. It's generally + // a good idea to specify "keyRangesFilter" to reduce the number of files for restore times. + // + // If "logsOnly" is true, then only log files are returned and "keyRangesFilter" is ignored, + // because the log can contain mutations of the whole key space, unlike range files that each + // is limited to a smaller key range. ACTOR static Future> getRestoreSet(Reference bc, Version targetVersion, VectorRef keyRangesFilter, diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index b6e3f5a338..d876594ade 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -4001,6 +4001,8 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { static TaskParam firstVersion() { return LiteralStringRef(__FUNCTION__); } } Params; + // Find all files needed for the restore and save them in the RestoreConfig for the task. + // Update the total number of files and blocks and change state to starting. ACTOR static Future _execute(Database cx, Reference taskBucket, Reference futureBucket, @@ -5198,6 +5200,24 @@ public: return r; } + // Submits the restore request to the database and throws "restore_invalid_version" error if + // restore is not possible. Parameters: + // cx: the database to be restored to + // cxOrig: if present, is used to resolve the restore timestamp into a version. + // tagName: restore tag + // url: the backup container's URL that contains all backup files + // ranges: the restored key ranges; if empty, restore the whole database + // waitForComplete: if set, wait until the restore is completed before returning; otherwise, + // return when the request is submitted to the database. + // targetVersion: the version to be restored. + // verbose: print verbose information. + // addPrefix: each key is added this prefix during restore. + // removePrefix: for each key to be restored, remove this prefix first. + // lockDB: if set lock the database with randomUid before performing restore; + // otherwise, check database is locked with the randomUid + // incrementalBackupOnly: only perform incremental backup + // beginVersion: restore's begin version + // randomUid: the UID for lock the database ACTOR static Future restore(FileBackupAgent* backupAgent, Database cx, Optional cxOrig, From 41faa51c7a77e4c203c894eb4eb907e0d1fdb87f Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 29 Mar 2021 13:31:16 -0600 Subject: [PATCH 41/49] re-add debugRandom --- flow/IRandom.h | 6 ++++++ flow/flow.cpp | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/flow/IRandom.h b/flow/IRandom.h index 9e7b0b8829..eb04a318b7 100644 --- a/flow/IRandom.h +++ b/flow/IRandom.h @@ -184,6 +184,12 @@ Reference deterministicRandom(); // non-deterministic contexts. Reference nondeterministicRandom(); +// This returns a deterministic random number generator initialized with the same seed as the one returned by +// deterministicRandom. The main use-case for this is to generate deterministic random numbers without changing the +// determinism of the simulator. This is useful for things like generating random UIDs for debug transactions. +// WARNING: This is not thread safe and must not be called from any other thread than the network thread! +Reference debugRandom(); + // Populates a buffer with a random sequence of bytes void generateRandomData(uint8_t* buffer, int length); diff --git a/flow/flow.cpp b/flow/flow.cpp index 50d138b9cc..74f0b334f5 100644 --- a/flow/flow.cpp +++ b/flow/flow.cpp @@ -47,11 +47,17 @@ INetwork* g_network = 0; FILE* randLog = 0; thread_local Reference seededRandom; +Reference seededDebugRandom; uint64_t debug_lastLoadBalanceResultEndpointToken = 0; bool noUnseed = false; void setThreadLocalDeterministicRandomSeed(uint32_t seed) { seededRandom = Reference(new DeterministicRandom(seed, true)); + seededDebugRandom = Reference(new DeterministicRandom(seed)); +} + +Reference debugRandom() { + return seededDebugRandom; } Reference deterministicRandom() { From 50342b5082d5ab158fdfe52c50e1ad4a69b1c0ad Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 29 Mar 2021 13:31:26 -0600 Subject: [PATCH 42/49] fix a second low-latency bug --- fdbserver/ClusterController.actor.cpp | 9 +++++---- fdbserver/workloads/LowLatency.actor.cpp | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 91dcf281a5..55770d6f3b 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -339,7 +339,7 @@ public: bool checkStable = false, std::set> dcIds = std::set>(), std::vector exclusionWorkerIds = {}) { - std::map, vector> fitness_workers; + std::map, vector> fitness_workers; std::vector results; std::vector unavailableLocals; Reference logServerSet; @@ -406,15 +406,16 @@ public: } // This worker is a candidate for TLog recruitment. - fitness_workers[std::make_tuple(fitness, id_used[worker_process_id], worker_details.degraded)].push_back( - worker_details); + bool inCCDC = worker_details.interf.locality.dcId() == clusterControllerDcId; + fitness_workers[std::make_tuple(fitness, id_used[worker_process_id], worker_details.degraded, inCCDC)] + .push_back(worker_details); } // FIXME: it's not clear whether this is necessary. for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign; fitness++) { auto fitnessEnum = (ProcessClass::Fitness)fitness; for (int addingDegraded = 0; addingDegraded < 2; addingDegraded++) { - fitness_workers[std::make_tuple(fitnessEnum, 0, addingDegraded)]; + fitness_workers[std::make_tuple(fitnessEnum, 0, addingDegraded, false)]; } } results.reserve(results.size() + id_worker.size()); diff --git a/fdbserver/workloads/LowLatency.actor.cpp b/fdbserver/workloads/LowLatency.actor.cpp index 8fdcbf175e..90b03dd8e9 100644 --- a/fdbserver/workloads/LowLatency.actor.cpp +++ b/fdbserver/workloads/LowLatency.actor.cpp @@ -74,6 +74,7 @@ struct LowLatencyWorkload : TestWorkload { ++self->operations; loop { try { + TraceEvent("StartLowLatencyTransaction"); tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); tr.setOption(FDBTransactionOptions::LOCK_AWARE); if (doCommit) { @@ -84,6 +85,7 @@ struct LowLatencyWorkload : TestWorkload { } break; } catch (Error& e) { + TraceEvent("LowLatencyTransactionFailed").error(e, true); wait(tr.onError(e)); ++self->retries; } From 2b4744ad8a0e37cc986e306fe3e03fae62d24e3c Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 29 Mar 2021 11:49:23 -0700 Subject: [PATCH 43/49] Reword comments. --- fdbclient/BackupContainerFileSystem.actor.cpp | 5 +++-- fdbclient/FileBackupAgent.actor.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fdbclient/BackupContainerFileSystem.actor.cpp b/fdbclient/BackupContainerFileSystem.actor.cpp index 796748181e..87e4ffcbf0 100644 --- a/fdbclient/BackupContainerFileSystem.actor.cpp +++ b/fdbclient/BackupContainerFileSystem.actor.cpp @@ -892,8 +892,9 @@ public: } // Get a set of files that can restore the given "keyRangesFilter" to the "targetVersion". - // If "keyRangesFilter" is empty, the restore will recover the whole database. It's generally - // a good idea to specify "keyRangesFilter" to reduce the number of files for restore times. + // If "keyRangesFilter" is empty, the file set will cover all key ranges present in the backup. + // It's generally a good idea to specify "keyRangesFilter" to reduce the number of files for + // restore times. // // If "logsOnly" is true, then only log files are returned and "keyRangesFilter" is ignored, // because the log can contain mutations of the whole key space, unlike range files that each diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index d876594ade..17112ca8f9 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -5206,7 +5206,7 @@ public: // cxOrig: if present, is used to resolve the restore timestamp into a version. // tagName: restore tag // url: the backup container's URL that contains all backup files - // ranges: the restored key ranges; if empty, restore the whole database + // ranges: the restored key ranges; if empty, restore all key ranges in the backup // waitForComplete: if set, wait until the restore is completed before returning; otherwise, // return when the request is submitted to the database. // targetVersion: the version to be restored. From 2648d19eddae167f708e2d377150541cf3a9fc45 Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Mon, 29 Mar 2021 15:10:57 -0700 Subject: [PATCH 44/49] Update comment --- fdbclient/DatabaseConfiguration.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 4b1b29c021..bc64a6c9c5 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -160,7 +160,7 @@ struct DatabaseConfiguration { } // Retuns the maximum number of discrete failures a cluster can tolerate. - // In HA mode, `fullyReplicatedRegions` is set to 1 initially when data is being + // In HA mode, `fullyReplicatedRegions` is set to "1" initially when data is being // replicated to remote, and will be incremented later. `forAvailablity` is set to true // if we want to account the number for machines that can recruit new tLogs/SS after failures. // Killing an entire datacenter counts as killing one zone in modes that support it. From 4c73e838abc851b07a91957488dee82f893d8bf9 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 30 Mar 2021 09:26:48 -0700 Subject: [PATCH 45/49] Fix a comment on backup log data location --- fdbclient/BackupAgentBase.actor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fdbclient/BackupAgentBase.actor.cpp b/fdbclient/BackupAgentBase.actor.cpp index 67fc4ae197..d6be426dab 100644 --- a/fdbclient/BackupAgentBase.actor.cpp +++ b/fdbclient/BackupAgentBase.actor.cpp @@ -142,8 +142,9 @@ Version getVersionFromString(std::string const& value) { } // Transaction log data is stored by the FoundationDB core in the -// \xff / bklog / keyspace in a funny order for performance reasons. -// Return the ranges of keys that contain the data for the given range +// "backupLogKeys" (i.e., \xff\x02/blog/) keyspace in a funny order for +// performance reasons. +// Returns the ranges of keys that contain the data for the given range // of versions. // assert CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE % blocksize = 0. Otherwise calculation of hash will be incorrect Standalone> getLogRanges(Version beginVersion, From d2061554db50c68146845b6784bf6f69b551e162 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 30 Mar 2021 19:48:54 -0700 Subject: [PATCH 46/49] Fix: simulations with region configurations that enables automatic failover could failover before both regions are fully replicated --- fdbserver/workloads/ChangeConfig.actor.cpp | 26 +++++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fdbserver/workloads/ChangeConfig.actor.cpp b/fdbserver/workloads/ChangeConfig.actor.cpp index bbb880bfc3..0cc14a1eb4 100644 --- a/fdbserver/workloads/ChangeConfig.actor.cpp +++ b/fdbserver/workloads/ChangeConfig.actor.cpp @@ -52,6 +52,8 @@ struct ChangeConfigWorkload : TestWorkload { void getMetrics(vector& m) override {} + // When simulated two clusters for DR tests, this actor sets the starting configuration + // for the extra cluster. ACTOR Future extraDatabaseConfigure(ChangeConfigWorkload* self) { if (g_network->isSimulated() && g_simulator.extraDB) { auto extraFile = makeReference(*g_simulator.extraDB); @@ -59,10 +61,15 @@ struct ChangeConfigWorkload : TestWorkload { wait(delay(5 * deterministicRandom()->random01())); if (self->configMode.size()) { + if (g_simulator.usableRegions == 2) { + // It is not safe to allow automatic failover to a region which is not fully replicated, + // so wait for both regions to be fully replicated before enabling failover + wait(success(changeConfig(extraDB, g_simulator.disableRemote, true))); + TraceEvent("WaitForReplicasExtra"); + wait(waitForFullReplication(extraDB)); + TraceEvent("WaitForReplicasExtraEnd"); + } wait(success(changeConfig(extraDB, self->configMode, true))); - TraceEvent("WaitForReplicasExtra"); - wait(waitForFullReplication(extraDB)); - TraceEvent("WaitForReplicasExtraEnd"); } if (self->networkAddresses.size()) { if (self->networkAddresses == "auto") @@ -75,6 +82,8 @@ struct ChangeConfigWorkload : TestWorkload { return Void(); } + // Either changes the database configuration, or changes the coordinators based on the parameters + // of the workload. ACTOR Future ChangeConfigClient(Database cx, ChangeConfigWorkload* self) { wait(delay(self->minDelayBeforeChange + deterministicRandom()->random01() * (self->maxDelayBeforeChange - self->minDelayBeforeChange))); @@ -86,10 +95,15 @@ struct ChangeConfigWorkload : TestWorkload { } if (self->configMode.size()) { + // It is not safe to allow automatic failover to a region which is not fully replicated, + // so wait for both regions to be fully replicated before enabling failover + if (g_network->isSimulated() && g_simulator.usableRegions == 2) { + wait(success(changeConfig(cx, g_simulator.disableRemote, true))); + TraceEvent("WaitForReplicas"); + wait(waitForFullReplication(cx)); + TraceEvent("WaitForReplicasEnd"); + } wait(success(changeConfig(cx, self->configMode, true))); - TraceEvent("WaitForReplicas"); - wait(waitForFullReplication(cx)); - TraceEvent("WaitForReplicasEnd"); } if (self->networkAddresses.size()) { if (self->networkAddresses == "auto") From e774262046bfb63fbae227e09a5ed7e64385e821 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 30 Mar 2021 21:11:26 -0700 Subject: [PATCH 47/49] fix: g_simulator.disableRemote did not contain the rest of the configuration --- fdbrpc/simulator.h | 1 + fdbserver/SimulatedCluster.actor.cpp | 14 +++++++++++--- fdbserver/workloads/ChangeConfig.actor.cpp | 12 ++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index cde0eb0dda..4b74ed91ba 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -391,6 +391,7 @@ public: std::string disablePrimary; std::string disableRemote; std::string originalRegions; + std::string startingDisabledConfiguration; bool allowLogSetKills; Optional> remoteDcId; bool hasSatelliteReplication; diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 76f392aea5..aaf114cb21 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -1179,9 +1179,6 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) { regionArr.push_back(remoteObj); } - set_config("regions=" + - json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none)); - if (needsRemote) { g_simulator.originalRegions = "regions=" + json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none); @@ -1195,6 +1192,11 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) { disableRemote[1].get_obj()["datacenters"].get_array()[0].get_obj()["priority"] = -1; g_simulator.disableRemote = "regions=" + json_spirit::write_string(json_spirit::mValue(disableRemote), json_spirit::Output_options::none); + } else { + // In order to generate a starting configuration with the remote disabled, do not apply the region + // configuration to the DatabaseConfiguration until after creating the starting conf string. + set_config("regions=" + + json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none)); } } @@ -1276,6 +1278,12 @@ void setupSimulatedSystem(vector>* systemActors, } } + if (g_simulator.originalRegions != "") { + simconfig.set_config(g_simulator.originalRegions); + g_simulator.startingDisabledConfiguration = startingConfigString + " " + g_simulator.disableRemote; + startingConfigString += " " + g_simulator.originalRegions; + } + g_simulator.storagePolicy = simconfig.db.storagePolicy; g_simulator.tLogPolicy = simconfig.db.tLogPolicy; g_simulator.tLogWriteAntiQuorum = simconfig.db.tLogWriteAntiQuorum; diff --git a/fdbserver/workloads/ChangeConfig.actor.cpp b/fdbserver/workloads/ChangeConfig.actor.cpp index 0cc14a1eb4..b891367c64 100644 --- a/fdbserver/workloads/ChangeConfig.actor.cpp +++ b/fdbserver/workloads/ChangeConfig.actor.cpp @@ -61,10 +61,10 @@ struct ChangeConfigWorkload : TestWorkload { wait(delay(5 * deterministicRandom()->random01())); if (self->configMode.size()) { - if (g_simulator.usableRegions == 2) { + if (g_simulator.startingDisabledConfiguration != "") { // It is not safe to allow automatic failover to a region which is not fully replicated, // so wait for both regions to be fully replicated before enabling failover - wait(success(changeConfig(extraDB, g_simulator.disableRemote, true))); + wait(success(changeConfig(extraDB, g_simulator.startingDisabledConfiguration, true))); TraceEvent("WaitForReplicasExtra"); wait(waitForFullReplication(extraDB)); TraceEvent("WaitForReplicasExtraEnd"); @@ -95,10 +95,10 @@ struct ChangeConfigWorkload : TestWorkload { } if (self->configMode.size()) { - // It is not safe to allow automatic failover to a region which is not fully replicated, - // so wait for both regions to be fully replicated before enabling failover - if (g_network->isSimulated() && g_simulator.usableRegions == 2) { - wait(success(changeConfig(cx, g_simulator.disableRemote, true))); + if (g_network->isSimulated() && g_simulator.startingDisabledConfiguration != "") { + // It is not safe to allow automatic failover to a region which is not fully replicated, + // so wait for both regions to be fully replicated before enabling failover + wait(success(changeConfig(cx, g_simulator.startingDisabledConfiguration, true))); TraceEvent("WaitForReplicas"); wait(waitForFullReplication(cx)); TraceEvent("WaitForReplicasEnd"); From 8be216b5c9f8728281662bd6e2413d430f6420b4 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 31 Mar 2021 14:24:44 -0700 Subject: [PATCH 48/49] fix: storage servers would not attempt to become a cluster controller even when they were the only process in a region because they were not notified that the existing cluster controller was in the remote region --- fdbserver/Coordination.actor.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fdbserver/Coordination.actor.cpp b/fdbserver/Coordination.actor.cpp index 7cabd4b6e5..80f945fa38 100644 --- a/fdbserver/Coordination.actor.cpp +++ b/fdbserver/Coordination.actor.cpp @@ -404,9 +404,16 @@ ACTOR Future leaderRegister(LeaderElectionRegInterface interf, Key key) { nextNominee = *availableLeaders.begin(); } + // If the current leader's priority became worse, we still need to notified all clients because now one + // of them might be better than the leader. In addition, even though FitnessRemote is better than + // FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a change to switch + // from passively monitoring the leader to actively attempting to become the leader. if (!currentNominee.present() || !nextNominee.present() || !currentNominee.get().equalInternalId(nextNominee.get()) || - nextNominee.get() > currentNominee.get()) { + nextNominee.get() > currentNominee.get() || + (currentNominee.get().getPriorityInfo().dcFitness == + ClusterControllerPriorityInfo::FitnessUnknown && + nextNominee.get().getPriorityInfo().dcFitness == ClusterControllerPriorityInfo::FitnessRemote)) { TraceEvent("NominatingLeader") .detail("NextNominee", nextNominee.present() ? nextNominee.get().changeID : UID()) .detail("CurrentNominee", currentNominee.present() ? currentNominee.get().changeID : UID()) From d612dbbbe2c793d72c5997dd612db8f9660523a9 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 1 Apr 2021 10:05:49 -0700 Subject: [PATCH 49/49] Update fdbserver/Coordination.actor.cpp Co-authored-by: Jingyu Zhou --- fdbserver/Coordination.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/Coordination.actor.cpp b/fdbserver/Coordination.actor.cpp index 80f945fa38..6e9be53ddb 100644 --- a/fdbserver/Coordination.actor.cpp +++ b/fdbserver/Coordination.actor.cpp @@ -406,7 +406,7 @@ ACTOR Future leaderRegister(LeaderElectionRegInterface interf, Key key) { // If the current leader's priority became worse, we still need to notified all clients because now one // of them might be better than the leader. In addition, even though FitnessRemote is better than - // FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a change to switch + // FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a chance to switch // from passively monitoring the leader to actively attempting to become the leader. if (!currentNominee.present() || !nextNominee.present() || !currentNominee.get().equalInternalId(nextNominee.get()) ||