From e73c0a31e6640d44fc7daa3c8c22a9d02ff3cbdb Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 2 Mar 2022 10:03:23 -0800 Subject: [PATCH 1/2] add wiggle_server_ids and wiggle_server_addresses in status json --- fdbcli/StatusCommand.actor.cpp | 1 + fdbclient/NativeAPI.actor.cpp | 26 ++++++++++++++++++ fdbclient/NativeAPI.actor.h | 4 +++ fdbclient/Schemas.cpp | 2 ++ fdbserver/DDTeamCollection.actor.cpp | 21 ++------------ fdbserver/Status.actor.cpp | 29 ++++++++++++++++++-- fdbserver/workloads/StatusWorkload.actor.cpp | 3 +- 7 files changed, 64 insertions(+), 22 deletions(-) diff --git a/fdbcli/StatusCommand.actor.cpp b/fdbcli/StatusCommand.actor.cpp index ea6bcb5293..e0b368ebe8 100644 --- a/fdbcli/StatusCommand.actor.cpp +++ b/fdbcli/StatusCommand.actor.cpp @@ -810,6 +810,7 @@ void printStatus(StatusObjectReader statusObj, outputString = outputStringCache; outputString += "\n Unable to retrieve data status"; } + // Storage Wiggle section // Operating space section outputString += "\n\nOperating space:"; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 90d9e85cfd..4467fd64d0 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -6783,6 +6783,32 @@ ACTOR Future setPerpetualStorageWiggle(Database cx, bool enable, LockAware return Void(); } +ACTOR Future>> readStorageWiggleValues(Database cx, + bool primary, + bool use_system_priority) { + state const Key readKey = perpetualStorageWiggleIDPrefix.withSuffix(primary ? "primary/"_sr : "remote/"_sr); + state KeyBackedObjectMap metadataMap(readKey, + IncludeVersion()); + state Reference tr(new ReadYourWritesTransaction(cx)); + state std::vector> res; + // read the wiggling pairs + loop { + try { + tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); + if (use_system_priority) { + tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + } + wait(store(res, metadataMap.getRange(tr, UID(0, 0), Optional(), CLIENT_KNOBS->TOO_MANY))); + wait(tr->commit()); + break; + } catch (Error& e) { + wait(tr->onError(e)); + } + } + return res; +} + ACTOR Future>> splitStorageMetrics(Database cx, KeyRange keys, StorageMetrics limit, diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index a3fb18574a..505bbefd3a 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -480,5 +480,9 @@ inline uint64_t getWriteOperationCost(uint64_t bytes) { // will be 1. Otherwise, the value will be 0. ACTOR Future setPerpetualStorageWiggle(Database cx, bool enable, LockAware lockAware = LockAware::False); +ACTOR Future>> readStorageWiggleValues(Database cx, + bool primary, + bool use_system_priority); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 2e9fcc53a6..c5e2f32792 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -25,6 +25,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( { "cluster":{ "storage_wiggler": { + "wiggle_server_ids":["0ccb4e0feddb55"], + "wiggle_server_addresses": ["127.0.0.1"], "primary": { "last_round_start_datetime": "Wed Feb 4 09:36:37 2022 +0000", "last_round_start_timestamp": 63811229797, diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index bc22b1a7eb..75747c9c65 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -2828,25 +2828,8 @@ public: // read the current map of `perpetualStorageWiggleIDPrefix`, then restore wigglingId. ACTOR static Future readStorageWiggleMap(DDTeamCollection* self) { - - state const Key readKey = - perpetualStorageWiggleIDPrefix.withSuffix(self->primary ? "primary/"_sr : "remote/"_sr); - state KeyBackedObjectMap metadataMap(readKey, - IncludeVersion()); - state Reference tr(new ReadYourWritesTransaction(self->cx)); - state std::vector> res; - // read the wiggling pairs - loop { - try { - tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); - wait(store(res, metadataMap.getRange(tr, UID(0, 0), Optional(), CLIENT_KNOBS->TOO_MANY))); - wait(tr->commit()); - break; - } catch (Error& e) { - wait(tr->onError(e)); - } - } + state std::vector> res = + wait(readStorageWiggleValues(self->cx, self->primary, false)); if (res.size() > 0) { // SOMEDAY: support wiggle multiple SS at once ASSERT(!self->wigglingId.present()); // only single process wiggle is allowed diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index c34830311d..b2da99f2f7 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -2928,6 +2928,7 @@ ACTOR Future clusterGetStatus( state JsonBuilderObject qos; state JsonBuilderObject dataOverlay; state JsonBuilderObject storageWiggler; + state std::unordered_set wiggleServers; statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version()); statusObj["connection_string"] = coordinators.ccr->getConnectionString().toString(); @@ -3018,8 +3019,18 @@ ACTOR Future clusterGetStatus( clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons)); if (configuration.get().perpetualStorageWiggleSpeed > 0) { - wait(store(storageWiggler, storageWigglerStatsFetcher(configuration.get(), cx, true))); - statusObj["storage_wiggler"] = storageWiggler; + state Future>> primaryWiggleValues; + state Future>> remoteWiggleValues; + + primaryWiggleValues = readStorageWiggleValues(cx, true, true); + remoteWiggleValues = readStorageWiggleValues(cx, false, true); + wait(store(storageWiggler, storageWigglerStatsFetcher(configuration.get(), cx, true)) && + success(primaryWiggleValues) && success(remoteWiggleValues)); + + for (auto& p : primaryWiggleValues.get()) + wiggleServers.insert(p.first); + for (auto& p : remoteWiggleValues.get()) + wiggleServers.insert(p.first); } state std::vector workerStatuses = wait(getAll(futures2)); @@ -3178,13 +3189,27 @@ ACTOR Future clusterGetStatus( statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference); int activeTSSCount = 0; + JsonBuilderArray wiggleServerAddress; for (auto& it : storageServers) { if (it.first.isTss()) { activeTSSCount++; } + if (wiggleServers.count(it.first.id())) { + wiggleServerAddress.push_back(it.first.address().toString()); + } } statusObj["active_tss_count"] = activeTSSCount; + if (!wiggleServers.empty()) { + JsonBuilderArray wiggleServerUID; + for (auto& id : wiggleServers) + wiggleServerUID.push_back(id.shortString()); + + storageWiggler["wiggle_server_ids"] = wiggleServerUID; + storageWiggler["wiggle_server_addresses"] = wiggleServerAddress; + statusObj["storage_wiggler"] = storageWiggler; + } + int totalDegraded = 0; for (auto& it : workers) { if (it.degraded) { diff --git a/fdbserver/workloads/StatusWorkload.actor.cpp b/fdbserver/workloads/StatusWorkload.actor.cpp index 8d7cc36490..afd8b5c65e 100644 --- a/fdbserver/workloads/StatusWorkload.actor.cpp +++ b/fdbserver/workloads/StatusWorkload.actor.cpp @@ -88,7 +88,8 @@ struct StatusWorkload : TestWorkload { schemaCoverage(spath, false); if (skv.second.type() == json_spirit::array_type && skv.second.get_array().size()) { - schemaCoverageRequirements(skv.second.get_array()[0].get_obj(), spath + "[0]"); + if (skv.second.get_array()[0].type() != json_spirit::str_type) + schemaCoverageRequirements(skv.second.get_array()[0].get_obj(), spath + "[0]"); } else if (skv.second.type() == json_spirit::obj_type) { if (skv.second.get_obj().count("$enum")) { for (auto& enum_item : skv.second.get_obj().at("$enum").get_array()) From 887d5a25cb3ec9bac6a4cd196b48bb76a5c9f087 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 2 Mar 2022 11:31:55 -0800 Subject: [PATCH 2/2] add storage wiggle to status --- fdbcli/StatusCommand.actor.cpp | 21 +++++++++++++++++++++ fdbserver/Status.actor.cpp | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fdbcli/StatusCommand.actor.cpp b/fdbcli/StatusCommand.actor.cpp index e0b368ebe8..d1a772e0a0 100644 --- a/fdbcli/StatusCommand.actor.cpp +++ b/fdbcli/StatusCommand.actor.cpp @@ -811,6 +811,27 @@ void printStatus(StatusObjectReader statusObj, outputString += "\n Unable to retrieve data status"; } // Storage Wiggle section + StatusObjectReader storageWigglerObj; + std::string storageWigglerString; + try { + if (statusObjCluster.get("storage_wiggler", storageWigglerObj)) { + int size = 0; + if (storageWigglerObj.has("wiggle_server_addresses")) { + storageWigglerString += "\n Wiggle server addresses-"; + for (auto& v : storageWigglerObj.obj().at("wiggle_server_addresses").get_array()) { + storageWigglerString += " " + v.get_str(); + size += 1; + } + } + storageWigglerString += "\n Wiggle server count - " + std::to_string(size); + } + } catch (std::runtime_error&) { + storageWigglerString += "\n Unable to retrieve storage wiggler status"; + } + if (storageWigglerString.size()) { + outputString += "\n\nStorage wiggle:"; + outputString += storageWigglerString; + } // Operating space section outputString += "\n\nOperating space:"; diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index b2da99f2f7..d00b48cf67 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -3200,7 +3200,7 @@ ACTOR Future clusterGetStatus( } statusObj["active_tss_count"] = activeTSSCount; - if (!wiggleServers.empty()) { + if (!storageWiggler.empty()) { JsonBuilderArray wiggleServerUID; for (auto& id : wiggleServers) wiggleServerUID.push_back(id.shortString());