From 7273723a430067251eb196518c13de2574d4d965 Mon Sep 17 00:00:00 2001 From: Chaoguang Lin Date: Mon, 6 Mar 2023 14:46:52 -0800 Subject: [PATCH] Add the hotrange fdbcli command (#9570) * Add the hotrange fdbcli command * Remove the unnecessary state * Add the doc about the hotrange command --- .../sphinx/source/command-line-interface.rst | 13 ++ fdbcli/HotRangeCommand.actor.cpp | 128 ++++++++++++++++++ fdbcli/Util.actor.cpp | 26 ++++ fdbcli/fdbcli.actor.cpp | 11 ++ fdbcli/include/fdbcli/fdbcli.actor.h | 9 ++ fdbclient/NativeAPI.actor.cpp | 24 ++++ fdbclient/include/fdbclient/DatabaseContext.h | 4 + .../include/fdbserver/StorageMetrics.actor.h | 7 +- 8 files changed, 216 insertions(+), 6 deletions(-) create mode 100644 fdbcli/HotRangeCommand.actor.cpp diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index 251110e9ff..a9855289da 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -755,3 +755,16 @@ Removes a TSS process from quarantine, disposing of the TSS and allowing Data Di Lists the storage UIDs of all TSS processes currently in quarantine. +hotrange +-------- + +Utility commands for fetching sampled read bytes/ops metrics from the specified storage server. + +``hotrange`` + +It will populate a list of available storage servers' network addresses. Users need to run this first before fetching metrics from a specific storage server. Otherwise, the address is not recognized. + +``hotrange `` + +Fetch read metrics from the given storage server to find the hot range. Run ``help hotrange`` to read the guide. + diff --git a/fdbcli/HotRangeCommand.actor.cpp b/fdbcli/HotRangeCommand.actor.cpp new file mode 100644 index 0000000000..dce0337abc --- /dev/null +++ b/fdbcli/HotRangeCommand.actor.cpp @@ -0,0 +1,128 @@ +/* + * HotRangeCommand.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2023 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "fdbcli/fdbcli.actor.h" + +#include "fdbclient/FDBOptions.g.h" +#include "fdbclient/IClientApi.h" +#include "fdbclient/StorageServerInterface.h" + +#include "fdbclient/json_spirit/json_spirit_value.h" +#include "flow/Arena.h" +#include "flow/FastRef.h" +#include "flow/NetworkAddress.h" +#include "flow/ThreadHelper.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +namespace { + +ReadHotSubRangeRequest::SplitType parseSplitType(const std::string& typeStr) { + auto type = ReadHotSubRangeRequest::SplitType::BYTES; + if (typeStr == "bytes") { + type = ReadHotSubRangeRequest::BYTES; + } else if (typeStr == "readBytes") { + type = ReadHotSubRangeRequest::READ_BYTES; + } else if (typeStr == "readOps") { + type = ReadHotSubRangeRequest::READ_OPS; + } else { + fmt::print("Error: {} is not a valid split type. Will use bytes as the default split type\n", typeStr); + } + return type; +} + +} // namespace + +namespace fdb_cli { + +ACTOR Future hotRangeCommandActor(Database localdb, + Reference db, + std::vector tokens, + std::map* storage_interface) { + + if (tokens.size() == 1) { + // initialize storage interfaces + storage_interface->clear(); + wait(getStorageServerInterfaces(db, storage_interface)); + fmt::print("\nThe following {} storage servers can be queried:\n", storage_interface->size()); + for (const auto& [addr, _] : *storage_interface) { + fmt::print("{}\n", addr); + } + fmt::print("\n"); + } else if (tokens.size() == 6) { + if (storage_interface->size() == 0) { + fprintf(stderr, "ERROR: no storage processes to query. You must run the `hotrange’ command first.\n"); + return false; + } + state Key address = tokens[1]; + // At present we only support one process(IP:Port) at a time + if (!storage_interface->count(address.toString())) { + fprintf(stderr, "ERROR: storage process `%s' not recognized.\n", printable(address).c_str()); + return false; + } + state int splitCount; + try { + splitCount = boost::lexical_cast(tokens[5].toString()); + } catch (...) { + fmt::print("Error: splitCount value: '{}', cannot be parsed to an Integer\n", tokens[5].toString()); + return false; + } + ReadHotSubRangeRequest::SplitType splitType = parseSplitType(tokens[2].toString()); + KeyRangeRef range(tokens[3], tokens[4]); + // TODO: refactor this to support multiversion fdbcli in the future + Standalone> metrics = + wait(localdb->getHotRangeMetrics((*storage_interface)[address.toString()], range, splitType, splitCount)); + // next parse the result and form a json array for each object + json_spirit::mArray resultArray; + for (const auto& metric : metrics) { + json_spirit::mObject metricObj; + metricObj["begin"] = metric.keys.begin.toString(); + metricObj["end"] = metric.keys.end.toString(); + metricObj["readBytesPerSec"] = metric.readBandwidthSec; + metricObj["readOpsPerSec"] = metric.readOpsSec; + metricObj["bytes"] = metric.bytes; + resultArray.push_back(metricObj); + } + // print out the json array + const std::string result = + json_spirit::write_string(json_spirit::mValue(resultArray), json_spirit::pretty_print); + fmt::print("\n{}\n", result); + } else { + printUsage(tokens[0]); + return false; + } + + return true; +} + +CommandFactory hotRangeFactory( + "hotrange", + CommandHelp( + "hotrange ", + "Fetch read metrics from a given storage server to detect hot range", + "If no arguments are specified, populates the list of storage processes that can be queried. " + " specify the range you are interested in, " + " is the metric used to divide ranges, " + "splitCount is the number of returned ranges divided by the given metric. " + "The command will return an array of json object for each range with their metrics." + "Notice: the three metrics are sampled by a different way, so their values are not perfectly matched.\n")); + +} // namespace fdb_cli diff --git a/fdbcli/Util.actor.cpp b/fdbcli/Util.actor.cpp index aed1133047..72c5767a83 100644 --- a/fdbcli/Util.actor.cpp +++ b/fdbcli/Util.actor.cpp @@ -153,4 +153,30 @@ ACTOR Future getWorkers(Reference db, std::vector* } } +ACTOR Future getStorageServerInterfaces(Reference db, + std::map* interfaces) { + state Reference tr = db->createTransaction(); + loop { + interfaces->clear(); + try { + tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + state ThreadFuture serverListF = tr->getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY); + wait(success(safeThreadFutureToFuture(serverListF))); + ASSERT(!serverListF.get().more); + ASSERT_LT(serverListF.get().size(), CLIENT_KNOBS->TOO_MANY); + RangeResult serverList = serverListF.get(); + // decode server interfaces + for (int i = 0; i < serverList.size(); i++) { + auto ssi = decodeServerListValue(serverList[i].value); + (*interfaces)[ssi.address().toString()] = ssi; + } + return Void(); + } catch (Error& e) { + wait(safeThreadFutureToFuture(tr->onError(e))); + } + } +} + } // namespace fdb_cli diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index e02cc20c28..4f5cf367c2 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -28,6 +28,7 @@ #include "fdbclient/Status.h" #include "fdbclient/KeyBackedTypes.h" #include "fdbclient/StatusClient.h" +#include "fdbclient/StorageServerInterface.h" #include "fdbclient/DatabaseContext.h" #include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/IKnobCollection.h" @@ -1090,6 +1091,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise, Reference> address_interface; + state std::map storage_interface; state FdbOptions globalOptions; state FdbOptions activeOptions; @@ -2145,6 +2147,15 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise, Reference>(); + } else { + return fs.get().readHotRanges; + } +} + +Future>> DatabaseContext::getHotRangeMetrics( + StorageServerInterface ssi, + const KeyRange& keys, + ReadHotSubRangeRequest::SplitType type, + int splitCount) { + + return getHotRangeMetricsActor( + Reference::addRef(this), ssi, ReadHotSubRangeRequest(keys, type, splitCount)); +} + int64_t getMaxKeySize(KeyRef const& key) { return getMaxWriteKeySize(key, true); } diff --git a/fdbclient/include/fdbclient/DatabaseContext.h b/fdbclient/include/fdbclient/DatabaseContext.h index 7fbdf25a26..08c9183cf5 100644 --- a/fdbclient/include/fdbclient/DatabaseContext.h +++ b/fdbclient/include/fdbclient/DatabaseContext.h @@ -313,6 +313,10 @@ public: Optional const& minSplitBytes = {}); Future>> getReadHotRanges(KeyRange const& keys); + Future>> getHotRangeMetrics(StorageServerInterface ssi, + KeyRange const& keys, + ReadHotSubRangeRequest::SplitType type, + int splitCount); // Returns the protocol version reported by the coordinator this client is connected to // If an expected version is given, the future won't return until the protocol version is different than expected diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index 14a8d86d9d..2bd0888db7 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -216,12 +216,7 @@ Future serveStorageMetricsRequests(ServiceType* self, StorageServerInterfa self->getStorageMetrics(req); } when(ReadHotSubRangeRequest req = waitNext(ssi.getReadHotRanges.getFuture())) { - if (!self->isReadable(req.keys)) { - CODE_PROBE(true, "readHotSubRanges immediate wrong_shard_server()", probe::decoration::rare); - self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty()); - } else { - self->metrics.getReadHotRanges(req); - } + self->metrics.getReadHotRanges(req); } when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) { if (!self->isReadable(req.keys)) {