From 41f09e9c2ebc63a1eb5ef536070e8854caf4b9cb Mon Sep 17 00:00:00 2001 From: Chaoguang Lin Date: Fri, 9 Jul 2021 19:32:28 +0000 Subject: [PATCH] Refactor kill command --- fdbcli/CMakeLists.txt | 1 + fdbcli/KillCommand.actor.cpp | 117 +++++++++++++++++++++++++++++++++++ fdbcli/fdbcli.actor.cpp | 84 +++++-------------------- fdbcli/fdbcli.actor.h | 10 +++ 4 files changed, 144 insertions(+), 68 deletions(-) create mode 100644 fdbcli/KillCommand.actor.cpp diff --git a/fdbcli/CMakeLists.txt b/fdbcli/CMakeLists.txt index 41c2639dad..f09c98dfd4 100644 --- a/fdbcli/CMakeLists.txt +++ b/fdbcli/CMakeLists.txt @@ -8,6 +8,7 @@ set(FDBCLI_SRCS FlowLineNoise.actor.cpp FlowLineNoise.h ForceRecoveryWithDataLossCommand.actor.cpp + KillCommand.actor.cpp MaintenanceCommand.actor.cpp SetClassCommand.actor.cpp SnapshotCommand.actor.cpp diff --git a/fdbcli/KillCommand.actor.cpp b/fdbcli/KillCommand.actor.cpp new file mode 100644 index 0000000000..9f6016f1f3 --- /dev/null +++ b/fdbcli/KillCommand.actor.cpp @@ -0,0 +1,117 @@ +/* + * KillCommand.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbcli/fdbcli.actor.h" + +#include "fdbclient/FDBOptions.g.h" +#include "fdbclient/IClientApi.h" +#include "fdbclient/Knobs.h" + +#include "flow/Arena.h" +#include "flow/FastRef.h" +#include "flow/ThreadHelper.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +namespace fdb_cli { + +ACTOR Future killCommandActor(Reference db, + Reference tr, + std::vector tokens, + std::map>* address_interface) { + state bool result = true; + if (tokens.size() == 1) { + // Hold the reference to the standalone's memory + state ThreadFuture kvsFuture = + tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), + LiteralStringRef("\xff\xff/worker_interfaces0")), + CLIENT_KNOBS->TOO_MANY); + RangeResult kvs = wait(safeThreadFutureToFuture(kvsFuture)); + ASSERT(!kvs.more); + auto connectLock = makeReference(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM); + std::vector> addInterfs; + for (auto it : kvs) { + addInterfs.push_back(addInterface(address_interface, connectLock, it)); + } + wait(waitForAll(addInterfs)); + } + if (tokens.size() == 1 || tokencmp(tokens[1], "list")) { + if (address_interface->size() == 0) { + printf("\nNo addresses can be killed.\n"); + } else if (address_interface->size() == 1) { + printf("\nThe following address can be killed:\n"); + } else { + printf("\nThe following %zu addresses can be killed:\n", address_interface->size()); + } + for (auto it : *address_interface) { + printf("%s\n", printable(it.first).c_str()); + } + printf("\n"); + } else if (tokencmp(tokens[1], "all")) { + state std::map>::const_iterator it; + for (it = address_interface->cbegin(); it != address_interface->cend(); it++) { + int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(it->first, false, 0))); + if (!killRequestSent) { + result = false; + fprintf(stderr, "ERROR: failed to send request to kill process `%s'.\n", it->first.toString().c_str()); + } + } + if (address_interface->size() == 0) { + result = false; + fprintf(stderr, + "ERROR: no processes to kill. You must run the `kill’ command before " + "running `kill all’.\n"); + } else { + printf("Attempted to kill %zu processes\n", address_interface->size()); + } + } else { + state int i; + for (i = 1; i < tokens.size(); i++) { + if (!address_interface->count(tokens[i])) { + fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str()); + result = false; + break; + } + } + + if (result) { + for (i = 1; i < tokens.size(); i++) { + int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(tokens[i], false, 0))); + if (!killRequestSent) { + result = false; + fprintf( + stderr, "ERROR: failed to send request to kill process `%s'.\n", tokens[i].toString().c_str()); + } + } + printf("Attempted to kill %zu processes\n", tokens.size() - 1); + } + } + return result; +} + +CommandFactory killFactory( + "kill", + CommandHelp( + "kill all|list|", + "attempts to kill one or more processes in the cluster", + "If no addresses are specified, populates the list of processes which can be killed. Processes cannot be " + "killed before this list has been populated.\n\nIf `all' is specified, attempts to kill all known " + "processes.\n\nIf `list' is specified, displays all known processes. This is only useful when the database is " + "unresponsive.\n\nFor each IP:port pair in
, attempt to kill the specified process.")); +} // namespace fdb_cli \ No newline at end of file diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index bb31b112e0..c82846921a 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -40,6 +40,7 @@ #include "fdbclient/Tuple.h" #include "fdbclient/ThreadSafeTransaction.h" +#include "flow/Arena.h" #include "flow/DeterministicRandom.h" #include "flow/Platform.h" @@ -623,13 +624,6 @@ void initHelp() { helpMap["writemode"] = CommandHelp("writemode ", "enables or disables sets and clears", "Setting or clearing keys from the CLI is not recommended."); - helpMap["kill"] = CommandHelp( - "kill all|list|", - "attempts to kill one or more processes in the cluster", - "If no addresses are specified, populates the list of processes which can be killed. Processes cannot be " - "killed before this list has been populated.\n\nIf `all' is specified, attempts to kill all known " - "processes.\n\nIf `list' is specified, displays all known processes. This is only useful when the database is " - "unresponsive.\n\nFor each IP:port pair in
, attempt to kill the specified process."); helpMap["suspend"] = CommandHelp( "suspend ", "attempts to suspend one or more processes in the cluster", @@ -3209,13 +3203,19 @@ Future stopNetworkAfter(Future what) { } } -ACTOR Future addInterface(std::map>* address_interface, - Reference connectLock, - KeyValue kv) { +ACTOR Future fdb_cli::addInterface(std::map>* address_interface, + Reference connectLock, + KeyValue kv) { wait(connectLock->take()); state FlowLock::Releaser releaser(*connectLock); - state ClientWorkerInterface workerInterf = - BinaryReader::fromStringRef(kv.value, IncludeVersion()); + state ClientWorkerInterface workerInterf; + try { + // the interface is back-ward compatible, thus if parsing failed, it needs to upgrade cli version + workerInterf = BinaryReader::fromStringRef(kv.value, IncludeVersion()); + } catch (Error& e) { + fprintf(stderr, "Error: %s; CLI version is too old, please update to use a newer version\n", e.what()); + return Void(); + } state ClientLeaderRegInterface leaderInterf(workerInterf.address()); choose { when(Optional rep = @@ -3757,62 +3757,10 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { } if (tokencmp(tokens[0], "kill")) { - getTransaction(db, tr, options, intrans); - if (tokens.size() == 1) { - RangeResult kvs = wait( - makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), - LiteralStringRef("\xff\xff/worker_interfaces0")), - CLIENT_KNOBS->TOO_MANY))); - ASSERT(!kvs.more); - auto connectLock = makeReference(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM); - std::vector> addInterfs; - for (auto it : kvs) { - addInterfs.push_back(addInterface(&address_interface, connectLock, it)); - } - wait(waitForAll(addInterfs)); - } - if (tokens.size() == 1 || tokencmp(tokens[1], "list")) { - if (address_interface.size() == 0) { - printf("\nNo addresses can be killed.\n"); - } else if (address_interface.size() == 1) { - printf("\nThe following address can be killed:\n"); - } else { - printf("\nThe following %zu addresses can be killed:\n", address_interface.size()); - } - for (auto it : address_interface) { - printf("%s\n", printable(it.first).c_str()); - } - printf("\n"); - } else if (tokencmp(tokens[1], "all")) { - for (auto it : address_interface) { - BinaryReader::fromStringRef(it.second.first, IncludeVersion()) - .reboot.send(RebootRequest()); - } - if (address_interface.size() == 0) { - fprintf(stderr, - "ERROR: no processes to kill. You must run the `kill’ command before " - "running `kill all’.\n"); - } else { - printf("Attempted to kill %zu processes\n", address_interface.size()); - } - } else { - for (int i = 1; i < tokens.size(); i++) { - if (!address_interface.count(tokens[i])) { - fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str()); - is_error = true; - break; - } - } - - if (!is_error) { - for (int i = 1; i < tokens.size(); i++) { - BinaryReader::fromStringRef(address_interface[tokens[i]].first, - IncludeVersion()) - .reboot.send(RebootRequest()); - } - printf("Attempted to kill %zu processes\n", tokens.size() - 1); - } - } + getTransaction(db, tr, tr2, options, intrans); + bool _result = wait(makeInterruptable(killCommandActor(db2, tr2, tokens, &address_interface))); + if (!_result) + is_error = true; continue; } diff --git a/fdbcli/fdbcli.actor.h b/fdbcli/fdbcli.actor.h index 3190b326d5..ed352716ed 100644 --- a/fdbcli/fdbcli.actor.h +++ b/fdbcli/fdbcli.actor.h @@ -28,6 +28,7 @@ #elif !defined(FDBCLI_FDBCLI_ACTOR_H) #define FDBCLI_FDBCLI_ACTOR_H +#include "fdbclient/CoordinationInterface.h" #include "fdbclient/IClientApi.h" #include "flow/Arena.h" @@ -73,6 +74,10 @@ extern const KeyRangeRef processClassTypeSpecialKeyRange; // Other special keys inline const KeyRef errorMsgSpecialKey = LiteralStringRef("\xff\xff/error_message"); // help functions (Copied from fdbcli.actor.cpp) +// decode worker interfaces +ACTOR Future addInterface(std::map>* address_interface, + Reference connectLock, + KeyValue kv); // compare StringRef with the given c string bool tokencmp(StringRef token, const char* command); @@ -93,6 +98,11 @@ ACTOR Future consistencyCheckCommandActor(Reference tr, std: ACTOR Future dataDistributionCommandActor(Reference db, std::vector tokens); // force_recovery_with_data_loss command ACTOR Future forceRecoveryWithDataLossCommandActor(Reference db, std::vector tokens); +// kill command +ACTOR Future killCommandActor(Reference db, + Reference tr, + std::vector tokens, + std::map>* address_interface); // maintenance command ACTOR Future setHealthyZone(Reference db, StringRef zoneId, double seconds, bool printWarning = false); ACTOR Future clearHealthyZone(Reference db,