Refactor kill command

This commit is contained in:
Chaoguang Lin 2021-07-09 19:32:28 +00:00
parent 5d84ffe019
commit 41f09e9c2e
4 changed files with 144 additions and 68 deletions

View File

@ -8,6 +8,7 @@ set(FDBCLI_SRCS
FlowLineNoise.actor.cpp
FlowLineNoise.h
ForceRecoveryWithDataLossCommand.actor.cpp
KillCommand.actor.cpp
MaintenanceCommand.actor.cpp
SetClassCommand.actor.cpp
SnapshotCommand.actor.cpp

View File

@ -0,0 +1,117 @@
/*
* KillCommand.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbcli/fdbcli.actor.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/IClientApi.h"
#include "fdbclient/Knobs.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace fdb_cli {
ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
Reference<ITransaction> tr,
std::vector<StringRef> tokens,
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
state bool result = true;
if (tokens.size() == 1) {
// Hold the reference to the standalone's memory
state ThreadFuture<RangeResult> kvsFuture =
tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY);
RangeResult kvs = wait(safeThreadFutureToFuture(kvsFuture));
ASSERT(!kvs.more);
auto connectLock = makeReference<FlowLock>(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM);
std::vector<Future<Void>> addInterfs;
for (auto it : kvs) {
addInterfs.push_back(addInterface(address_interface, connectLock, it));
}
wait(waitForAll(addInterfs));
}
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
if (address_interface->size() == 0) {
printf("\nNo addresses can be killed.\n");
} else if (address_interface->size() == 1) {
printf("\nThe following address can be killed:\n");
} else {
printf("\nThe following %zu addresses can be killed:\n", address_interface->size());
}
for (auto it : *address_interface) {
printf("%s\n", printable(it.first).c_str());
}
printf("\n");
} else if (tokencmp(tokens[1], "all")) {
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>>::const_iterator it;
for (it = address_interface->cbegin(); it != address_interface->cend(); it++) {
int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(it->first, false, 0)));
if (!killRequestSent) {
result = false;
fprintf(stderr, "ERROR: failed to send request to kill process `%s'.\n", it->first.toString().c_str());
}
}
if (address_interface->size() == 0) {
result = false;
fprintf(stderr,
"ERROR: no processes to kill. You must run the `kill command before "
"running `kill all.\n");
} else {
printf("Attempted to kill %zu processes\n", address_interface->size());
}
} else {
state int i;
for (i = 1; i < tokens.size(); i++) {
if (!address_interface->count(tokens[i])) {
fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str());
result = false;
break;
}
}
if (result) {
for (i = 1; i < tokens.size(); i++) {
int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(tokens[i], false, 0)));
if (!killRequestSent) {
result = false;
fprintf(
stderr, "ERROR: failed to send request to kill process `%s'.\n", tokens[i].toString().c_str());
}
}
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
}
}
return result;
}
CommandFactory killFactory(
"kill",
CommandHelp(
"kill all|list|<ADDRESS...>",
"attempts to kill one or more processes in the cluster",
"If no addresses are specified, populates the list of processes which can be killed. Processes cannot be "
"killed before this list has been populated.\n\nIf `all' is specified, attempts to kill all known "
"processes.\n\nIf `list' is specified, displays all known processes. This is only useful when the database is "
"unresponsive.\n\nFor each IP:port pair in <ADDRESS ...>, attempt to kill the specified process."));
} // namespace fdb_cli

View File

@ -40,6 +40,7 @@
#include "fdbclient/Tuple.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "flow/Arena.h"
#include "flow/DeterministicRandom.h"
#include "flow/Platform.h"
@ -623,13 +624,6 @@ void initHelp() {
helpMap["writemode"] = CommandHelp("writemode <on|off>",
"enables or disables sets and clears",
"Setting or clearing keys from the CLI is not recommended.");
helpMap["kill"] = CommandHelp(
"kill all|list|<ADDRESS...>",
"attempts to kill one or more processes in the cluster",
"If no addresses are specified, populates the list of processes which can be killed. Processes cannot be "
"killed before this list has been populated.\n\nIf `all' is specified, attempts to kill all known "
"processes.\n\nIf `list' is specified, displays all known processes. This is only useful when the database is "
"unresponsive.\n\nFor each IP:port pair in <ADDRESS ...>, attempt to kill the specified process.");
helpMap["suspend"] = CommandHelp(
"suspend <SECONDS> <ADDRESS...>",
"attempts to suspend one or more processes in the cluster",
@ -3209,13 +3203,19 @@ Future<T> stopNetworkAfter(Future<T> what) {
}
}
ACTOR Future<Void> addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv) {
ACTOR Future<Void> fdb_cli::addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv) {
wait(connectLock->take());
state FlowLock::Releaser releaser(*connectLock);
state ClientWorkerInterface workerInterf =
BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
state ClientWorkerInterface workerInterf;
try {
// the interface is back-ward compatible, thus if parsing failed, it needs to upgrade cli version
workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
} catch (Error& e) {
fprintf(stderr, "Error: %s; CLI version is too old, please update to use a newer version\n", e.what());
return Void();
}
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
choose {
when(Optional<LeaderInfo> rep =
@ -3757,62 +3757,10 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
if (tokencmp(tokens[0], "kill")) {
getTransaction(db, tr, options, intrans);
if (tokens.size() == 1) {
RangeResult kvs = wait(
makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY)));
ASSERT(!kvs.more);
auto connectLock = makeReference<FlowLock>(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM);
std::vector<Future<Void>> addInterfs;
for (auto it : kvs) {
addInterfs.push_back(addInterface(&address_interface, connectLock, it));
}
wait(waitForAll(addInterfs));
}
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
if (address_interface.size() == 0) {
printf("\nNo addresses can be killed.\n");
} else if (address_interface.size() == 1) {
printf("\nThe following address can be killed:\n");
} else {
printf("\nThe following %zu addresses can be killed:\n", address_interface.size());
}
for (auto it : address_interface) {
printf("%s\n", printable(it.first).c_str());
}
printf("\n");
} else if (tokencmp(tokens[1], "all")) {
for (auto it : address_interface) {
BinaryReader::fromStringRef<ClientWorkerInterface>(it.second.first, IncludeVersion())
.reboot.send(RebootRequest());
}
if (address_interface.size() == 0) {
fprintf(stderr,
"ERROR: no processes to kill. You must run the `kill command before "
"running `kill all.\n");
} else {
printf("Attempted to kill %zu processes\n", address_interface.size());
}
} else {
for (int i = 1; i < tokens.size(); i++) {
if (!address_interface.count(tokens[i])) {
fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str());
is_error = true;
break;
}
}
if (!is_error) {
for (int i = 1; i < tokens.size(); i++) {
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[tokens[i]].first,
IncludeVersion())
.reboot.send(RebootRequest());
}
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
}
}
getTransaction(db, tr, tr2, options, intrans);
bool _result = wait(makeInterruptable(killCommandActor(db2, tr2, tokens, &address_interface)));
if (!_result)
is_error = true;
continue;
}

View File

@ -28,6 +28,7 @@
#elif !defined(FDBCLI_FDBCLI_ACTOR_H)
#define FDBCLI_FDBCLI_ACTOR_H
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/IClientApi.h"
#include "flow/Arena.h"
@ -73,6 +74,10 @@ extern const KeyRangeRef processClassTypeSpecialKeyRange;
// Other special keys
inline const KeyRef errorMsgSpecialKey = LiteralStringRef("\xff\xff/error_message");
// help functions (Copied from fdbcli.actor.cpp)
// decode worker interfaces
ACTOR Future<Void> addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv);
// compare StringRef with the given c string
bool tokencmp(StringRef token, const char* command);
@ -93,6 +98,11 @@ ACTOR Future<bool> consistencyCheckCommandActor(Reference<ITransaction> tr, std:
ACTOR Future<bool> dataDistributionCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// force_recovery_with_data_loss command
ACTOR Future<bool> forceRecoveryWithDataLossCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// kill command
ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
Reference<ITransaction> tr,
std::vector<StringRef> tokens,
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface);
// maintenance command
ACTOR Future<bool> setHealthyZone(Reference<IDatabase> db, StringRef zoneId, double seconds, bool printWarning = false);
ACTOR Future<bool> clearHealthyZone(Reference<IDatabase> db,