Merge pull request #4845 from sfc-gh-clin/refactor-fdbcli-2

Refactor fdbcli commands: advanceversion, maintenance, snapshot, force_recovery_with_data_loss
This commit is contained in:
Andrew Noyes 2021-06-15 11:13:18 -07:00 committed by GitHub
commit 6c2165dfee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 391 additions and 69 deletions

View File

@ -0,0 +1,75 @@
/*
* AdvanceVersionCommand.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "boost/lexical_cast.hpp"
#include "fdbcli/fdbcli.actor.h"
#include "fdbclient/IClientApi.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace fdb_cli {
const KeyRef advanceVersionSpecialKey = LiteralStringRef("\xff\xff/management/min_required_commit_version");
ACTOR Future<bool> advanceVersionCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
if (tokens.size() != 2) {
printUsage(tokens[0]);
return false;
} else {
state Version v;
int n = 0;
if (sscanf(tokens[1].toString().c_str(), "%ld%n", &v, &n) != 1 || n != tokens[1].size()) {
printUsage(tokens[0]);
return false;
} else {
state Reference<ITransaction> tr = db->createTransaction();
loop {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
try {
Version rv = wait(safeThreadFutureToFuture(tr->getReadVersion()));
if (rv <= v) {
tr->set(advanceVersionSpecialKey, boost::lexical_cast<std::string>(v));
wait(safeThreadFutureToFuture(tr->commit()));
} else {
printf("Current read version is %ld\n", rv);
return true;
}
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
}
}
CommandFactory advanceVersionFactory(
"advanceversion",
CommandHelp(
"advanceversion <VERSION>",
"Force the cluster to recover at the specified version",
"Forces the cluster to recover at the specified version. If the specified version is larger than the current "
"version of the cluster, the cluster version is advanced "
"to the specified version via a forced recovery."));
} // namespace fdb_cli

View File

@ -1,9 +1,13 @@
set(FDBCLI_SRCS
fdbcli.actor.cpp
fdbcli.actor.h
AdvanceVersionCommand.actor.cpp
ConsistencyCheckCommand.actor.cpp
FlowLineNoise.actor.cpp
FlowLineNoise.h
ForceRecoveryWithDataLossCommand.actor.cpp
MaintenanceCommand.actor.cpp
SnapshotCommand.actor.cpp
Util.cpp
linenoise/linenoise.h)

View File

@ -0,0 +1,50 @@
/*
* ForceRecoveryWithDataLossCommand.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbcli/fdbcli.actor.h"
#include "fdbclient/IClientApi.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace fdb_cli {
ACTOR Future<bool> forceRecoveryWithDataLossCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
if (tokens.size() != 2) {
printUsage(tokens[0]);
return false;
}
wait(safeThreadFutureToFuture(db->forceRecoveryWithDataLoss(tokens[1])));
return true;
}
CommandFactory forceRecoveryWithDataLossFactory(
"force_recovery_with_data_loss",
CommandHelp("force_recovery_with_data_loss <DCID>",
"Force the database to recover into DCID",
"A forced recovery will cause the database to lose the most recently committed mutations. The "
"amount of mutations that will be lost depends on how far behind the remote datacenter is. This "
"command will change the region configuration to have a positive priority for the chosen DCID, and "
"a negative priority for all other DCIDs. This command will set usable_regions to 1. If the "
"database has already recovered, this command does nothing.\n"));
} // namespace fdb_cli

View File

@ -0,0 +1,174 @@
/*
* MaintenanceCommand.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cinttypes>
#include "boost/lexical_cast.hpp"
#include "fdbcli/fdbcli.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/IClientApi.h"
#include "fdbclient/Knobs.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace {
// print zoneId under maintenance, only one is possible at the same time
ACTOR Future<Void> printHealthyZone(Reference<IDatabase> db) {
state Reference<ITransaction> tr = db->createTransaction();
loop {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
try {
// We need to keep the future as the returned standalone is not guaranteed to manage its memory when
// using an external client, but the ThreadFuture holds a reference to the memory
state ThreadFuture<RangeResult> resultFuture =
tr->getRange(fdb_cli::maintenanceSpecialKeyRange, CLIENT_KNOBS->TOO_MANY);
RangeResult res = wait(safeThreadFutureToFuture(resultFuture));
ASSERT(res.size() <= 1);
if (res.size() == 1 && res[0].key == fdb_cli::ignoreSSFailureSpecialKey) {
printf("Data distribution has been disabled for all storage server failures in this cluster and thus "
"maintenance mode is not active.\n");
} else if (!res.size() || boost::lexical_cast<double>(res[0].value.toString()) <= 0) {
printf("No ongoing maintenance.\n");
} else {
std::string zoneId = res[0].key.removePrefix(fdb_cli::maintenanceSpecialKeyRange.begin).toString();
int64_t seconds = static_cast<int64_t>(boost::lexical_cast<double>(res[0].value.toString()));
printf("Maintenance for zone %s will continue for %" PRId64 " seconds.\n", zoneId.c_str(), seconds);
}
return Void();
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
// clear ongoing maintenance, let clearSSFailureZoneString = true to enable data distribution for storage
ACTOR Future<bool> clearHealthyZone(Reference<IDatabase> db,
bool printWarning = false,
bool clearSSFailureZoneString = false) {
state Reference<ITransaction> tr = db->createTransaction();
TraceEvent("ClearHealthyZone").detail("ClearSSFailureZoneString", clearSSFailureZoneString);
loop {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
try {
// hold the returned standalone object's memory
state ThreadFuture<RangeResult> resultFuture =
tr->getRange(fdb_cli::maintenanceSpecialKeyRange, CLIENT_KNOBS->TOO_MANY);
RangeResult res = wait(safeThreadFutureToFuture(resultFuture));
ASSERT(res.size() <= 1);
if (!clearSSFailureZoneString && res.size() == 1 && res[0].key == fdb_cli::ignoreSSFailureSpecialKey) {
if (printWarning) {
printf("ERROR: Maintenance mode cannot be used while data distribution is disabled for storage "
"server failures. Use 'datadistribution on' to reenable data distribution.\n");
}
return false;
}
tr->clear(fdb_cli::maintenanceSpecialKeyRange);
wait(safeThreadFutureToFuture(tr->commit()));
return true;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
// add a zone to maintenance and specify the maintenance duration
ACTOR Future<bool> setHealthyZone(Reference<IDatabase> db,
StringRef zoneId,
double seconds,
bool printWarning = false) {
state Reference<ITransaction> tr = db->createTransaction();
TraceEvent("SetHealthyZone").detail("Zone", zoneId).detail("DurationSeconds", seconds);
loop {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
try {
// hold the returned standalone object's memory
state ThreadFuture<RangeResult> resultFuture =
tr->getRange(fdb_cli::maintenanceSpecialKeyRange, CLIENT_KNOBS->TOO_MANY);
RangeResult res = wait(safeThreadFutureToFuture(resultFuture));
ASSERT(res.size() <= 1);
if (res.size() == 1 && res[0].key == fdb_cli::ignoreSSFailureSpecialKey) {
if (printWarning) {
printf("ERROR: Maintenance mode cannot be used while data distribution is disabled for storage "
"server failures. Use 'datadistribution on' to reenable data distribution.\n");
}
return false;
}
tr->set(fdb_cli::maintenanceSpecialKeyRange.begin.withSuffix(zoneId),
boost::lexical_cast<std::string>(seconds));
wait(safeThreadFutureToFuture(tr->commit()));
return true;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
} // namespace
namespace fdb_cli {
const KeyRangeRef maintenanceSpecialKeyRange = KeyRangeRef(LiteralStringRef("\xff\xff/management/maintenance/"),
LiteralStringRef("\xff\xff/management/maintenance0"));
// The special key, if present, means data distribution is disabled for storage failures;
const KeyRef ignoreSSFailureSpecialKey = LiteralStringRef("\xff\xff/management/maintenance/IgnoreSSFailures");
ACTOR Future<bool> maintenanceCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
state bool result = true;
if (tokens.size() == 1) {
wait(printHealthyZone(db));
} else if (tokens.size() == 2 && tokencmp(tokens[1], "off")) {
bool clearResult = wait(clearHealthyZone(db, true));
result = clearResult;
} else if (tokens.size() == 4 && tokencmp(tokens[1], "on")) {
double seconds;
int n = 0;
auto secondsStr = tokens[3].toString();
if (sscanf(secondsStr.c_str(), "%lf%n", &seconds, &n) != 1 || n != secondsStr.size()) {
printUsage(tokens[0]);
result = false;
} else {
bool setResult = wait(setHealthyZone(db, tokens[2], seconds, true));
result = setResult;
}
} else {
printUsage(tokens[0]);
result = false;
}
return result;
}
CommandFactory maintenanceFactory(
"maintenance",
CommandHelp(
"maintenance [on|off] [ZONEID] [SECONDS]",
"mark a zone for maintenance",
"Calling this command with `on' prevents data distribution from moving data away from the processes with the "
"specified ZONEID. Data distribution will automatically be turned back on for ZONEID after the specified "
"SECONDS have elapsed, or after a storage server with a different ZONEID fails. Only one ZONEID can be marked "
"for maintenance. Calling this command with no arguments will display any ongoing maintenance. Calling this "
"command with `off' will disable maintenance.\n"));
} // namespace fdb_cli

View File

@ -0,0 +1,64 @@
/*
* SnapshotCommand.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbcli/fdbcli.actor.h"
#include "fdbclient/IClientApi.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace fdb_cli {
ACTOR Future<bool> snapshotCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
state bool result = true;
if (tokens.size() < 2) {
printUsage(tokens[0]);
result = false;
} else {
Standalone<StringRef> snap_cmd;
state Key uid(deterministicRandom()->randomUniqueID().toString());
for (int i = 1; i < tokens.size(); i++) {
snap_cmd = snap_cmd.withSuffix(tokens[i]);
if (i != tokens.size() - 1) {
snap_cmd = snap_cmd.withSuffix(LiteralStringRef(" "));
}
}
try {
wait(safeThreadFutureToFuture(db->createSnapshot(uid, snap_cmd)));
printf("Snapshot command succeeded with UID %s\n", uid.toString().c_str());
} catch (Error& e) {
fprintf(stderr,
"Snapshot command failed %d (%s)."
" Please cleanup any instance level snapshots created with UID %s.\n",
e.code(),
e.what(),
uid.toString().c_str());
result = false;
}
}
return result;
}
// hidden commands, no help text for now
CommandFactory dataDistributionFactory("snapshot");
} // namespace fdb_cli

View File

@ -600,12 +600,6 @@ void initHelp() {
CommandHelp("getversion",
"Fetch the current read version",
"Displays the current read version of the database or currently running transaction.");
helpMap["advanceversion"] = CommandHelp(
"advanceversion <VERSION>",
"Force the cluster to recover at the specified version",
"Forces the cluster to recover at the specified version. If the specified version is larger than the current "
"version of the cluster, the cluster version is advanced "
"to the specified version via a forced recovery.");
helpMap["reset"] =
CommandHelp("reset",
"reset the current transaction",
@ -647,22 +641,6 @@ void initHelp() {
"namespace for all the profiling-related commands.",
"Different types support different actions. Run `profile` to get a list of "
"types, and iteratively explore the help.\n");
helpMap["force_recovery_with_data_loss"] =
CommandHelp("force_recovery_with_data_loss <DCID>",
"Force the database to recover into DCID",
"A forced recovery will cause the database to lose the most recently committed mutations. The "
"amount of mutations that will be lost depends on how far behind the remote datacenter is. This "
"command will change the region configuration to have a positive priority for the chosen DCID, and "
"a negative priority for all other DCIDs. This command will set usable_regions to 1. If the "
"database has already recovered, this command does nothing.\n");
helpMap["maintenance"] = CommandHelp(
"maintenance [on|off] [ZONEID] [SECONDS]",
"mark a zone for maintenance",
"Calling this command with `on' prevents data distribution from moving data away from the processes with the "
"specified ZONEID. Data distribution will automatically be turned back on for ZONEID after the specified "
"SECONDS have elapsed, or after a storage server with a different ZONEID fails. Only one ZONEID can be marked "
"for maintenance. Calling this command with no arguments will display any ongoing maintenance. Calling this "
"command with `off' will disable maintenance.\n");
helpMap["throttle"] =
CommandHelp("throttle <on|off|enable auto|disable auto|list> [ARGS]",
"view and control throttled tags",
@ -695,7 +673,6 @@ void initHelp() {
hiddenCommands.insert("expensive_data_check");
hiddenCommands.insert("datadistribution");
hiddenCommands.insert("snapshot");
}
void printVersion() {
@ -3597,14 +3574,9 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
if (tokencmp(tokens[0], "snapshot")) {
if (tokens.size() < 2) {
printUsage(tokens[0]);
bool _result = wait(snapshotCommandActor(db2, tokens));
if (!_result)
is_error = true;
} else {
bool err = wait(createSnapshot(db, tokens));
if (err)
is_error = true;
}
continue;
}
@ -3759,19 +3731,9 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
if (tokencmp(tokens[0], "advanceversion")) {
if (tokens.size() != 2) {
printUsage(tokens[0]);
bool _result = wait(makeInterruptable(advanceVersionCommandActor(db2, tokens)));
if (!_result)
is_error = true;
} else {
Version v;
int n = 0;
if (sscanf(tokens[1].toString().c_str(), "%ld%n", &v, &n) != 1 || n != tokens[1].size()) {
printUsage(tokens[0]);
is_error = true;
} else {
wait(makeInterruptable(advanceVersion(db, v)));
}
}
continue;
}
@ -3907,43 +3869,24 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
if (tokencmp(tokens[0], "force_recovery_with_data_loss")) {
if (tokens.size() != 2) {
printUsage(tokens[0]);
bool _result = wait(makeInterruptable(forceRecoveryWithDataLossCommandActor(db2, tokens)));
if (!_result)
is_error = true;
continue;
}
wait(makeInterruptable(forceRecovery(db->getConnectionFile(), tokens[1])));
continue;
}
if (tokencmp(tokens[0], "maintenance")) {
if (tokens.size() == 1) {
wait(makeInterruptable(printHealthyZone(db)));
} else if (tokens.size() == 2 && tokencmp(tokens[1], "off")) {
bool clearResult = wait(makeInterruptable(clearHealthyZone(db, true)));
is_error = !clearResult;
} else if (tokens.size() == 4 && tokencmp(tokens[1], "on")) {
double seconds;
int n = 0;
auto secondsStr = tokens[3].toString();
if (sscanf(secondsStr.c_str(), "%lf%n", &seconds, &n) != 1 || n != secondsStr.size()) {
printUsage(tokens[0]);
is_error = true;
} else {
bool setResult = wait(makeInterruptable(setHealthyZone(db, tokens[2], seconds, true)));
is_error = !setResult;
}
} else {
printUsage(tokens[0]);
bool _result = wait(makeInterruptable(maintenanceCommandActor(db2, tokens)));
if (!_result)
is_error = true;
}
continue;
}
if (tokencmp(tokens[0], "consistencycheck")) {
getTransaction(db, tr, tr2, options, intrans);
bool _result = wait(consistencyCheckCommandActor(tr2, tokens));
is_error = !_result;
bool _result = wait(makeInterruptable(consistencyCheckCommandActor(tr2, tokens)));
if (!_result)
is_error = true;
continue;
}

View File

@ -57,9 +57,13 @@ struct CommandFactory {
};
// Special keys used by fdbcli commands
// advanceversion
extern const KeyRef advanceVersionSpecialKey;
// consistencycheck
extern const KeyRef consistencyCheckSpecialKey;
// maintenance
extern const KeyRangeRef maintenanceSpecialKeyRange;
extern const KeyRef ignoreSSFailureSpecialKey;
// help functions (Copied from fdbcli.actor.cpp)
@ -69,8 +73,16 @@ bool tokencmp(StringRef token, const char* command);
void printUsage(StringRef command);
// All fdbcli commands (alphabetically)
// advanceversion command
ACTOR Future<bool> advanceVersionCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// consistency command
ACTOR Future<bool> consistencyCheckCommandActor(Reference<ITransaction> tr, std::vector<StringRef> tokens);
// force_recovery_with_data_loss command
ACTOR Future<bool> forceRecoveryWithDataLossCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// maintenance command
ACTOR Future<bool> maintenanceCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
// snapshot command
ACTOR Future<bool> snapshotCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
} // namespace fdb_cli