Merge pull request #2014 from etschannen/feature-fdbcli-sleep

Added a sleep command to fdbcli
This commit is contained in:
A.J. Beamon 2019-08-30 11:22:13 -07:00 committed by GitHub
commit 3f9e392668
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 67 additions and 62 deletions

View File

@ -477,6 +477,9 @@
"full_replication":true,
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
"maintenance_seconds_remaining":1.0,
"data_distribution_disabled_for_ss_failures":true,
"data_distribution_disabled_for_rebalance":true,
"data_distribution_disabled":true,
"configuration":{
"log_anti_quorum":0,
"log_replicas":2,

View File

@ -498,6 +498,10 @@ void initHelp() {
helpMap["quit"] = CommandHelp();
helpMap["waitconnected"] = CommandHelp();
helpMap["waitopen"] = CommandHelp();
helpMap["sleep"] = CommandHelp(
"sleep <SECONDS>",
"sleep for a period of time",
"");
helpMap["get"] = CommandHelp(
"get <KEY>",
"fetch the value for a given key",
@ -1493,6 +1497,17 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
outputString += "\n\nWARNING: A single process is both a transaction log and a storage server.\n For best performance use dedicated disks for the transaction logs by setting process classes.";
}
if (statusObjCluster.has("data_distribution_disabled")) {
outputString += "\n\nWARNING: Data distribution is off.";
} else {
if (statusObjCluster.has("data_distribution_disabled_for_ss_failures")) {
outputString += "\n\nWARNING: Data distribution is currently turned on but disabled for all storage server failures.";
}
if (statusObjCluster.has("data_distribution_disabled_for_rebalance")) {
outputString += "\n\nWARNING: Data distribution is currently turned on but shard size balancing is currently disabled.";
}
}
printf("%s\n", outputString.c_str());
}
@ -2593,8 +2608,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (!opt.exec.present()) {
if(opt.initialStatusCheck) {
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile());
Future<Void> checkDDStatusF = checkDataDistributionStatus(db, true);
wait(makeInterruptable(success(checkStatusF) && success(checkDDStatusF)));
wait(makeInterruptable(success(checkStatusF)));
}
else {
printf("\n");
@ -2740,6 +2754,23 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
continue;
}
if( tokencmp(tokens[0], "sleep")) {
if(tokens.size() != 2) {
printUsage(tokens[0]);
is_error = true;
} else {
double v;
int n=0;
if (sscanf(tokens[1].toString().c_str(), "%lf%n", &v, &n) != 1 || n != tokens[1].size()) {
printUsage(tokens[0]);
is_error = true;
} else {
wait(delay(v));
}
}
continue;
}
if (tokencmp(tokens[0], "status")) {
// Warn at 7 seconds since status will spend as long as 5 seconds trying to read/write from the database
warn = timeWarning( 7.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n" );
@ -3430,13 +3461,11 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (tokencmp(tokens[0], "datadistribution")) {
if (tokens.size() != 2 && tokens.size() != 3) {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
} else {
if (tokencmp(tokens[1], "status")) {
wait(makeInterruptable(checkDataDistributionStatus(db)));
} else if (tokencmp(tokens[1], "on")) {
if (tokencmp(tokens[1], "on")) {
wait(success(setDDMode(db, 1)));
printf("Data distribution is turned on.\n");
} else if (tokencmp(tokens[1], "off")) {
@ -3450,7 +3479,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, true)));
printf("Data distribution is disabled for rebalance.\n");
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}
@ -3462,12 +3491,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, false)));
printf("Data distribution is enabled for rebalance.\n");
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}

View File

@ -1339,61 +1339,17 @@ ACTOR Future<vector<AddressExclusion>> getExcludedServers( Database cx ) {
}
}
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly) {
state Transaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
state Future<Optional<Value>> overallSwitchF = tr.get(dataDistributionModeKey);
state Future<Optional<Value>> healthyZoneValueF = tr.get(healthyZoneKey);
state Future<Optional<Value>> rebalanceDDIgnoreValueF = tr.get(rebalanceDDIgnoreKey);
wait(success(overallSwitchF) && success(healthyZoneValueF) && success(rebalanceDDIgnoreValueF));
if (overallSwitchF.get().present()) {
BinaryReader rd(overallSwitchF.get().get(), Unversioned());
int currentMode;
rd >> currentMode;
if (currentMode == 0) {
printf("WARNING: Data distribution is off.\n");
return Void();
}
}
if (!printWarningOnly) {
printf("Data distribution is on.\n");
}
if (healthyZoneValueF.get().present()) {
auto healthyZoneKV = decodeHealthyZoneValue(healthyZoneValueF.get().get());
if (healthyZoneKV.first == ignoreSSFailuresZoneString) {
printf("WARNING: Data distribution is currently turned on but disabled for all storage server "
"failures.\n");
} else {
printf("WARNING: Data distribution is currently turned on but zone %s is under maintenance and "
"will continue for %" PRId64 " seconds.\n",
healthyZoneKV.first.toString().c_str(),
(healthyZoneKV.second - tr.getReadVersion().get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
}
}
if (rebalanceDDIgnoreValueF.get().present()) {
printf("WARNING: Data distribution is currently turned on but shard size balancing is currently "
"disabled.\n");
}
return Void();
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR Future<Void> printHealthyZone( Database cx ) {
state Transaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
Optional<Value> val = wait( tr.get(healthyZoneKey) );
if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
printf("No ongoing maintenance.\n");
} else if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
printf("Data distribution has been disabled for all storage server failures in this cluster and thus "
"maintenance mode is not active.\n");
} else if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
printf("No ongoing maintenance.\n");
} else {
auto healthyZone = decodeHealthyZoneValue(val.get());
printf("Maintenance for zone %s will continue for %" PRId64 " seconds.\n", healthyZone.first.toString().c_str(), (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND);

View File

@ -181,7 +181,6 @@ ACTOR Future<int> setDDMode( Database cx, int mode );
ACTOR Future<Void> forceRecovery( Reference<ClusterConnectionFile> clusterFile, Standalone<StringRef> dcId );
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly = false);
ACTOR Future<Void> printHealthyZone( Database cx );
ACTOR Future<Void> setDDIgnoreRebalanceSwitch(Database cx, bool ignoreRebalance);
ACTOR Future<bool> clearHealthyZone(Database cx, bool printWarning = false, bool clearSSFailureZoneString = false);

View File

@ -501,6 +501,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"full_replication":true,
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
"maintenance_seconds_remaining":1.0,
"data_distribution_disabled_for_ss_failures":true,
"data_distribution_disabled_for_rebalance":true,
"data_distribution_disabled":true,
"configuration":{
"log_anti_quorum":0,
"log_replicas":2,

View File

@ -1158,8 +1158,9 @@ struct LoadConfigurationResult {
Optional<Key> healthyZone;
double healthyZoneSeconds;
bool rebalanceDDIgnored;
bool dataDistributionDisabled;
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false) {}
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false), dataDistributionDisabled(false) {}
};
ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfigurationResult>>> loadConfiguration(Database cx, JsonBuilderArray *messages, std::set<std::string> *status_incomplete_reasons){
@ -1201,12 +1202,13 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
}
state Future<Optional<Value>> healthyZoneValue = tr.get(healthyZoneKey);
state Future<Optional<Value>> rebalanceDDIgnored = tr.get(rebalanceDDIgnoreKey);
state Future<Optional<Value>> ddModeKey = tr.get(dataDistributionModeKey);
choose {
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored))) {
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored) && success(ddModeKey))) {
int unreplicated = 0;
for(int i = 0; i < result.get().regions.size(); i++) {
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
unreplicated++;
}
}
@ -1214,12 +1216,23 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
res.fullReplication = (!unreplicated || (result.get().usableRegions == 1 && unreplicated < result.get().regions.size()));
if(healthyZoneValue.get().present()) {
auto healthyZone = decodeHealthyZoneValue(healthyZoneValue.get().get());
if(healthyZone.second > tr.getReadVersion().get()) {
if(healthyZone.first == ignoreSSFailuresZoneString) {
res.healthyZone = healthyZone.first;
}
else if(healthyZone.second > tr.getReadVersion().get()) {
res.healthyZone = healthyZone.first;
res.healthyZoneSeconds = (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND;
}
}
res.rebalanceDDIgnored = rebalanceDDIgnored.get().present();
if (ddModeKey.get().present()) {
BinaryReader rd(ddModeKey.get().get(), Unversioned());
int currentMode;
rd >> currentMode;
if (currentMode == 0) {
res.dataDistributionDisabled = true;
}
}
loadResult = res;
}
when(wait(getConfTimeout)) {
@ -2188,6 +2201,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
if (loadResult.get().rebalanceDDIgnored) {
statusObj["data_distribution_disabled_for_rebalance"] = true;
}
if (loadResult.get().dataDistributionDisabled) {
statusObj["data_distribution_disabled"] = true;
}
}
statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons);
@ -2222,7 +2238,6 @@ ACTOR Future<StatusReply> clusterGetStatus(
futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
int oldLogFaultTolerance = 100;