Merge pull request #2014 from etschannen/feature-fdbcli-sleep
Added a sleep command to fdbcli
This commit is contained in:
commit
3f9e392668
|
@ -477,6 +477,9 @@
|
|||
"full_replication":true,
|
||||
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
|
||||
"maintenance_seconds_remaining":1.0,
|
||||
"data_distribution_disabled_for_ss_failures":true,
|
||||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
|
|
@ -498,6 +498,10 @@ void initHelp() {
|
|||
helpMap["quit"] = CommandHelp();
|
||||
helpMap["waitconnected"] = CommandHelp();
|
||||
helpMap["waitopen"] = CommandHelp();
|
||||
helpMap["sleep"] = CommandHelp(
|
||||
"sleep <SECONDS>",
|
||||
"sleep for a period of time",
|
||||
"");
|
||||
helpMap["get"] = CommandHelp(
|
||||
"get <KEY>",
|
||||
"fetch the value for a given key",
|
||||
|
@ -1493,6 +1497,17 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
|
|||
outputString += "\n\nWARNING: A single process is both a transaction log and a storage server.\n For best performance use dedicated disks for the transaction logs by setting process classes.";
|
||||
}
|
||||
|
||||
if (statusObjCluster.has("data_distribution_disabled")) {
|
||||
outputString += "\n\nWARNING: Data distribution is off.";
|
||||
} else {
|
||||
if (statusObjCluster.has("data_distribution_disabled_for_ss_failures")) {
|
||||
outputString += "\n\nWARNING: Data distribution is currently turned on but disabled for all storage server failures.";
|
||||
}
|
||||
if (statusObjCluster.has("data_distribution_disabled_for_rebalance")) {
|
||||
outputString += "\n\nWARNING: Data distribution is currently turned on but shard size balancing is currently disabled.";
|
||||
}
|
||||
}
|
||||
|
||||
printf("%s\n", outputString.c_str());
|
||||
}
|
||||
|
||||
|
@ -2593,8 +2608,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
if (!opt.exec.present()) {
|
||||
if(opt.initialStatusCheck) {
|
||||
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile());
|
||||
Future<Void> checkDDStatusF = checkDataDistributionStatus(db, true);
|
||||
wait(makeInterruptable(success(checkStatusF) && success(checkDDStatusF)));
|
||||
wait(makeInterruptable(success(checkStatusF)));
|
||||
}
|
||||
else {
|
||||
printf("\n");
|
||||
|
@ -2740,6 +2754,23 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if( tokencmp(tokens[0], "sleep")) {
|
||||
if(tokens.size() != 2) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
double v;
|
||||
int n=0;
|
||||
if (sscanf(tokens[1].toString().c_str(), "%lf%n", &v, &n) != 1 || n != tokens[1].size()) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
wait(delay(v));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "status")) {
|
||||
// Warn at 7 seconds since status will spend as long as 5 seconds trying to read/write from the database
|
||||
warn = timeWarning( 7.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n" );
|
||||
|
@ -3430,13 +3461,11 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
|
||||
if (tokencmp(tokens[0], "datadistribution")) {
|
||||
if (tokens.size() != 2 && tokens.size() != 3) {
|
||||
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
|
||||
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
|
||||
"<ssfailure|rebalance>>\n");
|
||||
is_error = true;
|
||||
} else {
|
||||
if (tokencmp(tokens[1], "status")) {
|
||||
wait(makeInterruptable(checkDataDistributionStatus(db)));
|
||||
} else if (tokencmp(tokens[1], "on")) {
|
||||
if (tokencmp(tokens[1], "on")) {
|
||||
wait(success(setDDMode(db, 1)));
|
||||
printf("Data distribution is turned on.\n");
|
||||
} else if (tokencmp(tokens[1], "off")) {
|
||||
|
@ -3450,7 +3479,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, true)));
|
||||
printf("Data distribution is disabled for rebalance.\n");
|
||||
} else {
|
||||
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
|
||||
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
|
||||
"<ssfailure|rebalance>>\n");
|
||||
is_error = true;
|
||||
}
|
||||
|
@ -3462,12 +3491,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, false)));
|
||||
printf("Data distribution is enabled for rebalance.\n");
|
||||
} else {
|
||||
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
|
||||
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
|
||||
"<ssfailure|rebalance>>\n");
|
||||
is_error = true;
|
||||
}
|
||||
} else {
|
||||
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
|
||||
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
|
||||
"<ssfailure|rebalance>>\n");
|
||||
is_error = true;
|
||||
}
|
||||
|
|
|
@ -1339,61 +1339,17 @@ ACTOR Future<vector<AddressExclusion>> getExcludedServers( Database cx ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
state Future<Optional<Value>> overallSwitchF = tr.get(dataDistributionModeKey);
|
||||
state Future<Optional<Value>> healthyZoneValueF = tr.get(healthyZoneKey);
|
||||
state Future<Optional<Value>> rebalanceDDIgnoreValueF = tr.get(rebalanceDDIgnoreKey);
|
||||
wait(success(overallSwitchF) && success(healthyZoneValueF) && success(rebalanceDDIgnoreValueF));
|
||||
if (overallSwitchF.get().present()) {
|
||||
BinaryReader rd(overallSwitchF.get().get(), Unversioned());
|
||||
int currentMode;
|
||||
rd >> currentMode;
|
||||
if (currentMode == 0) {
|
||||
printf("WARNING: Data distribution is off.\n");
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
if (!printWarningOnly) {
|
||||
printf("Data distribution is on.\n");
|
||||
}
|
||||
if (healthyZoneValueF.get().present()) {
|
||||
auto healthyZoneKV = decodeHealthyZoneValue(healthyZoneValueF.get().get());
|
||||
if (healthyZoneKV.first == ignoreSSFailuresZoneString) {
|
||||
printf("WARNING: Data distribution is currently turned on but disabled for all storage server "
|
||||
"failures.\n");
|
||||
} else {
|
||||
printf("WARNING: Data distribution is currently turned on but zone %s is under maintenance and "
|
||||
"will continue for %" PRId64 " seconds.\n",
|
||||
healthyZoneKV.first.toString().c_str(),
|
||||
(healthyZoneKV.second - tr.getReadVersion().get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
|
||||
}
|
||||
}
|
||||
if (rebalanceDDIgnoreValueF.get().present()) {
|
||||
printf("WARNING: Data distribution is currently turned on but shard size balancing is currently "
|
||||
"disabled.\n");
|
||||
}
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> printHealthyZone( Database cx ) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Optional<Value> val = wait( tr.get(healthyZoneKey) );
|
||||
if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
|
||||
printf("No ongoing maintenance.\n");
|
||||
} else if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
|
||||
if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
|
||||
printf("Data distribution has been disabled for all storage server failures in this cluster and thus "
|
||||
"maintenance mode is not active.\n");
|
||||
} else if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
|
||||
printf("No ongoing maintenance.\n");
|
||||
} else {
|
||||
auto healthyZone = decodeHealthyZoneValue(val.get());
|
||||
printf("Maintenance for zone %s will continue for %" PRId64 " seconds.\n", healthyZone.first.toString().c_str(), (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
|
||||
|
|
|
@ -181,7 +181,6 @@ ACTOR Future<int> setDDMode( Database cx, int mode );
|
|||
|
||||
ACTOR Future<Void> forceRecovery( Reference<ClusterConnectionFile> clusterFile, Standalone<StringRef> dcId );
|
||||
|
||||
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly = false);
|
||||
ACTOR Future<Void> printHealthyZone( Database cx );
|
||||
ACTOR Future<Void> setDDIgnoreRebalanceSwitch(Database cx, bool ignoreRebalance);
|
||||
ACTOR Future<bool> clearHealthyZone(Database cx, bool printWarning = false, bool clearSSFailureZoneString = false);
|
||||
|
|
|
@ -501,6 +501,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"full_replication":true,
|
||||
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
|
||||
"maintenance_seconds_remaining":1.0,
|
||||
"data_distribution_disabled_for_ss_failures":true,
|
||||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
|
|
@ -1158,8 +1158,9 @@ struct LoadConfigurationResult {
|
|||
Optional<Key> healthyZone;
|
||||
double healthyZoneSeconds;
|
||||
bool rebalanceDDIgnored;
|
||||
bool dataDistributionDisabled;
|
||||
|
||||
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false) {}
|
||||
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false), dataDistributionDisabled(false) {}
|
||||
};
|
||||
|
||||
ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfigurationResult>>> loadConfiguration(Database cx, JsonBuilderArray *messages, std::set<std::string> *status_incomplete_reasons){
|
||||
|
@ -1201,12 +1202,13 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
|
|||
}
|
||||
state Future<Optional<Value>> healthyZoneValue = tr.get(healthyZoneKey);
|
||||
state Future<Optional<Value>> rebalanceDDIgnored = tr.get(rebalanceDDIgnoreKey);
|
||||
state Future<Optional<Value>> ddModeKey = tr.get(dataDistributionModeKey);
|
||||
|
||||
choose {
|
||||
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored))) {
|
||||
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored) && success(ddModeKey))) {
|
||||
int unreplicated = 0;
|
||||
for(int i = 0; i < result.get().regions.size(); i++) {
|
||||
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
|
||||
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
|
||||
unreplicated++;
|
||||
}
|
||||
}
|
||||
|
@ -1214,12 +1216,23 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
|
|||
res.fullReplication = (!unreplicated || (result.get().usableRegions == 1 && unreplicated < result.get().regions.size()));
|
||||
if(healthyZoneValue.get().present()) {
|
||||
auto healthyZone = decodeHealthyZoneValue(healthyZoneValue.get().get());
|
||||
if(healthyZone.second > tr.getReadVersion().get()) {
|
||||
if(healthyZone.first == ignoreSSFailuresZoneString) {
|
||||
res.healthyZone = healthyZone.first;
|
||||
}
|
||||
else if(healthyZone.second > tr.getReadVersion().get()) {
|
||||
res.healthyZone = healthyZone.first;
|
||||
res.healthyZoneSeconds = (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND;
|
||||
}
|
||||
}
|
||||
res.rebalanceDDIgnored = rebalanceDDIgnored.get().present();
|
||||
if (ddModeKey.get().present()) {
|
||||
BinaryReader rd(ddModeKey.get().get(), Unversioned());
|
||||
int currentMode;
|
||||
rd >> currentMode;
|
||||
if (currentMode == 0) {
|
||||
res.dataDistributionDisabled = true;
|
||||
}
|
||||
}
|
||||
loadResult = res;
|
||||
}
|
||||
when(wait(getConfTimeout)) {
|
||||
|
@ -2188,6 +2201,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
if (loadResult.get().rebalanceDDIgnored) {
|
||||
statusObj["data_distribution_disabled_for_rebalance"] = true;
|
||||
}
|
||||
if (loadResult.get().dataDistributionDisabled) {
|
||||
statusObj["data_distribution_disabled"] = true;
|
||||
}
|
||||
}
|
||||
|
||||
statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons);
|
||||
|
@ -2222,7 +2238,6 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
|
||||
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
|
||||
futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
|
||||
|
||||
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
|
||||
|
||||
int oldLogFaultTolerance = 100;
|
||||
|
|
Loading…
Reference in New Issue