Merge pull request #3320 from ajbeamon/backport-region-config-status-changes
Backport region config status changes to release-6.2
This commit is contained in:
commit
f6f9fb1147
|
@ -494,6 +494,7 @@
|
|||
"data_distribution_disabled_for_ss_failures":true,
|
||||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"active_primary_dc":"pv",
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
|
|
@ -2,6 +2,14 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.2.23
|
||||
======
|
||||
|
||||
Status
|
||||
------
|
||||
|
||||
* Added ``cluster.active_primary_dc`` that indicates which datacenter is serving as the primary datacenter in multi-region setups. `(PR #3320) <https://github.com/apple/foundationdb/pull/3320>`_
|
||||
|
||||
6.2.22
|
||||
======
|
||||
|
||||
|
|
|
@ -945,7 +945,11 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
|
|||
|
||||
StatusObjectReader statusObjConfig;
|
||||
StatusArray excludedServersArr;
|
||||
Optional<std::string> activePrimaryDC;
|
||||
|
||||
if (statusObjCluster.has("active_primary_dc")) {
|
||||
activePrimaryDC = statusObjCluster["active_primary_dc"].get_str();
|
||||
}
|
||||
if (statusObjCluster.get("configuration", statusObjConfig)) {
|
||||
if (statusObjConfig.has("excluded_servers"))
|
||||
excludedServersArr = statusObjConfig.last().get_array();
|
||||
|
@ -1001,6 +1005,73 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
|
|||
|
||||
if (statusObjConfig.get("log_routers", intVal))
|
||||
outputString += format("\n Desired Log Routers - %d", intVal);
|
||||
|
||||
outputString += "\n Usable Regions - ";
|
||||
if (statusObjConfig.get("usable_regions", intVal)) {
|
||||
outputString += std::to_string(intVal);
|
||||
} else {
|
||||
outputString += "unknown";
|
||||
}
|
||||
|
||||
StatusArray regions;
|
||||
if (statusObjConfig.has("regions")) {
|
||||
outputString += "\n Regions: ";
|
||||
regions = statusObjConfig["regions"].get_array();
|
||||
bool isPrimary = false;
|
||||
std::vector<std::string> regionSatelliteDCs;
|
||||
std::string regionDC;
|
||||
for (StatusObjectReader region : regions) {
|
||||
for (StatusObjectReader dc : region["datacenters"].get_array()) {
|
||||
if (!dc.has("satellite")) {
|
||||
regionDC = dc["id"].get_str();
|
||||
if (activePrimaryDC.present() && dc["id"].get_str() == activePrimaryDC.get()) {
|
||||
isPrimary = true;
|
||||
}
|
||||
} else if (dc["satellite"].get_int() == 1) {
|
||||
regionSatelliteDCs.push_back(dc["id"].get_str());
|
||||
}
|
||||
}
|
||||
if (activePrimaryDC.present()) {
|
||||
if (isPrimary) {
|
||||
outputString += "\n Primary -";
|
||||
} else {
|
||||
outputString += "\n Remote -";
|
||||
}
|
||||
} else {
|
||||
outputString += "\n Region -";
|
||||
}
|
||||
outputString += format("\n Datacenter - %s", regionDC.c_str());
|
||||
if (regionSatelliteDCs.size() > 0) {
|
||||
outputString += "\n Satellite datacenters - ";
|
||||
for (int i = 0; i < regionSatelliteDCs.size(); i++) {
|
||||
if (i != regionSatelliteDCs.size() - 1) {
|
||||
outputString += format("%s, ", regionSatelliteDCs[i].c_str());
|
||||
} else {
|
||||
outputString += format("%s", regionSatelliteDCs[i].c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
isPrimary = false;
|
||||
if (region.get("satellite_redundancy_mode", strVal)) {
|
||||
outputString += format("\n Satellite Redundancy Mode - %s", strVal.c_str());
|
||||
}
|
||||
if (region.get("satellite_anti_quorum", intVal)) {
|
||||
outputString += format("\n Satellite Anti Quorum - %d", intVal);
|
||||
}
|
||||
if (region.get("satellite_logs", intVal)) {
|
||||
outputString += format("\n Satellite Logs - %d", intVal);
|
||||
}
|
||||
if (region.get("satellite_log_policy", strVal)) {
|
||||
outputString += format("\n Satellite Log Policy - %s", strVal.c_str());
|
||||
}
|
||||
if (region.get("satellite_log_replicas", intVal)) {
|
||||
outputString += format("\n Satellite Log Replicas - %d", intVal);
|
||||
}
|
||||
if (region.get("satellite_usable_dcs", intVal)) {
|
||||
outputString += format("\n Satellite Usable DCs - %d", intVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (std::runtime_error& ) {
|
||||
outputString = outputStringCache;
|
||||
|
|
|
@ -520,6 +520,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"data_distribution_disabled_for_ss_failures":true,
|
||||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"active_primary_dc":"pv",
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
|
|
@ -2145,6 +2145,35 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<CachedSer
|
|||
return statusObj;
|
||||
}
|
||||
|
||||
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray* messages) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
|
||||
state Future<Void> readTimeout = delay(5); // so that we won't loop forever
|
||||
loop {
|
||||
try {
|
||||
if (readTimeout.isReady()) {
|
||||
throw timed_out();
|
||||
}
|
||||
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
Optional<Value> res = wait(timeoutError(tr.get(primaryDatacenterKey), 5));
|
||||
if (!res.present()) {
|
||||
messages->push_back(
|
||||
JsonString::makeMessage("primary_dc_missing", "Unable to determine primary datacenter."));
|
||||
}
|
||||
return res;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_timed_out) {
|
||||
messages->push_back(
|
||||
JsonString::makeMessage("fetch_primary_dc_timedout", "Fetching primary DC timed out."));
|
||||
return Optional<Value>();
|
||||
} else {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// constructs the cluster section of the json status output
|
||||
ACTOR Future<StatusReply> clusterGetStatus(
|
||||
Reference<AsyncVar<CachedSerialization<ServerDBInfo>>> db,
|
||||
|
@ -2323,6 +2352,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers));
|
||||
|
||||
state int minReplicasRemaining = -1;
|
||||
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &messages);
|
||||
std::vector<Future<JsonBuilderObject>> futures2;
|
||||
futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
|
||||
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
|
||||
|
@ -2341,11 +2371,17 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining, loadResult.present() && loadResult.get().healthyZone.present());
|
||||
}
|
||||
|
||||
JsonBuilderObject configObj = configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
|
||||
state JsonBuilderObject configObj =
|
||||
configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
|
||||
|
||||
wait(success(primaryDCFO));
|
||||
if (primaryDCFO.get().present()) {
|
||||
statusObj["active_primary_dc"] = primaryDCFO.get().get();
|
||||
}
|
||||
// configArr could be empty
|
||||
if (!configObj.empty())
|
||||
if (!configObj.empty()) {
|
||||
statusObj["configuration"] = configObj;
|
||||
}
|
||||
|
||||
// workloadStatusFetcher returns the workload section but also optionally writes the qos section and adds to the data_overlay object
|
||||
if (!workerStatuses[1].empty())
|
||||
|
|
|
@ -19,7 +19,7 @@ struct TriggerRecoveryLoopWorkload : TestWorkload {
|
|||
numRecoveries = getOption(options, LiteralStringRef("numRecoveries"), deterministicRandom()->randomInt(1, 10));
|
||||
delayBetweenRecoveries = getOption(options, LiteralStringRef("delayBetweenRecoveries"), 0.0);
|
||||
killAllProportion = getOption(options, LiteralStringRef("killAllProportion"), 0.1);
|
||||
ASSERT(numRecoveries > 0 && startTime >= 0 and delayBetweenRecoveries >= 0);
|
||||
ASSERT((numRecoveries > 0) && (startTime >= 0) && (delayBetweenRecoveries >= 0));
|
||||
TraceEvent(SevInfo, "TriggerRecoveryLoopSetup")
|
||||
.detail("StartTime", startTime)
|
||||
.detail("NumRecoveries", numRecoveries)
|
||||
|
|
Loading…
Reference in New Issue