Merge pull request #3320 from ajbeamon/backport-region-config-status-changes
Backport region config status changes to release-6.2
This commit is contained in:
commit
f6f9fb1147
|
@ -494,6 +494,7 @@
|
||||||
"data_distribution_disabled_for_ss_failures":true,
|
"data_distribution_disabled_for_ss_failures":true,
|
||||||
"data_distribution_disabled_for_rebalance":true,
|
"data_distribution_disabled_for_rebalance":true,
|
||||||
"data_distribution_disabled":true,
|
"data_distribution_disabled":true,
|
||||||
|
"active_primary_dc":"pv",
|
||||||
"configuration":{
|
"configuration":{
|
||||||
"log_anti_quorum":0,
|
"log_anti_quorum":0,
|
||||||
"log_replicas":2,
|
"log_replicas":2,
|
||||||
|
|
|
@ -2,6 +2,14 @@
|
||||||
Release Notes
|
Release Notes
|
||||||
#############
|
#############
|
||||||
|
|
||||||
|
6.2.23
|
||||||
|
======
|
||||||
|
|
||||||
|
Status
|
||||||
|
------
|
||||||
|
|
||||||
|
* Added ``cluster.active_primary_dc`` that indicates which datacenter is serving as the primary datacenter in multi-region setups. `(PR #3320) <https://github.com/apple/foundationdb/pull/3320>`_
|
||||||
|
|
||||||
6.2.22
|
6.2.22
|
||||||
======
|
======
|
||||||
|
|
||||||
|
|
|
@ -945,7 +945,11 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
|
||||||
|
|
||||||
StatusObjectReader statusObjConfig;
|
StatusObjectReader statusObjConfig;
|
||||||
StatusArray excludedServersArr;
|
StatusArray excludedServersArr;
|
||||||
|
Optional<std::string> activePrimaryDC;
|
||||||
|
|
||||||
|
if (statusObjCluster.has("active_primary_dc")) {
|
||||||
|
activePrimaryDC = statusObjCluster["active_primary_dc"].get_str();
|
||||||
|
}
|
||||||
if (statusObjCluster.get("configuration", statusObjConfig)) {
|
if (statusObjCluster.get("configuration", statusObjConfig)) {
|
||||||
if (statusObjConfig.has("excluded_servers"))
|
if (statusObjConfig.has("excluded_servers"))
|
||||||
excludedServersArr = statusObjConfig.last().get_array();
|
excludedServersArr = statusObjConfig.last().get_array();
|
||||||
|
@ -1001,6 +1005,73 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
|
||||||
|
|
||||||
if (statusObjConfig.get("log_routers", intVal))
|
if (statusObjConfig.get("log_routers", intVal))
|
||||||
outputString += format("\n Desired Log Routers - %d", intVal);
|
outputString += format("\n Desired Log Routers - %d", intVal);
|
||||||
|
|
||||||
|
outputString += "\n Usable Regions - ";
|
||||||
|
if (statusObjConfig.get("usable_regions", intVal)) {
|
||||||
|
outputString += std::to_string(intVal);
|
||||||
|
} else {
|
||||||
|
outputString += "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
StatusArray regions;
|
||||||
|
if (statusObjConfig.has("regions")) {
|
||||||
|
outputString += "\n Regions: ";
|
||||||
|
regions = statusObjConfig["regions"].get_array();
|
||||||
|
bool isPrimary = false;
|
||||||
|
std::vector<std::string> regionSatelliteDCs;
|
||||||
|
std::string regionDC;
|
||||||
|
for (StatusObjectReader region : regions) {
|
||||||
|
for (StatusObjectReader dc : region["datacenters"].get_array()) {
|
||||||
|
if (!dc.has("satellite")) {
|
||||||
|
regionDC = dc["id"].get_str();
|
||||||
|
if (activePrimaryDC.present() && dc["id"].get_str() == activePrimaryDC.get()) {
|
||||||
|
isPrimary = true;
|
||||||
|
}
|
||||||
|
} else if (dc["satellite"].get_int() == 1) {
|
||||||
|
regionSatelliteDCs.push_back(dc["id"].get_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (activePrimaryDC.present()) {
|
||||||
|
if (isPrimary) {
|
||||||
|
outputString += "\n Primary -";
|
||||||
|
} else {
|
||||||
|
outputString += "\n Remote -";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
outputString += "\n Region -";
|
||||||
|
}
|
||||||
|
outputString += format("\n Datacenter - %s", regionDC.c_str());
|
||||||
|
if (regionSatelliteDCs.size() > 0) {
|
||||||
|
outputString += "\n Satellite datacenters - ";
|
||||||
|
for (int i = 0; i < regionSatelliteDCs.size(); i++) {
|
||||||
|
if (i != regionSatelliteDCs.size() - 1) {
|
||||||
|
outputString += format("%s, ", regionSatelliteDCs[i].c_str());
|
||||||
|
} else {
|
||||||
|
outputString += format("%s", regionSatelliteDCs[i].c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isPrimary = false;
|
||||||
|
if (region.get("satellite_redundancy_mode", strVal)) {
|
||||||
|
outputString += format("\n Satellite Redundancy Mode - %s", strVal.c_str());
|
||||||
|
}
|
||||||
|
if (region.get("satellite_anti_quorum", intVal)) {
|
||||||
|
outputString += format("\n Satellite Anti Quorum - %d", intVal);
|
||||||
|
}
|
||||||
|
if (region.get("satellite_logs", intVal)) {
|
||||||
|
outputString += format("\n Satellite Logs - %d", intVal);
|
||||||
|
}
|
||||||
|
if (region.get("satellite_log_policy", strVal)) {
|
||||||
|
outputString += format("\n Satellite Log Policy - %s", strVal.c_str());
|
||||||
|
}
|
||||||
|
if (region.get("satellite_log_replicas", intVal)) {
|
||||||
|
outputString += format("\n Satellite Log Replicas - %d", intVal);
|
||||||
|
}
|
||||||
|
if (region.get("satellite_usable_dcs", intVal)) {
|
||||||
|
outputString += format("\n Satellite Usable DCs - %d", intVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (std::runtime_error& ) {
|
catch (std::runtime_error& ) {
|
||||||
outputString = outputStringCache;
|
outputString = outputStringCache;
|
||||||
|
|
|
@ -520,6 +520,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"data_distribution_disabled_for_ss_failures":true,
|
"data_distribution_disabled_for_ss_failures":true,
|
||||||
"data_distribution_disabled_for_rebalance":true,
|
"data_distribution_disabled_for_rebalance":true,
|
||||||
"data_distribution_disabled":true,
|
"data_distribution_disabled":true,
|
||||||
|
"active_primary_dc":"pv",
|
||||||
"configuration":{
|
"configuration":{
|
||||||
"log_anti_quorum":0,
|
"log_anti_quorum":0,
|
||||||
"log_replicas":2,
|
"log_replicas":2,
|
||||||
|
|
|
@ -2145,6 +2145,35 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<CachedSer
|
||||||
return statusObj;
|
return statusObj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray* messages) {
|
||||||
|
state ReadYourWritesTransaction tr(cx);
|
||||||
|
|
||||||
|
state Future<Void> readTimeout = delay(5); // so that we won't loop forever
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
if (readTimeout.isReady()) {
|
||||||
|
throw timed_out();
|
||||||
|
}
|
||||||
|
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
Optional<Value> res = wait(timeoutError(tr.get(primaryDatacenterKey), 5));
|
||||||
|
if (!res.present()) {
|
||||||
|
messages->push_back(
|
||||||
|
JsonString::makeMessage("primary_dc_missing", "Unable to determine primary datacenter."));
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_timed_out) {
|
||||||
|
messages->push_back(
|
||||||
|
JsonString::makeMessage("fetch_primary_dc_timedout", "Fetching primary DC timed out."));
|
||||||
|
return Optional<Value>();
|
||||||
|
} else {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// constructs the cluster section of the json status output
|
// constructs the cluster section of the json status output
|
||||||
ACTOR Future<StatusReply> clusterGetStatus(
|
ACTOR Future<StatusReply> clusterGetStatus(
|
||||||
Reference<AsyncVar<CachedSerialization<ServerDBInfo>>> db,
|
Reference<AsyncVar<CachedSerialization<ServerDBInfo>>> db,
|
||||||
|
@ -2323,6 +2352,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||||
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers));
|
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers));
|
||||||
|
|
||||||
state int minReplicasRemaining = -1;
|
state int minReplicasRemaining = -1;
|
||||||
|
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &messages);
|
||||||
std::vector<Future<JsonBuilderObject>> futures2;
|
std::vector<Future<JsonBuilderObject>> futures2;
|
||||||
futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
|
futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
|
||||||
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
|
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
|
||||||
|
@ -2341,11 +2371,17 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||||
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining, loadResult.present() && loadResult.get().healthyZone.present());
|
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining, loadResult.present() && loadResult.get().healthyZone.present());
|
||||||
}
|
}
|
||||||
|
|
||||||
JsonBuilderObject configObj = configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
|
state JsonBuilderObject configObj =
|
||||||
|
configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
|
||||||
|
|
||||||
|
wait(success(primaryDCFO));
|
||||||
|
if (primaryDCFO.get().present()) {
|
||||||
|
statusObj["active_primary_dc"] = primaryDCFO.get().get();
|
||||||
|
}
|
||||||
// configArr could be empty
|
// configArr could be empty
|
||||||
if (!configObj.empty())
|
if (!configObj.empty()) {
|
||||||
statusObj["configuration"] = configObj;
|
statusObj["configuration"] = configObj;
|
||||||
|
}
|
||||||
|
|
||||||
// workloadStatusFetcher returns the workload section but also optionally writes the qos section and adds to the data_overlay object
|
// workloadStatusFetcher returns the workload section but also optionally writes the qos section and adds to the data_overlay object
|
||||||
if (!workerStatuses[1].empty())
|
if (!workerStatuses[1].empty())
|
||||||
|
|
|
@ -19,7 +19,7 @@ struct TriggerRecoveryLoopWorkload : TestWorkload {
|
||||||
numRecoveries = getOption(options, LiteralStringRef("numRecoveries"), deterministicRandom()->randomInt(1, 10));
|
numRecoveries = getOption(options, LiteralStringRef("numRecoveries"), deterministicRandom()->randomInt(1, 10));
|
||||||
delayBetweenRecoveries = getOption(options, LiteralStringRef("delayBetweenRecoveries"), 0.0);
|
delayBetweenRecoveries = getOption(options, LiteralStringRef("delayBetweenRecoveries"), 0.0);
|
||||||
killAllProportion = getOption(options, LiteralStringRef("killAllProportion"), 0.1);
|
killAllProportion = getOption(options, LiteralStringRef("killAllProportion"), 0.1);
|
||||||
ASSERT(numRecoveries > 0 && startTime >= 0 and delayBetweenRecoveries >= 0);
|
ASSERT((numRecoveries > 0) && (startTime >= 0) && (delayBetweenRecoveries >= 0));
|
||||||
TraceEvent(SevInfo, "TriggerRecoveryLoopSetup")
|
TraceEvent(SevInfo, "TriggerRecoveryLoopSetup")
|
||||||
.detail("StartTime", startTime)
|
.detail("StartTime", startTime)
|
||||||
.detail("NumRecoveries", numRecoveries)
|
.detail("NumRecoveries", numRecoveries)
|
||||||
|
|
Loading…
Reference in New Issue