Merge pull request #2230 from ajbeamon/fix-fault-tolerance-reporting-with-remote-regions

Fix: status would fail to account for remote regions when...
This commit is contained in:
Evan Tschannen 2019-10-16 14:51:48 -07:00 committed by GitHub
commit 552eb44bf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 11 deletions

View File

@ -51,6 +51,7 @@ Fixes
* Loading a 6.1 or newer ``fdb_c`` library as a secondary client using the multi-version client could lead to an infinite recursion when run with API versions older than 610. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_ * Loading a 6.1 or newer ``fdb_c`` library as a secondary client using the multi-version client could lead to an infinite recursion when run with API versions older than 610. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* Using C API functions that were removed in 6.1 when using API version 610 or above now results in a compilation error. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_ * Using C API functions that were removed in 6.1 when using API version 610 or above now results in a compilation error. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* Coordinator changes could fail to complete if the database wasn't allowing any transactions to start. [6.2.6] `(PR #2191) <https://github.com/apple/foundationdb/pull/2191>`_ * Coordinator changes could fail to complete if the database wasn't allowing any transactions to start. [6.2.6] `(PR #2191) <https://github.com/apple/foundationdb/pull/2191>`_
* Status would report incorrect fault tolerance metrics when a remote region was configured and the primary region lost a storage replica. [6.2.6] `(PR #2230) <https://github.com/apple/foundationdb/pull/2230>`_
Status Status
------ ------

View File

@ -1274,7 +1274,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
return statusObj; return statusObj;
} }
ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker, int *minReplicasRemaining) { ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker, DatabaseConfiguration configuration, int *minReplicasRemaining) {
state JsonBuilderObject statusObjData; state JsonBuilderObject statusObjData;
try { try {
@ -1339,6 +1339,7 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
continue; continue;
} }
int replicas = configuration.storageTeamSize;
bool primary = inFlight.getInt("Primary"); bool primary = inFlight.getInt("Primary");
int highestPriority = inFlight.getInt("HighestPriority"); int highestPriority = inFlight.getInt("HighestPriority");
@ -1359,27 +1360,21 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
stateSectionObj["name"] = "missing_data"; stateSectionObj["name"] = "missing_data";
stateSectionObj["description"] = "No replicas remain of some data"; stateSectionObj["description"] = "No replicas remain of some data";
stateSectionObj["min_replicas_remaining"] = 0; stateSectionObj["min_replicas_remaining"] = 0;
if(primary) { replicas = 0;
*minReplicasRemaining = 0;
}
} }
else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_1_LEFT) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_1_LEFT) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Only one replica remains of some data"; stateSectionObj["description"] = "Only one replica remains of some data";
stateSectionObj["min_replicas_remaining"] = 1; stateSectionObj["min_replicas_remaining"] = 1;
if(primary) { replicas = 1;
*minReplicasRemaining = 1;
}
} }
else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Only two replicas remain of some data"; stateSectionObj["description"] = "Only two replicas remain of some data";
stateSectionObj["min_replicas_remaining"] = 2; stateSectionObj["min_replicas_remaining"] = 2;
if(primary) { replicas = 2;
*minReplicasRemaining = 2;
}
} }
else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
@ -1416,6 +1411,13 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
statusObjData["state"] = stateSectionObj; statusObjData["state"] = stateSectionObj;
} }
} }
if(primary) {
*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + replicas;
}
else if(replicas > 0) {
*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + 1;
}
} }
statusObjData["team_trackers"] = teamTrackers; statusObjData["team_trackers"] = teamTrackers;
} }
@ -2235,7 +2237,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state int minReplicasRemaining = -1; state int minReplicasRemaining = -1;
std::vector<Future<JsonBuilderObject>> futures2; std::vector<Future<JsonBuilderObject>> futures2;
futures2.push_back(dataStatusFetcher(ddWorker, &minReplicasRemaining)); futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture)); futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons)); futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons)); futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));