Merge pull request #9449 from sfc-gh-akejriwal/exclusion

Improve space estimation in checkExclusion()
This commit is contained in:
Ankita Kejriwal 2023-02-27 08:31:52 -08:00 committed by GitHub
commit f7108958bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 25 additions and 7 deletions

View File

@ -973,9 +973,9 @@ ACTOR Future<bool> checkExclusion(Database db,
}
StatusObject status = wait(StatusClient::statusFetcher(db));
state std::string errorString =
"ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
"ERROR: Could not calculate the impact of this exclude on the total available space in the cluster.\n"
"Please try the exclude again in 30 seconds.\n"
"Call set(\"0xff0xff/management/options/exclude/force\", ...) first to exclude without checking free "
"Call set(\"0xff0xff/management/options/exclude/force\", ...) first to exclude without checking available "
"space.\n";
StatusObjectReader statusObj(status);
@ -997,8 +997,11 @@ ACTOR Future<bool> checkExclusion(Database db,
state std::unordered_set<std::string> diskLocalities;
state int64_t totalKvStoreFreeBytes = 0;
state int64_t totalKvStoreFreeBytesNotExcluded = 0;
state int64_t totalKvStoreUsedBytes = 0;
state int64_t totalKvStoreUsedBytesNonExcluded = 0;
state int64_t totalKvStoreAvailableBytes = 0;
try {
for (auto proc : processesMap.obj()) {
StatusObjectReader process(proc.second);
@ -1036,13 +1039,22 @@ ACTOR Future<bool> checkExclusion(Database db,
return false;
}
int64_t available_bytes;
if (!role.get("kvstore_available_bytes", available_bytes)) {
*msg = ManagementAPIError::toJsonString(
false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
totalKvStoreUsedBytes += used_bytes;
totalKvStoreFreeBytes += free_bytes;
totalKvStoreAvailableBytes += available_bytes;
if (!excluded) {
totalKvStoreUsedBytesNonExcluded += used_bytes;
if (disk_id.empty() || diskLocalities.find(disk_id) == diskLocalities.end()) {
totalKvStoreFreeBytes += free_bytes;
totalKvStoreFreeBytesNotExcluded += free_bytes;
if (!disk_id.empty()) {
diskLocalities.insert(disk_id);
}
@ -1061,11 +1073,17 @@ ACTOR Future<bool> checkExclusion(Database db,
return false;
}
double finalFreeRatio = 1 - (totalKvStoreUsedBytes / (totalKvStoreUsedBytesNonExcluded + totalKvStoreFreeBytes));
if (ssExcludedCount == ssTotalCount || finalFreeRatio <= 0.1) {
std::string temp = "ERROR: This exclude may cause the total free space in the cluster to drop below 10%.\n"
// The numerator is the total space in use by FDB that is not immediately reusable.
// This is calculated as: used + free - available = used + free - (free - reusable) = used - reusable.
// The denominator is the total capacity usable by FDB (either used or unused currently).
double finalUnavailableRatio =
(totalKvStoreUsedBytes + totalKvStoreFreeBytes - totalKvStoreAvailableBytes) /
std::max((totalKvStoreUsedBytesNonExcluded + totalKvStoreFreeBytesNotExcluded), (int64_t)1);
if (ssExcludedCount == ssTotalCount || finalUnavailableRatio > 0.9) {
std::string temp = "ERROR: This exclude may cause the total available space in the cluster to drop below 10%.\n"
"Call set(\"0xff0xff/management/options/exclude/force\", ...) first to exclude without "
"checking free space.\n";
"checking available space.\n";
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", temp);
return false;
}