Report lag with the usual "seconds" and "versions" fields. Rename and deprecate the qos.*version_lag_storage_server fields.

This commit is contained in:
A.J. Beamon 2019-08-15 13:42:39 -07:00
parent 02ba73917b
commit bb72cdd36a
4 changed files with 51 additions and 23 deletions

View File

@ -265,8 +265,22 @@
"worst_queue_bytes_storage_server":0,
"limiting_version_lag_storage_server":0,
"worst_version_lag_storage_server":0,
"limiting_durability_lag_storage_server":0,
"worst_durability_lag_storage_server":0
"limiting_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"limiting_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
}
},
"incompatible_connections":[
],

View File

@ -54,7 +54,8 @@ Status
* ``connected_clients`` is now only a sample of the connected clients, rather than a complete list. `(PR #1902) <https://github.com/apple/foundationdb/pull/1902>`_.
* Added ``max_protocol_clients`` to the ``supported_versions`` section, which provides a sample of connected clients which cannot connect to any higher protocol version. `(PR #1902) <https://github.com/apple/foundationdb/pull/1902>`_.
* Clients which connect without specifying their supported versions are tracked as an ``Unknown`` version in the ``supported_versions`` section. [6.2.2] `(PR #1990) <https://github.com/apple/foundationdb/pull/1990>`_.
* Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to the ``cluster.qos`` section. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
* Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
* Added ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These are meant to replace ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server``, which are now deprecated. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
Bindings
--------

View File

@ -287,8 +287,22 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"worst_queue_bytes_storage_server":0,
"limiting_version_lag_storage_server":0,
"worst_version_lag_storage_server":0,
"limiting_durability_lag_storage_server":0,
"worst_durability_lag_storage_server":0
"limiting_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"limiting_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
}
},
"incompatible_connections":[

View File

@ -388,6 +388,13 @@ static JsonBuilderObject machineStatusFetcher(WorkerEvents mMetrics, vector<Work
return machineMap;
}
JsonBuilderObject getLagObject(int64_t versions) {
JsonBuilderObject lag;
lag["versions"] = versions;
lag["seconds"] = versions / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
return lag;
}
struct MachineMemoryInfo {
double memoryUsage;
double numProcesses;
@ -474,17 +481,8 @@ struct RolesInfo {
obj["read_latency_bands"] = addLatencyBandInfo(readLatencyMetrics);
}
JsonBuilderObject dataLag;
dataLag["versions"] = versionLag;
dataLagSeconds = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
dataLag["seconds"] = dataLagSeconds;
JsonBuilderObject durabilityLag;
durabilityLag["versions"] = version - durableVersion;
durabilityLag["seconds"] = (version - durableVersion) / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
obj["data_lag"] = dataLag;
obj["durability_lag"] = durabilityLag;
obj["data_lag"] = getLagObject(versionLag);
obj["durability_lag"] = getLagObject(version - durableVersion);
} catch (Error& e) {
if(e.code() != error_code_attribute_not_found)
@ -1611,10 +1609,15 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
(*data_overlay)["least_operating_space_bytes_storage_server"] = std::max(worstFreeSpaceStorageServer, (int64_t)0);
(*qos).setKeyRawNumber("worst_queue_bytes_storage_server", ratekeeper.getValue("WorstStorageServerQueue"));
(*qos).setKeyRawNumber("limiting_queue_bytes_storage_server", ratekeeper.getValue("LimitingStorageServerQueue"));
// TODO: These can be removed in the next release after 6.2
(*qos).setKeyRawNumber("worst_version_lag_storage_server", ratekeeper.getValue("WorstStorageServerVersionLag"));
(*qos).setKeyRawNumber("limiting_version_lag_storage_server", ratekeeper.getValue("LimitingStorageServerVersionLag"));
(*qos).setKeyRawNumber("worst_durability_lag_storage_server", ratekeeper.getValue("WorstStorageServerDurabilityLag"));
(*qos).setKeyRawNumber("limiting_durability_lag_storage_server", ratekeeper.getValue("LimitingStorageServerDurabilityLag"));
(*qos)["worst_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerVersionLag"));
(*qos)["limiting_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerVersionLag"));
(*qos)["worst_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerDurabilityLag"));
(*qos)["limiting_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerDurabilityLag"));
}
if(tlogCount > 0) {
@ -2306,11 +2309,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
incompatibleConnectionsArray.push_back(it.toString());
}
statusObj["incompatible_connections"] = incompatibleConnectionsArray;
StatusObject datacenterLag;
datacenterLag["versions"] = datacenterVersionDifference;
datacenterLag["seconds"] = datacenterVersionDifference / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
statusObj["datacenter_lag"] = datacenterLag;
statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference);
int totalDegraded = 0;
for(auto& it : workers) {