Merge pull request #4069 from apple/revert-4067-add-limiting-health-metrics
Revert "Add limiting health metrics"
This commit is contained in:
commit
84b5e3f1ea
|
@ -866,7 +866,7 @@ Some of this information is also available in ``\xff\xff/status/json``, but thes
|
||||||
>>> for k, v in db.get_range_startswith('\xff\xff/metrics/health/'):
|
>>> for k, v in db.get_range_startswith('\xff\xff/metrics/health/'):
|
||||||
... print(k, v)
|
... print(k, v)
|
||||||
...
|
...
|
||||||
('\xff\xff/metrics/health/aggregate', '{"batch_limited":false,"limiting_storage_durability_lag":5000000,"limiting_storage_queue":1000,"tps_limit":483988.66315011407,"worst_storage_durability_lag":5000001,"worst_storage_queue":2036,"worst_log_queue":300}')
|
('\xff\xff/metrics/health/aggregate', '{"batch_limited":false,"tps_limit":483988.66315011407,"worst_storage_durability_lag":5000001,"worst_storage_queue":2036,"worst_log_queue":300}')
|
||||||
('\xff\xff/metrics/health/log/e639a9ad0373367784cc550c615c469b', '{"log_queue":300}')
|
('\xff\xff/metrics/health/log/e639a9ad0373367784cc550c615c469b', '{"log_queue":300}')
|
||||||
('\xff\xff/metrics/health/storage/ab2ce4caf743c9c1ae57063629c6678a', '{"cpu_usage":2.398696781487125,"disk_usage":0.059995917598039405,"storage_durability_lag":5000001,"storage_queue":2036}')
|
('\xff\xff/metrics/health/storage/ab2ce4caf743c9c1ae57063629c6678a', '{"cpu_usage":2.398696781487125,"disk_usage":0.059995917598039405,"storage_durability_lag":5000001,"storage_queue":2036}')
|
||||||
|
|
||||||
|
@ -874,17 +874,15 @@ Some of this information is also available in ``\xff\xff/status/json``, but thes
|
||||||
|
|
||||||
Aggregate stats about cluster health. Reading this key alone is slightly cheaper than reading any of the per-process keys.
|
Aggregate stats about cluster health. Reading this key alone is slightly cheaper than reading any of the per-process keys.
|
||||||
|
|
||||||
=================================== ======== ===============
|
============================ ======== ===============
|
||||||
**Field** **Type** **Description**
|
**Field** **Type** **Description**
|
||||||
---------------------------- -------- ---------------
|
---------------------------- -------- ---------------
|
||||||
batch_limited boolean Whether or not the cluster is limiting batch priority transactions
|
batch_limited boolean Whether or not the cluster is limiting batch priority transactions
|
||||||
limiting_storage_durability_lag number storage_durability_lag that ratekeeper is using to determing throttling (see the description for storage_durability_lag)
|
tps_limit number The rate at which normal priority transactions are allowed to start
|
||||||
limiting_storage_queue number storage_queue that ratekeeper is using to determing throttling (see the description for storage_queue)
|
worst_storage_durability_lag number See the description for storage_durability_lag
|
||||||
tps_limit number The rate at which normal priority transactions are allowed to start
|
worst_storage_queue number See the description for storage_queue
|
||||||
worst_storage_durability_lag number See the description for storage_durability_lag
|
worst_log_queue number See the description for log_queue
|
||||||
worst_storage_queue number See the description for storage_queue
|
============================ ======== ===============
|
||||||
worst_log_queue number See the description for log_queue
|
|
||||||
============================ ======== ===============
|
|
||||||
|
|
||||||
``\xff\xff/metrics/health/log/<id>``
|
``\xff\xff/metrics/health/log/<id>``
|
||||||
|
|
||||||
|
|
|
@ -997,9 +997,7 @@ struct HealthMetrics {
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t worstStorageQueue;
|
int64_t worstStorageQueue;
|
||||||
int64_t limitingStorageQueue;
|
|
||||||
int64_t worstStorageDurabilityLag;
|
int64_t worstStorageDurabilityLag;
|
||||||
int64_t limitingStorageDurabilityLag;
|
|
||||||
int64_t worstTLogQueue;
|
int64_t worstTLogQueue;
|
||||||
double tpsLimit;
|
double tpsLimit;
|
||||||
bool batchLimited;
|
bool batchLimited;
|
||||||
|
@ -1007,15 +1005,17 @@ struct HealthMetrics {
|
||||||
std::map<UID, int64_t> tLogQueue;
|
std::map<UID, int64_t> tLogQueue;
|
||||||
|
|
||||||
HealthMetrics()
|
HealthMetrics()
|
||||||
: worstStorageQueue(0), limitingStorageQueue(0), worstStorageDurabilityLag(0), limitingStorageDurabilityLag(0),
|
: worstStorageQueue(0)
|
||||||
worstTLogQueue(0), tpsLimit(0.0), batchLimited(false) {}
|
, worstStorageDurabilityLag(0)
|
||||||
|
, worstTLogQueue(0)
|
||||||
|
, tpsLimit(0.0)
|
||||||
|
, batchLimited(false)
|
||||||
|
{}
|
||||||
|
|
||||||
void update(const HealthMetrics& hm, bool detailedInput, bool detailedOutput)
|
void update(const HealthMetrics& hm, bool detailedInput, bool detailedOutput)
|
||||||
{
|
{
|
||||||
worstStorageQueue = hm.worstStorageQueue;
|
worstStorageQueue = hm.worstStorageQueue;
|
||||||
limitingStorageQueue = hm.limitingStorageQueue;
|
|
||||||
worstStorageDurabilityLag = hm.worstStorageDurabilityLag;
|
worstStorageDurabilityLag = hm.worstStorageDurabilityLag;
|
||||||
limitingStorageDurabilityLag = hm.limitingStorageDurabilityLag;
|
|
||||||
worstTLogQueue = hm.worstTLogQueue;
|
worstTLogQueue = hm.worstTLogQueue;
|
||||||
tpsLimit = hm.tpsLimit;
|
tpsLimit = hm.tpsLimit;
|
||||||
batchLimited = hm.batchLimited;
|
batchLimited = hm.batchLimited;
|
||||||
|
@ -1030,16 +1030,19 @@ struct HealthMetrics {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(HealthMetrics const& r) const {
|
bool operator==(HealthMetrics const& r) const {
|
||||||
return (worstStorageQueue == r.worstStorageQueue && limitingStorageQueue == r.limitingStorageQueue &&
|
return (
|
||||||
worstStorageDurabilityLag == r.worstStorageDurabilityLag &&
|
worstStorageQueue == r.worstStorageQueue &&
|
||||||
limitingStorageDurabilityLag == r.limitingStorageDurabilityLag && worstTLogQueue == r.worstTLogQueue &&
|
worstStorageDurabilityLag == r.worstStorageDurabilityLag &&
|
||||||
storageStats == r.storageStats && tLogQueue == r.tLogQueue && batchLimited == r.batchLimited);
|
worstTLogQueue == r.worstTLogQueue &&
|
||||||
|
storageStats == r.storageStats &&
|
||||||
|
tLogQueue == r.tLogQueue &&
|
||||||
|
batchLimited == r.batchLimited
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, worstStorageQueue, worstStorageDurabilityLag, worstTLogQueue, tpsLimit, batchLimited,
|
serializer(ar, worstStorageQueue, worstStorageDurabilityLag, worstTLogQueue, tpsLimit, batchLimited, storageStats, tLogQueue);
|
||||||
storageStats, tLogQueue, limitingStorageQueue, limitingStorageDurabilityLag);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -760,9 +760,7 @@ static Standalone<RangeResultRef> healthMetricsToKVPairs(const HealthMetrics& me
|
||||||
statsObj["batch_limited"] = metrics.batchLimited;
|
statsObj["batch_limited"] = metrics.batchLimited;
|
||||||
statsObj["tps_limit"] = metrics.tpsLimit;
|
statsObj["tps_limit"] = metrics.tpsLimit;
|
||||||
statsObj["worst_storage_durability_lag"] = metrics.worstStorageDurabilityLag;
|
statsObj["worst_storage_durability_lag"] = metrics.worstStorageDurabilityLag;
|
||||||
statsObj["limiting_storage_durability_lag"] = metrics.limitingStorageDurabilityLag;
|
|
||||||
statsObj["worst_storage_queue"] = metrics.worstStorageQueue;
|
statsObj["worst_storage_queue"] = metrics.worstStorageQueue;
|
||||||
statsObj["limiting_storage_queue"] = metrics.limitingStorageQueue;
|
|
||||||
statsObj["worst_log_queue"] = metrics.worstTLogQueue;
|
statsObj["worst_log_queue"] = metrics.worstTLogQueue;
|
||||||
std::string statsString =
|
std::string statsString =
|
||||||
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
||||||
|
|
|
@ -939,8 +939,6 @@ const KeyRef JSONSchemas::storageHealthSchema = LiteralStringRef(R"""(
|
||||||
const KeyRef JSONSchemas::aggregateHealthSchema = LiteralStringRef(R"""(
|
const KeyRef JSONSchemas::aggregateHealthSchema = LiteralStringRef(R"""(
|
||||||
{
|
{
|
||||||
"batch_limited": false,
|
"batch_limited": false,
|
||||||
"limiting_storage_durability_lag": 5050809,
|
|
||||||
"limiting_storage_queue": 2030,
|
|
||||||
"tps_limit": 457082.8105811302,
|
"tps_limit": 457082.8105811302,
|
||||||
"worst_storage_durability_lag": 5050809,
|
"worst_storage_durability_lag": 5050809,
|
||||||
"worst_storage_queue": 2030,
|
"worst_storage_queue": 2030,
|
||||||
|
|
|
@ -1118,9 +1118,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||||
}
|
}
|
||||||
|
|
||||||
self->healthMetrics.worstStorageQueue = worstStorageQueueStorageServer;
|
self->healthMetrics.worstStorageQueue = worstStorageQueueStorageServer;
|
||||||
self->healthMetrics.limitingStorageQueue = limitingStorageQueueStorageServer;
|
|
||||||
self->healthMetrics.worstStorageDurabilityLag = worstDurabilityLag;
|
self->healthMetrics.worstStorageDurabilityLag = worstDurabilityLag;
|
||||||
self->healthMetrics.limitingStorageDurabilityLag = limitingDurabilityLag;
|
|
||||||
|
|
||||||
double writeToReadLatencyLimit = 0;
|
double writeToReadLatencyLimit = 0;
|
||||||
Version worstVersionLag = 0;
|
Version worstVersionLag = 0;
|
||||||
|
|
|
@ -28,9 +28,7 @@
|
||||||
struct HealthMetricsApiWorkload : TestWorkload {
|
struct HealthMetricsApiWorkload : TestWorkload {
|
||||||
// Performance Metrics
|
// Performance Metrics
|
||||||
int64_t worstStorageQueue = 0;
|
int64_t worstStorageQueue = 0;
|
||||||
int64_t worstLimitingStorageQueue = 0;
|
|
||||||
int64_t worstStorageDurabilityLag = 0;
|
int64_t worstStorageDurabilityLag = 0;
|
||||||
int64_t worstLimitingStorageDurabilityLag = 0;
|
|
||||||
int64_t worstTLogQueue = 0;
|
int64_t worstTLogQueue = 0;
|
||||||
int64_t detailedWorstStorageQueue = 0;
|
int64_t detailedWorstStorageQueue = 0;
|
||||||
int64_t detailedWorstStorageDurabilityLag = 0;
|
int64_t detailedWorstStorageDurabilityLag = 0;
|
||||||
|
@ -93,9 +91,7 @@ struct HealthMetricsApiWorkload : TestWorkload {
|
||||||
if (!correctHealthMetricsState) {
|
if (!correctHealthMetricsState) {
|
||||||
TraceEvent(SevError, "IncorrectHealthMetricsState")
|
TraceEvent(SevError, "IncorrectHealthMetricsState")
|
||||||
.detail("WorstStorageQueue", worstStorageQueue)
|
.detail("WorstStorageQueue", worstStorageQueue)
|
||||||
.detail("WorstLimitingStorageQueue", worstLimitingStorageQueue)
|
|
||||||
.detail("WorstStorageDurabilityLag", worstStorageDurabilityLag)
|
.detail("WorstStorageDurabilityLag", worstStorageDurabilityLag)
|
||||||
.detail("WorstLimitingStorageDurabilityLag", worstLimitingStorageDurabilityLag)
|
|
||||||
.detail("WorstTLogQueue", worstTLogQueue)
|
.detail("WorstTLogQueue", worstTLogQueue)
|
||||||
.detail("DetailedWorstStorageQueue", detailedWorstStorageQueue)
|
.detail("DetailedWorstStorageQueue", detailedWorstStorageQueue)
|
||||||
.detail("DetailedWorstStorageDurabilityLag", detailedWorstStorageDurabilityLag)
|
.detail("DetailedWorstStorageDurabilityLag", detailedWorstStorageDurabilityLag)
|
||||||
|
@ -132,19 +128,13 @@ struct HealthMetricsApiWorkload : TestWorkload {
|
||||||
healthMetrics = newHealthMetrics;
|
healthMetrics = newHealthMetrics;
|
||||||
|
|
||||||
self->worstStorageQueue = std::max(self->worstStorageQueue, healthMetrics.worstStorageQueue);
|
self->worstStorageQueue = std::max(self->worstStorageQueue, healthMetrics.worstStorageQueue);
|
||||||
self->worstLimitingStorageQueue =
|
|
||||||
std::max(self->worstLimitingStorageQueue, healthMetrics.limitingStorageQueue);
|
|
||||||
self->worstStorageDurabilityLag =
|
self->worstStorageDurabilityLag =
|
||||||
std::max(self->worstStorageDurabilityLag, healthMetrics.worstStorageDurabilityLag);
|
std::max(self->worstStorageDurabilityLag, healthMetrics.worstStorageDurabilityLag);
|
||||||
self->worstLimitingStorageDurabilityLag =
|
|
||||||
std::max(self->worstLimitingStorageDurabilityLag, healthMetrics.limitingStorageDurabilityLag);
|
|
||||||
self->worstTLogQueue = std::max(self->worstTLogQueue, healthMetrics.worstTLogQueue);
|
self->worstTLogQueue = std::max(self->worstTLogQueue, healthMetrics.worstTLogQueue);
|
||||||
|
|
||||||
TraceEvent("HealthMetrics")
|
TraceEvent("HealthMetrics")
|
||||||
.detail("WorstStorageQueue", healthMetrics.worstStorageQueue)
|
.detail("WorstStorageQueue", healthMetrics.worstStorageQueue)
|
||||||
.detail("LimitingStorageQueue", healthMetrics.limitingStorageQueue)
|
|
||||||
.detail("WorstStorageDurabilityLag", healthMetrics.worstStorageDurabilityLag)
|
.detail("WorstStorageDurabilityLag", healthMetrics.worstStorageDurabilityLag)
|
||||||
.detail("LimitingStorageDurabilityLag", healthMetrics.limitingStorageDurabilityLag)
|
|
||||||
.detail("WorstTLogQueue", healthMetrics.worstTLogQueue)
|
.detail("WorstTLogQueue", healthMetrics.worstTLogQueue)
|
||||||
.detail("TpsLimit", healthMetrics.tpsLimit);
|
.detail("TpsLimit", healthMetrics.tpsLimit);
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,7 @@ if(WITH_PYTHON)
|
||||||
add_fdb_test(TEST_FILES StorageMetricsSampleTests.txt IGNORE)
|
add_fdb_test(TEST_FILES StorageMetricsSampleTests.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES StreamingWrite.txt IGNORE)
|
add_fdb_test(TEST_FILES StreamingWrite.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES ThreadSafety.txt IGNORE)
|
add_fdb_test(TEST_FILES ThreadSafety.txt IGNORE)
|
||||||
|
add_fdb_test(TEST_FILES Throttling.toml IGNORE)
|
||||||
add_fdb_test(TEST_FILES TraceEventMetrics.txt IGNORE)
|
add_fdb_test(TEST_FILES TraceEventMetrics.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES PopulateTPCC.txt IGNORE)
|
add_fdb_test(TEST_FILES PopulateTPCC.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES TPCC.txt IGNORE)
|
add_fdb_test(TEST_FILES TPCC.txt IGNORE)
|
||||||
|
@ -102,6 +103,7 @@ if(WITH_PYTHON)
|
||||||
add_fdb_test(TEST_FILES pt.TXT IGNORE)
|
add_fdb_test(TEST_FILES pt.TXT IGNORE)
|
||||||
add_fdb_test(TEST_FILES randomSelector.txt IGNORE)
|
add_fdb_test(TEST_FILES randomSelector.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE)
|
add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE)
|
||||||
|
add_fdb_test(TEST_FILES WriteTagThrottling.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.toml)
|
add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.toml)
|
add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicOps.toml)
|
add_fdb_test(TEST_FILES fast/AtomicOps.toml)
|
||||||
|
@ -165,7 +167,6 @@ if(WITH_PYTHON)
|
||||||
add_fdb_test(TEST_FILES rare/RandomReadWriteTest.toml)
|
add_fdb_test(TEST_FILES rare/RandomReadWriteTest.toml)
|
||||||
add_fdb_test(TEST_FILES rare/SwizzledLargeApiCorrectness.toml)
|
add_fdb_test(TEST_FILES rare/SwizzledLargeApiCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES rare/RedwoodCorrectnessBTree.toml)
|
add_fdb_test(TEST_FILES rare/RedwoodCorrectnessBTree.toml)
|
||||||
add_fdb_test(TEST_FILES rare/Throttling.toml)
|
|
||||||
add_fdb_test(TEST_FILES rare/TransactionTagApiCorrectness.toml)
|
add_fdb_test(TEST_FILES rare/TransactionTagApiCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES rare/TransactionTagSwizzledApiCorrectness.toml)
|
add_fdb_test(TEST_FILES rare/TransactionTagSwizzledApiCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES rare/WriteTagThrottling.toml)
|
add_fdb_test(TEST_FILES rare/WriteTagThrottling.toml)
|
||||||
|
|
Loading…
Reference in New Issue