From 209ebcc5950fe246ee0a6b995dcbe9ecc2688c68 Mon Sep 17 00:00:00 2001 From: Trevor Clinkenbeard <65554662+sfc-gh-tclinkenbeard@users.noreply.github.com> Date: Fri, 13 Nov 2020 17:08:46 -0800 Subject: [PATCH] Revert "Add limiting health metrics" --- .../sphinx/source/developer-guide.rst | 22 +++++++-------- fdbclient/FDBTypes.h | 27 ++++++++++--------- fdbclient/NativeAPI.actor.cpp | 2 -- fdbclient/Schemas.cpp | 2 -- fdbserver/Ratekeeper.actor.cpp | 2 -- .../workloads/HealthMetricsApi.actor.cpp | 10 ------- tests/CMakeLists.txt | 3 ++- tests/{rare => }/Throttling.toml | 0 8 files changed, 27 insertions(+), 41 deletions(-) rename tests/{rare => }/Throttling.toml (100%) diff --git a/documentation/sphinx/source/developer-guide.rst b/documentation/sphinx/source/developer-guide.rst index e143cab3e3..e82b73392b 100644 --- a/documentation/sphinx/source/developer-guide.rst +++ b/documentation/sphinx/source/developer-guide.rst @@ -866,7 +866,7 @@ Some of this information is also available in ``\xff\xff/status/json``, but thes >>> for k, v in db.get_range_startswith('\xff\xff/metrics/health/'): ... print(k, v) ... - ('\xff\xff/metrics/health/aggregate', '{"batch_limited":false,"limiting_storage_durability_lag":5000000,"limiting_storage_queue":1000,"tps_limit":483988.66315011407,"worst_storage_durability_lag":5000001,"worst_storage_queue":2036,"worst_log_queue":300}') + ('\xff\xff/metrics/health/aggregate', '{"batch_limited":false,"tps_limit":483988.66315011407,"worst_storage_durability_lag":5000001,"worst_storage_queue":2036,"worst_log_queue":300}') ('\xff\xff/metrics/health/log/e639a9ad0373367784cc550c615c469b', '{"log_queue":300}') ('\xff\xff/metrics/health/storage/ab2ce4caf743c9c1ae57063629c6678a', '{"cpu_usage":2.398696781487125,"disk_usage":0.059995917598039405,"storage_durability_lag":5000001,"storage_queue":2036}') @@ -874,17 +874,15 @@ Some of this information is also available in ``\xff\xff/status/json``, but thes Aggregate stats about cluster health. Reading this key alone is slightly cheaper than reading any of the per-process keys. -=================================== ======== =============== -**Field** **Type** **Description** ----------------------------- -------- --------------- -batch_limited boolean Whether or not the cluster is limiting batch priority transactions -limiting_storage_durability_lag number storage_durability_lag that ratekeeper is using to determing throttling (see the description for storage_durability_lag) -limiting_storage_queue number storage_queue that ratekeeper is using to determing throttling (see the description for storage_queue) -tps_limit number The rate at which normal priority transactions are allowed to start -worst_storage_durability_lag number See the description for storage_durability_lag -worst_storage_queue number See the description for storage_queue -worst_log_queue number See the description for log_queue -============================ ======== =============== +============================ ======== =============== +**Field** **Type** **Description** +---------------------------- -------- --------------- +batch_limited boolean Whether or not the cluster is limiting batch priority transactions +tps_limit number The rate at which normal priority transactions are allowed to start +worst_storage_durability_lag number See the description for storage_durability_lag +worst_storage_queue number See the description for storage_queue +worst_log_queue number See the description for log_queue +============================ ======== =============== ``\xff\xff/metrics/health/log/`` diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index 1b95e63447..7bae9ec78d 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -997,9 +997,7 @@ struct HealthMetrics { }; int64_t worstStorageQueue; - int64_t limitingStorageQueue; int64_t worstStorageDurabilityLag; - int64_t limitingStorageDurabilityLag; int64_t worstTLogQueue; double tpsLimit; bool batchLimited; @@ -1007,15 +1005,17 @@ struct HealthMetrics { std::map tLogQueue; HealthMetrics() - : worstStorageQueue(0), limitingStorageQueue(0), worstStorageDurabilityLag(0), limitingStorageDurabilityLag(0), - worstTLogQueue(0), tpsLimit(0.0), batchLimited(false) {} + : worstStorageQueue(0) + , worstStorageDurabilityLag(0) + , worstTLogQueue(0) + , tpsLimit(0.0) + , batchLimited(false) + {} void update(const HealthMetrics& hm, bool detailedInput, bool detailedOutput) { worstStorageQueue = hm.worstStorageQueue; - limitingStorageQueue = hm.limitingStorageQueue; worstStorageDurabilityLag = hm.worstStorageDurabilityLag; - limitingStorageDurabilityLag = hm.limitingStorageDurabilityLag; worstTLogQueue = hm.worstTLogQueue; tpsLimit = hm.tpsLimit; batchLimited = hm.batchLimited; @@ -1030,16 +1030,19 @@ struct HealthMetrics { } bool operator==(HealthMetrics const& r) const { - return (worstStorageQueue == r.worstStorageQueue && limitingStorageQueue == r.limitingStorageQueue && - worstStorageDurabilityLag == r.worstStorageDurabilityLag && - limitingStorageDurabilityLag == r.limitingStorageDurabilityLag && worstTLogQueue == r.worstTLogQueue && - storageStats == r.storageStats && tLogQueue == r.tLogQueue && batchLimited == r.batchLimited); + return ( + worstStorageQueue == r.worstStorageQueue && + worstStorageDurabilityLag == r.worstStorageDurabilityLag && + worstTLogQueue == r.worstTLogQueue && + storageStats == r.storageStats && + tLogQueue == r.tLogQueue && + batchLimited == r.batchLimited + ); } template void serialize(Ar& ar) { - serializer(ar, worstStorageQueue, worstStorageDurabilityLag, worstTLogQueue, tpsLimit, batchLimited, - storageStats, tLogQueue, limitingStorageQueue, limitingStorageDurabilityLag); + serializer(ar, worstStorageQueue, worstStorageDurabilityLag, worstTLogQueue, tpsLimit, batchLimited, storageStats, tLogQueue); } }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index e537e17c6e..622f967a3a 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -760,9 +760,7 @@ static Standalone healthMetricsToKVPairs(const HealthMetrics& me statsObj["batch_limited"] = metrics.batchLimited; statsObj["tps_limit"] = metrics.tpsLimit; statsObj["worst_storage_durability_lag"] = metrics.worstStorageDurabilityLag; - statsObj["limiting_storage_durability_lag"] = metrics.limitingStorageDurabilityLag; statsObj["worst_storage_queue"] = metrics.worstStorageQueue; - statsObj["limiting_storage_queue"] = metrics.limitingStorageQueue; statsObj["worst_log_queue"] = metrics.worstTLogQueue; std::string statsString = json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8); diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 62eed02d0c..f3e72d8efd 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -939,8 +939,6 @@ const KeyRef JSONSchemas::storageHealthSchema = LiteralStringRef(R"""( const KeyRef JSONSchemas::aggregateHealthSchema = LiteralStringRef(R"""( { "batch_limited": false, - "limiting_storage_durability_lag": 5050809, - "limiting_storage_queue": 2030, "tps_limit": 457082.8105811302, "worst_storage_durability_lag": 5050809, "worst_storage_queue": 2030, diff --git a/fdbserver/Ratekeeper.actor.cpp b/fdbserver/Ratekeeper.actor.cpp index 7c57e0b04c..43f82345a4 100644 --- a/fdbserver/Ratekeeper.actor.cpp +++ b/fdbserver/Ratekeeper.actor.cpp @@ -1118,9 +1118,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) { } self->healthMetrics.worstStorageQueue = worstStorageQueueStorageServer; - self->healthMetrics.limitingStorageQueue = limitingStorageQueueStorageServer; self->healthMetrics.worstStorageDurabilityLag = worstDurabilityLag; - self->healthMetrics.limitingStorageDurabilityLag = limitingDurabilityLag; double writeToReadLatencyLimit = 0; Version worstVersionLag = 0; diff --git a/fdbserver/workloads/HealthMetricsApi.actor.cpp b/fdbserver/workloads/HealthMetricsApi.actor.cpp index 9566514b04..76ccf24624 100644 --- a/fdbserver/workloads/HealthMetricsApi.actor.cpp +++ b/fdbserver/workloads/HealthMetricsApi.actor.cpp @@ -28,9 +28,7 @@ struct HealthMetricsApiWorkload : TestWorkload { // Performance Metrics int64_t worstStorageQueue = 0; - int64_t worstLimitingStorageQueue = 0; int64_t worstStorageDurabilityLag = 0; - int64_t worstLimitingStorageDurabilityLag = 0; int64_t worstTLogQueue = 0; int64_t detailedWorstStorageQueue = 0; int64_t detailedWorstStorageDurabilityLag = 0; @@ -93,9 +91,7 @@ struct HealthMetricsApiWorkload : TestWorkload { if (!correctHealthMetricsState) { TraceEvent(SevError, "IncorrectHealthMetricsState") .detail("WorstStorageQueue", worstStorageQueue) - .detail("WorstLimitingStorageQueue", worstLimitingStorageQueue) .detail("WorstStorageDurabilityLag", worstStorageDurabilityLag) - .detail("WorstLimitingStorageDurabilityLag", worstLimitingStorageDurabilityLag) .detail("WorstTLogQueue", worstTLogQueue) .detail("DetailedWorstStorageQueue", detailedWorstStorageQueue) .detail("DetailedWorstStorageDurabilityLag", detailedWorstStorageDurabilityLag) @@ -132,19 +128,13 @@ struct HealthMetricsApiWorkload : TestWorkload { healthMetrics = newHealthMetrics; self->worstStorageQueue = std::max(self->worstStorageQueue, healthMetrics.worstStorageQueue); - self->worstLimitingStorageQueue = - std::max(self->worstLimitingStorageQueue, healthMetrics.limitingStorageQueue); self->worstStorageDurabilityLag = std::max(self->worstStorageDurabilityLag, healthMetrics.worstStorageDurabilityLag); - self->worstLimitingStorageDurabilityLag = - std::max(self->worstLimitingStorageDurabilityLag, healthMetrics.limitingStorageDurabilityLag); self->worstTLogQueue = std::max(self->worstTLogQueue, healthMetrics.worstTLogQueue); TraceEvent("HealthMetrics") .detail("WorstStorageQueue", healthMetrics.worstStorageQueue) - .detail("LimitingStorageQueue", healthMetrics.limitingStorageQueue) .detail("WorstStorageDurabilityLag", healthMetrics.worstStorageDurabilityLag) - .detail("LimitingStorageDurabilityLag", healthMetrics.limitingStorageDurabilityLag) .detail("WorstTLogQueue", healthMetrics.worstTLogQueue) .detail("TpsLimit", healthMetrics.tpsLimit); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 01a8459e71..bdf8147381 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -87,6 +87,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES StorageMetricsSampleTests.txt IGNORE) add_fdb_test(TEST_FILES StreamingWrite.txt IGNORE) add_fdb_test(TEST_FILES ThreadSafety.txt IGNORE) + add_fdb_test(TEST_FILES Throttling.toml IGNORE) add_fdb_test(TEST_FILES TraceEventMetrics.txt IGNORE) add_fdb_test(TEST_FILES PopulateTPCC.txt IGNORE) add_fdb_test(TEST_FILES TPCC.txt IGNORE) @@ -102,6 +103,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES pt.TXT IGNORE) add_fdb_test(TEST_FILES randomSelector.txt IGNORE) add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE) + add_fdb_test(TEST_FILES WriteTagThrottling.txt IGNORE) add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.toml) add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.toml) add_fdb_test(TEST_FILES fast/AtomicOps.toml) @@ -165,7 +167,6 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES rare/RandomReadWriteTest.toml) add_fdb_test(TEST_FILES rare/SwizzledLargeApiCorrectness.toml) add_fdb_test(TEST_FILES rare/RedwoodCorrectnessBTree.toml) - add_fdb_test(TEST_FILES rare/Throttling.toml) add_fdb_test(TEST_FILES rare/TransactionTagApiCorrectness.toml) add_fdb_test(TEST_FILES rare/TransactionTagSwizzledApiCorrectness.toml) add_fdb_test(TEST_FILES rare/WriteTagThrottling.toml) diff --git a/tests/rare/Throttling.toml b/tests/Throttling.toml similarity index 100% rename from tests/rare/Throttling.toml rename to tests/Throttling.toml