add more statistics
This commit is contained in:
parent
3b241955e7
commit
f40a2a68fc
|
@ -164,13 +164,20 @@ struct GetMetricsRequest {
|
|||
GetMetricsRequest(KeyRange const& keys) : keys(keys) {}
|
||||
};
|
||||
|
||||
struct GetTopKMetricsReply {
|
||||
std::vector<StorageMetrics> metrics;
|
||||
double minReadLoad = -1, maxReadLoad = -1;
|
||||
GetTopKMetricsReply() {}
|
||||
GetTopKMetricsReply(std::vector<StorageMetrics> const& m, double minReadLoad, double maxReadLoad)
|
||||
: metrics(m), minReadLoad(minReadLoad), maxReadLoad(maxReadLoad) {}
|
||||
};
|
||||
struct GetTopKMetricsRequest {
|
||||
// whether a > b
|
||||
typedef std::function<bool(const StorageMetrics& a, const StorageMetrics& b)> MetricsComparator;
|
||||
int topK = 1; // default only return the top 1 shard based on the comparator
|
||||
MetricsComparator comparator; // Return true if a.score > b.score, return the largest topK in keys
|
||||
std::vector<KeyRange> keys;
|
||||
Promise<std::vector<StorageMetrics>> reply; // topK storage metrics
|
||||
Promise<GetTopKMetricsReply> reply; // topK storage metrics
|
||||
double maxBytesReadPerKSecond = 0; // all returned shards won't exceed this read load
|
||||
|
||||
GetTopKMetricsRequest() {}
|
||||
|
|
|
@ -1554,7 +1554,7 @@ ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
|
|||
return a.bytesReadPerKSecond / std::max(a.bytes * 1.0, 1.0 * SERVER_KNOBS->MIN_SHARD_BYTES) >
|
||||
b.bytesReadPerKSecond / std::max(b.bytes * 1.0, 1.0 * SERVER_KNOBS->MIN_SHARD_BYTES);
|
||||
};
|
||||
state std::vector<StorageMetrics> metricsList = wait(brokenPromiseToNever(self->getTopKMetrics.getReply(req)));
|
||||
state GetTopKMetricsReply reply = wait(brokenPromiseToNever(self->getTopKMetrics.getReply(req)));
|
||||
wait(ready(healthMetrics));
|
||||
auto cpu = getWorstCpu(healthMetrics.get(), sourceTeam->getServerIDs());
|
||||
if (cpu < SERVER_KNOBS->READ_REBALANCE_CPU_THRESHOLD) { // 15.0 +- (0.3 * 15) < 20.0
|
||||
|
@ -1562,10 +1562,10 @@ ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!metricsList.empty()) {
|
||||
traceEvent->detail("KthReadLoad1", metricsList[metricsList.size() - 1].bytesReadPerKSecond)
|
||||
.detail("KthReadLoad2", metricsList[0].bytesReadPerKSecond);
|
||||
}
|
||||
auto& metricsList = reply.metrics;
|
||||
// NOTE: randomize is important here since we don't want to always push the same shard into the queue
|
||||
deterministicRandom()->randomShuffle(metricsList);
|
||||
traceEvent->detail("MinReadLoad", reply.minReadLoad).detail("MaxReadLoad", reply.maxReadLoad);
|
||||
|
||||
int chosenIdx = -1;
|
||||
for (int i = 0; i < metricsList.size(); ++i) {
|
||||
|
|
|
@ -844,7 +844,8 @@ ACTOR Future<Void> fetchTopKShardMetrics_impl(DataDistributionTracker* self, Get
|
|||
loop {
|
||||
onChange = Future<Void>();
|
||||
returnMetrics.clear();
|
||||
|
||||
state int64_t minReadLoad = std::numeric_limits<int64_t>::max();
|
||||
state int64_t maxReadLoad = std::numeric_limits<int64_t>::min();
|
||||
state int i;
|
||||
for (i = 0; i < SERVER_KNOBS->DD_SHARD_COMPARE_LIMIT && i < req.keys.size(); ++i) {
|
||||
auto range = req.keys[i];
|
||||
|
@ -863,6 +864,9 @@ ACTOR Future<Void> fetchTopKShardMetrics_impl(DataDistributionTracker* self, Get
|
|||
break;
|
||||
}
|
||||
|
||||
minReadLoad = std::min(metrics.bytesReadPerKSecond, minReadLoad);
|
||||
maxReadLoad = std::max(metrics.bytesReadPerKSecond, maxReadLoad);
|
||||
|
||||
if (metrics.bytesReadPerKSecond > 0 && metrics.bytesReadPerKSecond <= req.maxBytesReadPerKSecond) {
|
||||
metrics.keys = range;
|
||||
returnMetrics.push_back(metrics);
|
||||
|
@ -873,14 +877,16 @@ ACTOR Future<Void> fetchTopKShardMetrics_impl(DataDistributionTracker* self, Get
|
|||
// FIXME(xwang): Do we need to track slow task here?
|
||||
if (!onChange.isValid()) {
|
||||
if (req.topK >= returnMetrics.size())
|
||||
req.reply.send(returnMetrics);
|
||||
req.reply.send(GetTopKMetricsReply(returnMetrics, minReadLoad, maxReadLoad));
|
||||
else {
|
||||
std::nth_element(returnMetrics.begin(),
|
||||
returnMetrics.begin() + req.topK - 1,
|
||||
returnMetrics.end(),
|
||||
req.comparator);
|
||||
req.reply.send(
|
||||
std::vector<StorageMetrics>(returnMetrics.begin(), returnMetrics.begin() + req.topK));
|
||||
req.reply.send(GetTopKMetricsReply(
|
||||
std::vector<StorageMetrics>(returnMetrics.begin(), returnMetrics.begin() + req.topK),
|
||||
minReadLoad,
|
||||
maxReadLoad));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
@ -898,7 +904,7 @@ ACTOR Future<Void> fetchTopKShardMetrics(DataDistributionTracker* self, GetTopKM
|
|||
when(wait(fetchTopKShardMetrics_impl(self, req))) {}
|
||||
when(wait(delay(SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT))) {
|
||||
TEST(true); // TopK DD_SHARD_METRICS_TIMEOUT
|
||||
req.reply.send(std::vector<StorageMetrics>(1));
|
||||
req.reply.send(GetTopKMetricsReply());
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
|
|
Loading…
Reference in New Issue