shard size log (#11342)

This commit is contained in:
Yao Xiao 2024-04-29 13:42:19 -07:00 committed by GitHub
parent d0eb68bd25
commit 67a588380e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 14 additions and 8 deletions

View File

@ -143,6 +143,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1;
init( RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1;
init( RELOCATION_PARALLELISM_PER_DEST_SERVER, 10 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_DEST_SERVER = 1; // Note: if this is smaller than FETCH_KEYS_PARALLELISM, this will artificially reduce performance. The current default of 10 is probably too high but is set conservatively for now.
init( MERGE_RELOCATION_PARALLELISM_PER_TEAM, 6 ); if (randomize && BUGGIFY ) MERGE_RELOCATION_PARALLELISM_PER_TEAM = 1;
init( DD_QUEUE_MAX_KEY_SERVERS, 100 ); // Do not buggify
init( DD_REBALANCE_PARALLELISM, 50 );
init( DD_REBALANCE_RESET_AMOUNT, 30 );

View File

@ -132,6 +132,7 @@ public:
// than healthy priority
double RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
double RELOCATION_PARALLELISM_PER_DEST_SERVER;
double MERGE_RELOCATION_PARALLELISM_PER_TEAM;
int DD_QUEUE_MAX_KEY_SERVERS;
int DD_REBALANCE_PARALLELISM;
int DD_REBALANCE_RESET_AMOUNT;

View File

@ -433,8 +433,13 @@ int getSrcWorkFactor(RelocateData const& relocation, int singleRegionTeamSize) {
// we want to set PRIORITY_PERPETUAL_STORAGE_WIGGLE to a reasonably large value
// to make this parallelism take effect
return WORK_FULL_UTILIZATION / SERVER_KNOBS->WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
else // for now we assume that any message at a lower priority can best be assumed to have a full team left for work
else if (relocation.priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD)
return WORK_FULL_UTILIZATION / SERVER_KNOBS->MERGE_RELOCATION_PARALLELISM_PER_TEAM;
else { // for now we assume that any message at a lower priority can best be assumed to have a full team left for
// work
return WORK_FULL_UTILIZATION / singleRegionTeamSize / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
}
}
int getDestWorkFactor() {

View File

@ -127,6 +127,12 @@ int64_t getMaxShardSize(double dbSizeEstimate) {
size = std::max(size, static_cast<int64_t>(SERVER_KNOBS->MAX_LARGE_SHARD_BYTES));
}
TraceEvent("MaxShardSize")
.suppressFor(60.0)
.detail("Bytes", size)
.detail("EstimatedDbSize", dbSizeEstimate)
.detail("SqrtBytes", SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES)
.detail("AllowLargeShard", SERVER_KNOBS->ALLOW_LARGE_SHARD);
return size;
}
@ -933,9 +939,6 @@ Future<Void> shardMerger(DataDistributionTracker* self,
const UID actionId = deterministicRandom()->randomUniqueID();
const Severity stSev = static_cast<Severity>(SERVER_KNOBS->DD_SHARD_TRACKING_LOG_SEVERITY);
int64_t maxShardSize = self->maxShardSize->get().get();
if (SERVER_KNOBS->ALLOW_LARGE_SHARD) {
maxShardSize = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES;
}
auto prevIter = self->shards->rangeContaining(keys.begin);
auto nextIter = self->shards->rangeContaining(keys.begin);
@ -1125,10 +1128,6 @@ ACTOR Future<Void> shardEvaluator(DataDistributionTracker* self,
StorageMetrics const& stats = shardSize->get().get().metrics;
auto bandwidthStatus = getBandwidthStatus(stats);
if (SERVER_KNOBS->ALLOW_LARGE_SHARD) {
shardBounds.max.bytes = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES;
}
bool sizeSplit = stats.bytes > shardBounds.max.bytes,
writeSplit = bandwidthStatus == BandwidthStatusHigh && keys.begin < keyServersKeys.begin;
bool shouldSplit = sizeSplit || writeSplit;