From 1e249143a40337cc69de59d07523365afc463599 Mon Sep 17 00:00:00 2001 From: He Liu <86634338+liquid-helium@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:35:06 -0700 Subject: [PATCH] Allow large shard (#10961) * Added large shard. * getMaxShardSize() returns the fix max shard size. * Resolved comments. * fmt. --- fdbclient/ServerKnobs.cpp | 4 +++- fdbclient/include/fdbclient/ServerKnobs.h | 3 +++ fdbserver/DDShardTracker.actor.cpp | 20 ++++++++++++++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 7f1de75bfb..5335db4b9f 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -179,6 +179,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( MAX_DEST_CPU_PERCENT, 100.0 ); init( DD_TEAM_PIVOT_UPDATE_DELAY, 5.0 ); + init( ALLOW_LARGE_SHARD, false ); if( randomize && BUGGIFY ) ALLOW_LARGE_SHARD = true; + init( MAX_LARGE_SHARD_BYTES, 1000000000 ); // 1G init( SHARD_ENCODE_LOCATION_METADATA, false ); if( randomize && BUGGIFY ) SHARD_ENCODE_LOCATION_METADATA = true; init( ENABLE_DD_PHYSICAL_SHARD, false ); // EXPERIMENTAL; If true, SHARD_ENCODE_LOCATION_METADATA must be true; When true, optimization of data move between DCs is disabled init( DD_PHYSICAL_SHARD_MOVE_PROBABILITY, 0.0 ); if( isSimulated ) DD_PHYSICAL_SHARD_MOVE_PROBABILITY = 0.5; @@ -333,7 +335,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL, 300 ); init( CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL, 5 ); if( randomize && BUGGIFY ) CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL = deterministicRandom()->randomInt(1, 10); init( DD_BUILD_EXTRA_TEAMS_OVERRIDE, 10 ); if( randomize && BUGGIFY ) DD_BUILD_EXTRA_TEAMS_OVERRIDE = 2; - init( DD_SHARD_TRACKING_LOG_SEVERITY, 1); + init( DD_SHARD_TRACKING_LOG_SEVERITY, 1 ); init( ENFORCE_SHARD_COUNT_PER_TEAM, false ); if( randomize && BUGGIFY ) ENFORCE_SHARD_COUNT_PER_TEAM = true; init( DESIRED_MAX_SHARDS_PER_TEAM, 1000 ); if( randomize && BUGGIFY ) DESIRED_MAX_SHARDS_PER_TEAM = 10; diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index 10c2a3bb8a..279649c74b 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -205,6 +205,9 @@ public: // min(STORAGE_METRICS_POLLING_DELAY,DETAILED_METRIC_UPDATE_RATE) otherwise the pivot won't change; double DD_TEAM_PIVOT_UPDATE_DELAY; + bool ALLOW_LARGE_SHARD; + int MAX_LARGE_SHARD_BYTES; + bool SHARD_ENCODE_LOCATION_METADATA; // If true, location metadata will contain shard ID. bool ENABLE_DD_PHYSICAL_SHARD; // EXPERIMENTAL; If true, SHARD_ENCODE_LOCATION_METADATA must be true. double DD_PHYSICAL_SHARD_MOVE_PROBABILITY; // Percentage of physical shard move, in the range of [0, 1]. diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index 0831aacce4..51f2de9481 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -119,10 +119,15 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) { } int64_t getMaxShardSize(double dbSizeEstimate) { - return std::min((SERVER_KNOBS->MIN_SHARD_BYTES + (int64_t)std::sqrt(std::max(dbSizeEstimate, 0)) * - SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES) * - SERVER_KNOBS->SHARD_BYTES_RATIO, - (int64_t)SERVER_KNOBS->MAX_SHARD_BYTES); + int64_t size = std::min((SERVER_KNOBS->MIN_SHARD_BYTES + (int64_t)std::sqrt(std::max(dbSizeEstimate, 0)) * + SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES) * + SERVER_KNOBS->SHARD_BYTES_RATIO, + (int64_t)SERVER_KNOBS->MAX_SHARD_BYTES); + if (SERVER_KNOBS->ALLOW_LARGE_SHARD) { + size = std::max(size, static_cast(SERVER_KNOBS->MAX_LARGE_SHARD_BYTES)); + } + + return size; } bool ddLargeTeamEnabled() { @@ -928,6 +933,9 @@ Future shardMerger(DataDistributionTracker* self, const UID actionId = deterministicRandom()->randomUniqueID(); const Severity stSev = static_cast(SERVER_KNOBS->DD_SHARD_TRACKING_LOG_SEVERITY); int64_t maxShardSize = self->maxShardSize->get().get(); + if (SERVER_KNOBS->ALLOW_LARGE_SHARD) { + maxShardSize = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES; + } auto prevIter = self->shards->rangeContaining(keys.begin); auto nextIter = self->shards->rangeContaining(keys.begin); @@ -1117,6 +1125,10 @@ ACTOR Future shardEvaluator(DataDistributionTracker* self, StorageMetrics const& stats = shardSize->get().get().metrics; auto bandwidthStatus = getBandwidthStatus(stats); + if (SERVER_KNOBS->ALLOW_LARGE_SHARD) { + shardBounds.max.bytes = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES; + } + bool sizeSplit = stats.bytes > shardBounds.max.bytes, writeSplit = bandwidthStatus == BandwidthStatusHigh && keys.begin < keyServersKeys.begin; bool shouldSplit = sizeSplit || writeSplit;