diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 115a588763..9e1f86907b 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -143,6 +143,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1; init( RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1; init( RELOCATION_PARALLELISM_PER_DEST_SERVER, 10 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_DEST_SERVER = 1; // Note: if this is smaller than FETCH_KEYS_PARALLELISM, this will artificially reduce performance. The current default of 10 is probably too high but is set conservatively for now. + init( MERGE_RELOCATION_PARALLELISM_PER_TEAM, 6 ); if (randomize && BUGGIFY ) MERGE_RELOCATION_PARALLELISM_PER_TEAM = 1; init( DD_QUEUE_MAX_KEY_SERVERS, 100 ); // Do not buggify init( DD_REBALANCE_PARALLELISM, 50 ); init( DD_REBALANCE_RESET_AMOUNT, 30 ); diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index c1c9ef8c9b..0e75161652 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -132,6 +132,7 @@ public: // than healthy priority double RELOCATION_PARALLELISM_PER_SOURCE_SERVER; double RELOCATION_PARALLELISM_PER_DEST_SERVER; + double MERGE_RELOCATION_PARALLELISM_PER_TEAM; int DD_QUEUE_MAX_KEY_SERVERS; int DD_REBALANCE_PARALLELISM; int DD_REBALANCE_RESET_AMOUNT; diff --git a/fdbserver/DDRelocationQueue.actor.cpp b/fdbserver/DDRelocationQueue.actor.cpp index d5ede05fae..df81b8052a 100644 --- a/fdbserver/DDRelocationQueue.actor.cpp +++ b/fdbserver/DDRelocationQueue.actor.cpp @@ -433,8 +433,13 @@ int getSrcWorkFactor(RelocateData const& relocation, int singleRegionTeamSize) { // we want to set PRIORITY_PERPETUAL_STORAGE_WIGGLE to a reasonably large value // to make this parallelism take effect return WORK_FULL_UTILIZATION / SERVER_KNOBS->WIGGLING_RELOCATION_PARALLELISM_PER_SOURCE_SERVER; - else // for now we assume that any message at a lower priority can best be assumed to have a full team left for work + else if (relocation.priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD) + return WORK_FULL_UTILIZATION / SERVER_KNOBS->MERGE_RELOCATION_PARALLELISM_PER_TEAM; + else { // for now we assume that any message at a lower priority can best be assumed to have a full team left for + // work + return WORK_FULL_UTILIZATION / singleRegionTeamSize / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER; + } } int getDestWorkFactor() { diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index 1e0d0e91e2..3bb481a84d 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -127,6 +127,12 @@ int64_t getMaxShardSize(double dbSizeEstimate) { size = std::max(size, static_cast(SERVER_KNOBS->MAX_LARGE_SHARD_BYTES)); } + TraceEvent("MaxShardSize") + .suppressFor(60.0) + .detail("Bytes", size) + .detail("EstimatedDbSize", dbSizeEstimate) + .detail("SqrtBytes", SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES) + .detail("AllowLargeShard", SERVER_KNOBS->ALLOW_LARGE_SHARD); return size; } @@ -933,9 +939,6 @@ Future shardMerger(DataDistributionTracker* self, const UID actionId = deterministicRandom()->randomUniqueID(); const Severity stSev = static_cast(SERVER_KNOBS->DD_SHARD_TRACKING_LOG_SEVERITY); int64_t maxShardSize = self->maxShardSize->get().get(); - if (SERVER_KNOBS->ALLOW_LARGE_SHARD) { - maxShardSize = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES; - } auto prevIter = self->shards->rangeContaining(keys.begin); auto nextIter = self->shards->rangeContaining(keys.begin); @@ -1125,10 +1128,6 @@ ACTOR Future shardEvaluator(DataDistributionTracker* self, StorageMetrics const& stats = shardSize->get().get().metrics; auto bandwidthStatus = getBandwidthStatus(stats); - if (SERVER_KNOBS->ALLOW_LARGE_SHARD) { - shardBounds.max.bytes = SERVER_KNOBS->MAX_LARGE_SHARD_BYTES; - } - bool sizeSplit = stats.bytes > shardBounds.max.bytes, writeSplit = bandwidthStatus == BandwidthStatusHigh && keys.begin < keyServersKeys.begin; bool shouldSplit = sizeSplit || writeSplit;