diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index e48735b015..3f32c76882 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -27,6 +27,7 @@ Fixes * During an upgrade, the multi-version client now persists database default options and transaction options that aren't reset on retry (e.g. transaction timeout). In order for these options to function correctly during an upgrade, a 6.2 or later client should be used as the primary client. `(PR #1767) `_. * If a cluster is upgraded during an ``onError`` call, the cluster could return a ``cluster_version_changed`` error. `(PR #1734) `_. * Do not set doBuildTeams in StorageServerTracker unless a storage server's interface changes, in order to avoid unnecessary work. `(PR #1779) `_. +* Data distribution will now pick a random destination when merging shards in the ``\xff`` keyspace. This avoids an issue with backup where the write-heavy mutation log shards could concentrate on a single process that has less data than everybody else. `(PR #1916) `_. Status ------ diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index 51d287e190..bd05502a41 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -52,7 +52,14 @@ struct RelocateData { rs.priority == PRIORITY_REBALANCE_OVERUTILIZED_TEAM || rs.priority == PRIORITY_REBALANCE_UNDERUTILIZED_TEAM || rs.priority == PRIORITY_SPLIT_SHARD || - rs.priority == PRIORITY_TEAM_REDUNDANT ), interval("QueuedRelocation") {} + rs.priority == PRIORITY_TEAM_REDUNDANT || + mergeWantsNewServers(rs.keys, rs.priority)), interval("QueuedRelocation") {} + + static bool mergeWantsNewServers(KeyRangeRef keys, int priority) { + return priority == PRIORITY_MERGE_SHARD && + (SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 2 || + (SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 1 && keys.begin.startsWith(LiteralStringRef("\xff")))); + } bool operator> (const RelocateData& rhs) const { return priority != rhs.priority ? priority > rhs.priority : ( startTime != rhs.startTime ? startTime < rhs.startTime : randomId > rhs.randomId ); diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 280b1dda58..0171dd8eec 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -101,6 +101,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( INFLIGHT_PENALTY_HEALTHY, 1.0 ); init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 ); init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 ); + init( MERGE_ONTO_NEW_TEAM, 1 ); if( randomize && BUGGIFY ) MERGE_ONTO_NEW_TEAM = deterministicRandom()->coinflip() ? 0 : 2; // Data distribution init( RETRY_RELOCATESHARD_DELAY, 0.1 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index a378a822b5..582b225357 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -102,6 +102,7 @@ public: double INFLIGHT_PENALTY_REDUNDANT; double INFLIGHT_PENALTY_UNHEALTHY; double INFLIGHT_PENALTY_ONE_LEFT; + int MERGE_ONTO_NEW_TEAM; // Merges will request new servers. 0 for off, 1 for \xff only, 2 for all shards. // Data distribution double RETRY_RELOCATESHARD_DELAY;