From 8f0348d5e02a325b98a4e1d8fdb0bc91f844fa7e Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 31 Oct 2019 16:38:33 -0700 Subject: [PATCH 1/2] fix: merges which cross over systemKeys.begin did not properly decrement the systemSizeEstimate --- documentation/sphinx/source/release-notes.rst | 1 - fdbserver/DataDistributionTracker.actor.cpp | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 6dabb859b9..18027022ff 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -133,7 +133,6 @@ Fixes only impacting 6.2.0+ * The cluster controller would saturate its CPU for a few seconds when sending configuration information to all of the worker processes. [6.2.4] `(PR #2086) `_. * The data distributor would build all possible team combinations if it was tracking an unhealthy server with less than 10 teams. [6.2.4] `(PR #2099) `_. * The cluster controller could crash if a coordinator was unreachable when compiling cluster status. [6.2.4] `(PR #2065) `_. -* The cluster controller could crash if a coordinator was unreachable when compiling cluster status. [6.2.4] `(PR #2065) `_. * A storage server could crash if it took longer than 10 minutes to fetch a key range from another server. [6.2.5] `(PR #2170) `_. * Excluding or including servers would restart the data distributor. [6.2.5] `(PR #2170) `_. * The data distributor could read invalid memory when estimating database size. [6.2.6] `(PR #2225) `_. diff --git a/fdbserver/DataDistributionTracker.actor.cpp b/fdbserver/DataDistributionTracker.actor.cpp index 2a785a2882..90756a2063 100644 --- a/fdbserver/DataDistributionTracker.actor.cpp +++ b/fdbserver/DataDistributionTracker.actor.cpp @@ -402,6 +402,7 @@ Future shardMerger( bool forwardComplete = false; KeyRangeRef merged; StorageMetrics endingStats = shardSize->get().get(); + int64_t systemBytes = keys.begin >= systemKeys.begin ? shardSize->get().get().bytes : 0; loop { Optional newMetrics; @@ -439,6 +440,9 @@ Future shardMerger( merged = KeyRangeRef( prevIter->range().begin, nextIter->range().end ); endingStats += newMetrics.get(); + if((forwardComplete ? prevIter->range().begin : nextIter->range().begin) >= systemKeys.begin) { + systemBytes += newMetrics.get().bytes; + } shardsMerged++; auto shardBounds = getShardSizeBounds( merged, maxShardSize ); @@ -457,6 +461,9 @@ Future shardMerger( // If going forward, remove most recently added range endingStats -= newMetrics.get(); + if(nextIter->range().begin >= systemKeys.begin) { + systemBytes -= newMetrics.get().bytes; + } shardsMerged--; --nextIter; merged = KeyRangeRef( prevIter->range().begin, nextIter->range().end ); @@ -473,6 +480,9 @@ Future shardMerger( .detail("EndingSize", endingStats.bytes) .detail("BatchedMerges", shardsMerged); + if(mergeRange.begin < systemKeys.begin) { + self->systemSizeEstimate -= systemBytes; + } restartShardTrackers( self, mergeRange, endingStats ); self->shardsAffectedByTeamFailure->defineShard( mergeRange ); self->output.send( RelocateShard( mergeRange, SERVER_KNOBS->PRIORITY_MERGE_SHARD ) ); From 7f75eca7cbf661e26b63449010ed3c25d8ac4bc6 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 31 Oct 2019 17:06:58 -0700 Subject: [PATCH 2/2] updated release notes --- documentation/sphinx/source/release-notes.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 18027022ff..98e18f76fc 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -2,6 +2,14 @@ Release Notes ############# +6.2.8 +===== + +Fixes +----- + +* The ``system_kv_size_bytes`` status field could report a size much larger than the actual size of the system keyspace. `(PR #2305) `_. + 6.2.7 =====