From 7e97bd181abc76efa09d8cdfa5e30741cea21af1 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Sun, 28 Jul 2019 19:31:21 -0700 Subject: [PATCH 1/3] fix: we need to build teams when a server becomes healthy if it is possible another servers does not have enough teams --- fdbserver/DataDistribution.actor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index eb9a281bee..1d70622da3 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -540,6 +540,7 @@ struct DDTeamCollection : ReferenceCounted { DatabaseConfiguration configuration; bool doBuildTeams; + bool lastBuildTeamsFailed; Future teamBuilder; AsyncTrigger restartTeamBuilder; @@ -626,7 +627,7 @@ struct DDTeamCollection : ReferenceCounted { Reference> zeroHealthyTeams, bool primary, Reference> processingUnhealthy) : cx(cx), distributorId(distributorId), lock(lock), output(output), - shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder(Void()), + shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), lastBuildTeamsFailed(false), teamBuilder(Void()), badTeamRemover(Void()), redundantMachineTeamRemover(Void()), redundantServerTeamRemover(Void()), configuration(configuration), readyToStart(readyToStart), clearHealthyZoneFuture(Void()), checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskPriority::DataDistribution)), @@ -1449,6 +1450,7 @@ struct DDTeamCollection : ReferenceCounted { TraceEvent(SevWarn, "DataDistributionBuildTeams", distributorId) .detail("Primary", primary) .detail("Reason", "Unable to make desired machine Teams"); + lastBuildTeamsFailed = true; break; } } @@ -1874,6 +1876,7 @@ struct DDTeamCollection : ReferenceCounted { if (bestServerTeam.size() != configuration.storageTeamSize) { // Not find any team and will unlikely find a team + lastBuildTeamsFailed = true; break; } @@ -2018,7 +2021,8 @@ struct DDTeamCollection : ReferenceCounted { .detail("MachineTeamCount", self->machineTeams.size()) .detail("MachineCount", self->machine_info.size()) .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER); - + + self->lastBuildTeamsFailed = false; if (teamsToBuild > 0 || self->notEnoughTeamsForAServer()) { state vector> builtTeams; @@ -3099,7 +3103,7 @@ ACTOR Future storageServerFailureTracker( choose { when ( wait(healthChanged) ) { status->isFailed = !status->isFailed; - if(!status->isFailed && !server->teams.size()) { + if(!status->isFailed && (server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER || self->lastBuildTeamsFailed)) { self->doBuildTeams = true; } if(status->isFailed && self->healthyZone.get().present() && self->clearHealthyZoneFuture.isReady()) { @@ -3221,7 +3225,7 @@ ACTOR Future storageServerTracker( self->restartRecruiting.trigger(); if (lastIsUnhealthy && !status.isUnhealthy() && - server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER) { + ( server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER || self->lastBuildTeamsFailed)) { self->doBuildTeams = true; self->restartTeamBuilder.trigger(); // This does not trigger building teams if there exist healthy teams } From 9a0db742307f0c587d59af14db11a217bdef0178 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Sun, 28 Jul 2019 19:31:53 -0700 Subject: [PATCH 2/3] fix: forced recovery did not copy txsTags properly --- fdbserver/TagPartitionedLogSystem.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index f14fd61804..caae64f284 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -1547,6 +1547,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted 0) { logServers = oldLogData[maxRecoveryIndex-1].tLogs; + prevState.txsTags = oldLogData[maxRecoveryIndex-1].txsTags; lockResults[0] = allLockResults[maxRecoveryIndex]; lockResults[0].isCurrent = true; From d8b14fe37241cb56ae714eb04f1a6c231881049e Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Sun, 28 Jul 2019 19:34:17 -0700 Subject: [PATCH 3/3] we cannot buggify replace content bytes because it takes too long to recovery when the txnStateStore is too large --- fdbserver/Knobs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 596ac17bfd..280b1dda58 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -246,7 +246,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( SPRING_CLEANING_MAX_VACUUM_PAGES, 1e9 ); if( randomize && BUGGIFY ) SPRING_CLEANING_MAX_VACUUM_PAGES = deterministicRandom()->coinflip() ? 0 : deterministicRandom()->randomInt(1, 1e4); // KeyValueStoreMemory - init( REPLACE_CONTENTS_BYTES, 1e5 ); if( randomize && BUGGIFY ) REPLACE_CONTENTS_BYTES = 1e3; + init( REPLACE_CONTENTS_BYTES, 1e5 ); // Leader election bool longLeaderElection = randomize && BUGGIFY;